457 lines
12 KiB
Diff
457 lines
12 KiB
Diff
From 2b8826c6582ddaa48a1f6be3487d5de7e9bd42be Mon Sep 17 00:00:00 2001
|
|
From: Yang Shen <shenyang39@huawei.com>
|
|
Date: Fri, 10 Nov 2023 11:52:29 +0800
|
|
Subject: [PATCH 60/85] uadk/tools - fix lz77_zstd performance test for nosva
|
|
mode
|
|
|
|
1.lz77_zstd need a large buffer to output literals and sequences.
|
|
2.For nosva mode, the output buffer need to be alloced by wd_alloc_blk
|
|
from block memory pool.
|
|
|
|
Signed-off-by: Yang Shen <shenyang39@huawei.com>
|
|
---
|
|
uadk_tool/benchmark/zip_uadk_benchmark.c | 67 ++++------
|
|
uadk_tool/benchmark/zip_wd_benchmark.c | 160 ++++++-----------------
|
|
2 files changed, 66 insertions(+), 161 deletions(-)
|
|
|
|
diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c
|
|
index 44746f6..7c96edf 100644
|
|
--- a/uadk_tool/benchmark/zip_uadk_benchmark.c
|
|
+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c
|
|
@@ -15,7 +15,7 @@
|
|
#define DECOMP_LEN_RATE 2
|
|
#define MAX_POOL_LENTH_COMP 512
|
|
#define COMPRESSION_RATIO_FACTOR 0.7
|
|
-
|
|
+#define CHUNK_SIZE (128 * 1024)
|
|
struct uadk_bd {
|
|
u8 *src;
|
|
u8 *dst;
|
|
@@ -623,16 +623,12 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
|
|
{
|
|
thread_data *pdata = (thread_data *)arg;
|
|
struct wd_comp_sess_setup comp_setup = {0};
|
|
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
|
|
- ZSTD_inBuffer zstd_input = {0};
|
|
- ZSTD_outBuffer zstd_output = {0};
|
|
COMP_TUPLE_TAG *ftuple = NULL;
|
|
struct bd_pool *uadk_pool;
|
|
struct wd_comp_req creq;
|
|
- char *hw_buff_out = NULL;
|
|
- size_t fse_size;
|
|
handle_t h_sess;
|
|
- u32 first_len = 0;
|
|
+ void *src, *dst;
|
|
+ u32 in_len = 0;
|
|
u32 out_len = 0;
|
|
u32 count = 0;
|
|
int ret, i;
|
|
@@ -654,7 +650,6 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
|
|
return NULL;
|
|
|
|
creq.op_type = pdata->optype;
|
|
- creq.src_len = g_pktlen;
|
|
out_len = uadk_pool->bds[0].dst_len;
|
|
|
|
creq.cb = NULL;
|
|
@@ -665,53 +660,45 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
|
|
if (!ftuple)
|
|
goto fse_err;
|
|
|
|
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
|
|
- if (!hw_buff_out)
|
|
- goto hw_buff_err;
|
|
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
|
|
-
|
|
while(1) {
|
|
i = count % MAX_POOL_LENTH_COMP;
|
|
- creq.src = uadk_pool->bds[i].src;
|
|
- creq.dst = &hw_buff_out[i]; //temp out
|
|
- creq.src_len = uadk_pool->bds[i].src_len;
|
|
- creq.dst_len = out_len;
|
|
- creq.priv = &ftuple[i];
|
|
+ src = uadk_pool->bds[i].src;
|
|
+ dst = uadk_pool->bds[i].dst;
|
|
+ in_len = uadk_pool->bds[0].src_len;
|
|
+ out_len = uadk_pool->bds[0].dst_len;
|
|
+
|
|
+ while (in_len > 0) {
|
|
+ creq.src_len = in_len > CHUNK_SIZE ? CHUNK_SIZE : in_len;
|
|
+ creq.dst_len = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len;
|
|
+ creq.src = src;
|
|
+ creq.dst = dst;
|
|
+ creq.priv = &ftuple[i];
|
|
+
|
|
+ ret = wd_do_comp_strm(h_sess, &creq);
|
|
+ if (ret < 0 || creq.status == WD_IN_EPARA) {
|
|
+ ZIP_TST_PRT("wd comp, invalid or incomplete data! "
|
|
+ "ret(%d), req.status(%u)\n", ret, creq.status);
|
|
+ break;
|
|
+ }
|
|
|
|
- ret = wd_do_comp_strm(h_sess, &creq);
|
|
- if (ret < 0 || creq.status == WD_IN_EPARA) {
|
|
- ZIP_TST_PRT("wd comp, invalid or incomplete data! "
|
|
- "ret(%d), req.status(%u)\n", ret, creq.status);
|
|
- break;
|
|
+ src += CHUNK_SIZE;
|
|
+ in_len -= CHUNK_SIZE;
|
|
+ dst += 2 * CHUNK_SIZE;
|
|
+ out_len -= 2 * CHUNK_SIZE;
|
|
}
|
|
|
|
count++;
|
|
- zstd_input.src = creq.src;
|
|
- zstd_input.size = creq.src_len;
|
|
- zstd_input.pos = 0;
|
|
- zstd_output.dst = uadk_pool->bds[i].dst;
|
|
- zstd_output.size = out_len;
|
|
- zstd_output.pos = 0;
|
|
- fse_size = zstd_soft_fse(creq.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
|
|
|
|
- uadk_pool->bds[i].dst_len = fse_size;
|
|
- if (unlikely(i == 0))
|
|
- first_len = fse_size;
|
|
if (get_run_state() == 0)
|
|
break;
|
|
}
|
|
|
|
-hw_buff_err:
|
|
- free(hw_buff_out);
|
|
-fse_err:
|
|
free(ftuple);
|
|
+fse_err:
|
|
wd_comp_free_sess(h_sess);
|
|
|
|
cal_avg_latency(count);
|
|
- if (pdata->optype == WD_DIR_COMPRESS)
|
|
- add_recv_data(count, g_pktlen);
|
|
- else
|
|
- add_recv_data(count, first_len);
|
|
+ add_recv_data(count, g_pktlen);
|
|
|
|
return NULL;
|
|
}
|
|
diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c
|
|
index 61f6c69..b25f1fb 100644
|
|
--- a/uadk_tool/benchmark/zip_wd_benchmark.c
|
|
+++ b/uadk_tool/benchmark/zip_wd_benchmark.c
|
|
@@ -20,6 +20,7 @@
|
|
#define DECOMP_LEN_RATE 2
|
|
#define COMPRESSION_RATIO_FACTOR 0.7
|
|
#define MAX_POOL_LENTH_COMP 512
|
|
+#define CHUNK_SIZE (128 * 1024)
|
|
|
|
#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
|
|
#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
|
|
@@ -75,18 +76,6 @@ struct zip_file_head {
|
|
static unsigned int g_thread_num;
|
|
static unsigned int g_pktlen;
|
|
|
|
-#ifndef ZLIB_FSE
|
|
-static ZSTD_CCtx* zstd_soft_fse_init(unsigned int level)
|
|
-{
|
|
- return NULL;
|
|
-}
|
|
-
|
|
-static int zstd_soft_fse(void *Ftuple, ZSTD_inBuffer *input, ZSTD_outBuffer *output, ZSTD_CCtx * cctx, ZSTD_EndDirective cmode)
|
|
-{
|
|
- return input->size;
|
|
-}
|
|
-#endif
|
|
-
|
|
static int save_file_data(const char *alg, u32 pkg_len, u32 optype)
|
|
{
|
|
struct zip_file_head *fhead = NULL;
|
|
@@ -430,26 +419,12 @@ static void zip_lz77_async_cb(const void *message, void *data)
|
|
{
|
|
const struct wcrypto_comp_msg *cbmsg = message;
|
|
struct zip_async_tag *tag = data;
|
|
- ZSTD_CCtx *cctx = tag->cctx;
|
|
- ZSTD_inBuffer zstd_input;
|
|
- ZSTD_outBuffer zstd_output;
|
|
- struct wd_bd *bd_pool;
|
|
int td_id = tag->td_id;
|
|
+ struct wd_bd *bd_pool;
|
|
int idx = tag->bd_idx;
|
|
- size_t fse_size;
|
|
|
|
bd_pool = g_thread_queue.bd_res[td_id].bds;
|
|
bd_pool[idx].dst_len = cbmsg->produced;
|
|
-
|
|
- zstd_input.src = cbmsg->src;
|
|
- zstd_input.size = cbmsg->in_size;
|
|
- zstd_input.pos = 0;
|
|
- zstd_output.dst = bd_pool[idx].dst;
|
|
- zstd_output.size = tag->cm_len;
|
|
- zstd_output.pos = 0;
|
|
- fse_size = zstd_soft_fse(tag->priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
|
|
-
|
|
- bd_pool[idx].dst_len = fse_size;
|
|
}
|
|
|
|
static void zip_async_cb(const void *message, void *data)
|
|
@@ -501,18 +476,12 @@ recv_error:
|
|
static void *zip_wd_blk_lz77_sync_run(void *arg)
|
|
{
|
|
thread_data *pdata = (thread_data *)arg;
|
|
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
|
|
- ZSTD_inBuffer zstd_input = {0};
|
|
- ZSTD_outBuffer zstd_output = {0};
|
|
COMP_TUPLE_TAG *ftuple = NULL;
|
|
struct wcrypto_comp_ctx_setup comp_setup;
|
|
struct wcrypto_comp_op_data opdata;
|
|
struct wcrypto_comp_ctx *ctx;
|
|
struct wd_queue *queue;
|
|
struct wd_bd *bd_pool;
|
|
- u8 *hw_buff_out = NULL;
|
|
- size_t fse_size;
|
|
- u32 first_len = 0;
|
|
u32 out_len = 0;
|
|
u32 count = 0;
|
|
int ret, i;
|
|
@@ -557,15 +526,10 @@ static void *zip_wd_blk_lz77_sync_run(void *arg)
|
|
if (!ftuple)
|
|
goto fse_err;
|
|
|
|
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
|
|
- if (!hw_buff_out)
|
|
- goto hw_buff_err;
|
|
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
|
|
-
|
|
while(1) {
|
|
i = count % MAX_POOL_LENTH_COMP;
|
|
opdata.in = bd_pool[i].src;
|
|
- opdata.out = &hw_buff_out[i]; //temp out
|
|
+ opdata.out = bd_pool[i].dst;
|
|
opdata.in_len = bd_pool[i].src_len;
|
|
opdata.avail_out = out_len;
|
|
opdata.priv = &ftuple[i];
|
|
@@ -576,32 +540,17 @@ static void *zip_wd_blk_lz77_sync_run(void *arg)
|
|
break;
|
|
|
|
count++;
|
|
- zstd_input.src = opdata.in;
|
|
- zstd_input.size = opdata.in_len;
|
|
- zstd_input.pos = 0;
|
|
- zstd_output.dst = bd_pool[i].dst;
|
|
- zstd_output.size = out_len;
|
|
- zstd_output.pos = 0;
|
|
- fse_size = zstd_soft_fse(opdata.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
|
|
-
|
|
- bd_pool[i].dst_len = fse_size;
|
|
- if (unlikely(i == 0))
|
|
- first_len = fse_size;
|
|
+
|
|
if (get_run_state() == 0)
|
|
break;
|
|
}
|
|
|
|
-hw_buff_err:
|
|
- free(hw_buff_out);
|
|
-fse_err:
|
|
free(ftuple);
|
|
+fse_err:
|
|
wcrypto_del_comp_ctx(ctx);
|
|
|
|
cal_avg_latency(count);
|
|
- if (pdata->optype == WCRYPTO_DEFLATE)
|
|
- add_recv_data(count, g_pktlen);
|
|
- else
|
|
- add_recv_data(count, first_len);
|
|
+ add_recv_data(count, g_pktlen);
|
|
|
|
return NULL;
|
|
}
|
|
@@ -609,18 +558,14 @@ fse_err:
|
|
static void *zip_wd_stm_lz77_sync_run(void *arg)
|
|
{
|
|
thread_data *pdata = (thread_data *)arg;
|
|
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
|
|
- ZSTD_inBuffer zstd_input = {0};
|
|
- ZSTD_outBuffer zstd_output = {0};
|
|
COMP_TUPLE_TAG *ftuple = NULL;
|
|
struct wcrypto_comp_ctx_setup comp_setup;
|
|
struct wcrypto_comp_op_data opdata;
|
|
struct wcrypto_comp_ctx *ctx;
|
|
struct wd_queue *queue;
|
|
struct wd_bd *bd_pool;
|
|
- u8 *hw_buff_out = NULL;
|
|
- size_t fse_size;
|
|
- u32 first_len = 0;
|
|
+ void *src, *dst;
|
|
+ u32 in_len = 0;
|
|
u32 out_len = 0;
|
|
u32 count = 0;
|
|
int ret, i;
|
|
@@ -659,60 +604,48 @@ static void *zip_wd_stm_lz77_sync_run(void *arg)
|
|
else
|
|
opdata.flush = WCRYPTO_FINISH;
|
|
|
|
- out_len = bd_pool[0].dst_len;
|
|
-
|
|
ftuple = malloc(sizeof(COMP_TUPLE_TAG) * MAX_POOL_LENTH_COMP);
|
|
if (!ftuple)
|
|
goto fse_err;
|
|
|
|
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
|
|
- if (!hw_buff_out)
|
|
- goto hw_buff_err;
|
|
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
|
|
-
|
|
while(1) {
|
|
i = count % MAX_POOL_LENTH_COMP;
|
|
- opdata.in = bd_pool[i].src;
|
|
- opdata.out = &hw_buff_out[i]; //temp out
|
|
- opdata.in_len = bd_pool[i].src_len;
|
|
- opdata.avail_out = out_len;
|
|
- opdata.priv = &ftuple[i];
|
|
-
|
|
- ret = wcrypto_do_comp(ctx, &opdata, NULL);
|
|
- if (ret || opdata.status == WCRYPTO_DECOMP_END_NOSPACE ||
|
|
- opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) {
|
|
- ZIP_TST_PRT("wd comp, invalid or incomplete data! "
|
|
- "ret(%d), req.status(%u)\n", ret, opdata.status);
|
|
- break;
|
|
+ src = bd_pool[i].src;
|
|
+ dst = bd_pool[i].dst;
|
|
+ in_len = bd_pool[i].src_len;
|
|
+ out_len = bd_pool[i].dst_len;
|
|
+
|
|
+ while (in_len > 0) {
|
|
+ opdata.in_len = in_len > CHUNK_SIZE ? CHUNK_SIZE : in_len;
|
|
+ opdata.avail_out = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len;
|
|
+ opdata.in = src;
|
|
+ opdata.out = dst;
|
|
+ opdata.priv = &ftuple[i];
|
|
+
|
|
+ ret = wcrypto_do_comp(ctx, &opdata, NULL);
|
|
+ if (ret || opdata.status == WCRYPTO_DECOMP_END_NOSPACE ||
|
|
+ opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) {
|
|
+ ZIP_TST_PRT("wd comp, invalid or incomplete data! "
|
|
+ "ret(%d), req.status(%u)\n", ret, opdata.status);
|
|
+ break;
|
|
+ }
|
|
+ src += CHUNK_SIZE;
|
|
+ in_len -= CHUNK_SIZE;
|
|
+ dst += 2 * CHUNK_SIZE;
|
|
+ out_len -= 2 * CHUNK_SIZE;
|
|
}
|
|
-
|
|
count++;
|
|
- zstd_input.src = opdata.in;
|
|
- zstd_input.size = opdata.in_len;
|
|
- zstd_input.pos = 0;
|
|
- zstd_output.dst = opdata.out;
|
|
- zstd_output.size = out_len;
|
|
- zstd_output.pos = 0;
|
|
- fse_size = zstd_soft_fse(opdata.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
|
|
-
|
|
- bd_pool[i].dst_len = fse_size;
|
|
- if (unlikely(i == 0))
|
|
- first_len = fse_size;
|
|
+
|
|
if (get_run_state() == 0)
|
|
break;
|
|
}
|
|
|
|
-hw_buff_err:
|
|
- free(hw_buff_out);
|
|
-fse_err:
|
|
free(ftuple);
|
|
+fse_err:
|
|
wcrypto_del_comp_ctx(ctx);
|
|
|
|
cal_avg_latency(count);
|
|
- if (pdata->optype == WCRYPTO_DEFLATE)
|
|
- add_recv_data(count, g_pktlen);
|
|
- else
|
|
- add_recv_data(count, first_len);
|
|
+ add_recv_data(count, g_pktlen);
|
|
|
|
return NULL;
|
|
}
|
|
@@ -720,18 +653,16 @@ fse_err:
|
|
static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
{
|
|
thread_data *pdata = (thread_data *)arg;
|
|
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
|
|
- COMP_TUPLE_TAG *ftuple = NULL;
|
|
struct wcrypto_comp_ctx_setup comp_setup;
|
|
struct wcrypto_comp_op_data opdata;
|
|
+ COMP_TUPLE_TAG *ftuple = NULL;
|
|
struct wcrypto_comp_ctx *ctx;
|
|
struct zip_async_tag *tag;
|
|
- u8 *hw_buff_out = NULL;
|
|
struct wd_queue *queue;
|
|
struct wd_bd *bd_pool;
|
|
u32 out_len = 0;
|
|
- u32 count = 0;
|
|
u32 try_cnt = 0;
|
|
+ u32 count = 0;
|
|
int ret, i;
|
|
|
|
if (pdata->td_id > g_thread_num)
|
|
@@ -760,14 +691,9 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
if (!ctx)
|
|
return NULL;
|
|
|
|
- opdata.stream_pos = WCRYPTO_COMP_STREAM_NEW;
|
|
opdata.alg_type = pdata->alg;
|
|
opdata.priv = NULL;
|
|
opdata.status = 0;
|
|
- if (pdata->optype == WCRYPTO_INFLATE)
|
|
- opdata.flush = WCRYPTO_SYNC_FLUSH;
|
|
- else
|
|
- opdata.flush = WCRYPTO_FINISH;
|
|
|
|
out_len = bd_pool[0].dst_len;
|
|
|
|
@@ -775,11 +701,6 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
if (!ftuple)
|
|
goto fse_err;
|
|
|
|
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
|
|
- if (!hw_buff_out)
|
|
- goto hw_buff_err;
|
|
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
|
|
-
|
|
tag = malloc(sizeof(*tag) * MAX_POOL_LENTH_COMP);
|
|
if (!tag) {
|
|
ZIP_TST_PRT("failed to malloc zip tag!\n");
|
|
@@ -793,7 +714,7 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
try_cnt = 0;
|
|
i = count % MAX_POOL_LENTH_COMP;
|
|
opdata.in = bd_pool[i].src;
|
|
- opdata.out = &hw_buff_out[i]; //temp out
|
|
+ opdata.out = bd_pool[i].dst; //temp out
|
|
opdata.in_len = bd_pool[i].src_len;
|
|
opdata.avail_out = out_len;
|
|
opdata.priv = &ftuple[i];
|
|
@@ -802,7 +723,6 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
tag[i].ctx = ctx;
|
|
tag[i].td_id = pdata->td_id;
|
|
tag[i].cm_len = out_len;
|
|
- tag[i].cctx = cctx;
|
|
tag[i].priv = opdata.priv;
|
|
|
|
ret = wcrypto_do_comp(ctx, &opdata, &tag[i]);
|
|
@@ -827,12 +747,10 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
|
|
usleep(SEND_USLEEP);
|
|
}
|
|
|
|
-tag_err:
|
|
free(tag);
|
|
-hw_buff_err:
|
|
- free(hw_buff_out);
|
|
-fse_err:
|
|
+tag_err:
|
|
free(ftuple);
|
|
+fse_err:
|
|
wcrypto_del_comp_ctx(ctx);
|
|
add_send_complete();
|
|
|
|
--
|
|
2.25.1
|
|
|