libwd/0060-uadk-tools-fix-lz77_zstd-performance-test-for-nosva-.patch
2023-11-23 10:41:04 +08:00

457 lines
12 KiB
Diff

From 2b8826c6582ddaa48a1f6be3487d5de7e9bd42be Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Fri, 10 Nov 2023 11:52:29 +0800
Subject: [PATCH 60/85] uadk/tools - fix lz77_zstd performance test for nosva
mode
1.lz77_zstd need a large buffer to output literals and sequences.
2.For nosva mode, the output buffer need to be alloced by wd_alloc_blk
from block memory pool.
Signed-off-by: Yang Shen <shenyang39@huawei.com>
---
uadk_tool/benchmark/zip_uadk_benchmark.c | 67 ++++------
uadk_tool/benchmark/zip_wd_benchmark.c | 160 ++++++-----------------
2 files changed, 66 insertions(+), 161 deletions(-)
diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c
index 44746f6..7c96edf 100644
--- a/uadk_tool/benchmark/zip_uadk_benchmark.c
+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c
@@ -15,7 +15,7 @@
#define DECOMP_LEN_RATE 2
#define MAX_POOL_LENTH_COMP 512
#define COMPRESSION_RATIO_FACTOR 0.7
-
+#define CHUNK_SIZE (128 * 1024)
struct uadk_bd {
u8 *src;
u8 *dst;
@@ -623,16 +623,12 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
{
thread_data *pdata = (thread_data *)arg;
struct wd_comp_sess_setup comp_setup = {0};
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
- ZSTD_inBuffer zstd_input = {0};
- ZSTD_outBuffer zstd_output = {0};
COMP_TUPLE_TAG *ftuple = NULL;
struct bd_pool *uadk_pool;
struct wd_comp_req creq;
- char *hw_buff_out = NULL;
- size_t fse_size;
handle_t h_sess;
- u32 first_len = 0;
+ void *src, *dst;
+ u32 in_len = 0;
u32 out_len = 0;
u32 count = 0;
int ret, i;
@@ -654,7 +650,6 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
return NULL;
creq.op_type = pdata->optype;
- creq.src_len = g_pktlen;
out_len = uadk_pool->bds[0].dst_len;
creq.cb = NULL;
@@ -665,53 +660,45 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg)
if (!ftuple)
goto fse_err;
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
- if (!hw_buff_out)
- goto hw_buff_err;
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
-
while(1) {
i = count % MAX_POOL_LENTH_COMP;
- creq.src = uadk_pool->bds[i].src;
- creq.dst = &hw_buff_out[i]; //temp out
- creq.src_len = uadk_pool->bds[i].src_len;
- creq.dst_len = out_len;
- creq.priv = &ftuple[i];
+ src = uadk_pool->bds[i].src;
+ dst = uadk_pool->bds[i].dst;
+ in_len = uadk_pool->bds[0].src_len;
+ out_len = uadk_pool->bds[0].dst_len;
+
+ while (in_len > 0) {
+ creq.src_len = in_len > CHUNK_SIZE ? CHUNK_SIZE : in_len;
+ creq.dst_len = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len;
+ creq.src = src;
+ creq.dst = dst;
+ creq.priv = &ftuple[i];
+
+ ret = wd_do_comp_strm(h_sess, &creq);
+ if (ret < 0 || creq.status == WD_IN_EPARA) {
+ ZIP_TST_PRT("wd comp, invalid or incomplete data! "
+ "ret(%d), req.status(%u)\n", ret, creq.status);
+ break;
+ }
- ret = wd_do_comp_strm(h_sess, &creq);
- if (ret < 0 || creq.status == WD_IN_EPARA) {
- ZIP_TST_PRT("wd comp, invalid or incomplete data! "
- "ret(%d), req.status(%u)\n", ret, creq.status);
- break;
+ src += CHUNK_SIZE;
+ in_len -= CHUNK_SIZE;
+ dst += 2 * CHUNK_SIZE;
+ out_len -= 2 * CHUNK_SIZE;
}
count++;
- zstd_input.src = creq.src;
- zstd_input.size = creq.src_len;
- zstd_input.pos = 0;
- zstd_output.dst = uadk_pool->bds[i].dst;
- zstd_output.size = out_len;
- zstd_output.pos = 0;
- fse_size = zstd_soft_fse(creq.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
- uadk_pool->bds[i].dst_len = fse_size;
- if (unlikely(i == 0))
- first_len = fse_size;
if (get_run_state() == 0)
break;
}
-hw_buff_err:
- free(hw_buff_out);
-fse_err:
free(ftuple);
+fse_err:
wd_comp_free_sess(h_sess);
cal_avg_latency(count);
- if (pdata->optype == WD_DIR_COMPRESS)
- add_recv_data(count, g_pktlen);
- else
- add_recv_data(count, first_len);
+ add_recv_data(count, g_pktlen);
return NULL;
}
diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c
index 61f6c69..b25f1fb 100644
--- a/uadk_tool/benchmark/zip_wd_benchmark.c
+++ b/uadk_tool/benchmark/zip_wd_benchmark.c
@@ -20,6 +20,7 @@
#define DECOMP_LEN_RATE 2
#define COMPRESSION_RATIO_FACTOR 0.7
#define MAX_POOL_LENTH_COMP 512
+#define CHUNK_SIZE (128 * 1024)
#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
@@ -75,18 +76,6 @@ struct zip_file_head {
static unsigned int g_thread_num;
static unsigned int g_pktlen;
-#ifndef ZLIB_FSE
-static ZSTD_CCtx* zstd_soft_fse_init(unsigned int level)
-{
- return NULL;
-}
-
-static int zstd_soft_fse(void *Ftuple, ZSTD_inBuffer *input, ZSTD_outBuffer *output, ZSTD_CCtx * cctx, ZSTD_EndDirective cmode)
-{
- return input->size;
-}
-#endif
-
static int save_file_data(const char *alg, u32 pkg_len, u32 optype)
{
struct zip_file_head *fhead = NULL;
@@ -430,26 +419,12 @@ static void zip_lz77_async_cb(const void *message, void *data)
{
const struct wcrypto_comp_msg *cbmsg = message;
struct zip_async_tag *tag = data;
- ZSTD_CCtx *cctx = tag->cctx;
- ZSTD_inBuffer zstd_input;
- ZSTD_outBuffer zstd_output;
- struct wd_bd *bd_pool;
int td_id = tag->td_id;
+ struct wd_bd *bd_pool;
int idx = tag->bd_idx;
- size_t fse_size;
bd_pool = g_thread_queue.bd_res[td_id].bds;
bd_pool[idx].dst_len = cbmsg->produced;
-
- zstd_input.src = cbmsg->src;
- zstd_input.size = cbmsg->in_size;
- zstd_input.pos = 0;
- zstd_output.dst = bd_pool[idx].dst;
- zstd_output.size = tag->cm_len;
- zstd_output.pos = 0;
- fse_size = zstd_soft_fse(tag->priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
-
- bd_pool[idx].dst_len = fse_size;
}
static void zip_async_cb(const void *message, void *data)
@@ -501,18 +476,12 @@ recv_error:
static void *zip_wd_blk_lz77_sync_run(void *arg)
{
thread_data *pdata = (thread_data *)arg;
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
- ZSTD_inBuffer zstd_input = {0};
- ZSTD_outBuffer zstd_output = {0};
COMP_TUPLE_TAG *ftuple = NULL;
struct wcrypto_comp_ctx_setup comp_setup;
struct wcrypto_comp_op_data opdata;
struct wcrypto_comp_ctx *ctx;
struct wd_queue *queue;
struct wd_bd *bd_pool;
- u8 *hw_buff_out = NULL;
- size_t fse_size;
- u32 first_len = 0;
u32 out_len = 0;
u32 count = 0;
int ret, i;
@@ -557,15 +526,10 @@ static void *zip_wd_blk_lz77_sync_run(void *arg)
if (!ftuple)
goto fse_err;
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
- if (!hw_buff_out)
- goto hw_buff_err;
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
-
while(1) {
i = count % MAX_POOL_LENTH_COMP;
opdata.in = bd_pool[i].src;
- opdata.out = &hw_buff_out[i]; //temp out
+ opdata.out = bd_pool[i].dst;
opdata.in_len = bd_pool[i].src_len;
opdata.avail_out = out_len;
opdata.priv = &ftuple[i];
@@ -576,32 +540,17 @@ static void *zip_wd_blk_lz77_sync_run(void *arg)
break;
count++;
- zstd_input.src = opdata.in;
- zstd_input.size = opdata.in_len;
- zstd_input.pos = 0;
- zstd_output.dst = bd_pool[i].dst;
- zstd_output.size = out_len;
- zstd_output.pos = 0;
- fse_size = zstd_soft_fse(opdata.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
-
- bd_pool[i].dst_len = fse_size;
- if (unlikely(i == 0))
- first_len = fse_size;
+
if (get_run_state() == 0)
break;
}
-hw_buff_err:
- free(hw_buff_out);
-fse_err:
free(ftuple);
+fse_err:
wcrypto_del_comp_ctx(ctx);
cal_avg_latency(count);
- if (pdata->optype == WCRYPTO_DEFLATE)
- add_recv_data(count, g_pktlen);
- else
- add_recv_data(count, first_len);
+ add_recv_data(count, g_pktlen);
return NULL;
}
@@ -609,18 +558,14 @@ fse_err:
static void *zip_wd_stm_lz77_sync_run(void *arg)
{
thread_data *pdata = (thread_data *)arg;
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
- ZSTD_inBuffer zstd_input = {0};
- ZSTD_outBuffer zstd_output = {0};
COMP_TUPLE_TAG *ftuple = NULL;
struct wcrypto_comp_ctx_setup comp_setup;
struct wcrypto_comp_op_data opdata;
struct wcrypto_comp_ctx *ctx;
struct wd_queue *queue;
struct wd_bd *bd_pool;
- u8 *hw_buff_out = NULL;
- size_t fse_size;
- u32 first_len = 0;
+ void *src, *dst;
+ u32 in_len = 0;
u32 out_len = 0;
u32 count = 0;
int ret, i;
@@ -659,60 +604,48 @@ static void *zip_wd_stm_lz77_sync_run(void *arg)
else
opdata.flush = WCRYPTO_FINISH;
- out_len = bd_pool[0].dst_len;
-
ftuple = malloc(sizeof(COMP_TUPLE_TAG) * MAX_POOL_LENTH_COMP);
if (!ftuple)
goto fse_err;
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
- if (!hw_buff_out)
- goto hw_buff_err;
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
-
while(1) {
i = count % MAX_POOL_LENTH_COMP;
- opdata.in = bd_pool[i].src;
- opdata.out = &hw_buff_out[i]; //temp out
- opdata.in_len = bd_pool[i].src_len;
- opdata.avail_out = out_len;
- opdata.priv = &ftuple[i];
-
- ret = wcrypto_do_comp(ctx, &opdata, NULL);
- if (ret || opdata.status == WCRYPTO_DECOMP_END_NOSPACE ||
- opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) {
- ZIP_TST_PRT("wd comp, invalid or incomplete data! "
- "ret(%d), req.status(%u)\n", ret, opdata.status);
- break;
+ src = bd_pool[i].src;
+ dst = bd_pool[i].dst;
+ in_len = bd_pool[i].src_len;
+ out_len = bd_pool[i].dst_len;
+
+ while (in_len > 0) {
+ opdata.in_len = in_len > CHUNK_SIZE ? CHUNK_SIZE : in_len;
+ opdata.avail_out = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len;
+ opdata.in = src;
+ opdata.out = dst;
+ opdata.priv = &ftuple[i];
+
+ ret = wcrypto_do_comp(ctx, &opdata, NULL);
+ if (ret || opdata.status == WCRYPTO_DECOMP_END_NOSPACE ||
+ opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) {
+ ZIP_TST_PRT("wd comp, invalid or incomplete data! "
+ "ret(%d), req.status(%u)\n", ret, opdata.status);
+ break;
+ }
+ src += CHUNK_SIZE;
+ in_len -= CHUNK_SIZE;
+ dst += 2 * CHUNK_SIZE;
+ out_len -= 2 * CHUNK_SIZE;
}
-
count++;
- zstd_input.src = opdata.in;
- zstd_input.size = opdata.in_len;
- zstd_input.pos = 0;
- zstd_output.dst = opdata.out;
- zstd_output.size = out_len;
- zstd_output.pos = 0;
- fse_size = zstd_soft_fse(opdata.priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end);
-
- bd_pool[i].dst_len = fse_size;
- if (unlikely(i == 0))
- first_len = fse_size;
+
if (get_run_state() == 0)
break;
}
-hw_buff_err:
- free(hw_buff_out);
-fse_err:
free(ftuple);
+fse_err:
wcrypto_del_comp_ctx(ctx);
cal_avg_latency(count);
- if (pdata->optype == WCRYPTO_DEFLATE)
- add_recv_data(count, g_pktlen);
- else
- add_recv_data(count, first_len);
+ add_recv_data(count, g_pktlen);
return NULL;
}
@@ -720,18 +653,16 @@ fse_err:
static void *zip_wd_blk_lz77_async_run(void *arg)
{
thread_data *pdata = (thread_data *)arg;
- ZSTD_CCtx *cctx = zstd_soft_fse_init(15);
- COMP_TUPLE_TAG *ftuple = NULL;
struct wcrypto_comp_ctx_setup comp_setup;
struct wcrypto_comp_op_data opdata;
+ COMP_TUPLE_TAG *ftuple = NULL;
struct wcrypto_comp_ctx *ctx;
struct zip_async_tag *tag;
- u8 *hw_buff_out = NULL;
struct wd_queue *queue;
struct wd_bd *bd_pool;
u32 out_len = 0;
- u32 count = 0;
u32 try_cnt = 0;
+ u32 count = 0;
int ret, i;
if (pdata->td_id > g_thread_num)
@@ -760,14 +691,9 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
if (!ctx)
return NULL;
- opdata.stream_pos = WCRYPTO_COMP_STREAM_NEW;
opdata.alg_type = pdata->alg;
opdata.priv = NULL;
opdata.status = 0;
- if (pdata->optype == WCRYPTO_INFLATE)
- opdata.flush = WCRYPTO_SYNC_FLUSH;
- else
- opdata.flush = WCRYPTO_FINISH;
out_len = bd_pool[0].dst_len;
@@ -775,11 +701,6 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
if (!ftuple)
goto fse_err;
- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP);
- if (!hw_buff_out)
- goto hw_buff_err;
- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP);
-
tag = malloc(sizeof(*tag) * MAX_POOL_LENTH_COMP);
if (!tag) {
ZIP_TST_PRT("failed to malloc zip tag!\n");
@@ -793,7 +714,7 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
try_cnt = 0;
i = count % MAX_POOL_LENTH_COMP;
opdata.in = bd_pool[i].src;
- opdata.out = &hw_buff_out[i]; //temp out
+ opdata.out = bd_pool[i].dst; //temp out
opdata.in_len = bd_pool[i].src_len;
opdata.avail_out = out_len;
opdata.priv = &ftuple[i];
@@ -802,7 +723,6 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
tag[i].ctx = ctx;
tag[i].td_id = pdata->td_id;
tag[i].cm_len = out_len;
- tag[i].cctx = cctx;
tag[i].priv = opdata.priv;
ret = wcrypto_do_comp(ctx, &opdata, &tag[i]);
@@ -827,12 +747,10 @@ static void *zip_wd_blk_lz77_async_run(void *arg)
usleep(SEND_USLEEP);
}
-tag_err:
free(tag);
-hw_buff_err:
- free(hw_buff_out);
-fse_err:
+tag_err:
free(ftuple);
+fse_err:
wcrypto_del_comp_ctx(ctx);
add_send_complete();
--
2.25.1