From 3cbf3c85645d7ec43aa149c1bcf346a556bbf30a Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 8 Mar 2022 15:44:00 +0800 Subject: [PATCH 33/36] digest: improve the digest performance 1. The memset function found to be a hotspot function by perf. so should remove the memset in io path. it can improve three times performance. 2. add some branch predictor function to improve a little performance. 3. use the uadk_memory_cpy can improve 5% performance. Signed-off-by: Kai Ye --- src/Makefile.am | 4 +-- src/uadk.h | 1 + src/uadk_digest.c | 17 ++++++------- src/uadk_utils.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ src/uadk_utils.h | 24 ++++++++++++++++++ 5 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 src/uadk_utils.c create mode 100644 src/uadk_utils.h diff --git a/src/Makefile.am b/src/Makefile.am index 636f559..75595aa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -7,8 +7,8 @@ UADK_ENGINE_VERSION = -version-number ${MAJOR}:${MINOR}:${REVISION} lib_LTLIBRARIES=uadk_engine.la -uadk_engine_la_SOURCES=e_uadk.c uadk_cipher.c uadk_digest.c uadk_async.c uadk_rsa.c \ - uadk_sm2.c uadk_pkey.c uadk_dh.c uadk_ec.c uadk_ecx.c +uadk_engine_la_SOURCES=uadk_utils.c e_uadk.c uadk_cipher.c uadk_digest.c uadk_async.c \ + uadk_rsa.c uadk_sm2.c uadk_pkey.c uadk_dh.c uadk_ec.c uadk_ecx.c uadk_engine_la_LIBADD=-ldl -lwd -lwd_crypto -lpthread uadk_engine_la_LDFLAGS=-module $(UADK_ENGINE_VERSION) diff --git a/src/uadk.h b/src/uadk.h index 0f9b0be..384e035 100644 --- a/src/uadk.h +++ b/src/uadk.h @@ -19,6 +19,7 @@ #include #include #include +#include "uadk_utils.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ENV_STRING_LEN 256 diff --git a/src/uadk_digest.c b/src/uadk_digest.c index ad24168..355917d 100644 --- a/src/uadk_digest.c +++ b/src/uadk_digest.c @@ -493,7 +493,7 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx) priv->state = SEC_DIGEST_INIT; ret = uadk_e_init_digest(); - if (!ret) { + if (unlikely(!ret)) { priv->switch_flag = UADK_DO_SOFT; fprintf(stderr, "uadk failed to initialize digest.\n"); goto soft_init; @@ -507,7 +507,7 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx) } } - if (i == digest_counts) { + if (unlikely(i == digest_counts)) { fprintf(stderr, "failed to setup the private ctx.\n"); return 0; } @@ -519,11 +519,10 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx) return 0; priv->data = malloc(DIGEST_BLOCK_SIZE); - if (!priv->data) { + if (unlikely(!priv->data)) { wd_digest_free_sess(priv->sess); return 0; } - memset(priv->data, 0, DIGEST_BLOCK_SIZE); priv->switch_threshold = sec_digest_get_sw_threshold(nid); @@ -546,7 +545,8 @@ static int digest_update_inner(EVP_MD_CTX *ctx, const void *data, size_t data_le while (priv->last_update_bufflen + left_len > DIGEST_BLOCK_SIZE) { copy_to_bufflen = DIGEST_BLOCK_SIZE - priv->last_update_bufflen; - memcpy(priv->data + priv->last_update_bufflen, tmpdata, copy_to_bufflen); + uadk_memcpy(priv->data + priv->last_update_bufflen, tmpdata, + copy_to_bufflen); priv->last_update_bufflen = DIGEST_BLOCK_SIZE; priv->req.in_bytes = DIGEST_BLOCK_SIZE; @@ -567,10 +567,9 @@ static int digest_update_inner(EVP_MD_CTX *ctx, const void *data, size_t data_le } priv->last_update_bufflen = 0; - memset(priv->data, 0, DIGEST_BLOCK_SIZE); if (left_len <= DIGEST_BLOCK_SIZE) { priv->last_update_bufflen = left_len; - memcpy(priv->data, tmpdata, priv->last_update_bufflen); + uadk_memcpy(priv->data, tmpdata, priv->last_update_bufflen); break; } } @@ -604,7 +603,7 @@ static int uadk_e_digest_update(EVP_MD_CTX *ctx, const void *data, size_t data_l goto soft_update; if (priv->last_update_bufflen + data_len <= DIGEST_BLOCK_SIZE) { - memcpy(priv->data + priv->last_update_bufflen, data, data_len); + uadk_memcpy(priv->data + priv->last_update_bufflen, data, data_len); priv->last_update_bufflen += data_len; return 1; } @@ -697,7 +696,7 @@ static int uadk_e_digest_final(EVP_MD_CTX *ctx, unsigned char *digest) priv->e_nid = EVP_MD_nid(EVP_MD_CTX_md(ctx)); ret = async_setup_async_event_notification(&op); - if (!ret) { + if (unlikely(!ret)) { fprintf(stderr, "failed to setup async event notification.\n"); return 0; } diff --git a/src/uadk_utils.c b/src/uadk_utils.c new file mode 100644 index 0000000..2b34b3a --- /dev/null +++ b/src/uadk_utils.c @@ -0,0 +1,63 @@ +/* + * Copyright 2020-2022 Huawei Technologies Co.,Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#include "uadk_utils.h" + +#define UADK_MEM_IMPROVE_THRESHOLD 1024 + +static void *memcpy_large(void *dstpp, const void *srcpp, size_t len) +{ + __asm__ __volatile__( + "add x4, %[src], %[count]\n\t" + "add x5, %[res], %[count]\n\t" + "ldr q0, [%[src]]\n\t" + "str q0, [%[res]]\n\t" + "sub %[count], %[count], 80\n\t" + "and x14, %[src], 15\n\t" + "bic %[src], %[src], 15\n\t" + "sub x3, %[res], x14\n\t" + "add %[count], %[count], x14\n\t" + + "1:\n\t" + "ldp q0, q1, [%[src], 16]\n\t" + "stp q0, q1, [x3, 16]\n\t" + "ldp q0, q1, [%[src], 48]\n\t" + "stp q0, q1, [x3, 48]\n\t" + "add %[src], %[src], 64\n\t" + "add x3, x3, 64\n\t" + "subs %[count], %[count], 64\n\t" + "b.hi 1b\n\t" + + "ldp q0, q1, [x4, -64]\n\t" + "stp q0, q1, [x5, -64]\n\t" + "ldp q0, q1, [x4, -32]\n\t" + "stp q0, q1, [x5, -32]\n\t" + + : [res] "+r"(dstpp) + : [src] "r"(srcpp), [count] "r"(len) + : "x3", "x4", "x5", "x14", "q0", "q1" + ); + + return dstpp; +} + +void *uadk_memcpy(void *dstpp, const void *srcpp, size_t len) +{ + if (len >= UADK_MEM_IMPROVE_THRESHOLD) + return memcpy_large(dstpp, srcpp, len); + else + return memcpy(dstpp, srcpp, len); +} diff --git a/src/uadk_utils.h b/src/uadk_utils.h new file mode 100644 index 0000000..a16536b --- /dev/null +++ b/src/uadk_utils.h @@ -0,0 +1,24 @@ +/* + * Copyright 2020-2022 Huawei Technologies Co.,Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#ifndef UADK_UTILS +#define UADK_UTILS +#include +#include +#include + +void *uadk_memcpy(void *dstpp, const void *srcpp, size_t len); +#endif -- 2.24.4