uadk_engine/0033-digest-improve-the-digest-performance.patch
Wenkai Lin e73c02af5a engine: update uadk engine source
Backport uadk engine patch from linaro.

Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com>
(cherry picked from commit 4f6b3e8022834a09849728415c9ea73d7fdb794a)
2022-03-21 16:54:55 +08:00

227 lines
7.2 KiB
Diff

From 3cbf3c85645d7ec43aa149c1bcf346a556bbf30a Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 8 Mar 2022 15:44:00 +0800
Subject: [PATCH 33/36] digest: improve the digest performance
1. The memset function found to be a hotspot function by perf. so should
remove the memset in io path. it can improve three times
performance.
2. add some branch predictor function to improve a little performance.
3. use the uadk_memory_cpy can improve 5% performance.
Signed-off-by: Kai Ye <yekai13@huawei.com>
---
src/Makefile.am | 4 +--
src/uadk.h | 1 +
src/uadk_digest.c | 17 ++++++-------
src/uadk_utils.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
src/uadk_utils.h | 24 ++++++++++++++++++
5 files changed, 98 insertions(+), 11 deletions(-)
create mode 100644 src/uadk_utils.c
create mode 100644 src/uadk_utils.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 636f559..75595aa 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -7,8 +7,8 @@ UADK_ENGINE_VERSION = -version-number ${MAJOR}:${MINOR}:${REVISION}
lib_LTLIBRARIES=uadk_engine.la
-uadk_engine_la_SOURCES=e_uadk.c uadk_cipher.c uadk_digest.c uadk_async.c uadk_rsa.c \
- uadk_sm2.c uadk_pkey.c uadk_dh.c uadk_ec.c uadk_ecx.c
+uadk_engine_la_SOURCES=uadk_utils.c e_uadk.c uadk_cipher.c uadk_digest.c uadk_async.c \
+ uadk_rsa.c uadk_sm2.c uadk_pkey.c uadk_dh.c uadk_ec.c uadk_ecx.c
uadk_engine_la_LIBADD=-ldl -lwd -lwd_crypto -lpthread
uadk_engine_la_LDFLAGS=-module $(UADK_ENGINE_VERSION)
diff --git a/src/uadk.h b/src/uadk.h
index 0f9b0be..384e035 100644
--- a/src/uadk.h
+++ b/src/uadk.h
@@ -19,6 +19,7 @@
#include <openssl/engine.h>
#include <uadk/wd.h>
#include <uadk/wd_sched.h>
+#include "uadk_utils.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define ENV_STRING_LEN 256
diff --git a/src/uadk_digest.c b/src/uadk_digest.c
index ad24168..355917d 100644
--- a/src/uadk_digest.c
+++ b/src/uadk_digest.c
@@ -493,7 +493,7 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx)
priv->state = SEC_DIGEST_INIT;
ret = uadk_e_init_digest();
- if (!ret) {
+ if (unlikely(!ret)) {
priv->switch_flag = UADK_DO_SOFT;
fprintf(stderr, "uadk failed to initialize digest.\n");
goto soft_init;
@@ -507,7 +507,7 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx)
}
}
- if (i == digest_counts) {
+ if (unlikely(i == digest_counts)) {
fprintf(stderr, "failed to setup the private ctx.\n");
return 0;
}
@@ -519,11 +519,10 @@ static int uadk_e_digest_init(EVP_MD_CTX *ctx)
return 0;
priv->data = malloc(DIGEST_BLOCK_SIZE);
- if (!priv->data) {
+ if (unlikely(!priv->data)) {
wd_digest_free_sess(priv->sess);
return 0;
}
- memset(priv->data, 0, DIGEST_BLOCK_SIZE);
priv->switch_threshold = sec_digest_get_sw_threshold(nid);
@@ -546,7 +545,8 @@ static int digest_update_inner(EVP_MD_CTX *ctx, const void *data, size_t data_le
while (priv->last_update_bufflen + left_len > DIGEST_BLOCK_SIZE) {
copy_to_bufflen = DIGEST_BLOCK_SIZE - priv->last_update_bufflen;
- memcpy(priv->data + priv->last_update_bufflen, tmpdata, copy_to_bufflen);
+ uadk_memcpy(priv->data + priv->last_update_bufflen, tmpdata,
+ copy_to_bufflen);
priv->last_update_bufflen = DIGEST_BLOCK_SIZE;
priv->req.in_bytes = DIGEST_BLOCK_SIZE;
@@ -567,10 +567,9 @@ static int digest_update_inner(EVP_MD_CTX *ctx, const void *data, size_t data_le
}
priv->last_update_bufflen = 0;
- memset(priv->data, 0, DIGEST_BLOCK_SIZE);
if (left_len <= DIGEST_BLOCK_SIZE) {
priv->last_update_bufflen = left_len;
- memcpy(priv->data, tmpdata, priv->last_update_bufflen);
+ uadk_memcpy(priv->data, tmpdata, priv->last_update_bufflen);
break;
}
}
@@ -604,7 +603,7 @@ static int uadk_e_digest_update(EVP_MD_CTX *ctx, const void *data, size_t data_l
goto soft_update;
if (priv->last_update_bufflen + data_len <= DIGEST_BLOCK_SIZE) {
- memcpy(priv->data + priv->last_update_bufflen, data, data_len);
+ uadk_memcpy(priv->data + priv->last_update_bufflen, data, data_len);
priv->last_update_bufflen += data_len;
return 1;
}
@@ -697,7 +696,7 @@ static int uadk_e_digest_final(EVP_MD_CTX *ctx, unsigned char *digest)
priv->e_nid = EVP_MD_nid(EVP_MD_CTX_md(ctx));
ret = async_setup_async_event_notification(&op);
- if (!ret) {
+ if (unlikely(!ret)) {
fprintf(stderr, "failed to setup async event notification.\n");
return 0;
}
diff --git a/src/uadk_utils.c b/src/uadk_utils.c
new file mode 100644
index 0000000..2b34b3a
--- /dev/null
+++ b/src/uadk_utils.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2020-2022 Huawei Technologies Co.,Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include "uadk_utils.h"
+
+#define UADK_MEM_IMPROVE_THRESHOLD 1024
+
+static void *memcpy_large(void *dstpp, const void *srcpp, size_t len)
+{
+ __asm__ __volatile__(
+ "add x4, %[src], %[count]\n\t"
+ "add x5, %[res], %[count]\n\t"
+ "ldr q0, [%[src]]\n\t"
+ "str q0, [%[res]]\n\t"
+ "sub %[count], %[count], 80\n\t"
+ "and x14, %[src], 15\n\t"
+ "bic %[src], %[src], 15\n\t"
+ "sub x3, %[res], x14\n\t"
+ "add %[count], %[count], x14\n\t"
+
+ "1:\n\t"
+ "ldp q0, q1, [%[src], 16]\n\t"
+ "stp q0, q1, [x3, 16]\n\t"
+ "ldp q0, q1, [%[src], 48]\n\t"
+ "stp q0, q1, [x3, 48]\n\t"
+ "add %[src], %[src], 64\n\t"
+ "add x3, x3, 64\n\t"
+ "subs %[count], %[count], 64\n\t"
+ "b.hi 1b\n\t"
+
+ "ldp q0, q1, [x4, -64]\n\t"
+ "stp q0, q1, [x5, -64]\n\t"
+ "ldp q0, q1, [x4, -32]\n\t"
+ "stp q0, q1, [x5, -32]\n\t"
+
+ : [res] "+r"(dstpp)
+ : [src] "r"(srcpp), [count] "r"(len)
+ : "x3", "x4", "x5", "x14", "q0", "q1"
+ );
+
+ return dstpp;
+}
+
+void *uadk_memcpy(void *dstpp, const void *srcpp, size_t len)
+{
+ if (len >= UADK_MEM_IMPROVE_THRESHOLD)
+ return memcpy_large(dstpp, srcpp, len);
+ else
+ return memcpy(dstpp, srcpp, len);
+}
diff --git a/src/uadk_utils.h b/src/uadk_utils.h
new file mode 100644
index 0000000..a16536b
--- /dev/null
+++ b/src/uadk_utils.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2020-2022 Huawei Technologies Co.,Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#ifndef UADK_UTILS
+#define UADK_UTILS
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *uadk_memcpy(void *dstpp, const void *srcpp, size_t len);
+#endif
--
2.24.4