memset, strchr, strchrnul, strcmp, strncmp, ctrcpy, ctrlen, strnlen. (cherry picked from commit 4ba365320a633ecd4cb47d8f171aa81fcd1dd6ef)
191 lines
4.0 KiB
Diff
191 lines
4.0 KiB
Diff
From 603aa93569ec4034aa1d5a310f59504b5d6aad4d Mon Sep 17 00:00:00 2001
|
|
From: Xue Liu <liuxue@loongson.cn>
|
|
Date: Sun, 29 Jan 2023 10:23:06 +0800
|
|
Subject: [PATCH 3/6] LoongArch: Optimize string function memset.
|
|
|
|
Change-Id: I04906c31a2eabd380b19bb3a4cab603128526cd1
|
|
---
|
|
sysdeps/loongarch/lp64/memset.S | 170 ++++++++++++++++++++++++++++++++
|
|
1 file changed, 170 insertions(+)
|
|
create mode 100644 sysdeps/loongarch/lp64/memset.S
|
|
|
|
diff --git a/sysdeps/loongarch/lp64/memset.S b/sysdeps/loongarch/lp64/memset.S
|
|
new file mode 100644
|
|
index 00000000..261504b1
|
|
--- /dev/null
|
|
+++ b/sysdeps/loongarch/lp64/memset.S
|
|
@@ -0,0 +1,170 @@
|
|
+/* Optimized memset implementation for LoongArch.
|
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sysdep.h>
|
|
+
|
|
+#define ST_128(n) \
|
|
+ st.d a1, a0, n; \
|
|
+ st.d a1, a0, n+8 ; \
|
|
+ st.d a1, a0, n+16 ; \
|
|
+ st.d a1, a0, n+24 ; \
|
|
+ st.d a1, a0, n+32 ; \
|
|
+ st.d a1, a0, n+40 ; \
|
|
+ st.d a1, a0, n+48 ; \
|
|
+ st.d a1, a0, n+56 ; \
|
|
+ st.d a1, a0, n+64 ; \
|
|
+ st.d a1, a0, n+72 ; \
|
|
+ st.d a1, a0, n+80 ; \
|
|
+ st.d a1, a0, n+88 ; \
|
|
+ st.d a1, a0, n+96 ; \
|
|
+ st.d a1, a0, n+104; \
|
|
+ st.d a1, a0, n+112; \
|
|
+ st.d a1, a0, n+120; \
|
|
+
|
|
+/* void *memset(void *s, int c, size_t n); */
|
|
+LEAF(memset)
|
|
+ .align 6
|
|
+
|
|
+ bstrins.d a1, a1, 15, 8
|
|
+ add.d t7, a0, a2
|
|
+ bstrins.d a1, a1, 31, 16
|
|
+ move t0, a0
|
|
+ bstrins.d a1, a1, 63, 32
|
|
+ srai.d t8, a2, 4 #num/16
|
|
+ beqz t8, less_16bytes #num<16
|
|
+ srai.d t8, a2, 6 #num/64
|
|
+ bnez t8, more_64bytes #num>64
|
|
+ srai.d t8, a2, 5 #num/32
|
|
+ beqz t8, less_32bytes #num<32
|
|
+ st.d a1, a0, 0 #32<num<64
|
|
+ st.d a1, a0, 8
|
|
+ st.d a1, a0, 16
|
|
+ st.d a1, a0, 24
|
|
+ st.d a1, t7, -32
|
|
+ st.d a1, t7, -24
|
|
+ st.d a1, t7, -16
|
|
+ st.d a1, t7, -8
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_32bytes:
|
|
+ st.d a1, a0, 0
|
|
+ st.d a1, a0, 8
|
|
+ st.d a1, t7, -16
|
|
+ st.d a1, t7, -8
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_16bytes:
|
|
+ srai.d t8, a2, 3 #num/8
|
|
+ beqz t8, less_8bytes
|
|
+ st.d a1, a0, 0
|
|
+ st.d a1, t7, -8
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_8bytes:
|
|
+ srai.d t8, a2, 2
|
|
+ beqz t8, less_4bytes
|
|
+ st.w a1, a0, 0
|
|
+ st.w a1, t7, -4
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_4bytes:
|
|
+ srai.d t8, a2, 1
|
|
+ beqz t8, less_2bytes
|
|
+ st.h a1, a0, 0
|
|
+ st.h a1, t7, -2
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_2bytes:
|
|
+ beqz a2, less_1bytes
|
|
+ st.b a1, a0, 0
|
|
+
|
|
+ jr ra
|
|
+
|
|
+less_1bytes:
|
|
+ jr ra
|
|
+
|
|
+more_64bytes:
|
|
+ srli.d a0, a0, 3
|
|
+ slli.d a0, a0, 3
|
|
+ addi.d a0, a0, 0x8
|
|
+ st.d a1, t0, 0
|
|
+ sub.d t2, t0, a0
|
|
+ add.d a2, t2, a2
|
|
+
|
|
+ addi.d a2, a2, -0x80
|
|
+ blt a2, zero, end_unalign_proc
|
|
+
|
|
+loop_less:
|
|
+ ST_128(0)
|
|
+ addi.d a0, a0, 0x80
|
|
+ addi.d a2, a2, -0x80
|
|
+ bge a2, zero, loop_less
|
|
+
|
|
+end_unalign_proc:
|
|
+ addi.d a2, a2, 0x80
|
|
+
|
|
+ pcaddi t1, 20
|
|
+ andi t5, a2, 0x78
|
|
+ srli.d t5, t5, 1
|
|
+ sub.d t1, t1, t5
|
|
+ jirl zero, t1, 0
|
|
+
|
|
+end_120_128_unalign:
|
|
+ st.d a1, a0, 112
|
|
+end_112_120_unalign:
|
|
+ st.d a1, a0, 104
|
|
+end_104_112_unalign:
|
|
+ st.d a1, a0, 96
|
|
+end_96_104_unalign:
|
|
+ st.d a1, a0, 88
|
|
+end_88_96_unalign:
|
|
+ st.d a1, a0, 80
|
|
+end_80_88_unalign:
|
|
+ st.d a1, a0, 72
|
|
+end_72_80_unalign:
|
|
+ st.d a1, a0, 64
|
|
+end_64_72_unalign:
|
|
+ st.d a1, a0, 56
|
|
+end_56_64_unalign:
|
|
+ st.d a1, a0, 48
|
|
+end_48_56_unalign:
|
|
+ st.d a1, a0, 40
|
|
+end_40_48_unalign:
|
|
+ st.d a1, a0, 32
|
|
+end_32_40_unalign:
|
|
+ st.d a1, a0, 24
|
|
+end_24_32_unalign:
|
|
+ st.d a1, a0, 16
|
|
+end_16_24_unalign:
|
|
+ st.d a1, a0, 8
|
|
+end_8_16_unalign:
|
|
+ st.d a1, a0, 0
|
|
+end_0_8_unalign:
|
|
+ st.d a1, t7, -8
|
|
+
|
|
+ move v0, t0
|
|
+ jr ra
|
|
+
|
|
+END(memset)
|
|
+
|
|
+libc_hidden_builtin_def (memset)
|
|
--
|
|
2.33.0
|
|
|