memset, strchr, strchrnul, strcmp, strncmp, ctrcpy, ctrlen, strnlen. (cherry picked from commit 4ba365320a633ecd4cb47d8f171aa81fcd1dd6ef)
256 lines
6.5 KiB
Diff
256 lines
6.5 KiB
Diff
From 86290dbec63a9688ab0e0085ab8ab686fa256f18 Mon Sep 17 00:00:00 2001
|
|
From: Xue Liu <liuxue@loongson.cn>
|
|
Date: Sun, 29 Jan 2023 10:25:47 +0800
|
|
Subject: [PATCH 6/6] LoongArch: Optimize string functions strlen, strnlen.
|
|
|
|
Change-Id: I5df3398f9dbd9ea72c3de14e1e5f7793f6dbd794
|
|
---
|
|
sysdeps/loongarch/lp64/strlen.S | 102 +++++++++++++++++++++++++
|
|
sysdeps/loongarch/lp64/strnlen.S | 125 +++++++++++++++++++++++++++++++
|
|
2 files changed, 227 insertions(+)
|
|
create mode 100644 sysdeps/loongarch/lp64/strlen.S
|
|
create mode 100644 sysdeps/loongarch/lp64/strnlen.S
|
|
|
|
diff --git a/sysdeps/loongarch/lp64/strlen.S b/sysdeps/loongarch/lp64/strlen.S
|
|
new file mode 100644
|
|
index 00000000..3569598c
|
|
--- /dev/null
|
|
+++ b/sysdeps/loongarch/lp64/strlen.S
|
|
@@ -0,0 +1,102 @@
|
|
+/* Optimized strlen implementation for LoongArch.
|
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sysdep.h>
|
|
+#include <sys/asm.h>
|
|
+
|
|
+/* size_t strlen (const char *s1); */
|
|
+LEAF(strlen)
|
|
+ .align 5
|
|
+
|
|
+ nor t4, zero, zero
|
|
+ lu12i.w a2, 0x01010
|
|
+ andi t5, a0, 0x7
|
|
+
|
|
+ li.w t7, 0x7
|
|
+ slli.d t6, t5, 0x3
|
|
+ andn t7, a0, t7
|
|
+ ld.d a1, t7, 0
|
|
+ sub.d t7, zero, t6
|
|
+ sll.d t4, t4, t7
|
|
+ maskeqz t4, t4, t6
|
|
+ srl.d a1, a1, t6
|
|
+ or a1, a1, t4
|
|
+
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ nor t1, a1, zero
|
|
+ li.w a4, 8
|
|
+
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ sub.d a5, a4, t5
|
|
+ move t5, a0
|
|
+
|
|
+ sub.d t0, a1, a2
|
|
+ slli.d t4, a2, 7
|
|
+ nor a3, zero, t4
|
|
+ nor t1, a1, a3
|
|
+
|
|
+ and t0, t0, t1
|
|
+ bnez t0, strlen_count1
|
|
+ add.d a0, a0, a5
|
|
+strlen_loop:
|
|
+ ld.d a1, a0, 0
|
|
+ sub.d t0, a1, a2
|
|
+ and t1, t0, t4
|
|
+ bnez t1, strlen_count_pre
|
|
+ ld.d a1, a0, 8
|
|
+ sub.d t0, a1, a2
|
|
+ and t1, t0, t4
|
|
+ addi.d a0, a0, 16
|
|
+ beqz t1, strlen_loop
|
|
+strlen_count:
|
|
+ addi.d a0, a0, -8
|
|
+strlen_count_pre:
|
|
+ nor t1, a1, a3
|
|
+ and t0, t0, t1
|
|
+ beqz t0, strlen_noascii_start
|
|
+strlen_count1:
|
|
+ ctz.d t1, t0
|
|
+ sub.d v0, a0, t5
|
|
+ srli.w t1, t1, 3
|
|
+ add.d v0, v0, t1
|
|
+ jr ra
|
|
+strlen_noascii_start:
|
|
+ addi.d a0, a0, 8
|
|
+strlen_loop_noascii:
|
|
+ ld.d a1, a0, 0
|
|
+ sub.d t0, a1, a2
|
|
+ nor t1, a1, a3
|
|
+ and t0, t0, t1
|
|
+ bnez t0, strlen_count1
|
|
+ ld.d a1, a0, 8
|
|
+ sub.d t0, a1, a2
|
|
+ nor t1, a1, a3
|
|
+ and t0, t0, t1
|
|
+ addi.d a0, a0, 16
|
|
+ beqz t0, strlen_loop_noascii
|
|
+ addi.d a0, a0, -8
|
|
+ ctz.d t1, t0
|
|
+ sub.d v0, a0, t5
|
|
+ srli.w t1, t1, 3
|
|
+ add.d v0, v0, t1
|
|
+ jr ra
|
|
+END(strlen)
|
|
+
|
|
+libc_hidden_builtin_def (strlen)
|
|
+
|
|
diff --git a/sysdeps/loongarch/lp64/strnlen.S b/sysdeps/loongarch/lp64/strnlen.S
|
|
new file mode 100644
|
|
index 00000000..8eaa60e2
|
|
--- /dev/null
|
|
+++ b/sysdeps/loongarch/lp64/strnlen.S
|
|
@@ -0,0 +1,125 @@
|
|
+/* Optimized strlen implementation for LoongArch.
|
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sys/asm.h>
|
|
+
|
|
+/* rd <- if rc then ra else rb
|
|
+ a5 will be destroyed. */
|
|
+#define CONDITIONSEL(rd, ra, rb, rc)\
|
|
+ masknez a5, rb, rc;\
|
|
+ maskeqz rd, ra, rc;\
|
|
+ or rd, rd, a5
|
|
+
|
|
+/* Parameters and Results */
|
|
+#define srcin a0
|
|
+#define limit a1
|
|
+#define len v0
|
|
+
|
|
+/* Internal variable */
|
|
+#define data1 t0
|
|
+#define data2 t1
|
|
+#define has_nul1 t2
|
|
+#define has_nul2 t3
|
|
+#define src t4
|
|
+#define zeroones t5
|
|
+#define sevenf t6
|
|
+#define data2a t7
|
|
+#define tmp6 t7
|
|
+#define pos t8
|
|
+#define tmp1 a2
|
|
+#define tmp2 a3
|
|
+#define tmp3 a4
|
|
+#define tmp4 a5
|
|
+#define tmp5 a6
|
|
+#define limit_wd a7
|
|
+
|
|
+/* size_t strnlen (const char *s1,size_t maxlen); */
|
|
+LEAF(__strnlen)
|
|
+ .align 4
|
|
+ beqz limit, _hit_limit
|
|
+ lu12i.w zeroones, 0x01010
|
|
+ lu12i.w sevenf, 0x7f7f7
|
|
+ ori zeroones, zeroones, 0x101
|
|
+ ori sevenf, sevenf, 0xf7f
|
|
+ bstrins.d zeroones, zeroones, 63, 32
|
|
+ bstrins.d sevenf, sevenf, 63, 32
|
|
+ andi tmp1, srcin, 15
|
|
+ sub.d src, srcin, tmp1
|
|
+ bnez tmp1, misaligned
|
|
+ addi.d limit_wd, limit, -1
|
|
+ srli.d limit_wd, limit_wd, 4
|
|
+_loop:
|
|
+ ld.d data1, src, 0
|
|
+ ld.d data2, src, 8
|
|
+ addi.d src, src, 16
|
|
+_realigned:
|
|
+ sub.d tmp1, data1, zeroones
|
|
+ or tmp2, data1, sevenf
|
|
+ sub.d tmp3, data2, zeroones
|
|
+ or tmp4, data2, sevenf
|
|
+ andn has_nul1, tmp1, tmp2
|
|
+ andn has_nul2, tmp3, tmp4
|
|
+ addi.d limit_wd, limit_wd, -1
|
|
+ srli.d tmp1, limit_wd, 63
|
|
+ or tmp2, has_nul1, has_nul2
|
|
+ or tmp3, tmp1, tmp2
|
|
+ beqz tmp3, _loop
|
|
+ beqz tmp2, _hit_limit
|
|
+ sub.d len, src, srcin
|
|
+ beqz has_nul1, _nul_in_data2
|
|
+ move has_nul2, has_nul1
|
|
+ addi.d len, len, -8
|
|
+_nul_in_data2:
|
|
+ ctz.d pos, has_nul2
|
|
+ srli.d pos, pos, 3
|
|
+ addi.d len, len, -8
|
|
+ add.d len, len, pos
|
|
+ sltu tmp1, len, limit
|
|
+ CONDITIONSEL(len, len, limit, tmp1)
|
|
+ jr ra
|
|
+
|
|
+misaligned:
|
|
+ addi.d limit_wd, limit, -1
|
|
+ sub.d tmp4, zero, tmp1
|
|
+ andi tmp3, limit_wd, 15
|
|
+ srli.d limit_wd, limit_wd, 4
|
|
+ li.d tmp5, -1
|
|
+ ld.d data1, src, 0
|
|
+ ld.d data2, src, 8
|
|
+ addi.d src, src, 16
|
|
+ slli.d tmp4, tmp4, 3
|
|
+ add.d tmp3, tmp3, tmp1
|
|
+ srl.d tmp2, tmp5, tmp4
|
|
+ srli.d tmp3, tmp3, 4
|
|
+ add.d limit_wd, limit_wd, tmp3
|
|
+ or data1, data1, tmp2
|
|
+ or data2a, data2, tmp2
|
|
+ li.w tmp3, 9
|
|
+ sltu tmp1, tmp1, tmp3
|
|
+ CONDITIONSEL(data1, data1, tmp5, tmp1)
|
|
+ CONDITIONSEL(data2, data2, data2a, tmp1)
|
|
+ b _realigned
|
|
+
|
|
+_hit_limit:
|
|
+ move len, limit
|
|
+ jr ra
|
|
+END(__strnlen)
|
|
+
|
|
+weak_alias (__strnlen, strnlen)
|
|
+libc_hidden_def (strnlen)
|
|
+libc_hidden_def (__strnlen)
|
|
--
|
|
2.33.0
|
|
|