memset, strchr, strchrnul, strcmp, strncmp, ctrcpy, ctrlen, strnlen. (cherry picked from commit 4ba365320a633ecd4cb47d8f171aa81fcd1dd6ef)
251 lines
5.5 KiB
Diff
251 lines
5.5 KiB
Diff
From dd99689b821162293506e0344f163b82349a9298 Mon Sep 17 00:00:00 2001
|
|
From: Xue Liu <liuxue@loongson.cn>
|
|
Date: Sun, 29 Jan 2023 10:22:01 +0800
|
|
Subject: [PATCH 2/6] LoongArch: Optimize string functions strchr, strchrnull.
|
|
|
|
Change-Id: I8b274972642b6a1926d8fc176404bfd83344bc51
|
|
---
|
|
sysdeps/loongarch/lp64/strchr.S | 107 +++++++++++++++++++++++++++
|
|
sysdeps/loongarch/lp64/strchrnul.S | 115 +++++++++++++++++++++++++++++
|
|
2 files changed, 222 insertions(+)
|
|
create mode 100644 sysdeps/loongarch/lp64/strchr.S
|
|
create mode 100644 sysdeps/loongarch/lp64/strchrnul.S
|
|
|
|
diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S
|
|
new file mode 100644
|
|
index 00000000..3d64c684
|
|
--- /dev/null
|
|
+++ b/sysdeps/loongarch/lp64/strchr.S
|
|
@@ -0,0 +1,107 @@
|
|
+/* Optimized strchr implementation for LoongArch.
|
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sysdep.h>
|
|
+
|
|
+/* char * strchr (const char *s1, int c); */
|
|
+LEAF(strchr)
|
|
+ .align 6
|
|
+
|
|
+ li.w t4, 0x7
|
|
+ lu12i.w a2, 0x01010
|
|
+ bstrins.d a1, a1, 15, 8
|
|
+ andi t0, a0, 0x7
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ andn t4, a0, t4
|
|
+ slli.w t1, t0, 3
|
|
+
|
|
+ ld.d t4, t4, 0
|
|
+
|
|
+ nor t8, zero, zero
|
|
+ bstrins.d a1, a1, 31, 16
|
|
+ srl.d t4, t4, t1
|
|
+
|
|
+ bstrins.d a1, a1, 63, 32
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ srl.d a7, t8, t1
|
|
+
|
|
+ li.w t1, 8
|
|
+ nor t8, a7, zero
|
|
+ slli.d a3, a2, 7
|
|
+ or t5, t8, t4
|
|
+ and t3, a7, a1
|
|
+
|
|
+ sub.w t1, t1, t0
|
|
+ nor a3, a3, zero
|
|
+ xor t2, t5, t3
|
|
+ sub.d a7, t5, a2
|
|
+ nor a6, t5, a3
|
|
+
|
|
+ sub.d a5, t2, a2
|
|
+ nor a4, t2, a3
|
|
+
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ bnez a7, L(_mc8_a)
|
|
+
|
|
+ add.d a0, a0, t1
|
|
+L(_aloop):
|
|
+ ld.d t4, a0, 0
|
|
+
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ nor a6, t4, a3
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ nor a4, t2, a3
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ bnez a7, L(_mc8_a)
|
|
+
|
|
+ ld.d t4, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ nor a6, t4, a3
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ nor a4, t2, a3
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ beqz a7, L(_aloop)
|
|
+
|
|
+ addi.d a0, a0, -8
|
|
+L(_mc8_a):
|
|
+
|
|
+ ctz.d t0, a5
|
|
+ ctz.d t2, a6
|
|
+
|
|
+ srli.w t0, t0, 3
|
|
+ srli.w t2, t2, 3
|
|
+ sltu t1, t2, t0
|
|
+ add.d v0, a0, t0
|
|
+ masknez v0, v0, t1
|
|
+ jr ra
|
|
+END(strchr)
|
|
+
|
|
+libc_hidden_builtin_def (strchr)
|
|
+weak_alias (strchr, index)
|
|
diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S
|
|
new file mode 100644
|
|
index 00000000..58b8b372
|
|
--- /dev/null
|
|
+++ b/sysdeps/loongarch/lp64/strchrnul.S
|
|
@@ -0,0 +1,115 @@
|
|
+/* Optimized strchrnul implementation for LoongArch.
|
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sysdep.h>
|
|
+
|
|
+#define MOVZ(rd,rs,rt) \
|
|
+ masknez t6, rs, rt;\
|
|
+ maskeqz rd, rd, rt;\
|
|
+ or rd, rd, t6
|
|
+
|
|
+/* char *strchrnul(const char *s, int c); */
|
|
+LEAF(__strchrnul)
|
|
+ .align 6
|
|
+
|
|
+ li.w t4, 0x7
|
|
+ lu12i.w a2, 0x01010
|
|
+ bstrins.d a1, a1, 15, 8
|
|
+ andi t0, a0, 0x7
|
|
+
|
|
+ ori a2, a2, 0x101
|
|
+ andn t4, a0, t4
|
|
+ slli.w t1, t0, 3
|
|
+ ld.d t4, t4, 0
|
|
+
|
|
+ nor t8, zero, zero
|
|
+ bstrins.d a1, a1, 31, 16
|
|
+ srl.d t4, t4, t1
|
|
+
|
|
+ preld 0, a0, 32
|
|
+ bstrins.d a1, a1, 63, 32
|
|
+ bstrins.d a2, a2, 63, 32
|
|
+ srl.d a7, t8, t1
|
|
+
|
|
+ nor t8, a7, zero
|
|
+ slli.d a3, a2, 7
|
|
+ or t5, t8, t4
|
|
+ and t3, a7, a1
|
|
+
|
|
+ nor a3, a3, zero
|
|
+ xor t2, t5, t3
|
|
+ sub.d a7, t5, a2
|
|
+ nor a6, t5, a3
|
|
+
|
|
+ li.w t1, 8
|
|
+ sub.d a5, t2, a2
|
|
+ nor a4, t2, a3
|
|
+
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+ or a7, a6, a5
|
|
+ bnez a7, L(_mc8_a)
|
|
+
|
|
+ sub.w t1, t1, t0
|
|
+ add.d a0, a0, t1
|
|
+L(_aloop):
|
|
+ ld.d t4, a0, 0
|
|
+
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ nor a6, t4, a3
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ nor a4, t2, a3
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+
|
|
+ or a7, a6, a5
|
|
+ bnez a7, L(_mc8_a)
|
|
+
|
|
+ ld.d t4, a0, 8
|
|
+ addi.d a0, a0, 16
|
|
+
|
|
+ xor t2, t4, a1
|
|
+ sub.d a7, t4, a2
|
|
+ nor a6, t4, a3
|
|
+ sub.d a5, t2, a2
|
|
+
|
|
+ nor a4, t2, a3
|
|
+ and a6, a7, a6
|
|
+ and a5, a5, a4
|
|
+
|
|
+ or a7, a6, a5
|
|
+ beqz a7, L(_aloop)
|
|
+
|
|
+ addi.d a0, a0, -8
|
|
+L(_mc8_a):
|
|
+ ctz.d t0, a5
|
|
+ ctz.d t2, a6
|
|
+
|
|
+ srli.w t0, t0, 3
|
|
+ srli.w t2, t2, 3
|
|
+ slt t1, t0, t2
|
|
+
|
|
+ MOVZ(t0,t2,t1)
|
|
+
|
|
+ add.d v0, a0, t0
|
|
+ jr ra
|
|
+END(__strchrnul)
|
|
+
|
|
+weak_alias(__strchrnul, strchrnul)
|
|
--
|
|
2.33.0
|
|
|