glibc/2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch
Xue Liu 4adfab66ba LoongArch: Optimize string functions including memcpy, memmove,
memset, strchr, strchrnul, strcmp, strncmp, ctrcpy, ctrlen, strnlen.

(cherry picked from commit 4ba365320a633ecd4cb47d8f171aa81fcd1dd6ef)
2023-01-29 14:28:25 +08:00

251 lines
5.5 KiB
Diff

From dd99689b821162293506e0344f163b82349a9298 Mon Sep 17 00:00:00 2001
From: Xue Liu <liuxue@loongson.cn>
Date: Sun, 29 Jan 2023 10:22:01 +0800
Subject: [PATCH 2/6] LoongArch: Optimize string functions strchr, strchrnull.
Change-Id: I8b274972642b6a1926d8fc176404bfd83344bc51
---
sysdeps/loongarch/lp64/strchr.S | 107 +++++++++++++++++++++++++++
sysdeps/loongarch/lp64/strchrnul.S | 115 +++++++++++++++++++++++++++++
2 files changed, 222 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/strchr.S
create mode 100644 sysdeps/loongarch/lp64/strchrnul.S
diff --git a/sysdeps/loongarch/lp64/strchr.S b/sysdeps/loongarch/lp64/strchr.S
new file mode 100644
index 00000000..3d64c684
--- /dev/null
+++ b/sysdeps/loongarch/lp64/strchr.S
@@ -0,0 +1,107 @@
+/* Optimized strchr implementation for LoongArch.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* char * strchr (const char *s1, int c); */
+LEAF(strchr)
+ .align 6
+
+ li.w t4, 0x7
+ lu12i.w a2, 0x01010
+ bstrins.d a1, a1, 15, 8
+ andi t0, a0, 0x7
+
+ ori a2, a2, 0x101
+ andn t4, a0, t4
+ slli.w t1, t0, 3
+
+ ld.d t4, t4, 0
+
+ nor t8, zero, zero
+ bstrins.d a1, a1, 31, 16
+ srl.d t4, t4, t1
+
+ bstrins.d a1, a1, 63, 32
+ bstrins.d a2, a2, 63, 32
+ srl.d a7, t8, t1
+
+ li.w t1, 8
+ nor t8, a7, zero
+ slli.d a3, a2, 7
+ or t5, t8, t4
+ and t3, a7, a1
+
+ sub.w t1, t1, t0
+ nor a3, a3, zero
+ xor t2, t5, t3
+ sub.d a7, t5, a2
+ nor a6, t5, a3
+
+ sub.d a5, t2, a2
+ nor a4, t2, a3
+
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ bnez a7, L(_mc8_a)
+
+ add.d a0, a0, t1
+L(_aloop):
+ ld.d t4, a0, 0
+
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ nor a6, t4, a3
+ sub.d a5, t2, a2
+
+ nor a4, t2, a3
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ bnez a7, L(_mc8_a)
+
+ ld.d t4, a0, 8
+ addi.d a0, a0, 16
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ nor a6, t4, a3
+ sub.d a5, t2, a2
+
+ nor a4, t2, a3
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ beqz a7, L(_aloop)
+
+ addi.d a0, a0, -8
+L(_mc8_a):
+
+ ctz.d t0, a5
+ ctz.d t2, a6
+
+ srli.w t0, t0, 3
+ srli.w t2, t2, 3
+ sltu t1, t2, t0
+ add.d v0, a0, t0
+ masknez v0, v0, t1
+ jr ra
+END(strchr)
+
+libc_hidden_builtin_def (strchr)
+weak_alias (strchr, index)
diff --git a/sysdeps/loongarch/lp64/strchrnul.S b/sysdeps/loongarch/lp64/strchrnul.S
new file mode 100644
index 00000000..58b8b372
--- /dev/null
+++ b/sysdeps/loongarch/lp64/strchrnul.S
@@ -0,0 +1,115 @@
+/* Optimized strchrnul implementation for LoongArch.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define MOVZ(rd,rs,rt) \
+ masknez t6, rs, rt;\
+ maskeqz rd, rd, rt;\
+ or rd, rd, t6
+
+/* char *strchrnul(const char *s, int c); */
+LEAF(__strchrnul)
+ .align 6
+
+ li.w t4, 0x7
+ lu12i.w a2, 0x01010
+ bstrins.d a1, a1, 15, 8
+ andi t0, a0, 0x7
+
+ ori a2, a2, 0x101
+ andn t4, a0, t4
+ slli.w t1, t0, 3
+ ld.d t4, t4, 0
+
+ nor t8, zero, zero
+ bstrins.d a1, a1, 31, 16
+ srl.d t4, t4, t1
+
+ preld 0, a0, 32
+ bstrins.d a1, a1, 63, 32
+ bstrins.d a2, a2, 63, 32
+ srl.d a7, t8, t1
+
+ nor t8, a7, zero
+ slli.d a3, a2, 7
+ or t5, t8, t4
+ and t3, a7, a1
+
+ nor a3, a3, zero
+ xor t2, t5, t3
+ sub.d a7, t5, a2
+ nor a6, t5, a3
+
+ li.w t1, 8
+ sub.d a5, t2, a2
+ nor a4, t2, a3
+
+ and a6, a7, a6
+ and a5, a5, a4
+ or a7, a6, a5
+ bnez a7, L(_mc8_a)
+
+ sub.w t1, t1, t0
+ add.d a0, a0, t1
+L(_aloop):
+ ld.d t4, a0, 0
+
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ nor a6, t4, a3
+ sub.d a5, t2, a2
+
+ nor a4, t2, a3
+ and a6, a7, a6
+ and a5, a5, a4
+
+ or a7, a6, a5
+ bnez a7, L(_mc8_a)
+
+ ld.d t4, a0, 8
+ addi.d a0, a0, 16
+
+ xor t2, t4, a1
+ sub.d a7, t4, a2
+ nor a6, t4, a3
+ sub.d a5, t2, a2
+
+ nor a4, t2, a3
+ and a6, a7, a6
+ and a5, a5, a4
+
+ or a7, a6, a5
+ beqz a7, L(_aloop)
+
+ addi.d a0, a0, -8
+L(_mc8_a):
+ ctz.d t0, a5
+ ctz.d t2, a6
+
+ srli.w t0, t0, 3
+ srli.w t2, t2, 3
+ slt t1, t0, t2
+
+ MOVZ(t0,t2,t1)
+
+ add.d v0, a0, t0
+ jr ra
+END(__strchrnul)
+
+weak_alias(__strchrnul, strchrnul)
--
2.33.0