From 4ba7d34a059c3ce0738940f293fded7f3096d016 Mon Sep 17 00:00:00 2001 From: herengui Date: Thu, 31 Aug 2023 09:46:51 +0800 Subject: [PATCH 1001/1001] add loongarch64 support not upstream new Signed-off-by: herengui --- lib/builtins/loongarch/fp_mode.c | 61 ++++++ ...ommon_interceptors_vfork_loongarch64.inc.S | 57 ++++++ .../sanitizer_syscall_linux_loongarch64.inc | 168 +++++++++++++++++ lib/tsan/rtl/tsan_rtl_loongarch64.S | 142 ++++++++++++++ lib/xray/xray_loongarch.cpp | 173 ++++++++++++++++++ lib/xray/xray_trampoline_loongarch.S | 129 +++++++++++++ 6 files changed, 730 insertions(+) create mode 100644 lib/builtins/loongarch/fp_mode.c create mode 100644 lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S create mode 100644 lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc create mode 100644 lib/tsan/rtl/tsan_rtl_loongarch64.S create mode 100644 lib/xray/xray_loongarch.cpp create mode 100644 lib/xray/xray_trampoline_loongarch.S diff --git a/compiler-rt.orig/lib/builtins/loongarch/fp_mode.c b/compiler-rt.new/lib/builtins/loongarch/fp_mode.c new file mode 100644 index 0000000..6471cc3 --- /dev/null +++ b/compiler-rt.new/lib/builtins/loongarch/fp_mode.c @@ -0,0 +1,61 @@ +//=== lib/builtins/loongarch/fp_mode.c - Floaing-point mode utilities -*- C +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../fp_mode.h" + +#define LOONGARCH_TONEAREST 0x0000 +#define LOONGARCH_TOWARDZERO 0x0100 +#define LOONGARCH_UPWARD 0x0200 +#define LOONGARCH_DOWNWARD 0x0300 + +#define LOONGARCH_RMODE_MASK \ + (LOONGARCH_TONEAREST | LOONGARCH_TOWARDZERO | LOONGARCH_UPWARD | \ + LOONGARCH_DOWNWARD) + +#define LOONGARCH_INEXACT 0x10000 + +FE_ROUND_MODE __fe_getround(void) { +#if __loongarch_frlen != 0 + int fcsr; +#if __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r"(fcsr)); +#else + /* FIXME: gcc cannot recognise $fcsr0, use $r0 as a workaround. */ + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r"(fcsr)); +#endif + fcsr &= LOONGARCH_RMODE_MASK; + switch (fcsr) { + case LOONGARCH_TOWARDZERO: + return FE_TOWARDZERO; + case LOONGARCH_DOWNWARD: + return FE_DOWNWARD; + case LOONGARCH_UPWARD: + return FE_UPWARD; + case LOONGARCH_TONEAREST: + default: + return FE_TONEAREST; + } +#else + return FE_TONEAREST; +#endif +} + +int __fe_raise_inexact(void) { +#if __loongarch_frlen != 0 + int fcsr; +#if __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r"(fcsr)); + __asm__ __volatile__("movgr2fcsr $fcr0, %0" ::"r"(fcsr | LOONGARCH_INEXACT)); +#else + /* FIXME: gcc cannot recognise $fcsr0, use $r0 as a workaround. */ + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r"(fcsr)); + __asm__ __volatile__("movgr2fcsr $r0, %0" ::"r"(fcsr | LOONGARCH_INEXACT)); +#endif +#endif + return 0; +} diff --git a/compiler-rt.orig/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt.new/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S new file mode 100644 index 0000000..dae72b5 --- /dev/null +++ b/compiler-rt.new/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -0,0 +1,57 @@ +#if defined(__loongarch64) && defined(__linux__) + +#include "sanitizer_common/sanitizer_asm.h" + +ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) +ASM_HIDDEN(_ZN14__interception10real_vforkE) + +.text +.globl ASM_WRAPPER_NAME(vfork) +ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) +ASM_WRAPPER_NAME(vfork): + // Save ra in the off-stack spill area. + // allocate space on stack + addi.d $sp, $sp, -16 + // store $ra value + st.d $ra, $sp, 8 + bl COMMON_INTERCEPTOR_SPILL_AREA + // restore previous values from stack + ld.d $ra, $sp, 8 + // adjust stack + addi.d $sp, $sp, 16 + // store $ra by $a0 + st.d $ra, $a0, 0 + + // Call real vfork. This may return twice. User code that runs between the first and the second return + // may clobber the stack frame of the interceptor; that's why it does not have a frame. + la.local $a0, _ZN14__interception10real_vforkE + ld.d $a0, $a0, 0 + jirl $ra, $a0, 0 + + // adjust stack + addi.d $sp, $sp, -16 + // store $a0 by adjusted stack + st.d $a0, $sp, 8 + // jump to exit label if $a0 is 0 + beqz $a0, .L_exit + + // $a0 != 0 => parent process. Clear stack shadow. + // put old $sp to $a0 + addi.d $a0, $sp, 16 + bl %plt(COMMON_INTERCEPTOR_HANDLE_VFORK) + +.L_exit: + // Restore $ra + bl COMMON_INTERCEPTOR_SPILL_AREA + ld.d $ra, $a0, 0 + // load value by stack + ld.d $a0, $sp, 8 + // adjust stack + addi.d $sp, $sp, 16 + jr $ra +ASM_SIZE(vfork) + +.weak vfork +.set vfork, ASM_WRAPPER_NAME(vfork) + +#endif diff --git a/compiler-rt.orig/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc b/compiler-rt.new/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc new file mode 100644 index 0000000..0d8d530 --- /dev/null +++ b/compiler-rt.new/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc @@ -0,0 +1,168 @@ +//===-- sanitizer_syscall_linux_loongarch64.inc -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementations of internal_syscall and internal_iserror for +// Linux/loongarch64. +// +//===----------------------------------------------------------------------===// + +// About local register variables: +// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables +// +// Kernel ABI... +// syscall number is passed in a7 +// (http://man7.org/linux/man-pages/man2/syscall.2.html) results are return in +// a0 and a1 (http://man7.org/linux/man-pages/man2/syscall.2.html) arguments +// are passed in: a0-a7 (confirmed by inspecting glibc sources). +#define SYSCALL(name) __NR_##name + +#define INTERNAL_SYSCALL_CLOBBERS \ + "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" + +static uptr __internal_syscall(u64 nr) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0"); + __asm__ volatile("syscall 0\n\t" + : "=r"(a0) + : "r"(a7) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall0(n) (__internal_syscall)(n) + +static uptr __internal_syscall(u64 nr, u64 arg1) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall1(n, a1) (__internal_syscall)(n, (u64)(a1)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall2(n, a1, a2) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall3(n, a1, a2, a3) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, + u64 arg4) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall4(n, a1, a2, a3, a4) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall5(n, a1, a2, a3, a4, a5) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5, long arg6) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + register u64 a5 asm("a5") = arg6; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5), (long)(a6)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5, long arg6, long arg7) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + register u64 a5 asm("a5") = arg6; + register u64 a6 asm("a6") = arg7; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), + "r"(a6) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall7(n, a1, a2, a3, a4, a5, a6, a7) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5), (long)(a6), (long)(a7)) + +#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n +#define __SYSCALL_NARGS(...) \ + __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, ) +#define __SYSCALL_CONCAT_X(a, b) a##b +#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b) +#define __SYSCALL_DISP(b, ...) \ + __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__) + +#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__) + +// Helper function used to avoid clobbering of errno. +bool internal_iserror(uptr retval, int *internal_errno) { + if (retval >= (uptr)-4095) { + if (internal_errno) + *internal_errno = -retval; + return true; + } + return false; +} diff --git a/compiler-rt.orig/lib/tsan/rtl/tsan_rtl_loongarch64.S b/compiler-rt.new/lib/tsan/rtl/tsan_rtl_loongarch64.S new file mode 100644 index 0000000..66b16e4 --- /dev/null +++ b/compiler-rt.new/lib/tsan/rtl/tsan_rtl_loongarch64.S @@ -0,0 +1,142 @@ +.section .text + +.hidden __tsan_setjmp +.comm _ZN14__interception11real_setjmpE,8,8 +.globl setjmp +.type setjmp, @function +setjmp: + + # save env parameters + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + + # save jmp_buf + st.d $a0, $sp, 0 + + # obtain $sp and call tsan interceptor + addi.d $a0, $sp, 32 + bl __tsan_setjmp + + # restore jmp_buf + ld.d $a0, $sp, 0 + + # load libc setjmp to t0 + la.got $t0, _ZN14__interception11real_setjmpE + + # restore env parameters + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + + # tail jump to libc setjmp + ld.d $t0, $t0, 0 + jr $t0 + +.size setjmp, .-setjmp + +.globl _setjmp +.comm _ZN14__interception12real__setjmpE,8,8 +.type _setjmp, @function +_setjmp: + + # Save env parameters + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + + # save jmp_buf + st.d $a0, $sp, 0 + + # obtain $sp and call tsan interceptor + addi.d $a0, $sp, 32 + bl __tsan_setjmp + + # restore jmp_buf + ld.d $a0, $sp, 0 + + # load libc setjmp to t0 + la.got $t0, _ZN14__interception12real__setjmpE + + # restore env parameters + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + + # tail jump to libc setjmp + ld.d $t0, $t0, 0 + jr $t0 + +.size _setjmp, .-_setjmp + +.globl sigsetjmp +.comm _ZN14__interception14real_sigsetjmpE,8,8 +.type sigsetjmp, @function +sigsetjmp: + + # Save env parameters + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + + # save jmp_buf and savesigs + st.d $a0, $sp, 0 + st.d $a1, $sp, 8 + + # obtain $sp and call tsan interceptor + addi.d $a0, $sp, 32 + bl __tsan_setjmp + + # restore jmp_buf and savesigs + ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 + + # load libc setjmp to t0 + la.got $t0, _ZN14__interception14real_sigsetjmpE + + # restore env parameters + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + + # tail jump to libc setjmp + ld.d $t0, $t0, 0 + jr $t0 + +.size sigsetjmp, .-sigsetjmp + +.comm _ZN14__interception16real___sigsetjmpE,8,8 +.globl __sigsetjmp +.type __sigsetjmp, @function +__sigsetjmp: + + # Save env parameters + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + + # save jmp_buf and savesigs + st.d $a0, $sp, 0 + st.d $a1, $sp, 8 + + # obtain $sp and call tsan interceptor + addi.d $a0, $sp, 32 + bl __tsan_setjmp + + # restore jmp_buf and savesigs + ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 + + # load libc setjmp to t0 + la.got $t0, _ZN14__interception16real___sigsetjmpE + + # restore env parameters + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + + # tail jump to libc setjmp + ld.d $t0, $t0, 0 + jr $t0 + +.size __sigsetjmp, .-__sigsetjmp diff --git a/compiler-rt.orig/lib/xray/xray_loongarch.cpp b/compiler-rt.new/lib/xray/xray_loongarch.cpp new file mode 100644 index 0000000..c72bb2d --- /dev/null +++ b/compiler-rt.new/lib/xray/xray_loongarch.cpp @@ -0,0 +1,173 @@ +//===-- xray_loongarch.cpp -----------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of loongarch-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDID = 0x02c00000, // addi.d rt, rs, imm + PO_SD = 0x29c00000, // st.d rt, base, offset + PO_LU12IW = 0x14000000, // lu12i.w rt, imm + PO_ORI = 0x03800000, // ori rt, rs, imm + PO_LU32ID = 0x16000000, // lu32i.d rd, imm + PO_LU52ID = 0x03000000, // lu52i.d rd, rj, imm + PO_JIRL = 0x4c000000, // jirl rd, rj, 0 + PO_LD = 0x28c00000, // ld.d rt, base, offset + PO_B44 = 0x50002c00, // b #44 +}; + +enum RegNum : uint32_t { + RN_T0 = 0xC, + RN_T1 = 0xD, + RN_RA = 0x1, + RN_SP = 0x3, +}; + +// addi.d lu521.d ori ld.d st.d +inline static uint32_t +encodeInstruction_i12(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +// lu12i.w lu32i.d +inline static uint32_t +encodeInstruction_si20(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rd | Imm << 5); +} + +// jirl +inline static uint32_t +encodeInstruction_si16(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 11 NOPs (44 bytes) + // .tmpN + // + // With the following runtime patch: + // xray_sled_n (64-bit): + // addi.d sp,sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0,%%abs_hi20(__xray_FunctionEntry/Exit) + // ori %1,t0,%%abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0,%%abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0,t0,%%abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1,%%abs_hi20(function_id) + // ori %1,t1,%%abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #44 + + uint32_t *Address = reinterpret_cast(Sled.address()); + if (Enable) { + uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xfff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 12) & 0xfffff; + uint32_t HigherTracingHookAddr = + (reinterpret_cast(TracingHook) >> 32) & 0xfffff; + uint32_t HighestTracingHookAddr = + (reinterpret_cast(TracingHook) >> 52) & 0xfff; + uint32_t LoFunctionID = FuncId & 0xfff; + uint32_t HiFunctionID = (FuncId >> 12) & 0xfffff; + Address[2] = encodeInstruction_i12(PatchOpcodes::PO_SD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[3] = encodeInstruction_si20(PatchOpcodes::PO_LU12IW, RegNum::RN_T0, + HiTracingHookAddr); + Address[4] = encodeInstruction_i12(PatchOpcodes::PO_ORI, RegNum::RN_T0, + RegNum::RN_T0, LoTracingHookAddr); + Address[5] = encodeInstruction_si20(PatchOpcodes::PO_LU32ID, RegNum::RN_T0, + HigherTracingHookAddr); + Address[6] = encodeInstruction_i12(PatchOpcodes::PO_LU52ID, RegNum::RN_T0, + RegNum::RN_T0, HighestTracingHookAddr); + Address[7] = encodeInstruction_si20(PatchOpcodes::PO_LU12IW, RegNum::RN_T1, + HiFunctionID); + Address[8] = encodeInstruction_i12(PatchOpcodes::PO_ORI, RegNum::RN_T1, + RegNum::RN_T1, LoFunctionID); + Address[9] = encodeInstruction_si16(PatchOpcodes::PO_JIRL, RegNum::RN_RA, + RegNum::RN_T0, 0); + Address[10] = encodeInstruction_i12(PatchOpcodes::PO_LD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[11] = encodeInstruction_i12(PatchOpcodes::PO_LD, RegNum::RN_SP, + RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeInstruction_i12( + PatchOpcodes::PO_ADDID, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); + std::atomic_store_explicit( + reinterpret_cast *>(Address), CreateStackSpace, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Address), + uint32_t(PatchOpcodes::PO_B44), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/compiler-rt.orig/lib/xray/xray_trampoline_loongarch.S b/compiler-rt.new/lib/xray/xray_trampoline_loongarch.S new file mode 100644 index 0000000..7ac019b --- /dev/null +++ b/compiler-rt.new/lib/xray/xray_trampoline_loongarch.S @@ -0,0 +1,129 @@ +//===-- xray_trampoline_loongarch.s -----------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the loongarch-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + + .text + .file "xray_trampoline_loongarch.S" + .globl __xray_FunctionEntry + .p2align 2 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + .cfi_startproc + // Save argument registers before doing any actual work. + .cfi_def_cfa_offset 136 + addi.d $sp, $sp, -136 + st.d $ra, $sp, 128 + .cfi_offset 1, -8 + st.d $a7, $sp, 120 + st.d $a6, $sp, 112 + st.d $a5, $sp, 104 + st.d $a4, $sp, 96 + st.d $a3, $sp, 88 + st.d $a2, $sp, 80 + st.d $a1, $sp, 72 + st.d $a0, $sp, 64 + fst.d $f7, $sp, 56 + fst.d $f6, $sp, 48 + fst.d $f5, $sp, 40 + fst.d $f4, $sp, 32 + fst.d $f3, $sp, 24 + fst.d $f2, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + + lu12i.w $t2, %got_hi20(_ZN6__xray19XRayPatchedFunctionE) + ori $t2, $t2, %got_lo12(_ZN6__xray19XRayPatchedFunctionE) + lu32i.d $t2, %got64_lo20(_ZN6__xray19XRayPatchedFunctionE) + lu52i.d $t2, $t2, %got64_hi12(_ZN6__xray19XRayPatchedFunctionE) + ld.d $t2, $t2, 0 + + beqz $t2, FunctionEntry_restore + + // a1=0 means that we are tracing an entry event + move $a1, $zero + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionEntry_restore: + // Restore argument registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + fld.d $f2, $sp, 16 + fld.d $f3, $sp, 24 + fld.d $f4, $sp, 32 + fld.d $f5, $sp, 40 + fld.d $f6, $sp, 48 + fld.d $f7, $sp, 56 + ld.d $a0, $sp, 64 + ld.d $a1, $sp, 72 + ld.d $a2, $sp, 80 + ld.d $a3, $sp, 88 + ld.d $a4, $sp, 96 + ld.d $a5, $sp, 104 + ld.d $a6, $sp, 112 + ld.d $a7, $sp, 120 + ld.d $ra, $sp, 128 + addi.d $sp, $sp, 136 + jr $ra +FunctionEntry_end: + .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry + .cfi_endproc + + .text + .globl __xray_FunctionExit + .p2align 2 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + // Save return registers before doing any actual work. + .cfi_def_cfa_offset 48 + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 + .cfi_offset 1, -8 + st.d $fp, $sp, 32 + st.d $a1, $sp, 24 + st.d $a0, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + lu12i.w $t2, %got_hi20(_ZN6__xray19XRayPatchedFunctionE) + ori $t2, $t2, %got_lo12(_ZN6__xray19XRayPatchedFunctionE) + lu32i.d $t2, %got64_lo20(_ZN6__xray19XRayPatchedFunctionE) + lu52i.d $t2, $t2, %got64_hi12(_ZN6__xray19XRayPatchedFunctionE) + ld.d $t2, $t2, 0 + + beqz $t2, FunctionExit_restore + + // a1=1 means that we are tracing an exit event + ori $a1, $zero, 1 + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionExit_restore: + // Restore return registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + ld.d $a1, $sp, 24 + ld.d $a0, $sp, 16 + ld.d $fp, $sp, 32 + ld.d $ra, $sp, 40 + addi.d $sp, $sp, 48 + jr $ra + +FunctionExit_end: + .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit + .cfi_endproc + -- 2.41.0