!38 Sync from openEuler-22.03-LTS-SP3

From: @rfwang07 
Reviewed-by: @li-yancheng 
Signed-off-by: @li-yancheng
This commit is contained in:
openeuler-ci-bot 2024-05-28 14:37:03 +00:00 committed by Gitee
commit 04c0a500f0
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 993 additions and 36 deletions

View File

@ -0,0 +1,135 @@
From c62ab1487115a74d72ad23fd89b42076d5726bde Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 24 Jul 2023 19:47:46 +0800
Subject: [PATCH] [AArch64] fix bug #55005 handle DW_CFA_GNU_NegateRAState.
backport: https://reviews.llvm.org/rG9921197920fc3e9ad9605bd8fe0e835ca2dd41a5
---
bolt/lib/Core/Exceptions.cpp | 19 ++++--
.../Inputs/dw_cfa_gnu_window_save.yaml | 62 +++++++++++++++++++
bolt/test/AArch64/dw_cfa_gnu_window_save.test | 8 +++
3 files changed, 83 insertions(+), 6 deletions(-)
create mode 100644 bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
create mode 100644 bolt/test/AArch64/dw_cfa_gnu_window_save.test
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 79404ca87..b0aa8b990 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -644,18 +644,25 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
return false;
case DW_CFA_GNU_window_save:
+ // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
+ // id but mean different things. The latter is used in AArch64.
+ if (Function.getBinaryContext().isAArch64()) {
+ Function.addCFIInstruction(
+ Offset, MCCFIInstruction::createNegateRAState(nullptr));
+ break;
+ }
+ if (opts::Verbosity >= 1)
+ errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
+ return false;
case DW_CFA_lo_user:
case DW_CFA_hi_user:
- if (opts::Verbosity >= 1) {
- errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
- "unimplemented\n";
- }
+ if (opts::Verbosity >= 1)
+ errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
return false;
default:
- if (opts::Verbosity >= 1) {
+ if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
<< '\n';
- }
return false;
}
diff --git a/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
new file mode 100644
index 000000000..faa32e089
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
@@ -0,0 +1,62 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_AARCH64
+ Entry: 0x4100C0
+ProgramHeaders:
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ FirstSec: .init
+ LastSec: .fini
+ VAddr: 0x410000
+ Align: 0x10000
+Sections:
+ - Name: .init
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410000
+ AddressAlign: 0x4
+ Offset: 0x10000
+ Content: 3F2303D5FD7BBFA9FD0300913F000094FD7BC1A8BF2303D5C0035FD6
+ - Name: .plt
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410020
+ AddressAlign: 0x10
+ Content: F07BBFA9700100F011FE47F910E23F9120021FD61F2003D51F2003D51F2003D590010090110240F91002009120021FD690010090110640F91022009120021FD690010090110A40F91042009120021FD6
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410080
+ AddressAlign: 0x40
+ Content: 00008052C0035FD61F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D55F2403D51D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000D00391030080D2040080D2D5FFFF97D8FFFF975F2403D5E2FFFF171F2003D55F2403D5C0035FD6600100F000F047F9400000B4D3FFFF17C0035FD61F2003D5800100908101009000800091218000913F0000EBC000005481000090210840F9610000B4F00301AA00021FD6C0035FD680010090810100900080009121800091210000CB22FC7FD3410C818B21FC4193C10000B482000090420C40F9620000B4F00302AA00021FD6C0035FD63F2303D5FD7BBEA9FD030091F30B00F9930100906082403980000035DEFFFF972000805260820039F30B40F9FD7BC2A8BF2303D5C0035FD65F2403D5E2FFFF17
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x4101CC
+ AddressAlign: 0x4
+ Content: 3F2303D5FD7BBFA9FD030091FD7BC1A8BF2303D5C0035FD6
+ - Name: .eh_frame
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x420068
+ AddressAlign: 0x8
+ Content: 1000000000000000017A520004781E011B0C1F0010000000180000003C00FFFF3C0000000041071E140000002C0000006800FFFF08000000000000000000000010000000440000007000FFFF300000000000000010000000580000008C00FFFF3C00000000000000240000006C000000B400FFFF3800000000412D410E209D049E0342930248DEDDD30E00412D0000001400000094000000C400FFFF08000000000000000000000010000000AC00000068FFFEFF080000000000000000000000
+ - Name: .rela.text
+ Type: SHT_RELA
+ Flags: [ SHF_INFO_LINK ]
+ Link: .symtab
+ AddressAlign: 0x8
+ Info: .text
+Symbols:
+ - Name: .text
+ Type: STT_SECTION
+ Section: .text
+ Value: 0x410080
+ - Name: _ITM_deregisterTMCloneTable
+ Binding: STB_WEAK
+ - Name: _ITM_registerTMCloneTable
+ Binding: STB_WEAK
+...
diff --git a/bolt/test/AArch64/dw_cfa_gnu_window_save.test b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
new file mode 100644
index 000000000..2e044b399
--- /dev/null
+++ b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
@@ -0,0 +1,8 @@
+# Check that llvm-bolt can handle DW_CFA_GNU_window_save on AArch64.
+
+RUN: yaml2obj %p/Inputs/dw_cfa_gnu_window_save.yaml &> %t.exe
+RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+
+CHECK-NOT: paciasp
+CHECK-NOT: autiasp
+CHECK-NOT: ERROR: unable to fill CFI.
--
2.33.0

View File

@ -0,0 +1,465 @@
From 81a80dbe9f47f728bc593d05cd5708a653a23f1c Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 11 Sep 2023 11:33:41 +0800
Subject: [PATCH] [AArch64] Add AArch64 support for hugify.
---
bolt/CMakeLists.txt | 4 +-
bolt/runtime/CMakeLists.txt | 28 ++-
bolt/runtime/common.h | 224 ++++++++++++++++++
bolt/runtime/hugify.cpp | 21 +-
.../AArch64/Inputs/user_func_order.txt | 2 +
bolt/test/runtime/AArch64/user-func-reorder.c | 44 ++++
6 files changed, 305 insertions(+), 18 deletions(-)
create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt
create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
index a97878cd3..3de930496 100644
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@@ -5,7 +5,7 @@ set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 14)
set(BOLT_ENABLE_RUNTIME OFF)
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64")
set(BOLT_ENABLE_RUNTIME ON)
endif()
@@ -45,7 +45,7 @@ if (LLVM_INCLUDE_TESTS)
endif()
if (BOLT_ENABLE_RUNTIME)
- message(STATUS "Building BOLT runtime libraries for X86")
+ message(STATUS "Building BOLT runtime libraries")
ExternalProject_Add(bolt_rt
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
index 7c1b79af4..ee6ab7bd4 100644
--- a/bolt/runtime/CMakeLists.txt
+++ b/bolt/runtime/CMakeLists.txt
@@ -10,10 +10,12 @@ check_include_files(elf.h HAVE_ELF_H)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/config.h)
-add_library(bolt_rt_instr STATIC
- instr.cpp
- ${CMAKE_CURRENT_BINARY_DIR}/config.h
- )
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ add_library(bolt_rt_instr STATIC
+ instr.cpp
+ ${CMAKE_CURRENT_BINARY_DIR}/config.h
+ )
+endif()
add_library(bolt_rt_hugify STATIC
hugify.cpp
${CMAKE_CURRENT_BINARY_DIR}/config.h
@@ -23,16 +25,24 @@ set(BOLT_RT_FLAGS
-ffreestanding
-fno-exceptions
-fno-rtti
- -fno-stack-protector
- -mno-sse)
+ -fno-stack-protector)
+
+# x86 exclusive option
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ list(APPEND BOLT_RT_FLAGS -mno-sse)
+endif()
# Don't let the compiler think it can create calls to standard libs
-target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
-target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
+ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+endif()
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
-install(TARGETS bolt_rt_instr DESTINATION lib)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ install(TARGETS bolt_rt_instr DESTINATION lib)
+endif()
install(TARGETS bolt_rt_hugify DESTINATION lib)
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 008dbb6c3..6869742e7 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -39,6 +39,45 @@ typedef int int32_t;
#endif
// Save all registers while keeping 16B stack alignment
+#if defined (__aarch64__)
+#define SAVE_ALL \
+ "stp x0, x1, [sp, #-16]!\n" \
+ "stp x2, x3, [sp, #-16]!\n" \
+ "stp x4, x5, [sp, #-16]!\n" \
+ "stp x6, x7, [sp, #-16]!\n" \
+ "stp x8, x9, [sp, #-16]!\n" \
+ "stp x10, x11, [sp, #-16]!\n" \
+ "stp x12, x13, [sp, #-16]!\n" \
+ "stp x14, x15, [sp, #-16]!\n" \
+ "stp x16, x17, [sp, #-16]!\n" \
+ "stp x18, x19, [sp, #-16]!\n" \
+ "stp x20, x21, [sp, #-16]!\n" \
+ "stp x22, x23, [sp, #-16]!\n" \
+ "stp x24, x25, [sp, #-16]!\n" \
+ "stp x26, x27, [sp, #-16]!\n" \
+ "stp x28, x29, [sp, #-16]!\n" \
+ "stp x30, xzr, [sp, #-16]!\n"
+
+// Mirrors SAVE_ALL
+#define RESTORE_ALL \
+ "ldp x30, xzr, [sp], #16\n" \
+ "ldp x28, x29, [sp], #16\n" \
+ "ldp x26, x27, [sp], #16\n" \
+ "ldp x24, x25, [sp], #16\n" \
+ "ldp x22, x23, [sp], #16\n" \
+ "ldp x20, x21, [sp], #16\n" \
+ "ldp x18, x19, [sp], #16\n" \
+ "ldp x16, x17, [sp], #16\n" \
+ "ldp x14, x15, [sp], #16\n" \
+ "ldp x12, x13, [sp], #16\n" \
+ "ldp x10, x11, [sp], #16\n" \
+ "ldp x8, x9, [sp], #16\n" \
+ "ldp x6, x7, [sp], #16\n" \
+ "ldp x4, x5, [sp], #16\n" \
+ "ldp x2, x3, [sp], #16\n" \
+ "ldp x0, x1, [sp], #16\n"
+
+#else
#define SAVE_ALL \
"push %%rax\n" \
"push %%rbx\n" \
@@ -75,6 +114,7 @@ typedef int int32_t;
"pop %%rcx\n" \
"pop %%rbx\n" \
"pop %%rax\n"
+#endif
// Functions that are required by freestanding environment. Compiler may
// generate calls to these implicitly.
@@ -129,6 +169,189 @@ constexpr uint32_t BufSize = 10240;
#define _STRINGIFY(x) #x
#define STRINGIFY(x) _STRINGIFY(x)
+#if defined (__aarch64__)
+// Declare some syscall wrappers we use throughout this code to avoid linking
+// against system libc.
+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = fd;
+ register const void *x1 __asm__("x1") = buf;
+ register uint64_t x2 __asm__("x2") = count;
+ register uint32_t w8 __asm__("w8") = 63;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = fd;
+ register const void *x1 __asm__("x1") = buf;
+ register uint64_t x2 __asm__("x2") = count;
+ register uint32_t w8 __asm__("w8") = 64;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
+ uint64_t fd, uint64_t offset) {
+ void *ret;
+ register uint64_t x0 __asm__("x0") = addr;
+ register uint64_t x1 __asm__("x1") = size;
+ register uint64_t x2 __asm__("x2") = prot;
+ register uint64_t x3 __asm__("x3") = flags;
+ register uint64_t x4 __asm__("x4") = fd;
+ register uint64_t x5 __asm__("x5") = offset;
+ register uint32_t w8 __asm__("w8") = 222;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __munmap(void *addr, uint64_t size) {
+ uint64_t ret;
+ register void *x0 __asm__("x0") = addr;
+ register uint64_t x1 __asm__("x1") = size;
+ register uint32_t w8 __asm__("w8") = 215;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __exit(uint64_t code) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = code;
+ register uint32_t w8 __asm__("w8") = 94;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0)
+ : "r"(w8)
+ : "cc", "memory", "x1");
+ return ret;
+}
+
+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
+ uint64_t ret;
+ register int x0 __asm__("x0") = -100;
+ register const char *x1 __asm__("x1") = pathname;
+ register uint64_t x2 __asm__("x2") = flags;
+ register uint64_t x3 __asm__("x3") = mode;
+ register uint32_t w8 __asm__("w8") = 56;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(x3), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+int __madvise(void *addr, size_t length, int advice) {
+ int ret;
+ register void *x0 __asm__("x0") = addr;
+ register size_t x1 __asm__("x1") = length;
+ register int x2 __asm__("x2") = advice;
+ register uint32_t w8 __asm__("w8") = 233;
+ __asm__ __volatile__("svc #0\n"
+ "mov %w0, w0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+int __mprotect(void *addr, size_t len, int prot) {
+ int ret;
+ register void *x0 __asm__("x0") = addr;
+ register size_t x1 __asm__("x1") = len;
+ register int x2 __asm__("x2") = prot;
+ register uint32_t w8 __asm__("w8") = 226;
+ __asm__ __volatile__("svc #0\n"
+ "mov %w0, w0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+// Helper functions for writing strings to the .fdata file. We intentionally
+// avoid using libc names to make it clear it is our impl.
+
+/// Compare two strings, at most Num bytes.
+int strnCmp(const char *Str1, const char *Str2, size_t Num) {
+ while (Num && *Str1 && (*Str1 == *Str2)) {
+ Num--;
+ Str1++;
+ Str2++;
+ }
+ if (Num == 0)
+ return 0;
+ return *(unsigned char *)Str1 - *(unsigned char *)Str2;
+}
+
+uint32_t strLen(const char *Str) {
+ uint32_t Size = 0;
+ while (*Str++)
+ ++Size;
+ return Size;
+}
+
+/// Write number Num using Base to the buffer in OutBuf, returns a pointer to
+/// the end of the string.
+char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) {
+ const char *Chars = "0123456789abcdef";
+ char Buf[21];
+ char *Ptr = Buf;
+ while (Num) {
+ *Ptr++ = *(Chars + (Num % Base));
+ Num /= Base;
+ }
+ if (Ptr == Buf) {
+ *OutBuf++ = '0';
+ return OutBuf;
+ }
+ while (Ptr != Buf)
+ *OutBuf++ = *--Ptr;
+
+ return OutBuf;
+}
+
+/// Copy Str to OutBuf, returns a pointer to the end of the copied string
+char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) {
+ while (*Str) {
+ *OutBuf++ = *Str++;
+ if (--Size <= 0)
+ return OutBuf;
+ }
+ return OutBuf;
+}
+
+void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) {
+ char Buf[BufSize];
+ char *Ptr = Buf;
+ Ptr = strCopy(Ptr, Msg, BufSize - 23);
+ Ptr = intToStr(Ptr, Num, Base);
+ Ptr = strCopy(Ptr, "\n");
+ __write(2, Buf, Ptr - Buf);
+}
+
+void reportError(const char *Msg, uint64_t Size) {
+ __write(2, Msg, Size);
+ __exit(1);
+}
+#else
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
@@ -550,5 +773,6 @@ public:
inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
return (Value + Align - 1) / Align * Align;
}
+#endif
} // anonymous namespace
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 69e1a7e06..385e4d147 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -6,26 +6,25 @@
//
//===----------------------------------------------------------------------===//
-#if defined (__x86_64__)
#if !defined(__APPLE__)
#include "common.h"
#include <sys/mman.h>
// Enables a very verbose logging to stderr useful when debugging
-//#define ENABLE_DEBUG
+// #define ENABLE_DEBUG
// Function pointers to init routines in the binary, so we can resume
// regular execution of the function that we hooked.
extern void (*__bolt_hugify_init_ptr)();
// The __hot_start and __hot_end symbols set by Bolt. We use them to figure
-// out the rage for marking huge pages.
+// out the range for marking huge pages.
extern uint64_t __hot_start;
extern uint64_t __hot_end;
#ifdef MADV_HUGEPAGE
-/// Check whether the kernel supports THP via corresponding sysfs entry.
+// Check whether the kernel supports THP via corresponding sysfs entry.
static bool has_pagecache_thp_support() {
char buf[256] = {0};
const char *madviseStr = "always [madvise] never";
@@ -116,14 +115,22 @@ extern "C" void __bolt_hugify_self_impl() {
#endif
}
-/// This is hooking ELF's entry, it needs to save all machine state.
+// This is hooking ELF's entry, it needs to save all machine state.
extern "C" __attribute((naked)) void __bolt_hugify_self() {
+#if defined (__x86_64__)
__asm__ __volatile__(SAVE_ALL
"call __bolt_hugify_self_impl\n"
RESTORE_ALL
"jmp *__bolt_hugify_init_ptr(%%rip)\n"
:::);
-}
-
+#elif defined (__aarch64__)
+ __asm__ __volatile__(SAVE_ALL
+ "bl __bolt_hugify_self_impl\n"
+ RESTORE_ALL
+ "ldr x16, =__bolt_hugify_init_ptr\n"
+ "ldr x16, [x16]\n"
+ "br x16\n"
+ :::);
#endif
+}
#endif
diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
new file mode 100644
index 000000000..48b76cd35
--- /dev/null
+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
@@ -0,0 +1,2 @@
+main
+fib
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
new file mode 100644
index 000000000..fcb92bca1
--- /dev/null
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
@@ -0,0 +1,44 @@
+/* Checks that BOLT correctly processes a user-provided function list file,
+ * reorder functions according to this list, update hot_start and hot_end
+ * symbols and insert a function to perform hot text mapping during program
+ * startup.
+ */
+#include <stdio.h>
+
+int foo(int x) {
+ return x + 1;
+}
+
+int fib(int x) {
+ if (x < 2)
+ return x;
+ return fib(x - 1) + fib(x - 2);
+}
+
+int bar(int x) {
+ return x - 1;
+}
+
+int main(int argc, char **argv) {
+ printf("fib(%d) = %d\n", argc, fib(argc));
+ return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
+
+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
+RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
+RUN: llvm-nm --numeric-sort --print-armap %t | \
+RUN: FileCheck %s -check-prefix=CHECK-NM
+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+CHECK-NM: W __hot_start
+CHECK-NM: T main
+CHECK-NM-NEXT: T fib
+CHECK-NM-NEXT: W __hot_end
+
+CHECK-OUTPUT: fib(4) = 3
+*/
--
2.33.0

View File

@ -0,0 +1,259 @@
From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 11 Sep 2023 14:43:12 +0800
Subject: [PATCH] [AArch64] Add AArch64 support for inline.
---
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
12 files changed, 137 insertions(+)
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
create mode 100644 bolt/test/AArch64/inline-debug-info.test
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
create mode 100644 bolt/test/AArch64/jmp-optimization.test
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 04232bd3e..d009d59dc 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
<< ". Size change: " << SizeAfterInlining
<< " bytes.\n");
+// Skip situations where some A64 instructions can't be inlined:
+// # Indirect branch, e.g., BR.
+// # Branch instructions but used to make a function call.
+ if (BC.isAArch64()) {
+ auto &MIB = *BC.MIB;
+ bool skip = false;
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
+ for (MCInst Inst : BB) {
+ if (MIB.isPseudo(Inst))
+ continue;
+
+ MIB.stripAnnotations(Inst, false);
+
+ if (MIB.isBranch(Inst)) {
+ const BinaryBasicBlock *TargetBB =
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
+ skip = true;
+ break;
+ }
+ }
+ }
+ if (skip)
+ break;
+ }
+ if (skip) {
+ ++InstIt;
+ continue;
+ }
+ }
+
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
DidInlining = true;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c736196a8..03b1b536f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -34,6 +34,16 @@ public:
const MCRegisterInfo *RegInfo)
: MCPlusBuilder(Analysis, Info, RegInfo) {}
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::BL);
+ Inst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
@@ -1103,6 +1113,8 @@ public:
bool isLeave(const MCInst &Inst) const override { return false; }
+ bool isPush(const MCInst &Inst) const override { return false; }
+
bool isPop(const MCInst &Inst) const override { return false; }
bool isPrefix(const MCInst &Inst) const override { return false; }
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
new file mode 100644
index 000000000..1307c13f2
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
@@ -0,0 +1,5 @@
+#include "stub.h"
+
+void foo() {
+ puts("Hello world!\n");
+}
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
new file mode 100644
index 000000000..7853d2b63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-main.c
@@ -0,0 +1,5 @@
+extern void foo();
+int main() {
+ foo();
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
new file mode 100644
index 000000000..a6ff9e262
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
@@ -0,0 +1,23 @@
+extern "C" int printf(const char*, ...);
+extern const char* question();
+
+inline int answer() __attribute__((always_inline));
+inline int answer() { return 42; }
+
+int main(int argc, char *argv[]) {
+ int ans;
+ if (argc == 1) {
+ ans = 0;
+ } else {
+ ans = argc;
+ }
+ printf("%s\n", question());
+ for (int i = 0; i < 10; ++i) {
+ int x = answer();
+ int y = answer();
+ ans += x - y;
+ }
+ // padding to make sure question() is inlineable
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
+ return ans;
+}
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
new file mode 100644
index 000000000..edb7ab145
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
@@ -0,0 +1,3 @@
+const char* question() {
+ return "What do you get if you multiply six by nine?";
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
new file mode 100644
index 000000000..cd6d53c35
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
@@ -0,0 +1,7 @@
+int g();
+
+int main() {
+ int x = g();
+ int y = x*x;
+ return y;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
new file mode 100644
index 000000000..80b853d63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
@@ -0,0 +1,3 @@
+int f() {
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
new file mode 100644
index 000000000..7fb551163
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
@@ -0,0 +1,3 @@
+int f();
+
+int g() { return f(); }
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
new file mode 100644
index 000000000..e20e5e31e
--- /dev/null
+++ b/bolt/test/AArch64/inline-debug-info.test
@@ -0,0 +1,20 @@
+## Check that BOLT correctly prints and updates debug info for inlined
+## functions.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
+# RUN: -o %t.bolt \
+# RUN: | FileCheck %s
+
+## The call to puts() should come from inline-foo.c:
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
+
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
+
+## Dump of main() should include debug info from inline-foo.c after inlining:
+# CHECK-OBJDUMP: inline-foo.c:4
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
new file mode 100644
index 000000000..5a87bdde9
--- /dev/null
+++ b/bolt/test/AArch64/inlined-function-mixed.test
@@ -0,0 +1,11 @@
+# Make sure inlining from a unit with debug info into unit without
+# debug info does not cause a crash.
+
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
+
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
+
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
new file mode 100644
index 000000000..92f4b9a14
--- /dev/null
+++ b/bolt/test/AArch64/jmp-optimization.test
@@ -0,0 +1,14 @@
+# Tests the optimization of functions that just do a tail call in the beginning.
+
+# This test has commands that rely on shell capabilities that won't execute
+# correctly on Windows e.g. unsupported parameter expansion
+REQUIRES: shell
+
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
+
+CHECK: <main>:
+CHECK-NOT: call
+CHECK: xorl %eax, %eax
+CHECK: retq
--
2.33.0

View File

@ -1,59 +1,157 @@
%bcond_with check
%global maj_ver 15
%global min_ver 0
%global patch_ver 7
%global bolt_version %{maj_ver}.%{min_ver}.%{patch_ver}
%global bolt_srcdir llvm-project-%{bolt_version}.src
%global install_prefix %{_prefix}
%global install_bindir %{install_prefix}/bin
%global install_libdir %{install_prefix}/lib
%global install_docdir %{install_prefix}/share/doc
%global max_link_jobs 2
Name: llvm-bolt Name: llvm-bolt
Version: 0 Version: %{bolt_version}
Release: 1.20211016.gitb72f753 Release: 2
Summary: BOLT is a post-link optimizer developed to speed up large applications Summary: BOLT is a post-link optimizer developed to speed up large applications
License: Apache 2.0 License: Apache 2.0
URL: https://github.com/facebookincubator/BOLT URL: https://github.com/llvm/llvm-project/tree/main/bolt
Source0: %{name}-%{version}.tar.xz
BuildRequires: gcc gcc-c++ cmake ninja-build libstdc++-static chrpath ncurses-devel zlib-devel Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz
Requires: glibc zlib ncurses-libs libstdc++ libgcc
Patch1: 0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch
Patch2: 0002-AArch64-Add-AArch64-support-for-hugify.patch
Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch
BuildRequires: gcc
BuildRequires: gcc-c++
BuildRequires: cmake
BuildRequires: ninja-build
BuildRequires: zlib-devel
BuildRequires: python3-lit
BuildRequires: python3-psutil
BuildRequires: doxygen
%description %description
BOLT is a post-link optimizer developed to speed up large applications. BOLT is a post-link optimizer developed to speed up large applications.
It achieves the improvements by optimizing application's code layout based It achieves the improvements by optimizing application's code layout based
on execution profile gathered by sampling profiler, such as Linux perf tool. on execution profile gathered by sampling profiler, such as Linux perf tool.
%package doc
Summary: Documentation for BOLT
BuildArch: noarch
Requires: %{name} = %{version}-%{release}
%description doc
Documentation for the BOLT optimizer
%prep %prep
%setup -q %autosetup -n %{bolt_srcdir} -p1
mkdir -p _build
cd _build
%{__cmake} -G Ninja ../llvm -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_ENABLE_PROJECTS="clang;lld;bolt" -DCMAKE_INSTALL_PREFIX=%{_usr} \
-DLLVM_PARALLEL_LINK_JOBS=1 \
%ifarch %ix86 x86_64
-DLLVM_TARGETS_TO_BUILD="X86"
%endif
%ifarch aarch64
-DLLVM_TARGETS_TO_BUILD="AArch64"
%endif
%build %build
cd _build %{cmake} -G Ninja -S llvm \
%{ninja_build} -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_INSTALL_PREFIX=%{install_prefix} \
-DCMAKE_SKIP_RPATH=ON \
-DLLVM_DIR=%{install_libdir}/cmake/llvm \
-DLLVM_TABLEGEN_EXE=%{install_bindir}/llvm-tblgen \
-DLLVM_BUILD_UTILS:BOOL=ON \
-DBOLT_INCLUDE_DOCS:BOOL=ON \
-DLLVM_INCLUDE_TESTS:BOOL=ON \
-DBUILD_SHARED_LIBS:BOOL=OFF \
-DLLVM_LINK_LLVM_DYLIB:BOOL=OFF \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DBOLT_INCLUDE_TESTS:BOOL=ON \
-DLLVM_EXTERNAL_LIT=%{_bindir}/lit \
-DLLVM_ENABLE_PROJECTS="bolt" \
-DLLVM_PARALLEL_LINK_JOBS=%{max_link_jobs} \
%ifarch %ix86 x86_64
-DLLVM_TARGETS_TO_BUILD="X86"
%endif
%ifarch aarch64
-DLLVM_TARGETS_TO_BUILD="AArch64"
%endif
%ninja_build bolt
%install %install
%{ninja_install} -C _build %ninja_install bolt
%global _bolt_install_dir %{_buildrootdir}/%{name}-%{version}-%{release}.%{_arch}/usr
# remove extera llvm files. # Remove extera llvm files.
find %{_bolt_install_dir} ! -name "llvm-bolt" ! -name "merge-fdata" ! -name "perf2bolt" -type f,l -exec rm -f '{}' \; find %{buildroot}%{install_prefix} \
# remove static libs. ! -name "llvm-bolt" \
rm -rf %{_buildrootdir}/root ! -name "merge-fdata" \
# remove rpath ! -name "perf2bolt" \
chrpath -d %{_bolt_install_dir}/bin/llvm-bolt ! -name "llvm-boltdiff" \
chrpath -d %{_bolt_install_dir}/bin/merge-fdata ! -name "llvm-bolt-heatmap" \
! -name "libbolt_rt_hugify.a" \
! -name "libbolt_rt_instr.a" \
-type f,l -exec rm -f '{}' \;
#
rm -f %{_builddir}/%{bolt_srcdir}/lib/lib*.a
# There currently is not support upstream for building html doc from BOLT
install -d %{buildroot}%{install_docdir}
mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir}
%check
%if %{with check}
# Bolt makes incorrect assumptions on the location of libbolt_rt_*.a.
mkdir -p %{_builddir}/%{bolt_srcdir}/lib
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_hugify.a %{_builddir}/%{bolt_srcdir}/lib
%ifarch x86_64
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_instr.a %{_builddir}/%{bolt_srcdir}/lib
%endif
%ifarch aarch64
# Failing test cases on aarch64
rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp
%endif
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/lib
export DESTDIR=%{buildroot}
%ninja_build check
# Remove files installed during the check phase.
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
%endif
%files %files
%license bolt/LICENSE.TXT %license bolt/LICENSE.TXT
%doc bolt/docs/* %{install_bindir}/llvm-bolt
%exclude %{_includedir}/* %{install_bindir}/llvm-boltdiff
%exclude %{_datadir}/* %{install_bindir}/merge-fdata
%exclude %{_builddir}/%{name}-%{version}/_build/* %{install_bindir}/perf2bolt
%attr(0755,root,root) %{_bindir}/llvm-bolt %{install_bindir}/llvm-bolt-heatmap
%attr(0755,root,root) %{_bindir}/merge-fdata %{install_libdir}/libbolt_rt_hugify.a
%attr(-,root,root) %{_bindir}/perf2bolt
%ifarch x86_64
%{install_libdir}/libbolt_rt_instr.a
%endif
%exclude %{_builddir}/%{bolt_srcdir}/lib/*
%files doc
%doc %{install_docdir}
%changelog %changelog
* Thu Sep 7 2023 Xiong Zhou <xiongzhou4@huawei.com> 15.0.7-2
- Type:Update
- ID:NA
- SUG:NA
- DESC: Backport bugfix and add AArch64 support for hugify and inline.
* Thu Aug 31 2023 zhenyu zhao <zhaozhenyu17@huaiwei.com> 15.0.7-1
- Type:Update
- ID:NA
- SUG:NA
- DESC: Update llvm-bolt from llvm-bolt to llvm-bolt-15
* Mon Dec 19 2022 liyancheng <412998149@qq.com> 0-1.20211016.gitb72f753 * Mon Dec 19 2022 liyancheng <412998149@qq.com> 0-1.20211016.gitb72f753
- Type:fix - Type:fix
- ID:NA - ID:NA