!26 Backport bugfix and add AArch64 support for hugify and inline.

From: @xiongzhou4 
Reviewed-by: @li-yancheng 
Signed-off-by: @li-yancheng
This commit is contained in:
openeuler-ci-bot 2023-09-13 09:06:11 +00:00 committed by Gitee
commit a8621ebcce
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 874 additions and 9 deletions

View File

@ -0,0 +1,135 @@
From c62ab1487115a74d72ad23fd89b42076d5726bde Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 24 Jul 2023 19:47:46 +0800
Subject: [PATCH] [AArch64] fix bug #55005 handle DW_CFA_GNU_NegateRAState.
backport: https://reviews.llvm.org/rG9921197920fc3e9ad9605bd8fe0e835ca2dd41a5
---
bolt/lib/Core/Exceptions.cpp | 19 ++++--
.../Inputs/dw_cfa_gnu_window_save.yaml | 62 +++++++++++++++++++
bolt/test/AArch64/dw_cfa_gnu_window_save.test | 8 +++
3 files changed, 83 insertions(+), 6 deletions(-)
create mode 100644 bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
create mode 100644 bolt/test/AArch64/dw_cfa_gnu_window_save.test
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 79404ca87..b0aa8b990 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -644,18 +644,25 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
return false;
case DW_CFA_GNU_window_save:
+ // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
+ // id but mean different things. The latter is used in AArch64.
+ if (Function.getBinaryContext().isAArch64()) {
+ Function.addCFIInstruction(
+ Offset, MCCFIInstruction::createNegateRAState(nullptr));
+ break;
+ }
+ if (opts::Verbosity >= 1)
+ errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
+ return false;
case DW_CFA_lo_user:
case DW_CFA_hi_user:
- if (opts::Verbosity >= 1) {
- errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
- "unimplemented\n";
- }
+ if (opts::Verbosity >= 1)
+ errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
return false;
default:
- if (opts::Verbosity >= 1) {
+ if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
<< '\n';
- }
return false;
}
diff --git a/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
new file mode 100644
index 000000000..faa32e089
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/dw_cfa_gnu_window_save.yaml
@@ -0,0 +1,62 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_AARCH64
+ Entry: 0x4100C0
+ProgramHeaders:
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ FirstSec: .init
+ LastSec: .fini
+ VAddr: 0x410000
+ Align: 0x10000
+Sections:
+ - Name: .init
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410000
+ AddressAlign: 0x4
+ Offset: 0x10000
+ Content: 3F2303D5FD7BBFA9FD0300913F000094FD7BC1A8BF2303D5C0035FD6
+ - Name: .plt
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410020
+ AddressAlign: 0x10
+ Content: F07BBFA9700100F011FE47F910E23F9120021FD61F2003D51F2003D51F2003D590010090110240F91002009120021FD690010090110640F91022009120021FD690010090110A40F91042009120021FD6
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x410080
+ AddressAlign: 0x40
+ Content: 00008052C0035FD61F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D51F2003D55F2403D51D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000D00391030080D2040080D2D5FFFF97D8FFFF975F2403D5E2FFFF171F2003D55F2403D5C0035FD6600100F000F047F9400000B4D3FFFF17C0035FD61F2003D5800100908101009000800091218000913F0000EBC000005481000090210840F9610000B4F00301AA00021FD6C0035FD680010090810100900080009121800091210000CB22FC7FD3410C818B21FC4193C10000B482000090420C40F9620000B4F00302AA00021FD6C0035FD63F2303D5FD7BBEA9FD030091F30B00F9930100906082403980000035DEFFFF972000805260820039F30B40F9FD7BC2A8BF2303D5C0035FD65F2403D5E2FFFF17
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x4101CC
+ AddressAlign: 0x4
+ Content: 3F2303D5FD7BBFA9FD030091FD7BC1A8BF2303D5C0035FD6
+ - Name: .eh_frame
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x420068
+ AddressAlign: 0x8
+ Content: 1000000000000000017A520004781E011B0C1F0010000000180000003C00FFFF3C0000000041071E140000002C0000006800FFFF08000000000000000000000010000000440000007000FFFF300000000000000010000000580000008C00FFFF3C00000000000000240000006C000000B400FFFF3800000000412D410E209D049E0342930248DEDDD30E00412D0000001400000094000000C400FFFF08000000000000000000000010000000AC00000068FFFEFF080000000000000000000000
+ - Name: .rela.text
+ Type: SHT_RELA
+ Flags: [ SHF_INFO_LINK ]
+ Link: .symtab
+ AddressAlign: 0x8
+ Info: .text
+Symbols:
+ - Name: .text
+ Type: STT_SECTION
+ Section: .text
+ Value: 0x410080
+ - Name: _ITM_deregisterTMCloneTable
+ Binding: STB_WEAK
+ - Name: _ITM_registerTMCloneTable
+ Binding: STB_WEAK
+...
diff --git a/bolt/test/AArch64/dw_cfa_gnu_window_save.test b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
new file mode 100644
index 000000000..2e044b399
--- /dev/null
+++ b/bolt/test/AArch64/dw_cfa_gnu_window_save.test
@@ -0,0 +1,8 @@
+# Check that llvm-bolt can handle DW_CFA_GNU_window_save on AArch64.
+
+RUN: yaml2obj %p/Inputs/dw_cfa_gnu_window_save.yaml &> %t.exe
+RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+
+CHECK-NOT: paciasp
+CHECK-NOT: autiasp
+CHECK-NOT: ERROR: unable to fill CFI.
--
2.33.0

View File

@ -0,0 +1,465 @@
From 81a80dbe9f47f728bc593d05cd5708a653a23f1c Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 11 Sep 2023 11:33:41 +0800
Subject: [PATCH] [AArch64] Add AArch64 support for hugify.
---
bolt/CMakeLists.txt | 4 +-
bolt/runtime/CMakeLists.txt | 28 ++-
bolt/runtime/common.h | 224 ++++++++++++++++++
bolt/runtime/hugify.cpp | 21 +-
.../AArch64/Inputs/user_func_order.txt | 2 +
bolt/test/runtime/AArch64/user-func-reorder.c | 44 ++++
6 files changed, 305 insertions(+), 18 deletions(-)
create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt
create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
index a97878cd3..3de930496 100644
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@@ -5,7 +5,7 @@ set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 14)
set(BOLT_ENABLE_RUNTIME OFF)
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|aarch64")
set(BOLT_ENABLE_RUNTIME ON)
endif()
@@ -45,7 +45,7 @@ if (LLVM_INCLUDE_TESTS)
endif()
if (BOLT_ENABLE_RUNTIME)
- message(STATUS "Building BOLT runtime libraries for X86")
+ message(STATUS "Building BOLT runtime libraries")
ExternalProject_Add(bolt_rt
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
index 7c1b79af4..ee6ab7bd4 100644
--- a/bolt/runtime/CMakeLists.txt
+++ b/bolt/runtime/CMakeLists.txt
@@ -10,10 +10,12 @@ check_include_files(elf.h HAVE_ELF_H)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/config.h)
-add_library(bolt_rt_instr STATIC
- instr.cpp
- ${CMAKE_CURRENT_BINARY_DIR}/config.h
- )
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ add_library(bolt_rt_instr STATIC
+ instr.cpp
+ ${CMAKE_CURRENT_BINARY_DIR}/config.h
+ )
+endif()
add_library(bolt_rt_hugify STATIC
hugify.cpp
${CMAKE_CURRENT_BINARY_DIR}/config.h
@@ -23,16 +25,24 @@ set(BOLT_RT_FLAGS
-ffreestanding
-fno-exceptions
-fno-rtti
- -fno-stack-protector
- -mno-sse)
+ -fno-stack-protector)
+
+# x86 exclusive option
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ list(APPEND BOLT_RT_FLAGS -mno-sse)
+endif()
# Don't let the compiler think it can create calls to standard libs
-target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
-target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
+ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+endif()
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
-install(TARGETS bolt_rt_instr DESTINATION lib)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ install(TARGETS bolt_rt_instr DESTINATION lib)
+endif()
install(TARGETS bolt_rt_hugify DESTINATION lib)
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 008dbb6c3..6869742e7 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -39,6 +39,45 @@ typedef int int32_t;
#endif
// Save all registers while keeping 16B stack alignment
+#if defined (__aarch64__)
+#define SAVE_ALL \
+ "stp x0, x1, [sp, #-16]!\n" \
+ "stp x2, x3, [sp, #-16]!\n" \
+ "stp x4, x5, [sp, #-16]!\n" \
+ "stp x6, x7, [sp, #-16]!\n" \
+ "stp x8, x9, [sp, #-16]!\n" \
+ "stp x10, x11, [sp, #-16]!\n" \
+ "stp x12, x13, [sp, #-16]!\n" \
+ "stp x14, x15, [sp, #-16]!\n" \
+ "stp x16, x17, [sp, #-16]!\n" \
+ "stp x18, x19, [sp, #-16]!\n" \
+ "stp x20, x21, [sp, #-16]!\n" \
+ "stp x22, x23, [sp, #-16]!\n" \
+ "stp x24, x25, [sp, #-16]!\n" \
+ "stp x26, x27, [sp, #-16]!\n" \
+ "stp x28, x29, [sp, #-16]!\n" \
+ "stp x30, xzr, [sp, #-16]!\n"
+
+// Mirrors SAVE_ALL
+#define RESTORE_ALL \
+ "ldp x30, xzr, [sp], #16\n" \
+ "ldp x28, x29, [sp], #16\n" \
+ "ldp x26, x27, [sp], #16\n" \
+ "ldp x24, x25, [sp], #16\n" \
+ "ldp x22, x23, [sp], #16\n" \
+ "ldp x20, x21, [sp], #16\n" \
+ "ldp x18, x19, [sp], #16\n" \
+ "ldp x16, x17, [sp], #16\n" \
+ "ldp x14, x15, [sp], #16\n" \
+ "ldp x12, x13, [sp], #16\n" \
+ "ldp x10, x11, [sp], #16\n" \
+ "ldp x8, x9, [sp], #16\n" \
+ "ldp x6, x7, [sp], #16\n" \
+ "ldp x4, x5, [sp], #16\n" \
+ "ldp x2, x3, [sp], #16\n" \
+ "ldp x0, x1, [sp], #16\n"
+
+#else
#define SAVE_ALL \
"push %%rax\n" \
"push %%rbx\n" \
@@ -75,6 +114,7 @@ typedef int int32_t;
"pop %%rcx\n" \
"pop %%rbx\n" \
"pop %%rax\n"
+#endif
// Functions that are required by freestanding environment. Compiler may
// generate calls to these implicitly.
@@ -129,6 +169,189 @@ constexpr uint32_t BufSize = 10240;
#define _STRINGIFY(x) #x
#define STRINGIFY(x) _STRINGIFY(x)
+#if defined (__aarch64__)
+// Declare some syscall wrappers we use throughout this code to avoid linking
+// against system libc.
+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = fd;
+ register const void *x1 __asm__("x1") = buf;
+ register uint64_t x2 __asm__("x2") = count;
+ register uint32_t w8 __asm__("w8") = 63;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = fd;
+ register const void *x1 __asm__("x1") = buf;
+ register uint64_t x2 __asm__("x2") = count;
+ register uint32_t w8 __asm__("w8") = 64;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
+ uint64_t fd, uint64_t offset) {
+ void *ret;
+ register uint64_t x0 __asm__("x0") = addr;
+ register uint64_t x1 __asm__("x1") = size;
+ register uint64_t x2 __asm__("x2") = prot;
+ register uint64_t x3 __asm__("x3") = flags;
+ register uint64_t x4 __asm__("x4") = fd;
+ register uint64_t x5 __asm__("x5") = offset;
+ register uint32_t w8 __asm__("w8") = 222;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __munmap(void *addr, uint64_t size) {
+ uint64_t ret;
+ register void *x0 __asm__("x0") = addr;
+ register uint64_t x1 __asm__("x1") = size;
+ register uint32_t w8 __asm__("w8") = 215;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+uint64_t __exit(uint64_t code) {
+ uint64_t ret;
+ register uint64_t x0 __asm__("x0") = code;
+ register uint32_t w8 __asm__("w8") = 94;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0)
+ : "r"(w8)
+ : "cc", "memory", "x1");
+ return ret;
+}
+
+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
+ uint64_t ret;
+ register int x0 __asm__("x0") = -100;
+ register const char *x1 __asm__("x1") = pathname;
+ register uint64_t x2 __asm__("x2") = flags;
+ register uint64_t x3 __asm__("x3") = mode;
+ register uint32_t w8 __asm__("w8") = 56;
+ __asm__ __volatile__("svc #0\n"
+ "mov %0, x0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(x3), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+int __madvise(void *addr, size_t length, int advice) {
+ int ret;
+ register void *x0 __asm__("x0") = addr;
+ register size_t x1 __asm__("x1") = length;
+ register int x2 __asm__("x2") = advice;
+ register uint32_t w8 __asm__("w8") = 233;
+ __asm__ __volatile__("svc #0\n"
+ "mov %w0, w0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+int __mprotect(void *addr, size_t len, int prot) {
+ int ret;
+ register void *x0 __asm__("x0") = addr;
+ register size_t x1 __asm__("x1") = len;
+ register int x2 __asm__("x2") = prot;
+ register uint32_t w8 __asm__("w8") = 226;
+ __asm__ __volatile__("svc #0\n"
+ "mov %w0, w0"
+ : "=r"(ret), "+r"(x0), "+r"(x1)
+ : "r"(x2), "r"(w8)
+ : "cc", "memory");
+ return ret;
+}
+
+// Helper functions for writing strings to the .fdata file. We intentionally
+// avoid using libc names to make it clear it is our impl.
+
+/// Compare two strings, at most Num bytes.
+int strnCmp(const char *Str1, const char *Str2, size_t Num) {
+ while (Num && *Str1 && (*Str1 == *Str2)) {
+ Num--;
+ Str1++;
+ Str2++;
+ }
+ if (Num == 0)
+ return 0;
+ return *(unsigned char *)Str1 - *(unsigned char *)Str2;
+}
+
+uint32_t strLen(const char *Str) {
+ uint32_t Size = 0;
+ while (*Str++)
+ ++Size;
+ return Size;
+}
+
+/// Write number Num using Base to the buffer in OutBuf, returns a pointer to
+/// the end of the string.
+char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) {
+ const char *Chars = "0123456789abcdef";
+ char Buf[21];
+ char *Ptr = Buf;
+ while (Num) {
+ *Ptr++ = *(Chars + (Num % Base));
+ Num /= Base;
+ }
+ if (Ptr == Buf) {
+ *OutBuf++ = '0';
+ return OutBuf;
+ }
+ while (Ptr != Buf)
+ *OutBuf++ = *--Ptr;
+
+ return OutBuf;
+}
+
+/// Copy Str to OutBuf, returns a pointer to the end of the copied string
+char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) {
+ while (*Str) {
+ *OutBuf++ = *Str++;
+ if (--Size <= 0)
+ return OutBuf;
+ }
+ return OutBuf;
+}
+
+void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) {
+ char Buf[BufSize];
+ char *Ptr = Buf;
+ Ptr = strCopy(Ptr, Msg, BufSize - 23);
+ Ptr = intToStr(Ptr, Num, Base);
+ Ptr = strCopy(Ptr, "\n");
+ __write(2, Buf, Ptr - Buf);
+}
+
+void reportError(const char *Msg, uint64_t Size) {
+ __write(2, Msg, Size);
+ __exit(1);
+}
+#else
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
@@ -550,5 +773,6 @@ public:
inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
return (Value + Align - 1) / Align * Align;
}
+#endif
} // anonymous namespace
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 69e1a7e06..385e4d147 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -6,26 +6,25 @@
//
//===----------------------------------------------------------------------===//
-#if defined (__x86_64__)
#if !defined(__APPLE__)
#include "common.h"
#include <sys/mman.h>
// Enables a very verbose logging to stderr useful when debugging
-//#define ENABLE_DEBUG
+// #define ENABLE_DEBUG
// Function pointers to init routines in the binary, so we can resume
// regular execution of the function that we hooked.
extern void (*__bolt_hugify_init_ptr)();
// The __hot_start and __hot_end symbols set by Bolt. We use them to figure
-// out the rage for marking huge pages.
+// out the range for marking huge pages.
extern uint64_t __hot_start;
extern uint64_t __hot_end;
#ifdef MADV_HUGEPAGE
-/// Check whether the kernel supports THP via corresponding sysfs entry.
+// Check whether the kernel supports THP via corresponding sysfs entry.
static bool has_pagecache_thp_support() {
char buf[256] = {0};
const char *madviseStr = "always [madvise] never";
@@ -116,14 +115,22 @@ extern "C" void __bolt_hugify_self_impl() {
#endif
}
-/// This is hooking ELF's entry, it needs to save all machine state.
+// This is hooking ELF's entry, it needs to save all machine state.
extern "C" __attribute((naked)) void __bolt_hugify_self() {
+#if defined (__x86_64__)
__asm__ __volatile__(SAVE_ALL
"call __bolt_hugify_self_impl\n"
RESTORE_ALL
"jmp *__bolt_hugify_init_ptr(%%rip)\n"
:::);
-}
-
+#elif defined (__aarch64__)
+ __asm__ __volatile__(SAVE_ALL
+ "bl __bolt_hugify_self_impl\n"
+ RESTORE_ALL
+ "ldr x16, =__bolt_hugify_init_ptr\n"
+ "ldr x16, [x16]\n"
+ "br x16\n"
+ :::);
#endif
+}
#endif
diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
new file mode 100644
index 000000000..48b76cd35
--- /dev/null
+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
@@ -0,0 +1,2 @@
+main
+fib
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
new file mode 100644
index 000000000..fcb92bca1
--- /dev/null
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
@@ -0,0 +1,44 @@
+/* Checks that BOLT correctly processes a user-provided function list file,
+ * reorder functions according to this list, update hot_start and hot_end
+ * symbols and insert a function to perform hot text mapping during program
+ * startup.
+ */
+#include <stdio.h>
+
+int foo(int x) {
+ return x + 1;
+}
+
+int fib(int x) {
+ if (x < 2)
+ return x;
+ return fib(x - 1) + fib(x - 2);
+}
+
+int bar(int x) {
+ return x - 1;
+}
+
+int main(int argc, char **argv) {
+ printf("fib(%d) = %d\n", argc, fib(argc));
+ return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
+
+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
+RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
+RUN: llvm-nm --numeric-sort --print-armap %t | \
+RUN: FileCheck %s -check-prefix=CHECK-NM
+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+CHECK-NM: W __hot_start
+CHECK-NM: T main
+CHECK-NM-NEXT: T fib
+CHECK-NM-NEXT: W __hot_end
+
+CHECK-OUTPUT: fib(4) = 3
+*/
--
2.33.0

View File

@ -0,0 +1,259 @@
From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 11 Sep 2023 14:43:12 +0800
Subject: [PATCH] [AArch64] Add AArch64 support for inline.
---
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
12 files changed, 137 insertions(+)
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
create mode 100644 bolt/test/AArch64/inline-debug-info.test
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
create mode 100644 bolt/test/AArch64/jmp-optimization.test
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 04232bd3e..d009d59dc 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
<< ". Size change: " << SizeAfterInlining
<< " bytes.\n");
+// Skip situations where some A64 instructions can't be inlined:
+// # Indirect branch, e.g., BR.
+// # Branch instructions but used to make a function call.
+ if (BC.isAArch64()) {
+ auto &MIB = *BC.MIB;
+ bool skip = false;
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
+ for (MCInst Inst : BB) {
+ if (MIB.isPseudo(Inst))
+ continue;
+
+ MIB.stripAnnotations(Inst, false);
+
+ if (MIB.isBranch(Inst)) {
+ const BinaryBasicBlock *TargetBB =
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
+ skip = true;
+ break;
+ }
+ }
+ }
+ if (skip)
+ break;
+ }
+ if (skip) {
+ ++InstIt;
+ continue;
+ }
+ }
+
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
DidInlining = true;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c736196a8..03b1b536f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -34,6 +34,16 @@ public:
const MCRegisterInfo *RegInfo)
: MCPlusBuilder(Analysis, Info, RegInfo) {}
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::BL);
+ Inst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
@@ -1103,6 +1113,8 @@ public:
bool isLeave(const MCInst &Inst) const override { return false; }
+ bool isPush(const MCInst &Inst) const override { return false; }
+
bool isPop(const MCInst &Inst) const override { return false; }
bool isPrefix(const MCInst &Inst) const override { return false; }
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
new file mode 100644
index 000000000..1307c13f2
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
@@ -0,0 +1,5 @@
+#include "stub.h"
+
+void foo() {
+ puts("Hello world!\n");
+}
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
new file mode 100644
index 000000000..7853d2b63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-main.c
@@ -0,0 +1,5 @@
+extern void foo();
+int main() {
+ foo();
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
new file mode 100644
index 000000000..a6ff9e262
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
@@ -0,0 +1,23 @@
+extern "C" int printf(const char*, ...);
+extern const char* question();
+
+inline int answer() __attribute__((always_inline));
+inline int answer() { return 42; }
+
+int main(int argc, char *argv[]) {
+ int ans;
+ if (argc == 1) {
+ ans = 0;
+ } else {
+ ans = argc;
+ }
+ printf("%s\n", question());
+ for (int i = 0; i < 10; ++i) {
+ int x = answer();
+ int y = answer();
+ ans += x - y;
+ }
+ // padding to make sure question() is inlineable
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
+ return ans;
+}
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
new file mode 100644
index 000000000..edb7ab145
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
@@ -0,0 +1,3 @@
+const char* question() {
+ return "What do you get if you multiply six by nine?";
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
new file mode 100644
index 000000000..cd6d53c35
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
@@ -0,0 +1,7 @@
+int g();
+
+int main() {
+ int x = g();
+ int y = x*x;
+ return y;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
new file mode 100644
index 000000000..80b853d63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
@@ -0,0 +1,3 @@
+int f() {
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
new file mode 100644
index 000000000..7fb551163
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
@@ -0,0 +1,3 @@
+int f();
+
+int g() { return f(); }
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
new file mode 100644
index 000000000..e20e5e31e
--- /dev/null
+++ b/bolt/test/AArch64/inline-debug-info.test
@@ -0,0 +1,20 @@
+## Check that BOLT correctly prints and updates debug info for inlined
+## functions.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
+# RUN: -o %t.bolt \
+# RUN: | FileCheck %s
+
+## The call to puts() should come from inline-foo.c:
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
+
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
+
+## Dump of main() should include debug info from inline-foo.c after inlining:
+# CHECK-OBJDUMP: inline-foo.c:4
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
new file mode 100644
index 000000000..5a87bdde9
--- /dev/null
+++ b/bolt/test/AArch64/inlined-function-mixed.test
@@ -0,0 +1,11 @@
+# Make sure inlining from a unit with debug info into unit without
+# debug info does not cause a crash.
+
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
+
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
+
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
new file mode 100644
index 000000000..92f4b9a14
--- /dev/null
+++ b/bolt/test/AArch64/jmp-optimization.test
@@ -0,0 +1,14 @@
+# Tests the optimization of functions that just do a tail call in the beginning.
+
+# This test has commands that rely on shell capabilities that won't execute
+# correctly on Windows e.g. unsupported parameter expansion
+REQUIRES: shell
+
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
+
+CHECK: <main>:
+CHECK-NOT: call
+CHECK: xorl %eax, %eax
+CHECK: retq
--
2.33.0

View File

@ -15,13 +15,17 @@
Name: llvm-bolt
Version: %{bolt_version}
Release: 1
Release: 2
Summary: BOLT is a post-link optimizer developed to speed up large applications
License: Apache 2.0
URL: https://github.com/llvm/llvm-project/tree/main/bolt
Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz
Patch1: 0001-AArch64-fix-bug-55005-handle-DW_CFA_GNU_NegateRAState.patch
Patch2: 0002-AArch64-Add-AArch64-support-for-hugify.patch
Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch
BuildRequires: gcc
BuildRequires: gcc-c++
BuildRequires: cmake
@ -97,13 +101,11 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir}
%check
%if %{with check}
%ifarch x86_64
# Bolt makes incorrect assumptions on the location of libbolt_rt_*.a.
mkdir -p %{_builddir}/%{bolt_srcdir}/lib
for rt in libbolt_rt_instr libbolt_rt_hugify
do
ln -s %{buildroot}/%{install_libdir}/${rt}.a %{_builddir}/%{bolt_srcdir}/lib
done
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_hugify.a %{_builddir}/%{bolt_srcdir}/lib
%ifarch x86_64
ln -s %{buildroot}/%{install_libdir}/libbolt_rt_instr.a %{_builddir}/%{bolt_srcdir}/lib
%endif
%ifarch aarch64
@ -126,9 +128,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
%{install_bindir}/merge-fdata
%{install_bindir}/perf2bolt
%{install_bindir}/llvm-bolt-heatmap
%{install_libdir}/libbolt_rt_hugify.a
%ifarch x86_64
%{install_libdir}/libbolt_rt_hugify.a
%{install_libdir}/libbolt_rt_instr.a
%endif
@ -137,8 +139,13 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
%files doc
%doc %{install_docdir}
%changelog
* Thu Sep 7 2023 Xiong Zhou <xiongzhou4@huawei.com> 15.0.7-2
- Type:Update
- ID:NA
- SUG:NA
- DESC: Backport bugfix and add AArch64 support for hugify and inline.
* Thu Aug 31 2023 zhenyu zhao <zhaozhenyu17@huaiwei.com> 15.0.7-1
- Type:Update
- ID:NA
@ -156,4 +163,3 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/lib/lib*.a
- ID:NA
- SUG:NA
- DESC:Init llvm-bolt repository