llvm-bolt/0003-AArch64-Add-AArch64-support-for-inline.patch

260 lines
9.1 KiB
Diff

From b26ff1c328435d7b0ceccec1dcc25252821ad373 Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Mon, 11 Sep 2023 14:43:12 +0800
Subject: [PATCH] [AArch64] Add AArch64 support for inline.
---
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 12 +++++++
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
12 files changed, 137 insertions(+)
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
create mode 100644 bolt/test/AArch64/inline-debug-info.test
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
create mode 100644 bolt/test/AArch64/jmp-optimization.test
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 04232bd3e..d009d59dc 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -464,6 +464,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
<< ". Size change: " << SizeAfterInlining
<< " bytes.\n");
+// Skip situations where some A64 instructions can't be inlined:
+// # Indirect branch, e.g., BR.
+// # Branch instructions but used to make a function call.
+ if (BC.isAArch64()) {
+ auto &MIB = *BC.MIB;
+ bool skip = false;
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
+ for (MCInst Inst : BB) {
+ if (MIB.isPseudo(Inst))
+ continue;
+
+ MIB.stripAnnotations(Inst, false);
+
+ if (MIB.isBranch(Inst)) {
+ const BinaryBasicBlock *TargetBB =
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
+ skip = true;
+ break;
+ }
+ }
+ }
+ if (skip)
+ break;
+ }
+ if (skip) {
+ ++InstIt;
+ continue;
+ }
+ }
+
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
DidInlining = true;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c736196a8..03b1b536f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -34,6 +34,16 @@ public:
const MCRegisterInfo *RegInfo)
: MCPlusBuilder(Analysis, Info, RegInfo) {}
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::BL);
+ Inst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
@@ -1103,6 +1113,8 @@ public:
bool isLeave(const MCInst &Inst) const override { return false; }
+ bool isPush(const MCInst &Inst) const override { return false; }
+
bool isPop(const MCInst &Inst) const override { return false; }
bool isPrefix(const MCInst &Inst) const override { return false; }
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
new file mode 100644
index 000000000..1307c13f2
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
@@ -0,0 +1,5 @@
+#include "stub.h"
+
+void foo() {
+ puts("Hello world!\n");
+}
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
new file mode 100644
index 000000000..7853d2b63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-main.c
@@ -0,0 +1,5 @@
+extern void foo();
+int main() {
+ foo();
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
new file mode 100644
index 000000000..a6ff9e262
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
@@ -0,0 +1,23 @@
+extern "C" int printf(const char*, ...);
+extern const char* question();
+
+inline int answer() __attribute__((always_inline));
+inline int answer() { return 42; }
+
+int main(int argc, char *argv[]) {
+ int ans;
+ if (argc == 1) {
+ ans = 0;
+ } else {
+ ans = argc;
+ }
+ printf("%s\n", question());
+ for (int i = 0; i < 10; ++i) {
+ int x = answer();
+ int y = answer();
+ ans += x - y;
+ }
+ // padding to make sure question() is inlineable
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
+ return ans;
+}
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
new file mode 100644
index 000000000..edb7ab145
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
@@ -0,0 +1,3 @@
+const char* question() {
+ return "What do you get if you multiply six by nine?";
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
new file mode 100644
index 000000000..cd6d53c35
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
@@ -0,0 +1,7 @@
+int g();
+
+int main() {
+ int x = g();
+ int y = x*x;
+ return y;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
new file mode 100644
index 000000000..80b853d63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
@@ -0,0 +1,3 @@
+int f() {
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
new file mode 100644
index 000000000..7fb551163
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
@@ -0,0 +1,3 @@
+int f();
+
+int g() { return f(); }
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
new file mode 100644
index 000000000..e20e5e31e
--- /dev/null
+++ b/bolt/test/AArch64/inline-debug-info.test
@@ -0,0 +1,20 @@
+## Check that BOLT correctly prints and updates debug info for inlined
+## functions.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
+# RUN: -o %t.bolt \
+# RUN: | FileCheck %s
+
+## The call to puts() should come from inline-foo.c:
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
+
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
+
+## Dump of main() should include debug info from inline-foo.c after inlining:
+# CHECK-OBJDUMP: inline-foo.c:4
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
new file mode 100644
index 000000000..5a87bdde9
--- /dev/null
+++ b/bolt/test/AArch64/inlined-function-mixed.test
@@ -0,0 +1,11 @@
+# Make sure inlining from a unit with debug info into unit without
+# debug info does not cause a crash.
+
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
+
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
+
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
new file mode 100644
index 000000000..92f4b9a14
--- /dev/null
+++ b/bolt/test/AArch64/jmp-optimization.test
@@ -0,0 +1,14 @@
+# Tests the optimization of functions that just do a tail call in the beginning.
+
+# This test has commands that rely on shell capabilities that won't execute
+# correctly on Windows e.g. unsupported parameter expansion
+REQUIRES: shell
+
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
+
+CHECK: <main>:
+CHECK-NOT: call
+CHECK: xorl %eax, %eax
+CHECK: retq
--
2.33.0