From 96eff0ec88e75a49cc186476efd84370e6137b42 Mon Sep 17 00:00:00 2001 From: h00502206 Date: Tue, 4 Jun 2024 20:18:05 +0800 Subject: [PATCH] Added open-source code related to feature extracting from 'angelica-moreira: https://github.com/angelica-moreira/BOLT' on the basis of llvm-bolt, and modified some code to pass the compilation. --- .../bolt/include/bolt/Passes/FeatureMiner.h | 178 +++ .../include/bolt/Passes/StaticBranchInfo.h | 116 ++ bolt/lib/Passes/CMakeLists.txt | 2 + bolt/lib/Passes/FeatureMiner.cpp | 1067 +++++++++++++++++ .../bolt/lib/Passes/StaticBranchInfo.cpp | 162 +++ 5 files changed, 1525 insertions(+) create mode 100644 bolt/include/bolt/Passes/FeatureMiner.h create mode 100644 bolt/include/bolt/Passes/StaticBranchInfo.h create mode 100644 bolt/lib/Passes/FeatureMiner.cpp create mode 100644 bolt/lib/Passes/StaticBranchInfo.cpp diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h new file mode 100644 index 000000000..916e5515d --- /dev/null +++ b/bolt/include/bolt/Passes/FeatureMiner.h @@ -0,0 +1,178 @@ +//===--- Passes/FeatureMiner.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ + +// #include "BinaryContext.h" +// #include "BinaryFunction.h" +// #include "BinaryLoop.h" +// #include "DominatorAnalysis.h" +// #include "Passes/BinaryPasses.h" +// #include "Passes/StaticBranchInfo.h" +#include "bolt/Core/BinaryData.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" +#include "bolt/Passes/DominatorAnalysis.h" +#include "bolt/Passes/BinaryPasses.h" +#include "bolt/Passes/StaticBranchInfo.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace llvm { +namespace bolt { + +class FeatureMiner : public BinaryFunctionPass { +private: + std::unique_ptr SBI; + + /// BasicBlockInfo - This structure holds feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + struct BasicBlockInfo { + Optional BranchDominates; // 1 - dominates, 0 - does not dominate + Optional BranchPostdominates; // 1 - postdominates, 0 - does not PD + Optional LoopHeader; // 1 - loop header, 0 - not a loop header + Optional Backedge; // 1 - loop back, 0 - not a loop back + Optional Exit; // 1 - loop exit, 0 - not a loop exit + Optional Call; // 1 - program call, 0 - not a program call + Optional NumCalls; + Optional NumLoads; + Optional NumStores; + Optional EndOpcode; // 0 = NOTHING + StringRef EndOpcodeStr = "UNDEF"; + Optional BasicBlockSize; + std::string FromFunName = "UNDEF"; + uint32_t FromBb; + std::string ToFunName = "UNDEF"; + uint32_t ToBb; + + + Optional NumCallsExit; + Optional NumCallsInvoke; + Optional NumIndirectCalls; + Optional NumTailCalls; + }; + + typedef std::unique_ptr BBIPtr; + + /// BranchFeaturesInfo - This structure holds feature information about each + /// two-way branch from the program. + struct BranchFeaturesInfo { + StringRef OpcodeStr = "UNDEF"; + StringRef CmpOpcodeStr = "UNDEF"; + bool Simple = 0; + + Optional Opcode; + Optional CmpOpcode; + Optional Count; + Optional MissPredicted; + Optional FallthroughCount; + Optional FallthroughMissPredicted; + BBIPtr TrueSuccessor = std::make_unique(); + BBIPtr FalseSuccessor = std::make_unique(); + Optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf + Optional LoopHeader; // 1 — loop header, 0 - not a loop header + Optional Direction; // 1 - Forward Branch, 0 - Backward Branch + + Optional NumOuterLoops; + Optional TotalLoops; + Optional MaximumLoopDepth; + Optional LoopDepth; + Optional LoopNumExitEdges; + Optional LoopNumExitBlocks; + Optional LoopNumExitingBlocks; + Optional LoopNumLatches; + Optional LoopNumBlocks; + Optional LoopNumBackEdges; + Optional NumLoads; + Optional NumStores; + + Optional LocalExitingBlock; + Optional LocalLatchBlock; + Optional LocalLoopHeader; + Optional Call; + + Optional NumCalls; + Optional NumCallsExit; + Optional NumCallsInvoke; + Optional NumIndirectCalls; + Optional NumTailCalls; + Optional NumSelfCalls; + + Optional NumBasicBlocks; + + Optional DeltaTaken; + + Optional OperandRAType; + Optional OperandRBType; + + Optional BasicBlockSize; + + Optional BranchOffset; + }; + + typedef std::unique_ptr BFIPtr; + std::vector BranchesInfoSet; + + /// getProcedureType - Determines which category the function falls into: + /// Leaf, Non-leaf or Calls-self. + int8_t getProcedureType(BinaryFunction &Function, BinaryContext &BC); + + /// addSuccessorInfo - Discovers feature information for the target successor + /// basic block, and inserts it into the static branch info container. + void addSuccessorInfo(DominatorAnalysis &DA, + DominatorAnalysis &PDA, BFIPtr const &BFI, + BinaryFunction &Function, BinaryContext &BC, + MCInst &Inst, BinaryBasicBlock &BB, bool Succ); + + /// extractFeatures - Extracts the feature information for each two-way branch + /// from the program. + void extractFeatures(BinaryFunction &Function, + BinaryContext &BC, + raw_ostream &Printer); + + /// dumpSuccessorFeatures - Dumps the feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + void dumpSuccessorFeatures(raw_ostream &Printer, BBIPtr &Successor); + + /// dumpFeatures - Dumps the feature information about each two-way branch + /// from the program. + void dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, + uint64_t FunctionFrequency); + + /// dumpProfileData - Dumps a limited version of the inout profile data + /// that contains only profile for conditional branches, unconditional + /// branches and terminators that aren't branches. + void dumpProfileData(BinaryFunction &Function, raw_ostream &Printer); + +public: + explicit FeatureMiner(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + const char *getName() const override { return "feature-miner"; } + + void runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ \ No newline at end of file diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h new file mode 100644 index 000000000..1713d3367 --- /dev/null +++ b/bolt/include/bolt/Passes/StaticBranchInfo.h @@ -0,0 +1,116 @@ +//===------ Passes/StaticBranchInfo.h -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ + +// #include "BinaryContext.h" +// #include "BinaryFunction.h" +// #include "BinaryLoop.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" + +#include "llvm/MC/MCSymbol.h" +// add new include +#include + +namespace llvm { +namespace bolt { + +class StaticBranchInfo { + +public: + /// An edge indicates that a control flow may go from a basic block (source) + /// to an other one (destination), and this pair of basic blocks will be used + /// to index maps and retrieve content of sets. + typedef std::pair Edge; + +private: + /// Holds the loop headers of a given function. + DenseSet LoopHeaders; + + /// Holds the loop backedges of a given function. + DenseSet BackEdges; + + /// Holds the loop exit edges of a given function. + DenseSet ExitEdges; + + /// Holds the basic blocks of a given function + /// that contains at least one call instructions. + DenseSet CallSet; + + /// Holds the basic blocks of a given function + /// that contains at least one store instructions. + DenseSet StoreSet; + + unsigned NumLoads; + unsigned NumStores; + +public: + unsigned getNumLoads() { return NumLoads; } + + unsigned getNumStores() { return NumStores; } + + /// findLoopEdgesInfo - Finds all loop back edges, loop exit eges + /// and loop headers within the function. + void findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo); + + /// findBasicBlockInfo - Finds all call and store instructions within + /// the basic blocks of a given function. + void findBasicBlockInfo(const BinaryFunction &Function, BinaryContext &BC); + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const Edge &CFGEdge) const; + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryLoop::Edge &CFGEdge) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isLoopHeader - Checks if the basic block is a loop header. + bool isLoopHeader(const BinaryBasicBlock *BB) const; + + /// hasCallInst - Checks if the basic block has a call instruction. + bool hasCallInst(const BinaryBasicBlock *BB) const; + + /// hasStoreInst - Checks if the basic block has a store instruction. + bool hasStoreInst(const BinaryBasicBlock *BB) const; + + /// callToExit - Checks if a basic block invokes exit function. + bool callToExit(BinaryBasicBlock *BB, BinaryContext &BC) const; + + /// countBackEdges - Compute the number of BB's successor that are back edges. + unsigned countBackEdges(BinaryBasicBlock *BB) const; + + /// countExitEdges - Compute the number of BB's successor that are exit edges. + unsigned countExitEdges(BinaryBasicBlock *BB) const; + + /// clear - Cleans up all the content from the data structs used. + void clear(); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ \ No newline at end of file diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index bb296263b..901ff614c 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMBOLTPasses DataflowAnalysis.cpp DataflowInfoManager.cpp ExtTSPReorderAlgorithm.cpp + FeatureMiner.cpp FrameAnalysis.cpp FrameOptimizer.cpp HFSort.cpp @@ -39,6 +40,7 @@ add_llvm_library(LLVMBOLTPasses StackAvailableExpressions.cpp StackPointerTracking.cpp StackReachingUses.cpp + StaticBranchInfo.cpp StokeInfo.cpp TailDuplication.cpp ThreeWayBranch.cpp diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp new file mode 100644 index 000000000..680222906 --- /dev/null +++ b/bolt/lib/Passes/FeatureMiner.cpp @@ -0,0 +1,1067 @@ +//===--- Passes/FeatureMiner.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +// #include "Passes/FeatureMiner.h" +// #include "Passes/DataflowInfoManager.h" +// #include "llvm/Support/CommandLine.h" +// #include "llvm/Support/Options.h" +#include "bolt/Passes/FeatureMiner.h" +#include "bolt/Passes/DataflowInfoManager.h" +#include "llvm/Support/CommandLine.h" + +// add new include +#include "llvm/Support/FileSystem.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-feature-miner" + +using namespace llvm; +using namespace bolt; + +namespace opts { + +extern cl::OptionCategory InferenceCategory; + +cl::opt VespaUseDFS( + "vespa-dfs", + cl::desc("use DFS ordering when using -gen-features option"), + cl::init(false), + cl::ReallyHidden, + cl::ZeroOrMore, + cl::cat(InferenceCategory)); + +cl::opt IncludeValidProfile( + "beetle-valid-profile-info", + cl::desc("include valid profile information."), + cl::init(false), + cl::ReallyHidden, + cl::ZeroOrMore, + cl::cat(InferenceCategory)); + +} // namespace opts + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +int8_t FeatureMiner::getProcedureType(BinaryFunction &Function, + BinaryContext &BC) { + int8_t ProcedureType = 1; + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) { + ProcedureType = 0; // non-leaf type + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol); + if (Callee && + Callee->getFunctionNumber() == Function.getFunctionNumber()) { + return 2; // call self type + } + } + } + } + } + return ProcedureType; // leaf type +} + +void FeatureMiner::addSuccessorInfo(DominatorAnalysis &DA, + DominatorAnalysis &PDA, + BFIPtr const &BFI, BinaryFunction &Function, + BinaryContext &BC, MCInst &Inst, + BinaryBasicBlock &BB, bool SuccType) { + + BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); + + if (!Successor) + return; + + unsigned NumLoads{0}; + unsigned NumStores{0}; + unsigned NumCallsExit{0}; + unsigned NumCalls{0}; + unsigned NumCallsInvoke{0}; + unsigned NumTailCalls{0}; + unsigned NumIndirectCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isLoad(Inst)) { + ++NumLoads; + } else if (BC.MIB->isStore(Inst)) { + ++NumStores; + } else if (BC.MIB->isCall(Inst)) { + ++NumCalls; + + if (BC.MIB->isIndirectCall(Inst)) + ++NumIndirectCalls; + + if (BC.MIB->isInvoke(Inst)) + ++NumCallsInvoke; + + if (BC.MIB->isTailCall(Inst)) + ++NumTailCalls; + + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + StringRef CalleeName = CalleeSymbol->getName(); + if (CalleeName == "__cxa_throw@PLT" || + CalleeName == "_Unwind_Resume@PLT" || + CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || + CalleeName == "abort@PLT") + ++NumCallsExit; + } + } + } + + BBIPtr SuccBBInfo = std::make_unique(); + + // Check if the successor basic block is a loop header and store it. + SuccBBInfo->LoopHeader = SBI->isLoopHeader(Successor); + + SuccBBInfo->BasicBlockSize = Successor->size(); + + // Check if the edge getting to the successor basic block is a loop + // exit edge and store it. + SuccBBInfo->Exit = SBI->isExitEdge(&BB, Successor); + + // Check if the edge getting to the successor basic block is a loop + // back edge and store it. + SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); + + MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); + // Store information about the branch type ending sucessor basic block + SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) + ? SuccInst->getOpcode() + : 0; // 0 = NOTHING + if (SuccBBInfo->EndOpcode != 0) + SuccBBInfo->EndOpcodeStr = BC.MII->getName(SuccInst->getOpcode()); + else + SuccBBInfo->EndOpcodeStr = "NOTHING"; + + // Check if the successor basic block contains + // a procedure call and store it. + SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction + : 0; // Does not contain a call instruction + + SuccBBInfo->NumStores = NumStores; + SuccBBInfo->NumLoads = NumLoads; + SuccBBInfo->NumCallsExit = NumCallsExit; + SuccBBInfo->NumCalls = NumCalls; + + SuccBBInfo->NumCallsInvoke = NumCallsInvoke; + SuccBBInfo->NumIndirectCalls = NumIndirectCalls; + SuccBBInfo->NumTailCalls = NumTailCalls; + + auto InstSucc = Successor->getLastNonPseudoInstr(); + if (InstSucc) { + // Check if the source basic block dominates its + // target basic block and store it. + SuccBBInfo->BranchDominates = (DA.doesADominateB(Inst, *InstSucc) == true) + ? 1 // Dominates + : 0; // Does not dominate + + // Check if the target basic block postdominates + // the source basic block and store it. + SuccBBInfo->BranchPostdominates = + (PDA.doesADominateB(*InstSucc, Inst) == true) + ? 1 // Postdominates + : 0; // Does not postdominate + } + + /// The follwoing information is used as an identifier only for + /// the purpose of matching the inferred probabilities with the branches + /// in the binary. + SuccBBInfo->FromFunName = Function.getPrintName(); + SuccBBInfo->FromBb = BB.getInputOffset(); + BinaryFunction *ToFun = Successor->getFunction(); + SuccBBInfo->ToFunName = ToFun->getPrintName(); + SuccBBInfo->ToBb = Successor->getInputOffset(); + + auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); + if (Offset) { + uint32_t TargetOffset = Successor->getInputOffset(); + uint32_t BranchOffset = Offset.get(); + BFI->BranchOffset = BranchOffset; + if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { + int64_t Delta = TargetOffset - BranchOffset; + BFI->DeltaTaken = std::abs(Delta); + } + } + + if (SuccType) { + BFI->TrueSuccessor = std::move(SuccBBInfo); + + // Check if the taken branch is a forward + // or a backwards branch and store it. + BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) + ? 1 // Forward branch + : 0; // Backwards branch + + auto TakenBranchInfo = BB.getTakenBranchInfo(); + BFI->Count = TakenBranchInfo.Count; + BFI->MissPredicted = TakenBranchInfo.MispredictedCount; + } else { + BFI->FalseSuccessor = std::move(SuccBBInfo); + + auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); + BFI->FallthroughCount = FallthroughBranchInfo.Count; + BFI->FallthroughMissPredicted = FallthroughBranchInfo.MispredictedCount; + } +} + +void FeatureMiner::extractFeatures(BinaryFunction &Function, BinaryContext &BC, + raw_ostream &Printer) { + int8_t ProcedureType = getProcedureType(Function, BC); +// auto Info = DataflowInfoManager(BC, Function, nullptr, nullptr); + auto Info = DataflowInfoManager(Function, nullptr, nullptr); + auto &DA = Info.getDominatorAnalysis(); + auto &PDA = Info.getPostDominatorAnalysis(); + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + bool Simple = Function.isSimple(); + +// const auto &Order = opts::VespaUseDFS ? Function.dfs() : Function.getLayout(); + const auto &Order = Function.dfs(); + + for (auto *BBA : Order) { + + auto &BB = *BBA; + unsigned NumOuterLoops{0}; + unsigned TotalLoops{0}; + unsigned MaximumLoopDepth{0}; + unsigned LoopDepth{0}; + unsigned LoopNumExitEdges{0}; + unsigned LoopNumExitBlocks{0}; + unsigned LoopNumExitingBlocks{0}; + unsigned LoopNumLatches{0}; + unsigned LoopNumBlocks{0}; + unsigned LoopNumBackEdges{0}; + + bool LocalExitingBlock{false}; + bool LocalLatchBlock{false}; + bool LocalLoopHeader{false}; + + BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); + if (Loop) { + SmallVector ExitingBlocks; + Loop->getExitingBlocks(ExitingBlocks); + + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + + SmallVector ExitEdges; + Loop->getExitEdges(ExitEdges); + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + NumOuterLoops = LoopsInfo.OuterLoops; + TotalLoops = LoopsInfo.TotalLoops; + MaximumLoopDepth = LoopsInfo.MaximumDepth; + LoopDepth = Loop->getLoopDepth(); + LoopNumExitEdges = ExitEdges.size(); + LoopNumExitBlocks = ExitBlocks.size(); + LoopNumExitingBlocks = ExitingBlocks.size(); + LoopNumLatches = Latches.size(); + LoopNumBlocks = Loop->getNumBlocks(); + LoopNumBackEdges = Loop->getNumBackEdges(); + + LocalExitingBlock = Loop->isLoopExiting(&BB); + LocalLatchBlock = Loop->isLoopLatch(&BB); + LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); + } + + unsigned NumLoads{0}; + unsigned NumStores{0}; + unsigned NumCallsExit{0}; + unsigned NumCalls{0}; + unsigned NumCallsInvoke{0}; + unsigned NumTailCalls{0}; + unsigned NumIndirectCalls{0}; + unsigned NumSelfCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isLoad(Inst)) { + ++NumLoads; + } else if (BC.MIB->isStore(Inst)) { + ++NumStores; + } else if (BC.MIB->isCall(Inst)) { + ++NumCalls; + + if (BC.MIB->isIndirectCall(Inst)) + ++NumIndirectCalls; + + if (BC.MIB->isInvoke(Inst)) + ++NumCallsInvoke; + + if (BC.MIB->isTailCall(Inst)) + ++NumTailCalls; + + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + StringRef CalleeName = CalleeSymbol->getName(); + if (CalleeName == "__cxa_throw@PLT" || + CalleeName == "_Unwind_Resume@PLT" || + CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || + CalleeName == "abort@PLT") + ++NumCallsExit; + else if (CalleeName == Function.getPrintName()) { + ++NumSelfCalls; + } + } + } + } + + int Index = -2; + bool LoopHeader = SBI->isLoopHeader(&BB); + for (auto &Inst : BB) { + ++Index; + + if (!BC.MIA->isConditionalBranch(Inst)) + continue; + + BFIPtr BFI = std::make_unique(); + + BFI->Simple = Simple; + BFI->NumOuterLoops = NumOuterLoops; + BFI->TotalLoops = TotalLoops; + BFI->MaximumLoopDepth = MaximumLoopDepth; + BFI->LoopDepth = LoopDepth; + BFI->LoopNumExitEdges = LoopNumExitEdges; + BFI->LoopNumExitBlocks = LoopNumExitBlocks; + BFI->LoopNumExitingBlocks = LoopNumExitingBlocks; + BFI->LoopNumLatches = LoopNumLatches; + BFI->LoopNumBlocks = LoopNumBlocks; + BFI->LoopNumBackEdges = LoopNumBackEdges; + + BFI->LocalExitingBlock = LocalExitingBlock; + BFI->LocalLatchBlock = LocalLatchBlock; + BFI->LocalLoopHeader = LocalLoopHeader; + + BFI->Call = ((NumCalls > 0) ? 1 : 0); + BFI->NumCalls = NumCalls; + + BFI->BasicBlockSize = BB.size(); + BFI->NumBasicBlocks = Function.size(); + BFI->NumSelfCalls = NumSelfCalls; + + BFI->NumLoads = NumLoads; + BFI->NumStores = NumStores; + BFI->NumCallsExit = NumCallsExit; + + BFI->NumCallsInvoke = NumCallsInvoke; + BFI->NumIndirectCalls = NumIndirectCalls; + BFI->NumTailCalls = NumTailCalls; + + // Check if branch's basic block is a loop header and store it. + BFI->LoopHeader = LoopHeader; + + // Adding taken successor info. + addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, true); + // Adding fall through successor info. + addSuccessorInfo(DA, PDA, BFI, Function, BC, Inst, BB, false); + + // Holds the branch opcode info. + BFI->Opcode = Inst.getOpcode(); + BFI->OpcodeStr = BC.MII->getName(Inst.getOpcode()); + + // Holds the branch's procedure type. + BFI->ProcedureType = ProcedureType; + + BFI->CmpOpcode = 0; + if (Index > -1) { + auto Cmp = BB.begin() + Index; + + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + + BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); + + auto getOperandType = [&](const MCOperand &Operand) -> int32_t { + if (Operand.isReg()) + return 0; + else if (Operand.isImm()) + return 1; + // else if (Operand.isFPImm()) + else if (Operand.isSFPImm()) + return 2; + else if (Operand.isExpr()) + return 3; + else + return -1; + }; + + const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); + unsigned NumDefs = InstInfo.getNumDefs(); + int32_t NumPrimeOperands = + MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; + switch (NumPrimeOperands) { + case 6: { + int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); + int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + + if (RBType == 0 && RAType == 0) { + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { + RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); + + if (RAType != 1 && RAType != 2) { + RAType = -1; + } + + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else { + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + } + break; + } + case 2: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + break; + case 3: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); + break; + case 1: + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); + break; + default: + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + break; + } + + } else { + Index -= 1; + for (int Idx = Index; Idx > -1; Idx--) { + auto Cmp = BB.begin() + Idx; + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + BFI->CmpOpcodeStr = BC.MII->getName((*Cmp).getOpcode()); + break; + } + } + } + } + + //======================================================================== + + auto &FalseSuccessor = BFI->FalseSuccessor; + auto &TrueSuccessor = BFI->TrueSuccessor; + + if (!FalseSuccessor && !TrueSuccessor) + continue; + + int64_t BranchOffset = + (BFI->BranchOffset.hasValue()) + ? static_cast(*(BFI->BranchOffset)) + : -1; + if(BranchOffset == -1) + continue; + + int16_t ProcedureType = (BFI->ProcedureType.hasValue()) + ? static_cast(*(BFI->ProcedureType)) + : -1; + + int16_t Direction = (BFI->Direction.hasValue()) + ? static_cast(*(BFI->Direction)) + : -1; + + int16_t LoopHeader = (BFI->LoopHeader.hasValue()) + ? static_cast(*(BFI->LoopHeader)) + : -1; + + int32_t Opcode = + (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; + + int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) + ? static_cast(*(BFI->CmpOpcode)) + : -1; + + int64_t Count = + (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; + + int64_t MissPredicted = (BFI->MissPredicted.hasValue()) + ? static_cast(*(BFI->MissPredicted)) + : -1; + + int64_t FallthroughCount = + (BFI->FallthroughCount.hasValue()) + ? static_cast(*(BFI->FallthroughCount)) + : -1; + + int64_t FallthroughMissPredicted = + (BFI->FallthroughMissPredicted.hasValue()) + ? static_cast(*(BFI->FallthroughMissPredicted)) + : -1; + + int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) + ? static_cast(*(BFI->NumOuterLoops)) + : -1; + int64_t TotalLoops = (BFI->TotalLoops.hasValue()) + ? static_cast(*(BFI->TotalLoops)) + : -1; + int64_t MaximumLoopDepth = + (BFI->MaximumLoopDepth.hasValue()) + ? static_cast(*(BFI->MaximumLoopDepth)) + : -1; + int64_t LoopDepth = (BFI->LoopDepth.hasValue()) + ? static_cast(*(BFI->LoopDepth)) + : -1; + int64_t LoopNumExitEdges = + (BFI->LoopNumExitEdges.hasValue()) + ? static_cast(*(BFI->LoopNumExitEdges)) + : -1; + int64_t LoopNumExitBlocks = + (BFI->LoopNumExitBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumExitBlocks)) + : -1; + int64_t LoopNumExitingBlocks = + (BFI->LoopNumExitingBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumExitingBlocks)) + : -1; + int64_t LoopNumLatches = + (BFI->LoopNumLatches.hasValue()) + ? static_cast(*(BFI->LoopNumLatches)) + : -1; + int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumBlocks)) + : -1; + int64_t LoopNumBackEdges = + (BFI->LoopNumBackEdges.hasValue()) + ? static_cast(*(BFI->LoopNumBackEdges)) + : -1; + + int64_t LocalExitingBlock = + (BFI->LocalExitingBlock.hasValue()) + ? static_cast(*(BFI->LocalExitingBlock)) + : -1; + + int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) + ? static_cast(*(BFI->LocalLatchBlock)) + : -1; + + int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) + ? static_cast(*(BFI->LocalLoopHeader)) + : -1; + + int64_t Call = + (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; + + int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) + ? static_cast(*(BFI->DeltaTaken)) + : -1; + + int64_t NumLoads = (BFI->NumLoads.hasValue()) + ? static_cast(*(BFI->NumLoads)) + : -1; + + int64_t NumStores = (BFI->NumStores.hasValue()) + ? static_cast(*(BFI->NumStores)) + : -1; + + int64_t BasicBlockSize = + (BFI->BasicBlockSize.hasValue()) + ? static_cast(*(BFI->BasicBlockSize)) + : -1; + + int64_t NumBasicBlocks = + (BFI->NumBasicBlocks.hasValue()) + ? static_cast(*(BFI->NumBasicBlocks)) + : -1; + + int64_t NumCalls = (BFI->NumCalls.hasValue()) + ? static_cast(*(BFI->NumCalls)) + : -1; + + int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) + ? static_cast(*(BFI->NumSelfCalls)) + : -1; + + int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) + ? static_cast(*(BFI->NumCallsExit)) + : -1; + + int64_t OperandRAType = (BFI->OperandRAType.hasValue()) + ? static_cast(*(BFI->OperandRAType)) + : -1; + + int64_t OperandRBType = (BFI->OperandRBType.hasValue()) + ? static_cast(*(BFI->OperandRBType)) + : -1; + + int64_t NumCallsInvoke = + (BFI->NumCallsInvoke.hasValue()) + ? static_cast(*(BFI->NumCallsInvoke)) + : -1; + + int64_t NumIndirectCalls = + (BFI->NumIndirectCalls.hasValue()) + ? static_cast(*(BFI->NumIndirectCalls)) + : -1; + + int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) + ? static_cast(*(BFI->NumTailCalls)) + : -1; + + Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," + << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr + << "," << LoopHeader << "," << ProcedureType << "," << Count + << "," << MissPredicted << "," << FallthroughCount << "," + << FallthroughMissPredicted << "," << NumOuterLoops << "," + << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth + << "," << LoopDepth << "," << LoopNumExitEdges << "," + << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," + << LoopNumLatches << "," << LoopNumBlocks << "," + << LoopNumBackEdges << "," << LocalExitingBlock << "," + << LocalLatchBlock << "," << LocalLoopHeader << "," << Call << "," + << DeltaTaken << "," << NumLoads << "," << NumStores << "," + << NumCalls << "," << OperandRAType << "," << OperandRBType << "," + << BasicBlockSize << "," << NumBasicBlocks << "," + << NumCallsInvoke << "," << NumIndirectCalls << "," + << NumTailCalls << "," << NumSelfCalls; + + if (FalseSuccessor && TrueSuccessor) { + dumpSuccessorFeatures(Printer, TrueSuccessor); + dumpSuccessorFeatures(Printer, FalseSuccessor); + + FalseSuccessor.reset(); + TrueSuccessor.reset(); + } + BFI.reset(); + + std::string BranchOffsetStr = (BranchOffset == -1) ? "None" : Twine::utohexstr(BranchOffset).str(); + + uint64_t fun_exec = Function.getExecutionCount(); + fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; + Printer << "," << Twine::utohexstr(Function.getAddress()) << "," + << fun_exec << "," << Function.getFunctionNumber() << "," + << Function.getOneName() << "," << Function.getPrintName() + << "," << BranchOffsetStr + << "\n"; + + //======================================================================== + + // this->BranchesInfoSet.push_back(std::move(BFI)); + } + } +} + +void FeatureMiner::dumpSuccessorFeatures(raw_ostream &Printer, + BBIPtr &Successor) { + int16_t BranchDominates = + (Successor->BranchDominates.hasValue()) + ? static_cast(*(Successor->BranchDominates)) + : -1; + + int16_t BranchPostdominates = + (Successor->BranchPostdominates.hasValue()) + ? static_cast(*(Successor->BranchPostdominates)) + : -1; + + int16_t LoopHeader = (Successor->LoopHeader.hasValue()) + ? static_cast(*(Successor->LoopHeader)) + : -1; + + int16_t Backedge = (Successor->Backedge.hasValue()) + ? static_cast(*(Successor->Backedge)) + : -1; + + int16_t Exit = + (Successor->Exit.hasValue()) ? static_cast(*(Successor->Exit)) : -1; + + int16_t Call = + (Successor->Call.hasValue()) ? static_cast(*(Successor->Call)) : -1; + + int32_t EndOpcode = (Successor->EndOpcode.hasValue()) + ? static_cast(*(Successor->EndOpcode)) + : -1; + + int64_t NumLoads = (Successor->NumLoads.hasValue()) + ? static_cast(*(Successor->NumLoads)) + : -1; + + int64_t NumStores = (Successor->NumStores.hasValue()) + ? static_cast(*(Successor->NumStores)) + : -1; + + int64_t BasicBlockSize = + (Successor->BasicBlockSize.hasValue()) + ? static_cast(*(Successor->BasicBlockSize)) + : -1; + + int64_t NumCalls = (Successor->NumCalls.hasValue()) + ? static_cast(*(Successor->NumCalls)) + : -1; + + int64_t NumCallsExit = (Successor->NumCallsExit.hasValue()) + ? static_cast(*(Successor->NumCallsExit)) + : -1; + + int64_t NumCallsInvoke = + (Successor->NumCallsInvoke.hasValue()) + ? static_cast(*(Successor->NumCallsInvoke)) + : -1; + + int64_t NumIndirectCalls = + (Successor->NumIndirectCalls.hasValue()) + ? static_cast(*(Successor->NumIndirectCalls)) + : -1; + + int64_t NumTailCalls = (Successor->NumTailCalls.hasValue()) + ? static_cast(*(Successor->NumTailCalls)) + : -1; + + Printer << "," << BranchDominates << "," << BranchPostdominates << "," + << EndOpcode << "," << Successor->EndOpcodeStr << "," << LoopHeader + << "," << Backedge << "," << Exit << "," << Call << "," + << Successor->FromFunName << "," + << Twine::utohexstr(Successor->FromBb) << "," << Successor->ToFunName + << "," << Twine::utohexstr(Successor->ToBb) << "," << NumLoads << "," + << NumStores << "," << BasicBlockSize << "," << NumCalls << "," + << NumCallsExit << "," << NumIndirectCalls << "," << NumCallsInvoke + << "," << NumTailCalls; +} + +void FeatureMiner::dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, + uint64_t FunctionFrequency) { + + for (auto const &BFI : BranchesInfoSet) { + auto &FalseSuccessor = BFI->FalseSuccessor; + auto &TrueSuccessor = BFI->TrueSuccessor; + + if (!FalseSuccessor && !TrueSuccessor) + continue; + + int16_t ProcedureType = (BFI->ProcedureType.hasValue()) + ? static_cast(*(BFI->ProcedureType)) + : -1; + + int16_t Direction = + (BFI->Direction.hasValue()) ? static_cast(*(BFI->Direction)) : -1; + + int16_t LoopHeader = (BFI->LoopHeader.hasValue()) + ? static_cast(*(BFI->LoopHeader)) + : -1; + + int32_t Opcode = + (BFI->Opcode.hasValue()) ? static_cast(*(BFI->Opcode)) : -1; + + int32_t CmpOpcode = (BFI->CmpOpcode.hasValue()) + ? static_cast(*(BFI->CmpOpcode)) + : -1; + + int64_t Count = + (BFI->Count.hasValue()) ? static_cast(*(BFI->Count)) : -1; + + int64_t MissPredicted = (BFI->MissPredicted.hasValue()) + ? static_cast(*(BFI->MissPredicted)) + : -1; + + int64_t FallthroughCount = + (BFI->FallthroughCount.hasValue()) + ? static_cast(*(BFI->FallthroughCount)) + : -1; + + int64_t FallthroughMissPredicted = + (BFI->FallthroughMissPredicted.hasValue()) + ? static_cast(*(BFI->FallthroughMissPredicted)) + : -1; + + int64_t NumOuterLoops = (BFI->NumOuterLoops.hasValue()) + ? static_cast(*(BFI->NumOuterLoops)) + : -1; + int64_t TotalLoops = (BFI->TotalLoops.hasValue()) + ? static_cast(*(BFI->TotalLoops)) + : -1; + int64_t MaximumLoopDepth = + (BFI->MaximumLoopDepth.hasValue()) + ? static_cast(*(BFI->MaximumLoopDepth)) + : -1; + int64_t LoopDepth = (BFI->LoopDepth.hasValue()) + ? static_cast(*(BFI->LoopDepth)) + : -1; + int64_t LoopNumExitEdges = + (BFI->LoopNumExitEdges.hasValue()) + ? static_cast(*(BFI->LoopNumExitEdges)) + : -1; + int64_t LoopNumExitBlocks = + (BFI->LoopNumExitBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumExitBlocks)) + : -1; + int64_t LoopNumExitingBlocks = + (BFI->LoopNumExitingBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumExitingBlocks)) + : -1; + int64_t LoopNumLatches = (BFI->LoopNumLatches.hasValue()) + ? static_cast(*(BFI->LoopNumLatches)) + : -1; + int64_t LoopNumBlocks = (BFI->LoopNumBlocks.hasValue()) + ? static_cast(*(BFI->LoopNumBlocks)) + : -1; + int64_t LoopNumBackEdges = + (BFI->LoopNumBackEdges.hasValue()) + ? static_cast(*(BFI->LoopNumBackEdges)) + : -1; + + int64_t LocalExitingBlock = + (BFI->LocalExitingBlock.hasValue()) + ? static_cast(*(BFI->LocalExitingBlock)) + : -1; + + int64_t LocalLatchBlock = (BFI->LocalLatchBlock.hasValue()) + ? static_cast(*(BFI->LocalLatchBlock)) + : -1; + + int64_t LocalLoopHeader = (BFI->LocalLoopHeader.hasValue()) + ? static_cast(*(BFI->LocalLoopHeader)) + : -1; + + int64_t Call = + (BFI->Call.hasValue()) ? static_cast(*(BFI->Call)) : -1; + + int64_t DeltaTaken = (BFI->DeltaTaken.hasValue()) + ? static_cast(*(BFI->DeltaTaken)) + : -1; + + int64_t NumLoads = (BFI->NumLoads.hasValue()) + ? static_cast(*(BFI->NumLoads)) + : -1; + + int64_t NumStores = (BFI->NumStores.hasValue()) + ? static_cast(*(BFI->NumStores)) + : -1; + + int64_t BasicBlockSize = (BFI->BasicBlockSize.hasValue()) + ? static_cast(*(BFI->BasicBlockSize)) + : -1; + + int64_t BranchOffset = (BFI->BranchOffset.hasValue()) + ? static_cast(*(BFI->BranchOffset)): -1; + + int64_t NumBasicBlocks = (BFI->NumBasicBlocks.hasValue()) + ? static_cast(*(BFI->NumBasicBlocks)) + : -1; + + int64_t NumCalls = (BFI->NumCalls.hasValue()) + ? static_cast(*(BFI->NumCalls)) + : -1; + + int64_t NumSelfCalls = (BFI->NumSelfCalls.hasValue()) + ? static_cast(*(BFI->NumSelfCalls)) + : -1; + + int64_t NumCallsExit = (BFI->NumCallsExit.hasValue()) + ? static_cast(*(BFI->NumCallsExit)) + : -1; + + int64_t OperandRAType = (BFI->OperandRAType.hasValue()) + ? static_cast(*(BFI->OperandRAType)) + : -1; + + int64_t OperandRBType = (BFI->OperandRBType.hasValue()) + ? static_cast(*(BFI->OperandRBType)) + : -1; + + int64_t NumCallsInvoke = (BFI->NumCallsInvoke.hasValue()) + ? static_cast(*(BFI->NumCallsInvoke)) + : -1; + + int64_t NumIndirectCalls = + (BFI->NumIndirectCalls.hasValue()) + ? static_cast(*(BFI->NumIndirectCalls)) + : -1; + + int64_t NumTailCalls = (BFI->NumTailCalls.hasValue()) + ? static_cast(*(BFI->NumTailCalls)) + : -1; + + Printer << BFI->Simple << "," << Opcode << "," << BFI->OpcodeStr << "," + << Direction << "," << CmpOpcode << "," << BFI->CmpOpcodeStr << "," + << LoopHeader << "," << ProcedureType << "," << Count << "," + << MissPredicted << "," << FallthroughCount << "," + << FallthroughMissPredicted << "," << NumOuterLoops << "," + << NumCallsExit << "," << TotalLoops << "," << MaximumLoopDepth + << "," << LoopDepth << "," << LoopNumExitEdges << "," + << LoopNumExitBlocks << "," << LoopNumExitingBlocks << "," + << LoopNumLatches << "," << LoopNumBlocks << "," << LoopNumBackEdges + << "," << LocalExitingBlock << "," << LocalLatchBlock << "," + << LocalLoopHeader << "," << Call << "," << DeltaTaken << "," + << NumLoads << "," << NumStores << "," << NumCalls << "," + << OperandRAType << "," << OperandRBType << "," << BasicBlockSize + << "," << NumBasicBlocks << "," << NumCallsInvoke << "," + << NumIndirectCalls << "," << NumTailCalls << "," << NumSelfCalls; + + if (FalseSuccessor && TrueSuccessor) { + dumpSuccessorFeatures(Printer, TrueSuccessor); + dumpSuccessorFeatures(Printer, FalseSuccessor); + } + + Printer << "," << Twine::utohexstr(FunctionAddress) << "," + << FunctionFrequency << "\n"; + } + BranchesInfoSet.clear(); +} + +void FeatureMiner::runOnFunctions(BinaryContext &BC) { + auto FileName = "features_new.csv"; + outs() << "BOLT-INFO: Starting feature miner pass\n"; + + std::error_code EC; +// raw_fd_ostream Printer(FileName, EC, sys::fs::F_None); + raw_fd_ostream Printer(FileName, EC, sys::fs::OF_None); + + if (EC) { + errs() << "BOLT-WARNING: " << EC.message() << ", unable to open " + << FileName << " for output.\n"; + return; + } + + auto FILENAME = "profile_data_regular.fdata"; +// raw_fd_ostream Printer2(FILENAME, EC, sys::fs::F_None); + raw_fd_ostream Printer2(FILENAME, EC, sys::fs::OF_None); + + if (EC) { + dbgs() << "BOLT-WARNING: " << EC.message() << ", unable to open" + << " " << FILENAME << " for output.\n"; + return; + } + + // CSV file header + Printer << "FUN_TYPE,OPCODE,OPCODE_STR,DIRECTION,CMP_OPCODE,CMP_OPCODE_STR," + "LOOP_HEADER,PROCEDURE_TYPE," + "COUNT_TAKEN,MISS_TAKEN,COUNT_NOT_TAKEN,MISS_NOT_TAKEN," + "NUM_OUTER_LOOPS,NUM_CALLS_EXIT,TOTAL_LOOPS,MAXIMUM_LOOP_DEPTH," + "LOOP_DEPTH,LOOP_NUM_EXIT_EDGES,LOOP_NUM_EXIT_BLOCKS," + "LOOP_NUM_EXITING_BLOCKS,LOOP_NUM_LATCHES,LOOP_NUM_BLOCKS," + "LOOP_NUM_BAKCEDGES,LOCAL_EXITING_BLOCK,LOCAL_LATCH_BLOCK," + "LOCAL_LOOP_HEADER,CALL,DELTA_TAKEN,NUM_LOADS,NUM_STORES," + "NUM_CALLS,OPERAND_RA_TYPE,OPERAND_RB_TYPE,BASIC_BLOCK_SIZE," + "NUM_BASIC_BLOCKS,NUM_CALLS_INVOKE,NUM_INDIRECT_CALLS," + "NUM_TAIL_CALLS,NUM_SELF_CALLS,TS_DOMINATES,TS_POSTDOMINATES," + "TS_END_OPCODE,TS_END_OPCODE_STR,TS_LOOP_HEADER,TS_BACKEDGE,TS_" + "EXIT,TS_CALL," + "TS_FROM_FUN_NAME,TS_FROM_BB,TS_TO_FUN_NAME,TS_TO_BB,TS_NUM_LOADS," + "TS_NUM_STORES,TS_BASIC_BLOCK_SIZE,TS_NUM_CALLS,TS_NUM_CALLS_EXIT," + "TS_NUM_INDIRECT_CALL,TS_NUM_CALLS_INVOKE,TS_NUM_TAIL_CALLS," + "FS_DOMINATES,FS_POSTDOMINATES,FS_END_OPCODE,FS_END_OPCODE_STR,FS_" + "LOOP_HEADER," + "FS_BACKEDGE,FS_EXIT,FS_CALL,FS_FROM_FUN_NAME,FS_FROM_BB," + "FS_TO_FUN_NAME,FS_TO_BB,FS_NUM_LOADS,FS_NUM_STORES," + "FS_BASIC_BLOCK_SIZE,FS_NUM_CALLS,FS_NUM_CALLS_EXIT," + "FS_NUM_INDIRECT_CALL,FS_NUM_CALLS_INVOKE,FS_NUM_TAIL_CALLS," + "FUN_ENTRY_ADDRESS,FUN_ENTRY_FREQUENCY" + ",FUN_UNIQUE_NUMBER,FUN_ONE_NAME,FUN_PRINT_NAME," + "BRANCH_ADDRESS\n"; + + auto &BFs = BC.getBinaryFunctions(); + SBI = std::make_unique(); + for (auto &BFI : BFs) { + BinaryFunction &Function = BFI.second; + + if (Function.empty() || (Function.hasValidProfile() && opts::IncludeValidProfile)) + continue; + + if (!Function.isLoopFree()) { + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + SBI->findLoopEdgesInfo(LoopsInfo); + } + extractFeatures(Function, BC, Printer); + + SBI->clear(); + + // dumpProfileData(Function, Printer2); + } + + outs() << "BOLT-INFO: Dumping two-way conditional branches' features" + << " at " << FileName << "\n"; +} + +/*void FeatureMiner::dumpProfileData(BinaryFunction &Function, + raw_ostream &Printer) { + + BinaryContext &BC = Function.getBinaryContext(); + + std::string FromFunName = Function.getPrintName(); + for (auto &BB : Function) { + auto LastInst = BB.getLastNonPseudoInstr(); + + for (auto &Inst : BB) { + if (!BC.MIB->isCall(Inst) && !BC.MIB->isBranch(Inst) && + LastInst != (&Inst)) + continue; + + auto Offset = BC.MIB->tryGetAnnotationAs(Inst, "Offset"); + + if (!Offset) + continue; + + uint64_t TakenFreqEdge = 0; + auto FromBb = Offset.get(); + std::string ToFunName; + uint32_t ToBb; + + if (BC.MIB->isCall(Inst)) { + auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst); + if (!CalleeSymbol) + continue; + + ToFunName = CalleeSymbol->getName(); + ToBb = 0; + + if (BC.MIB->getConditionalTailCall(Inst)) { + + if (BC.MIB->hasAnnotation(Inst, "CTCTakenCount")) { + auto CountAnnt = + BC.MIB->tryGetAnnotationAs(Inst, "CTCTakenCount"); + if (CountAnnt) { + TakenFreqEdge = (*CountAnnt); + } + } + } else { + if (BC.MIB->hasAnnotation(Inst, "Count")) { + auto CountAnnt = + BC.MIB->tryGetAnnotationAs(Inst, "Count"); + if (CountAnnt) { + TakenFreqEdge = (*CountAnnt); + } + } + } + + if (TakenFreqEdge > 0) + Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) + << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) << " " + << 0 << " " << TakenFreqEdge << "\n"; + } else { + for (BinaryBasicBlock *SuccBB : BB.successors()) { + TakenFreqEdge = BB.getBranchInfo(*SuccBB).Count; + BinaryFunction *ToFun = SuccBB->getFunction(); + ToFunName = ToFun->getPrintName(); + ToBb = SuccBB->getInputOffset(); + + if (TakenFreqEdge > 0) + Printer << "1 " << FromFunName << " " << Twine::utohexstr(FromBb) + << " 1 " << ToFunName << " " << Twine::utohexstr(ToBb) + << " " << 0 << " " << TakenFreqEdge << "\n"; + } + } + } + } +} +*/ + +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp new file mode 100644 index 000000000..13426b397 --- /dev/null +++ b/bolt/lib/Passes/StaticBranchInfo.cpp @@ -0,0 +1,162 @@ +//===------ Passes/StaticBranchInfo.cpp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +// #include "Passes/StaticBranchInfo.h" +// #include "BinaryBasicBlock.h" +#include "bolt/Passes/StaticBranchInfo.h" +#include "bolt/Core/BinaryBasicBlock.h" + +namespace llvm { +namespace bolt { + +void StaticBranchInfo::findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo) { + // Traverse discovered loops + std::stack Loops; + for (BinaryLoop *BL : LoopsInfo) + Loops.push(BL); + + while (!Loops.empty()) { + BinaryLoop *Loop = Loops.top(); + Loops.pop(); + BinaryBasicBlock *LoopHeader = Loop->getHeader(); + LoopHeaders.insert(LoopHeader); + + // Add nested loops in the stack. + for (BinaryLoop::iterator I = Loop->begin(), E = Loop->end(); I != E; ++I) { + Loops.push(*I); + } + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + // Find back edges. + for (BinaryBasicBlock *Latch : Latches) { + for (BinaryBasicBlock *Succ : Latch->successors()) { + if (Succ == LoopHeader) { + Edge CFGEdge = std::make_pair(Latch->getLabel(), Succ->getLabel()); + BackEdges.insert(CFGEdge); + } + } + } + + // Find exit edges. + SmallVector AuxExitEdges; + Loop->getExitEdges(AuxExitEdges); + for (BinaryLoop::Edge &Exit : AuxExitEdges) { + ExitEdges.insert(Exit); + } + } +} + +void StaticBranchInfo::findBasicBlockInfo(const BinaryFunction &Function, + BinaryContext &BC) { + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) + CallSet.insert(&BB); + else if (BC.MIB->isStore(Inst)) + StoreSet.insert(&BB); + } + } +} + +bool StaticBranchInfo::isBackEdge(const Edge &CFGEdge) const { + return BackEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { + const Edge CFGEdge = std::make_pair(SrcBB->getLabel(), DstBB->getLabel()); + return isBackEdge(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryLoop::Edge &CFGEdge) const { + return ExitEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { +// const BinaryLoop::Edge CFGEdge = std::make_pair(SrcBB, DstBB); + const BinaryLoop::Edge CFGEdge = std::make_pair(const_cast(SrcBB), const_cast(DstBB)); + return isExitEdge(CFGEdge); +} + +bool StaticBranchInfo::isLoopHeader(const BinaryBasicBlock *BB) const { + return LoopHeaders.count(BB); +} + +bool StaticBranchInfo::hasCallInst(const BinaryBasicBlock *BB) const { + return CallSet.count(BB); +} + +bool StaticBranchInfo::hasStoreInst(const BinaryBasicBlock *BB) const { + return StoreSet.count(BB); +} + +bool StaticBranchInfo::callToExit(BinaryBasicBlock *BB, + BinaryContext &BC) const { + auto &currBB = *BB; + for (auto &Inst : currBB) { + if (BC.MIB->isCall(Inst)) { + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + StringRef CalleeName = CalleeSymbol->getName(); + if (CalleeName == "__cxa_throw@PLT" || + CalleeName == "_Unwind_Resume@PLT" || + CalleeName == "__cxa_rethrow@PLT" || CalleeName == "exit@PLT" || + CalleeName == "abort@PLT") + return true; + } + } + } + + return false; +} + +unsigned StaticBranchInfo::countBackEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const Edge CFGEdge = std::make_pair(BB->getLabel(), SuccBB->getLabel()); + if (BackEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +unsigned StaticBranchInfo::countExitEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const BinaryLoop::Edge CFGEdge = std::make_pair(BB, SuccBB); + if (ExitEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +void StaticBranchInfo::clear() { + LoopHeaders.clear(); + BackEdges.clear(); + ExitEdges.clear(); + CallSet.clear(); + StoreSet.clear(); +} + +} // namespace bolt +} // namespace llvm \ No newline at end of file -- 2.33.0