From 1515efea7da79148f7456378ffab754b04715d1d Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Sun, 13 Feb 2022 15:08:16 +0800 Subject: [PATCH] [Arm SPE] Arm SPE event processing support Support parser Arm SPE events, each event will be saved in different profile file. (cherry picked from commit e0d48adbcaefe8186fa776d62c9818716b71c5f1) --- 0002-Arm-spe-parser-support.patch | 1049 +++++++++++++++++++++++++++++ autofdo.spec | 9 +- 2 files changed, 1057 insertions(+), 1 deletion(-) create mode 100644 0002-Arm-spe-parser-support.patch diff --git a/0002-Arm-spe-parser-support.patch b/0002-Arm-spe-parser-support.patch new file mode 100644 index 0000000..336e7fd --- /dev/null +++ b/0002-Arm-spe-parser-support.patch @@ -0,0 +1,1049 @@ +diff --git a/Makefile.am b/Makefile.am +index a6b8e0f..cf37121 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -88,6 +88,8 @@ $(am_create_llvm_prof_OBJECTS): $(protoc_outputs) + noinst_LIBRARIES = libquipper.a + libquipper_a_SOURCES = \ + third_party/perf_data_converter/src/quipper/address_mapper.cc \ ++ third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc \ ++ third_party/perf_data_converter/src/quipper/arm_spe_parser.cc \ + third_party/perf_data_converter/src/quipper/binary_data_utils.cc \ + third_party/perf_data_converter/src/quipper/buffer_reader.cc \ + third_party/perf_data_converter/src/quipper/buffer_writer.cc \ +diff --git a/sample_reader.cc b/sample_reader.cc +index 43a03cb..ec21849 100644 +--- a/sample_reader.cc ++++ b/sample_reader.cc +@@ -216,18 +216,25 @@ bool PerfDataSampleReader::Append(const string &profile_file) { + // in the profile, then we use focus_binary to match samples. Otherwise, + // focus_binary_re_ is used to match the binary name with the samples. + for (const auto &event : parser.parsed_events()) { +- if (!event.event_ptr || +- event.event_ptr->header().type() != PERF_RECORD_SAMPLE) { ++ if (!event.arm_spe_event.ArmSpeEventExist() && (!event.event_ptr || ++ event.event_ptr->header().type() != PERF_RECORD_SAMPLE)) { + continue; + } + if (MatchBinary(event.dso_and_offset.dso_name(), focus_binary)) { + address_count_map_[event.dso_and_offset.offset()]++; +- // pmu event should be processed here, if event_name is not empty, it means there is at least +- // two perf event in this perf.data, so we should record it into event_address_count_map_ for +- // multiply event support. +- auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); +- if (!event_name.empty()) { +- event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ // If this sample is an Arm SPE event, each target event will be decoded as a name string. ++ if (event.arm_spe_event.ArmSpeEventExist()) { ++ for (const auto &type_name : parser.GetArmSpeEventNameString(event.arm_spe_event.arm_spe_type_)) { ++ event_address_count_map_[type_name][event.dso_and_offset.offset()]++; ++ } ++ } else { ++ // pmu event should be processed here, if event_name is not empty, it means there is at least ++ // two perf event in this perf.data, so we should record it into event_address_count_map_ for ++ // multiply event support. ++ auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); ++ if (!event_name.empty()) { ++ event_address_count_map_[event_name][event.dso_and_offset.offset()]++; ++ } + } + } + if (event.branch_stack.size() > 0 && +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc +new file mode 100644 +index 0000000..88f7c81 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc +@@ -0,0 +1,233 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#include ++#include "base/logging.h" ++ ++#include "arm_spe_decoder.h" ++ ++namespace quipper ++{ ++ ++std::string ArmSpePacketDecoder::GetArmParserStateString(ArmParserState state) ++{ ++ if (state == ArmParserState::START) { ++ return "START"; ++ } else if (state == ArmParserState::PC) { ++ return "PC"; ++ } else if (state == ArmParserState::LAT) { ++ return "LAT"; ++ } else if (state == ArmParserState::TYPE) { ++ return "TYPE"; ++ } else if (state == ArmParserState::LD_ST) { ++ return "LD_ST"; ++ } else if (state == ArmParserState::BRANCH) { ++ return "BRANCH"; ++ } else if (state == ArmParserState::TGT) { ++ return "TGT"; ++ } else if (state == ArmParserState::PAD) { ++ return "PAD"; ++ } else if (state == ArmParserState::INST_OTHER) { ++ return "INST_OTHER"; ++ } else if (state == ArmParserState::END) { ++ return "END"; ++ } else if (state == ArmParserState::EXIT) { ++ return "EXIT"; ++ } else if (state == ArmParserState::UNKNOWN) { ++ return "UNKNOWN"; ++ } ++ return "UNKNOWN"; ++} ++ ++static std::string Dec2hex(int num) ++{ ++ std::stringstream ioss; ++ std::string tmpStr; ++ ioss << std::hex << std::setw(2) << std::setfill('0') << num; ++ ioss >> tmpStr; ++ return tmpStr; ++} ++ ++void ArmSpePacketDecoder::HandleError() ++{ ++ LOG(ERROR) << "ARM SPE: only the following Arm SPE flags are supported: jitter, branch_filter, load_filter, store_filter, event_filter, min_latency."; ++ LOG(ERROR) << "ARM SPE: recommend usage: perf record -e arm_spe_0/jitter=1/ -c COUNT -- COMMAND"; ++ LOG(ERROR) << "ARM SPE: unsupport arm_spe raw string! error_pos/total_size: " << m_curPos << "/" << m_totalSize; ++ LOG(ERROR) << "ARM SPE: current processing status code: " << GetArmParserStateString(m_curParserState); ++ std::string errorByteStr; ++ for (size_t i = m_curPos; i < m_curPos + 10 && i < m_totalSize; i++) { ++ errorByteStr.append(Dec2hex(int(m_rawTraceData[i]))); ++ errorByteStr.append(" "); ++ } ++ if (!errorByteStr.empty()) { ++ LOG(FATAL) << "ARM SPE: recent error 10 byte: " << errorByteStr; ++ } else { ++ LOG(FATAL) << "ARM SPE: there is no valid byte."; ++ } ++} ++ ++bool ArmSpePacketDecoder::Init(const std::string* traceData) ++{ ++ if (traceData == nullptr || traceData->empty()) { ++ LOG(ERROR) << "TraceData is invalid!"; ++ return false; ++ } ++ m_rawTraceData = traceData->c_str(); ++ m_totalSize = traceData->size(); ++ m_curParserState = ArmParserState::START; ++ m_curPos = 0; ++ return true; ++} ++ ++void ArmSpePacketDecoder::InitProcessArmSpePacketMap() ++{ ++ m_decodeArmSpePacketMap.insert({ArmParserState::START, &ArmSpePacketDecoder::ProcessInitSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::PC, &ArmSpePacketDecoder::ProcessPCSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::LAT, &ArmSpePacketDecoder::ProcessLATSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::TYPE, &ArmSpePacketDecoder::ProcessTypeSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::LD_ST, &ArmSpePacketDecoder::ProcessLDSTSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::BRANCH, &ArmSpePacketDecoder::ProcessBranchTGTSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::INST_OTHER, &ArmSpePacketDecoder::ProcessInstOtherSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::PAD, &ArmSpePacketDecoder::ProcessPadSection}); ++ m_decodeArmSpePacketMap.insert({ArmParserState::END, &ArmSpePacketDecoder::ProcessEndSection}); ++} ++ ++bool ArmSpePacketDecoder::IsUnsolvedPacketExist() const ++{ ++ if (m_totalSize > 0 && m_curPos < m_totalSize) { ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessInitSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { ++ m_curPos++; ++ m_curParserState = ArmParserState::PC; ++ return true; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD) { ++ m_curParserState = ArmParserState::PAD; ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessPCSection(ArmSpeParsedPacket& packet) ++{ ++ constexpr size_t pcLen = 7; ++ memcpy(&packet.sampleAddr, &(m_rawTraceData[m_curPos]), pcLen); ++ m_curPos += pcLen; ++ m_curPos++; // 1 = en_ls(1) ++ m_curParserState = ArmParserState::LAT; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessLATSection(ArmSpeParsedPacket& packet) ++{ ++ /* LAT is useless for AutoFDO, ignore it. */ ++ m_curPos += 6; // 6 = LAT(6) ++ m_curParserState = ArmParserState::TYPE; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessLDSTSection(ArmSpeParsedPacket& packet) ++{ ++ /* Virtual Address and LAT is useless for AutoFDO, ignore it. */ ++ m_curPos += 15; // 15 = VA(9) + LAT(6) ++ m_curParserState = ArmParserState::PAD; ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_LD_ST; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessBranchTGTSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_COND) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_COND; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_IND) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_IND; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B) { ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR; ++ } else { ++ LOG(ERROR) << "ARM SPE: Unsupport branch tag!"; ++ return false; ++ } ++ m_curPos += 2; // 2 = BRANCH_TYPE(1) + TGT_TAG(1) ++ constexpr size_t pcLen = 7; // 7 = PC(7) ++ memcpy(&packet.targetAddr, &(m_rawTraceData[m_curPos]), pcLen); ++ m_curPos += pcLen + 1; // 1 = el_ns(1) ++ m_curParserState = ArmParserState::PAD; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessInstOtherSection(ArmSpeParsedPacket& packet) ++{ ++ m_curInstType = ArmSpeInstType::ARM_SPE_INST_OTHER; ++ m_curParserState = ArmParserState::PAD; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessPadSection(ArmSpeParsedPacket& packet) ++{ ++ while(m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD && m_curPos < m_totalSize - 1) { ++ m_curPos++; ++ } ++ m_curParserState = ArmParserState::END; ++ return true; ++} ++ ++bool ArmSpePacketDecoder::ProcessEndSection(ArmSpeParsedPacket& packet) ++{ ++ m_curParserState = ArmParserState::EXIT; ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD || m_rawTraceData[m_curPos] == ArmSpeInstTag::END) { ++ /* PAD tag means this is the end of Arm SPE section, there is no END tag. ++ END tag means that there are still packet s to be processed next. */ ++ m_curPos++; ++ return true; ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { ++ /* sometimes packet not end with ArmSpeTag::END but with ++ ArmSpeTag::PC, so it should be processed next. */ ++ return true; ++ } ++ LOG(ERROR) << "ARM SPE: Invalid end section!"; ++ return false; ++} ++ ++bool ArmSpePacketDecoder::ProcessTypeSection(ArmSpeParsedPacket& packet) ++{ ++ if (m_rawTraceData[m_curPos++] != ArmSpeInstTag::TYPE) { ++ LOG(ERROR) << "ARM SPE: Unsupport sample type section tag!"; ++ return false; ++ } ++ ++ memcpy(&packet.packetType, &(m_rawTraceData[m_curPos]), sizeof(packet.packetType)); ++ m_curPos += sizeof(packet.packetType); ++ ++ if (m_rawTraceData[m_curPos] == ArmSpeInstTag::LD_ST) { ++ m_curParserState = ArmParserState::LD_ST; ++ m_curPos += 2; // 2 = FLAG(2) ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::BRANCH) { ++ m_curParserState = ArmParserState::BRANCH; ++ m_curPos++; // 1 = BR_TAG(1) ++ } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::INST_OTHER) { ++ m_curParserState = ArmParserState::INST_OTHER; ++ m_curPos += 2; // 2 = FLAG(2) ++ } else { ++ LOG(ERROR) << "ARM SPE: Unsupport instruction type tag!"; ++ return false; ++ } ++ return true; ++} ++ ++bool ArmSpePacketDecoder::SolveOnePacket(ArmSpeParsedPacket& packet) ++{ ++ m_curParserState = ArmParserState::START; ++ while (m_curParserState != ArmParserState::EXIT) { ++ if (!(this->*m_decodeArmSpePacketMap[m_curParserState])(packet)) { ++ return false; ++ } ++ } ++ return true; ++} ++ ++} // namespace quipper +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h +new file mode 100644 +index 0000000..7600bd0 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h +@@ -0,0 +1,171 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#ifndef PERF_DATA_CONVENTER_ARM_SPE_DECODER ++#define PERF_DATA_CONVENTER_ARM_SPE_DECODER ++ ++#include ++#include ++#include ++#include ++ ++#ifndef BIT ++#define BIT(n) (1UL << (n)) ++#endif ++ ++/* ++ Example of supporting Arm SPE profile packet: ++ ++ 1. load/str/other instruction ++ b0 68 ad d0 0b fc ff 00 80 PC 0xfffc0bd0ad68 el0 ns=1 ++ 99 0a 00 LAT 10 ISSUE ++ 98 c1 00 LAT 193 TOT ++ 52 1e 06 EV RETIRED L1D-ACCESS L1D-REFILL TLB-ACCESS LLC-REFILL REMOTE-ACCESS ++ 49 00 LD ++ b2 d8 d6 b6 0b fc ff 00 00 VA 0xfffc0bb6d6d8 ++ 9a 01 00 LAT 1 XLAT ++ 9e b6 00 LAT 182 ++ 00 00 00 00 PAD ++ 01 END ++ ++ 2. branch instruction ++ b0 20 a7 d0 0b fc ff 00 80 PC 0xfffc0bd0a720 el0 ns=1 ++ 99 66 00 LAT 102 ISSUE ++ 98 67 00 LAT 103 TOT ++ 52 82 00 EV RETIRED MISPRED ++ 4a 01 B COND ++ b1 a8 ad d0 0b fc ff 00 80 TGT 0xfffc0bd0ada8 el0 ns=1 ++ 00 00 00 00 00 00 PAD ++ 01 END ++*/ ++ ++namespace quipper { ++/* Use bit state to represent Arm SPE types. ++ The bit status definition is the same as ArmSpeRawType */ ++using ArmSpeEventType = uint16_t; ++ ++/* DO NOT CHANGE IT, it`s define at kernel/tool/perf/.../arm_spe_decoder.h */ ++enum ArmSpeRawType { ++ EV_EXCEPTION_GEN = 0, // not used ++ EV_RETIRED = 1, ++ EV_L1D_ACCESS = 2, // not used ++ EV_L1D_REFILL = 3, ++ EV_TLB_ACCESS = 4, // not used ++ EV_TLB_REFILL = 5, // not used ++ EV_NOT_TAKEN = 6, // not used ++ EV_MISPRED = 7, // not used ++ EV_LLC_ACCESS = 8, // not used ++ EV_LLC_REFILL = 9, ++ EV_REMOTE_ACCESS = 10,// not used ++}; ++ ++/* Define instruction type of Arm SPE packet */ ++enum class ArmSpeInstType { ++ ARM_SPE_INST_OTHER, // other instruction ++ ARM_SPE_INST_LD_ST, // ld/str instruction ++ ARM_SPE_INST_BR, // branch instruction ++ ARM_SPE_INST_BR_COND, // branch condition ++ ARM_SPE_INST_BR_IND, // branch indirect call ++}; ++ ++inline bool IsArmSpeBranchInst(const ArmSpeInstType& type) ++{ ++ if (type == ArmSpeInstType::ARM_SPE_INST_BR || type == ArmSpeInstType::ARM_SPE_INST_BR_COND ++ || type == ArmSpeInstType::ARM_SPE_INST_BR_IND) { ++ return true; ++ } ++ return false; ++} ++ ++struct ArmSpeParsedPacket { ++ uint64_t sampleAddr = 0; // Address of sample instruction ++ uint64_t targetAddr = 0; // Branch instrution jump target address ++ ArmSpeEventType packetType = 0; // One packet may have multiply types, it represent as bit ++ ArmSpeInstType instType; // Instruction Type ++}; ++ ++class ArmSpePacketDecoder { ++public: ++ explicit ArmSpePacketDecoder() { ++ InitProcessArmSpePacketMap(); ++ } ++ ~ArmSpePacketDecoder() {} ++ ++ bool Init(const std::string* traceData); ++ ++ bool IsUnsolvedPacketExist() const; ++ ++ bool SolveOnePacket(ArmSpeParsedPacket& packet); ++ ++ void HandleError(); ++ ++private: ++ enum class ArmParserState { ++ START, ++ PC, ++ LAT, ++ TYPE, ++ LD_ST, ++ BRANCH, ++ TGT, ++ INST_OTHER, ++ PAD, ++ END, ++ EXIT, ++ UNKNOWN, ++ }; ++ ++ enum ArmSpeInstTag { ++ PC = 0xb0, ++ END = 0x01, ++ PAD = 0x00, ++ TYPE = 0x52, ++ LD_ST = 0x49, ++ BRANCH = 0x4a, ++ TGT = 0xb1, ++ INST_OTHER = 0x48, ++ }; ++ ++ enum ArmSpeBrTypeTag { ++ B = 0x00, // branch instruction ++ B_COND = 0x01, // condition instruction ++ B_IND = 0x02, // indirect branch instruction ++ }; ++ ++ /* Init Table-driven map to parser a SPE packet. */ ++ void InitProcessArmSpePacketMap(); ++ ++ bool ProcessInitSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessPCSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessLATSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessTypeSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessLDSTSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessBranchTGTSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessInstOtherSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessPadSection(ArmSpeParsedPacket& packet); ++ ++ bool ProcessEndSection(ArmSpeParsedPacket& packet); ++ ++ std::string GetArmParserStateString(ArmParserState); ++ ++ const char* m_rawTraceData = nullptr; ++ size_t m_totalSize = 0; ++ size_t m_curPos = 0; ++ enum ArmParserState m_curParserState; ++ ArmSpeInstType m_curInstType; ++ ++ /* Table-Driven approach to parser a SPE packet. */ ++ std::map m_decodeArmSpePacketMap; ++ ++}; // class ArmSpePacketDecoder ++ ++} // namespace quipper ++ ++#endif //PERF_DATA_CONVENTER_ARM_SPE_DECODER +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc +new file mode 100644 +index 0000000..123502d +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc +@@ -0,0 +1,72 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#include "base/logging.h" ++ ++#include "arm_spe_parser.h" ++ ++namespace quipper { ++inline bool ArmSpeParser::IsTargetEvent(const ArmSpeParsedPacket& rawPacket) const ++{ ++ if (rawPacket.packetType & BIT(ArmSpeRawType::EV_RETIRED) ++ || rawPacket.packetType & BIT(ArmSpeRawType::EV_LLC_REFILL) ++ || rawPacket.packetType & BIT(ArmSpeRawType::EV_L1D_REFILL)) { ++ return true; ++ } ++ return false; ++} ++ ++bool ArmSpeParser::ParserRawSpeData() ++{ ++ if (!m_armSpePacketDecoder.Init(m_rawTraceData)) { ++ m_armSpePacketDecoder.HandleError(); ++ return false; ++ } ++ while (m_armSpePacketDecoder.IsUnsolvedPacketExist()) { ++ if (ArmSpeParsedPacket packet; m_armSpePacketDecoder.SolveOnePacket(packet)) { ++ if (IsTargetEvent(packet)) { ++ SavePacket(packet); ++ } ++ } else { ++ m_armSpePacketDecoder.HandleError(); ++ return false; ++ } ++ } ++ return true; ++} ++ ++const std::vector ArmSpeParser::GetArmSpeEventNameString(ArmSpeEventType type) const ++{ ++ std::vector ret; ++ for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { ++ if (type & BIT(i)) { ++ switch (i) { ++ case ArmSpeRawType::EV_RETIRED: ++ ret.push_back("ARM_SPE_INST_RETIRED"); ++ break; ++ case ArmSpeRawType::EV_LLC_REFILL: ++ ret.push_back("ARM_SPE_LLC_MISS"); ++ break; ++ case ArmSpeRawType::EV_L1D_REFILL: ++ ret.push_back("ARM_SPE_L1D_MISS"); ++ break; ++ case ArmSpeRawType::EV_TLB_REFILL: ++ case ArmSpeRawType::EV_MISPRED: ++ case ArmSpeRawType::EV_REMOTE_ACCESS: ++ case ArmSpeRawType::EV_NOT_TAKEN: ++ case ArmSpeRawType::EV_EXCEPTION_GEN: ++ case ArmSpeRawType::EV_L1D_ACCESS: ++ case ArmSpeRawType::EV_LLC_ACCESS: ++ case ArmSpeRawType::EV_TLB_ACCESS: ++ /* Useless for AutoFDO, just skip it. */ ++ break; ++ default: ++ LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " ++ << static_cast(type); ++ } ++ } ++ } ++ return ret; ++} ++ ++} // namespace quipper +\ No newline at end of file +diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.h b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h +new file mode 100644 +index 0000000..c515122 +--- /dev/null ++++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h +@@ -0,0 +1,90 @@ ++// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++#ifndef PERF_DATA_CONVENTER_ARM_SPE_PARSER ++#define PERF_DATA_CONVENTER_ARM_SPE_PARSER ++ ++#include ++#include ++ ++#include "arm_spe_decoder.h" ++ ++namespace quipper { ++/* ++ This class is used to parser Arm SPE events, ++ usage as flollows: SetTraceDataPtr() -> ParserRawSpeData() -> GetParsedSpeEvent() ++*/ ++ ++class ArmSpeParser { ++public: ++ explicit ArmSpeParser() {} ++ ~ArmSpeParser() {} ++ ++ /* Parsing raw Arm SPE byte stream, return the result. */ ++ bool ParsingArmSpeData(const std::string* traceStrPtr) { ++ if (!SetTraceDatePtr(traceStrPtr)) { ++ return false; ++ } ++ return ParserRawSpeData(); ++ } ++ ++ /* Return a ref of parsed SPE packets. */ ++ const std::list& GetParsedSpeEvent() const { ++ return m_parsedSpeEvents; ++ } ++ ++ /* ArmSpeEventType is a highly compressed format, we need decode it into strings when use. */ ++ const std::vector GetArmSpeEventNameString(ArmSpeEventType) const; ++ ++ /* Free saving parsedSpeEvents. */ ++ void Clear() { ++ m_parsedSpeEvents.clear(); ++ } ++ ++ bool ArmSpeTraceDataExist() const { ++ return m_armSpeTraceDataExist; ++ } ++ ++ void SetArmSpeTraceDataExist(bool exist) { ++ m_armSpeTraceDataExist = exist; ++ } ++ ++private: ++ /* traceDataPtr is a pointer to Arm SPE data string, it must be called firstly. ++ DO NOT FREE traceStrPtr here, it`s belong to other modules. */ ++ bool SetTraceDatePtr(const std::string* traceStrPtr) { ++ if (traceStrPtr == nullptr || traceStrPtr->empty()) { ++ return false; ++ } ++ m_rawTraceData = traceStrPtr; ++ ++ return true; ++ } ++ ++ /* Parsing raw Arm SPE data, the target event will be saved in m_parsedSpeEvents. */ ++ bool ParserRawSpeData(); ++ ++ /* Return true if there is any target event we want. */ ++ bool IsTargetEvent(const ArmSpeParsedPacket&) const; ++ ++ inline void SavePacket(const ArmSpeParsedPacket& packet) { ++ m_parsedSpeEvents.push_back(packet); ++ } ++ ++ /* Record whether Arm SPE sample exists from PERF_RECORD_AUXTRACE_INFO. */ ++ bool m_armSpeTraceDataExist = false; ++ ++ /* Pointer to raw Arm SPE data, DO NOT FREE in this class. */ ++ const std::string* m_rawTraceData = nullptr; ++ ++ /* Save parsed Arm Spe event. */ ++ std::list m_parsedSpeEvents; ++ ++ /* Decoder is used for parser raw Arm SPE packet. */ ++ ArmSpePacketDecoder m_armSpePacketDecoder; ++ ++}; // class ArmSpeParser ++ ++} // namespace quipper ++ ++#endif // define PERF_DATA_CONVENTER_ARM_SPE_PARSER +\ No newline at end of file +diff --git a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h +index 2225696..6bf6f15 100644 +--- a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h ++++ b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h +@@ -317,6 +317,16 @@ enum perf_user_event_type { + PERF_RECORD_HEADER_MAX = 81, + }; + ++// Taken from tools/perf/util/auxtrace.h ++enum auxtrace_type { ++ PERF_AUXTRACE_UNKNOWN, ++ PERF_AUXTRACE_INTEL_PT, ++ PERF_AUXTRACE_INTEL_BTS, ++ PERF_AUXTRACE_CS_ETM, ++ PERF_AUXTRACE_ARM_SPE, ++ PERF_AUXTRACE_S390_CPUMSF, ++}; ++ + struct attr_event { + struct perf_event_header header; + struct perf_event_attr attr; +@@ -342,6 +352,13 @@ struct tracing_data_event { + u32 size; + }; + ++struct auxtrace_info_event { ++ struct perf_event_header header; ++ u32 type; ++ u32 reserved__; /* For alignment */ ++ u64 priv[]; ++}; ++ + struct auxtrace_event { + struct perf_event_header header; + u64 size; +@@ -386,6 +403,7 @@ union perf_event { + struct event_type_event event_type; + struct tracing_data_event tracing_data; + struct build_id_event build_id; ++ struct auxtrace_info_event auxtrace_info; + struct auxtrace_event auxtrace; + struct aux_event aux; + struct itrace_start_event itrace_start; +diff --git a/third_party/perf_data_converter/src/quipper/perf_data.proto b/third_party/perf_data_converter/src/quipper/perf_data.proto +index faf0148..ebe72f1 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_data.proto ++++ b/third_party/perf_data_converter/src/quipper/perf_data.proto +@@ -463,6 +463,33 @@ message PerfDataProto { + optional SampleInfo sample_info = 6; + } + ++ // Next tag: 3 ++ message AuxtraceInfoEvent { ++ // Auxtrace type from the auxtrace_type enum in tools/perf/util/auxtrace.h. ++ optional uint32 type = 1; ++ ++ // Private data. ++ // WARNING: unparsed_binary_blob_priv_data contains unparsed private data ++ // specific to the type stored in the above field. This data is included to ++ // support serialization of a perf.data to perf_data.proto and ++ // deserialization of a perf_data.proto to perf.data. If this data is used ++ // for something other than the aforementioned usecase, this data has to be ++ // parsed based on the type. ++ // For example: ++ // If type == PERF_AUXTRACE_INTEL_PT, unparsed_binary_blob_priv_data ++ // contains fields filled by intel_pt_info_fill() function in the file ++ // tools/perf/arch/x86/util/intel-pt.c. ++ // If type == PERF_AUXTRACE_INTEL_BTS, unparsed_binary_blob_priv_data ++ // contains fields filled by intel_bts_info_fill() function in the file ++ // tools/perf/arch/x86/util/intel-bts.c. ++ // ++ // NOTE: Do not read this unparsed data directly. Quipper should be ++ // modified to parse the data into a new field before reading. Please ++ // contact developers of quipper to add support for parsing this data. ++ // ++ repeated uint64 unparsed_binary_blob_priv_data = 2; ++ } ++ + // Next tag: 8 + message AuxtraceEvent { + // Size of AUX area tracing buffer. +@@ -537,6 +564,7 @@ message PerfDataProto { + AuxEvent aux_event = 11; + ItraceStartEvent itrace_start_event = 13; + LostSamplesEvent lost_samples_event = 14; ++ AuxtraceInfoEvent auxtrace_info_event = 18; + AuxtraceEvent auxtrace_event = 12; + } + // Time after boot in nanoseconds corresponding to the event. +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.cc b/third_party/perf_data_converter/src/quipper/perf_parser.cc +index 6755e5c..8f9479c 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.cc +@@ -125,10 +125,77 @@ bool PerfParser::ParseRawEvents() { + return true; + } + ++const std::vector PerfParser::GetArmSpeEventNameString(ArmSpeEventType type) { ++ return spe_parser_.GetArmSpeEventNameString(type); ++} ++ ++bool PerfParser::MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid) { ++ // Currently, We only use PID from process_map. ++ // Actually TID is not used by MapIPAndPidAndGetNameAndOffset ++ PidTid pidtid = std::make_pair(pid, pid); ++ uint64_t remapped_event_ip = 0; ++ if (!MapIPAndPidAndGetNameAndOffset(packet.sampleAddr, pidtid, ++ &remapped_event_ip, &event.dso_and_offset)) { ++ return false; ++ } ++ if (IsArmSpeBranchInst(packet.instType)) { ++ if (!MapIPAndPidAndGetNameAndOffset(packet.targetAddr, pidtid, ++ &remapped_event_ip, &event.arm_spe_event.arm_spe_br_tgt_)) { ++ return false; ++ } ++ event.arm_spe_event.arm_spe_inst_type_ = packet.instType; ++ } ++ event.arm_spe_event.arm_spe_type_ = packet.packetType; ++ return true; ++} ++ ++void PerfParser::ProcessArmSpeEvent() { ++ stats_.num_sample_events += spe_parser_.GetParsedSpeEvent().size(); ++ for (const auto& spe_event : spe_parser_.GetParsedSpeEvent()) { ++ for (const auto& process_map : process_mappers_) { ++ if (ParsedEvent event; MapArmSpeEvent(event, spe_event, process_map.first)) { ++ parsed_events_.push_back(event); ++ stats_.num_arm_spe_event_mapped++; ++ break; ++ } ++ } ++ } ++ stats_.num_sample_events_mapped += stats_.num_arm_spe_event_mapped; ++ spe_parser_.Clear(); ++} ++ + bool PerfParser::ProcessUserEvents(PerfEvent& event) { + // New user events from PERF-4.13 is not yet supported + switch (event.header().type()) { + case PERF_RECORD_AUXTRACE: ++ // This part may be called many times during processing, every parsed event ++ // will store in spe_parser_.GetParsedSpeEvent() ++ if (spe_parser_.ArmSpeTraceDataExist() && ++ spe_parser_.ParsingArmSpeData(&event.auxtrace_event().trace_data())) { ++ stats_.num_arm_spe_events = spe_parser_.GetParsedSpeEvent().size(); ++ } ++ break; ++ case PERF_RECORD_AUXTRACE_INFO: ++ switch (event.auxtrace_info_event().type()) ++ { ++ case PERF_AUXTRACE_ARM_SPE: ++ spe_parser_.SetArmSpeTraceDataExist(true); ++ break; ++ case PERF_AUXTRACE_UNKNOWN: ++ case PERF_AUXTRACE_INTEL_PT: ++ case PERF_AUXTRACE_INTEL_BTS: ++ case PERF_AUXTRACE_CS_ETM: ++ case PERF_AUXTRACE_S390_CPUMSF: ++ default: ++ VLOG(1) << "Unsupported PERF_RECORD_AUXTRACE_INFO: " << event.auxtrace_info_event().type(); ++ break; ++ } ++ case PERF_RECORD_AUXTRACE_ERROR: ++ case PERF_RECORD_THREAD_MAP: ++ case PERF_RECORD_STAT_CONFIG: ++ case PERF_RECORD_STAT: ++ case PERF_RECORD_STAT_ROUND: ++ case PERF_RECORD_TIME_CONV: + VLOG(1) << "Parsed event type: " << event.header().type() + << ". Doing nothing."; + break; +@@ -251,6 +318,10 @@ bool PerfParser::ProcessEvents() { + } + if (!FillInDsoBuildIds()) return false; + ++ if (!spe_parser_.GetParsedSpeEvent().empty()) { ++ ProcessArmSpeEvent(); ++ } ++ + // Print stats collected from parsing. + // clang-format off + LOG(INFO) << "Parser processed: " +@@ -259,7 +330,9 @@ bool PerfParser::ProcessEvents() { + << stats_.num_fork_events << " FORK events, " + << stats_.num_exit_events << " EXIT events, " + << stats_.num_sample_events << " SAMPLE events, " +- << stats_.num_sample_events_mapped << " of these were mapped"; ++ << stats_.num_sample_events_mapped << " of these were mapped, " ++ << stats_.num_arm_spe_events << " ARM_SPE events, " ++ << stats_.num_arm_spe_event_mapped << " of these ARM_SPE events were mapped."; + // clang-format on + + float sample_mapping_percentage = +@@ -269,7 +342,8 @@ bool PerfParser::ProcessEvents() { + if (sample_mapping_percentage < threshold) { + LOG(ERROR) << "Mapped " << static_cast(sample_mapping_percentage) + << "% of samples, expected at least " +- << static_cast(threshold) << "%"; ++ << static_cast(threshold) << "%," ++ << " adjust with option --sample_mapping_percentage_threshold"; + return false; + } + stats_.did_remap = options_.do_remap; +diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.h b/third_party/perf_data_converter/src/quipper/perf_parser.h +index 2ec734e..37f7e34 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_parser.h ++++ b/third_party/perf_data_converter/src/quipper/perf_parser.h +@@ -22,6 +22,7 @@ + #include "compat/string.h" + #include "dso.h" + #include "perf_reader.h" ++#include "arm_spe_parser.h" + + namespace quipper { + +@@ -108,6 +109,43 @@ struct ParsedEvent { + std::equal(branch_stack.begin(), branch_stack.end(), + other.branch_stack.begin()); + } ++ ++ // A struct that contains Arm Spe event info, ++ // including event type, instruction type and branch ++ // target address. ++ struct ArmSpeEvent { ++ ArmSpeEventType arm_spe_type_ = 0; ++ ArmSpeInstType arm_spe_inst_type_; ++ DSOAndOffset arm_spe_br_tgt_; ++ ++ bool ArmSpeEventExist() const { ++ for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { ++ if (arm_spe_type_ & BIT(i)) { ++ switch (i) { ++ case ArmSpeRawType::EV_RETIRED: ++ case ArmSpeRawType::EV_LLC_REFILL: ++ case ArmSpeRawType::EV_L1D_REFILL: ++ /* Useful for AutoFDO */ ++ return true; ++ case ArmSpeRawType::EV_MISPRED: ++ case ArmSpeRawType::EV_REMOTE_ACCESS: ++ case ArmSpeRawType::EV_TLB_REFILL: ++ case ArmSpeRawType::EV_NOT_TAKEN: ++ case ArmSpeRawType::EV_EXCEPTION_GEN: ++ case ArmSpeRawType::EV_L1D_ACCESS: ++ case ArmSpeRawType::EV_LLC_ACCESS: ++ case ArmSpeRawType::EV_TLB_ACCESS: ++ /* Useless for AutoFDO, just skip it. */ ++ break; ++ default: ++ LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " ++ << static_cast(arm_spe_type_); ++ } ++ } ++ } ++ return false; ++ } ++ } arm_spe_event; + }; + + struct PerfEventStats { +@@ -117,12 +155,14 @@ struct PerfEventStats { + uint32_t num_comm_events; + uint32_t num_fork_events; + uint32_t num_exit_events; ++ uint32_t num_arm_spe_events; + + // Number of sample events that were successfully mapped using the address + // mapper. The mapping is recorded regardless of whether the address in the + // perf sample event itself was assigned the remapped address. The latter is + // indicated by |did_remap|. + uint32_t num_sample_events_mapped; ++ uint32_t num_arm_spe_event_mapped; + + // Whether address remapping was enabled during event parsing. + bool did_remap; +@@ -191,6 +231,9 @@ class PerfParser { + // Use with caution. Deserialization uses this to restore stats from proto. + PerfEventStats* mutable_stats() { return &stats_; } + ++ // ArmSpeEventType is a highly compressed format, we need decode it into strings when use. ++ const std::vector GetArmSpeEventNameString(ArmSpeEventType type); ++ + private: + // Used for processing events. e.g. remapping with synthetic addresses. + bool ProcessEvents(); +@@ -252,6 +295,12 @@ class PerfParser { + std::pair GetOrCreateProcessMapper( + uint32_t pid, uint32_t ppid = kKernelPid); + ++ // Map every raw ARM SPE event, save them into parsed_events_. ++ void ProcessArmSpeEvent(); ++ ++ // Map an ARM SPE event by PC(from sample) and pid. ++ bool MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid); ++ + // Points to a PerfReader that contains the input perf data to parse. + PerfReader* const reader_; + +@@ -277,6 +326,9 @@ class PerfParser { + // Maps process ID to an address mapper for that process. + std::unordered_map> process_mappers_; + ++ // A member to process Arm SPE event ++ ArmSpeParser spe_parser_; ++ + DISALLOW_COPY_AND_ASSIGN(PerfParser); + }; + +diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.cc b/third_party/perf_data_converter/src/quipper/perf_reader.cc +index 3012cd9..2815b45 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_reader.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_reader.cc +@@ -1916,6 +1916,19 @@ void PerfReader::MaybeSwapEventFields(event_t* event, bool is_cross_endian) { + case PERF_RECORD_LOST_SAMPLES: + ByteSwap(&event->lost_samples.lost); + break; ++ case PERF_RECORD_AUXTRACE_INFO: { ++ ByteSwap(&event->auxtrace_info.type); ++ u64 priv_size = ++ (event->header.size - ++ (sizeof(event->header) + sizeof(event->auxtrace_info.type) + ++ sizeof(u32) // size of auxtrace_info_event.reserved__ ++ )) / ++ sizeof(u64); ++ for (u64 i = 0; i < priv_size; ++i) { ++ ByteSwap(&event->auxtrace_info.priv[i]); ++ } ++ break; ++ } + case PERF_RECORD_AUXTRACE: + ByteSwap(&event->auxtrace.size); + ByteSwap(&event->auxtrace.offset); +diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.cc b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +index 613189d..2386f4d 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_serializer.cc ++++ b/third_party/perf_data_converter/src/quipper/perf_serializer.cc +@@ -244,6 +244,9 @@ bool PerfSerializer::SerializeKernelEvent( + bool PerfSerializer::SerializeUserEvent( + const event_t& event, PerfDataProto_PerfEvent* event_proto) const { + switch (event.header.type) { ++ case PERF_RECORD_AUXTRACE_INFO: ++ return SerializeAuxtraceInfoEvent( ++ event, event_proto->mutable_auxtrace_info_event()); + case PERF_RECORD_AUXTRACE: + return SerializeAuxtraceEvent(event, + event_proto->mutable_auxtrace_event()); +@@ -329,6 +332,9 @@ bool PerfSerializer::DeserializeKernelEvent( + bool PerfSerializer::DeserializeUserEvent( + const PerfDataProto_PerfEvent& event_proto, event_t* event) const { + switch (event_proto.header().type()) { ++ case PERF_RECORD_AUXTRACE_INFO: ++ return DeserializeAuxtraceInfoEvent(event_proto.auxtrace_info_event(), ++ event); + case PERF_RECORD_AUXTRACE: + return DeserializeAuxtraceEvent(event_proto.auxtrace_event(), event); + default: +@@ -882,6 +888,35 @@ bool PerfSerializer::DeserializeBuildIDEvent( + return true; + } + ++bool PerfSerializer::SerializeAuxtraceInfoEvent( ++ const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const { ++ const struct auxtrace_info_event& auxtrace_info = event.auxtrace_info; ++ u64 priv_size = ++ (event.header.size - sizeof(struct auxtrace_info_event)) / sizeof(u64); ++ sample->set_type(auxtrace_info.type); ++ if (auxtrace_info.reserved__ != 0) { ++ LOG(WARNING) << "PERF_RECORD_AUXTRACE_INFO's auxtrace_info_event.reserved__" ++ " contains a non-zero value: " ++ << auxtrace_info.reserved__ ++ << ". This" ++ " record's format has changed."; ++ } ++ for (u64 i = 0; i < priv_size; ++i) { ++ sample->add_unparsed_binary_blob_priv_data(auxtrace_info.priv[i]); ++ } ++ return true; ++} ++ ++bool PerfSerializer::DeserializeAuxtraceInfoEvent( ++ const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const { ++ struct auxtrace_info_event& auxtrace_info = event->auxtrace_info; ++ auxtrace_info.type = sample.type(); ++ for (u64 i = 0; i < sample.unparsed_binary_blob_priv_data_size(); ++i) { ++ auxtrace_info.priv[i] = sample.unparsed_binary_blob_priv_data(i); ++ } ++ return true; ++} ++ + bool PerfSerializer::SerializeAuxtraceEvent( + const event_t& event, PerfDataProto_AuxtraceEvent* sample) const { + const struct auxtrace_event& auxtrace = event.auxtrace; +diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.h b/third_party/perf_data_converter/src/quipper/perf_serializer.h +index 606335d..dbdeb23 100644 +--- a/third_party/perf_data_converter/src/quipper/perf_serializer.h ++++ b/third_party/perf_data_converter/src/quipper/perf_serializer.h +@@ -143,6 +143,10 @@ class PerfSerializer { + bool DeserializeBuildIDEvent(const PerfDataProto_PerfBuildID& from, + malloced_unique_ptr* to) const; + ++ bool SerializeAuxtraceInfoEvent( ++ const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const; ++ bool DeserializeAuxtraceInfoEvent( ++ const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const; + bool SerializeAuxtraceEvent(const event_t& event, + PerfDataProto_AuxtraceEvent* sample) const; + bool SerializeAuxtraceEventTraceData(const std::vector& from, diff --git a/autofdo.spec b/autofdo.spec index 1b8ef0c..648e813 100644 --- a/autofdo.spec +++ b/autofdo.spec @@ -1,6 +1,6 @@ Name: autofdo Version: 0.19 -Release: 1 +Release: 2 Summary: A tool to convert perf.data profile to AutoFDO profile License: Apache-2.0 URL: https://github.com/google/autofdo @@ -9,6 +9,7 @@ URL: https://github.com/google/autofdo # Source: https://github.com/google/%{name}/releases/download/%{version}/{version}.tar.gz Source: %{name}-%{version}.tar.xz Patch0: 0001-Multi-event-processing-support.patch +Patch1: 0002-Arm-spe-parser-support.patch BuildRequires: gcc gcc-c++ libtool autoconf automake git elfutils-libelf-devel openssl-devel pkg-config Requires: glibc openssl-libs elfutils libgcc libstdc++ zlib @@ -41,6 +42,12 @@ make -j 1 %exclude %{_bindir}/create_llvm_prof %changelog +* Sun Feb 13 2022 liyancheng <412998149@qq.com> - 0.19-2 +- Type:enhancement +- ID:NA +- SUG:NA +- DESC:Support processing ARM SPE event + * Sun Feb 13 2022 liyancheng <412998149@qq.com> - 0.19-1 - Type:enhancement - ID:NA