diff --git a/Makefile.am b/Makefile.am index a6b8e0f..cf37121 100644 --- a/Makefile.am +++ b/Makefile.am @@ -88,6 +88,8 @@ $(am_create_llvm_prof_OBJECTS): $(protoc_outputs) noinst_LIBRARIES = libquipper.a libquipper_a_SOURCES = \ third_party/perf_data_converter/src/quipper/address_mapper.cc \ + third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc \ + third_party/perf_data_converter/src/quipper/arm_spe_parser.cc \ third_party/perf_data_converter/src/quipper/binary_data_utils.cc \ third_party/perf_data_converter/src/quipper/buffer_reader.cc \ third_party/perf_data_converter/src/quipper/buffer_writer.cc \ diff --git a/sample_reader.cc b/sample_reader.cc index 43a03cb..ec21849 100644 --- a/sample_reader.cc +++ b/sample_reader.cc @@ -216,18 +216,25 @@ bool PerfDataSampleReader::Append(const string &profile_file) { // in the profile, then we use focus_binary to match samples. Otherwise, // focus_binary_re_ is used to match the binary name with the samples. for (const auto &event : parser.parsed_events()) { - if (!event.event_ptr || - event.event_ptr->header().type() != PERF_RECORD_SAMPLE) { + if (!event.arm_spe_event.ArmSpeEventExist() && (!event.event_ptr || + event.event_ptr->header().type() != PERF_RECORD_SAMPLE)) { continue; } if (MatchBinary(event.dso_and_offset.dso_name(), focus_binary)) { address_count_map_[event.dso_and_offset.offset()]++; - // pmu event should be processed here, if event_name is not empty, it means there is at least - // two perf event in this perf.data, so we should record it into event_address_count_map_ for - // multiply event support. - auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); - if (!event_name.empty()) { - event_address_count_map_[event_name][event.dso_and_offset.offset()]++; + // If this sample is an Arm SPE event, each target event will be decoded as a name string. + if (event.arm_spe_event.ArmSpeEventExist()) { + for (const auto &type_name : parser.GetArmSpeEventNameString(event.arm_spe_event.arm_spe_type_)) { + event_address_count_map_[type_name][event.dso_and_offset.offset()]++; + } + } else { + // pmu event should be processed here, if event_name is not empty, it means there is at least + // two perf event in this perf.data, so we should record it into event_address_count_map_ for + // multiply event support. + auto &event_name = reader.GetEventNameFromId(event.event_ptr->sample_event().id()); + if (!event_name.empty()) { + event_address_count_map_[event_name][event.dso_and_offset.offset()]++; + } } } if (event.branch_stack.size() > 0 && diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc new file mode 100644 index 0000000..88f7c81 --- /dev/null +++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.cc @@ -0,0 +1,233 @@ +// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include "base/logging.h" + +#include "arm_spe_decoder.h" + +namespace quipper +{ + +std::string ArmSpePacketDecoder::GetArmParserStateString(ArmParserState state) +{ + if (state == ArmParserState::START) { + return "START"; + } else if (state == ArmParserState::PC) { + return "PC"; + } else if (state == ArmParserState::LAT) { + return "LAT"; + } else if (state == ArmParserState::TYPE) { + return "TYPE"; + } else if (state == ArmParserState::LD_ST) { + return "LD_ST"; + } else if (state == ArmParserState::BRANCH) { + return "BRANCH"; + } else if (state == ArmParserState::TGT) { + return "TGT"; + } else if (state == ArmParserState::PAD) { + return "PAD"; + } else if (state == ArmParserState::INST_OTHER) { + return "INST_OTHER"; + } else if (state == ArmParserState::END) { + return "END"; + } else if (state == ArmParserState::EXIT) { + return "EXIT"; + } else if (state == ArmParserState::UNKNOWN) { + return "UNKNOWN"; + } + return "UNKNOWN"; +} + +static std::string Dec2hex(int num) +{ + std::stringstream ioss; + std::string tmpStr; + ioss << std::hex << std::setw(2) << std::setfill('0') << num; + ioss >> tmpStr; + return tmpStr; +} + +void ArmSpePacketDecoder::HandleError() +{ + LOG(ERROR) << "ARM SPE: only the following Arm SPE flags are supported: jitter, branch_filter, load_filter, store_filter, event_filter, min_latency."; + LOG(ERROR) << "ARM SPE: recommend usage: perf record -e arm_spe_0/jitter=1/ -c COUNT -- COMMAND"; + LOG(ERROR) << "ARM SPE: unsupport arm_spe raw string! error_pos/total_size: " << m_curPos << "/" << m_totalSize; + LOG(ERROR) << "ARM SPE: current processing status code: " << GetArmParserStateString(m_curParserState); + std::string errorByteStr; + for (size_t i = m_curPos; i < m_curPos + 10 && i < m_totalSize; i++) { + errorByteStr.append(Dec2hex(int(m_rawTraceData[i]))); + errorByteStr.append(" "); + } + if (!errorByteStr.empty()) { + LOG(FATAL) << "ARM SPE: recent error 10 byte: " << errorByteStr; + } else { + LOG(FATAL) << "ARM SPE: there is no valid byte."; + } +} + +bool ArmSpePacketDecoder::Init(const std::string* traceData) +{ + if (traceData == nullptr || traceData->empty()) { + LOG(ERROR) << "TraceData is invalid!"; + return false; + } + m_rawTraceData = traceData->c_str(); + m_totalSize = traceData->size(); + m_curParserState = ArmParserState::START; + m_curPos = 0; + return true; +} + +void ArmSpePacketDecoder::InitProcessArmSpePacketMap() +{ + m_decodeArmSpePacketMap.insert({ArmParserState::START, &ArmSpePacketDecoder::ProcessInitSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::PC, &ArmSpePacketDecoder::ProcessPCSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::LAT, &ArmSpePacketDecoder::ProcessLATSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::TYPE, &ArmSpePacketDecoder::ProcessTypeSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::LD_ST, &ArmSpePacketDecoder::ProcessLDSTSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::BRANCH, &ArmSpePacketDecoder::ProcessBranchTGTSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::INST_OTHER, &ArmSpePacketDecoder::ProcessInstOtherSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::PAD, &ArmSpePacketDecoder::ProcessPadSection}); + m_decodeArmSpePacketMap.insert({ArmParserState::END, &ArmSpePacketDecoder::ProcessEndSection}); +} + +bool ArmSpePacketDecoder::IsUnsolvedPacketExist() const +{ + if (m_totalSize > 0 && m_curPos < m_totalSize) { + return true; + } + return false; +} + +bool ArmSpePacketDecoder::ProcessInitSection(ArmSpeParsedPacket& packet) +{ + if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { + m_curPos++; + m_curParserState = ArmParserState::PC; + return true; + } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD) { + m_curParserState = ArmParserState::PAD; + return true; + } + return false; +} + +bool ArmSpePacketDecoder::ProcessPCSection(ArmSpeParsedPacket& packet) +{ + constexpr size_t pcLen = 7; + memcpy(&packet.sampleAddr, &(m_rawTraceData[m_curPos]), pcLen); + m_curPos += pcLen; + m_curPos++; // 1 = en_ls(1) + m_curParserState = ArmParserState::LAT; + return true; +} + +bool ArmSpePacketDecoder::ProcessLATSection(ArmSpeParsedPacket& packet) +{ + /* LAT is useless for AutoFDO, ignore it. */ + m_curPos += 6; // 6 = LAT(6) + m_curParserState = ArmParserState::TYPE; + return true; +} + +bool ArmSpePacketDecoder::ProcessLDSTSection(ArmSpeParsedPacket& packet) +{ + /* Virtual Address and LAT is useless for AutoFDO, ignore it. */ + m_curPos += 15; // 15 = VA(9) + LAT(6) + m_curParserState = ArmParserState::PAD; + m_curInstType = ArmSpeInstType::ARM_SPE_INST_LD_ST; + return true; +} + +bool ArmSpePacketDecoder::ProcessBranchTGTSection(ArmSpeParsedPacket& packet) +{ + if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_COND) { + m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_COND; + } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B_IND) { + m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR_IND; + } else if (m_rawTraceData[m_curPos] == ArmSpeBrTypeTag::B) { + m_curInstType = ArmSpeInstType::ARM_SPE_INST_BR; + } else { + LOG(ERROR) << "ARM SPE: Unsupport branch tag!"; + return false; + } + m_curPos += 2; // 2 = BRANCH_TYPE(1) + TGT_TAG(1) + constexpr size_t pcLen = 7; // 7 = PC(7) + memcpy(&packet.targetAddr, &(m_rawTraceData[m_curPos]), pcLen); + m_curPos += pcLen + 1; // 1 = el_ns(1) + m_curParserState = ArmParserState::PAD; + return true; +} + +bool ArmSpePacketDecoder::ProcessInstOtherSection(ArmSpeParsedPacket& packet) +{ + m_curInstType = ArmSpeInstType::ARM_SPE_INST_OTHER; + m_curParserState = ArmParserState::PAD; + return true; +} + +bool ArmSpePacketDecoder::ProcessPadSection(ArmSpeParsedPacket& packet) +{ + while(m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD && m_curPos < m_totalSize - 1) { + m_curPos++; + } + m_curParserState = ArmParserState::END; + return true; +} + +bool ArmSpePacketDecoder::ProcessEndSection(ArmSpeParsedPacket& packet) +{ + m_curParserState = ArmParserState::EXIT; + if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PAD || m_rawTraceData[m_curPos] == ArmSpeInstTag::END) { + /* PAD tag means this is the end of Arm SPE section, there is no END tag. + END tag means that there are still packet s to be processed next. */ + m_curPos++; + return true; + } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::PC) { + /* sometimes packet not end with ArmSpeTag::END but with + ArmSpeTag::PC, so it should be processed next. */ + return true; + } + LOG(ERROR) << "ARM SPE: Invalid end section!"; + return false; +} + +bool ArmSpePacketDecoder::ProcessTypeSection(ArmSpeParsedPacket& packet) +{ + if (m_rawTraceData[m_curPos++] != ArmSpeInstTag::TYPE) { + LOG(ERROR) << "ARM SPE: Unsupport sample type section tag!"; + return false; + } + + memcpy(&packet.packetType, &(m_rawTraceData[m_curPos]), sizeof(packet.packetType)); + m_curPos += sizeof(packet.packetType); + + if (m_rawTraceData[m_curPos] == ArmSpeInstTag::LD_ST) { + m_curParserState = ArmParserState::LD_ST; + m_curPos += 2; // 2 = FLAG(2) + } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::BRANCH) { + m_curParserState = ArmParserState::BRANCH; + m_curPos++; // 1 = BR_TAG(1) + } else if (m_rawTraceData[m_curPos] == ArmSpeInstTag::INST_OTHER) { + m_curParserState = ArmParserState::INST_OTHER; + m_curPos += 2; // 2 = FLAG(2) + } else { + LOG(ERROR) << "ARM SPE: Unsupport instruction type tag!"; + return false; + } + return true; +} + +bool ArmSpePacketDecoder::SolveOnePacket(ArmSpeParsedPacket& packet) +{ + m_curParserState = ArmParserState::START; + while (m_curParserState != ArmParserState::EXIT) { + if (!(this->*m_decodeArmSpePacketMap[m_curParserState])(packet)) { + return false; + } + } + return true; +} + +} // namespace quipper diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h new file mode 100644 index 0000000..7600bd0 --- /dev/null +++ b/third_party/perf_data_converter/src/quipper/arm_spe_decoder.h @@ -0,0 +1,171 @@ +// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#ifndef PERF_DATA_CONVENTER_ARM_SPE_DECODER +#define PERF_DATA_CONVENTER_ARM_SPE_DECODER + +#include +#include +#include +#include + +#ifndef BIT +#define BIT(n) (1UL << (n)) +#endif + +/* + Example of supporting Arm SPE profile packet: + + 1. load/str/other instruction + b0 68 ad d0 0b fc ff 00 80 PC 0xfffc0bd0ad68 el0 ns=1 + 99 0a 00 LAT 10 ISSUE + 98 c1 00 LAT 193 TOT + 52 1e 06 EV RETIRED L1D-ACCESS L1D-REFILL TLB-ACCESS LLC-REFILL REMOTE-ACCESS + 49 00 LD + b2 d8 d6 b6 0b fc ff 00 00 VA 0xfffc0bb6d6d8 + 9a 01 00 LAT 1 XLAT + 9e b6 00 LAT 182 + 00 00 00 00 PAD + 01 END + + 2. branch instruction + b0 20 a7 d0 0b fc ff 00 80 PC 0xfffc0bd0a720 el0 ns=1 + 99 66 00 LAT 102 ISSUE + 98 67 00 LAT 103 TOT + 52 82 00 EV RETIRED MISPRED + 4a 01 B COND + b1 a8 ad d0 0b fc ff 00 80 TGT 0xfffc0bd0ada8 el0 ns=1 + 00 00 00 00 00 00 PAD + 01 END +*/ + +namespace quipper { +/* Use bit state to represent Arm SPE types. + The bit status definition is the same as ArmSpeRawType */ +using ArmSpeEventType = uint16_t; + +/* DO NOT CHANGE IT, it`s define at kernel/tool/perf/.../arm_spe_decoder.h */ +enum ArmSpeRawType { + EV_EXCEPTION_GEN = 0, // not used + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, // not used + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, // not used + EV_TLB_REFILL = 5, // not used + EV_NOT_TAKEN = 6, // not used + EV_MISPRED = 7, // not used + EV_LLC_ACCESS = 8, // not used + EV_LLC_REFILL = 9, + EV_REMOTE_ACCESS = 10,// not used +}; + +/* Define instruction type of Arm SPE packet */ +enum class ArmSpeInstType { + ARM_SPE_INST_OTHER, // other instruction + ARM_SPE_INST_LD_ST, // ld/str instruction + ARM_SPE_INST_BR, // branch instruction + ARM_SPE_INST_BR_COND, // branch condition + ARM_SPE_INST_BR_IND, // branch indirect call +}; + +inline bool IsArmSpeBranchInst(const ArmSpeInstType& type) +{ + if (type == ArmSpeInstType::ARM_SPE_INST_BR || type == ArmSpeInstType::ARM_SPE_INST_BR_COND + || type == ArmSpeInstType::ARM_SPE_INST_BR_IND) { + return true; + } + return false; +} + +struct ArmSpeParsedPacket { + uint64_t sampleAddr = 0; // Address of sample instruction + uint64_t targetAddr = 0; // Branch instrution jump target address + ArmSpeEventType packetType = 0; // One packet may have multiply types, it represent as bit + ArmSpeInstType instType; // Instruction Type +}; + +class ArmSpePacketDecoder { +public: + explicit ArmSpePacketDecoder() { + InitProcessArmSpePacketMap(); + } + ~ArmSpePacketDecoder() {} + + bool Init(const std::string* traceData); + + bool IsUnsolvedPacketExist() const; + + bool SolveOnePacket(ArmSpeParsedPacket& packet); + + void HandleError(); + +private: + enum class ArmParserState { + START, + PC, + LAT, + TYPE, + LD_ST, + BRANCH, + TGT, + INST_OTHER, + PAD, + END, + EXIT, + UNKNOWN, + }; + + enum ArmSpeInstTag { + PC = 0xb0, + END = 0x01, + PAD = 0x00, + TYPE = 0x52, + LD_ST = 0x49, + BRANCH = 0x4a, + TGT = 0xb1, + INST_OTHER = 0x48, + }; + + enum ArmSpeBrTypeTag { + B = 0x00, // branch instruction + B_COND = 0x01, // condition instruction + B_IND = 0x02, // indirect branch instruction + }; + + /* Init Table-driven map to parser a SPE packet. */ + void InitProcessArmSpePacketMap(); + + bool ProcessInitSection(ArmSpeParsedPacket& packet); + + bool ProcessPCSection(ArmSpeParsedPacket& packet); + + bool ProcessLATSection(ArmSpeParsedPacket& packet); + + bool ProcessTypeSection(ArmSpeParsedPacket& packet); + + bool ProcessLDSTSection(ArmSpeParsedPacket& packet); + + bool ProcessBranchTGTSection(ArmSpeParsedPacket& packet); + + bool ProcessInstOtherSection(ArmSpeParsedPacket& packet); + + bool ProcessPadSection(ArmSpeParsedPacket& packet); + + bool ProcessEndSection(ArmSpeParsedPacket& packet); + + std::string GetArmParserStateString(ArmParserState); + + const char* m_rawTraceData = nullptr; + size_t m_totalSize = 0; + size_t m_curPos = 0; + enum ArmParserState m_curParserState; + ArmSpeInstType m_curInstType; + + /* Table-Driven approach to parser a SPE packet. */ + std::map m_decodeArmSpePacketMap; + +}; // class ArmSpePacketDecoder + +} // namespace quipper + +#endif //PERF_DATA_CONVENTER_ARM_SPE_DECODER diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc new file mode 100644 index 0000000..123502d --- /dev/null +++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "base/logging.h" + +#include "arm_spe_parser.h" + +namespace quipper { +inline bool ArmSpeParser::IsTargetEvent(const ArmSpeParsedPacket& rawPacket) const +{ + if (rawPacket.packetType & BIT(ArmSpeRawType::EV_RETIRED) + || rawPacket.packetType & BIT(ArmSpeRawType::EV_LLC_REFILL) + || rawPacket.packetType & BIT(ArmSpeRawType::EV_L1D_REFILL)) { + return true; + } + return false; +} + +bool ArmSpeParser::ParserRawSpeData() +{ + if (!m_armSpePacketDecoder.Init(m_rawTraceData)) { + m_armSpePacketDecoder.HandleError(); + return false; + } + while (m_armSpePacketDecoder.IsUnsolvedPacketExist()) { + if (ArmSpeParsedPacket packet; m_armSpePacketDecoder.SolveOnePacket(packet)) { + if (IsTargetEvent(packet)) { + SavePacket(packet); + } + } else { + m_armSpePacketDecoder.HandleError(); + return false; + } + } + return true; +} + +const std::vector ArmSpeParser::GetArmSpeEventNameString(ArmSpeEventType type) const +{ + std::vector ret; + for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { + if (type & BIT(i)) { + switch (i) { + case ArmSpeRawType::EV_RETIRED: + ret.push_back("ARM_SPE_INST_RETIRED"); + break; + case ArmSpeRawType::EV_LLC_REFILL: + ret.push_back("ARM_SPE_LLC_MISS"); + break; + case ArmSpeRawType::EV_L1D_REFILL: + ret.push_back("ARM_SPE_L1D_MISS"); + break; + case ArmSpeRawType::EV_TLB_REFILL: + case ArmSpeRawType::EV_MISPRED: + case ArmSpeRawType::EV_REMOTE_ACCESS: + case ArmSpeRawType::EV_NOT_TAKEN: + case ArmSpeRawType::EV_EXCEPTION_GEN: + case ArmSpeRawType::EV_L1D_ACCESS: + case ArmSpeRawType::EV_LLC_ACCESS: + case ArmSpeRawType::EV_TLB_ACCESS: + /* Useless for AutoFDO, just skip it. */ + break; + default: + LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " + << static_cast(type); + } + } + } + return ret; +} + +} // namespace quipper \ No newline at end of file diff --git a/third_party/perf_data_converter/src/quipper/arm_spe_parser.h b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h new file mode 100644 index 0000000..c515122 --- /dev/null +++ b/third_party/perf_data_converter/src/quipper/arm_spe_parser.h @@ -0,0 +1,90 @@ +// Copyright (c) 2021 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#ifndef PERF_DATA_CONVENTER_ARM_SPE_PARSER +#define PERF_DATA_CONVENTER_ARM_SPE_PARSER + +#include +#include + +#include "arm_spe_decoder.h" + +namespace quipper { +/* + This class is used to parser Arm SPE events, + usage as flollows: SetTraceDataPtr() -> ParserRawSpeData() -> GetParsedSpeEvent() +*/ + +class ArmSpeParser { +public: + explicit ArmSpeParser() {} + ~ArmSpeParser() {} + + /* Parsing raw Arm SPE byte stream, return the result. */ + bool ParsingArmSpeData(const std::string* traceStrPtr) { + if (!SetTraceDatePtr(traceStrPtr)) { + return false; + } + return ParserRawSpeData(); + } + + /* Return a ref of parsed SPE packets. */ + const std::list& GetParsedSpeEvent() const { + return m_parsedSpeEvents; + } + + /* ArmSpeEventType is a highly compressed format, we need decode it into strings when use. */ + const std::vector GetArmSpeEventNameString(ArmSpeEventType) const; + + /* Free saving parsedSpeEvents. */ + void Clear() { + m_parsedSpeEvents.clear(); + } + + bool ArmSpeTraceDataExist() const { + return m_armSpeTraceDataExist; + } + + void SetArmSpeTraceDataExist(bool exist) { + m_armSpeTraceDataExist = exist; + } + +private: + /* traceDataPtr is a pointer to Arm SPE data string, it must be called firstly. + DO NOT FREE traceStrPtr here, it`s belong to other modules. */ + bool SetTraceDatePtr(const std::string* traceStrPtr) { + if (traceStrPtr == nullptr || traceStrPtr->empty()) { + return false; + } + m_rawTraceData = traceStrPtr; + + return true; + } + + /* Parsing raw Arm SPE data, the target event will be saved in m_parsedSpeEvents. */ + bool ParserRawSpeData(); + + /* Return true if there is any target event we want. */ + bool IsTargetEvent(const ArmSpeParsedPacket&) const; + + inline void SavePacket(const ArmSpeParsedPacket& packet) { + m_parsedSpeEvents.push_back(packet); + } + + /* Record whether Arm SPE sample exists from PERF_RECORD_AUXTRACE_INFO. */ + bool m_armSpeTraceDataExist = false; + + /* Pointer to raw Arm SPE data, DO NOT FREE in this class. */ + const std::string* m_rawTraceData = nullptr; + + /* Save parsed Arm Spe event. */ + std::list m_parsedSpeEvents; + + /* Decoder is used for parser raw Arm SPE packet. */ + ArmSpePacketDecoder m_armSpePacketDecoder; + +}; // class ArmSpeParser + +} // namespace quipper + +#endif // define PERF_DATA_CONVENTER_ARM_SPE_PARSER \ No newline at end of file diff --git a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h index 2225696..6bf6f15 100644 --- a/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h +++ b/third_party/perf_data_converter/src/quipper/kernel/perf_internals.h @@ -317,6 +317,16 @@ enum perf_user_event_type { PERF_RECORD_HEADER_MAX = 81, }; +// Taken from tools/perf/util/auxtrace.h +enum auxtrace_type { + PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, + PERF_AUXTRACE_INTEL_BTS, + PERF_AUXTRACE_CS_ETM, + PERF_AUXTRACE_ARM_SPE, + PERF_AUXTRACE_S390_CPUMSF, +}; + struct attr_event { struct perf_event_header header; struct perf_event_attr attr; @@ -342,6 +352,13 @@ struct tracing_data_event { u32 size; }; +struct auxtrace_info_event { + struct perf_event_header header; + u32 type; + u32 reserved__; /* For alignment */ + u64 priv[]; +}; + struct auxtrace_event { struct perf_event_header header; u64 size; @@ -386,6 +403,7 @@ union perf_event { struct event_type_event event_type; struct tracing_data_event tracing_data; struct build_id_event build_id; + struct auxtrace_info_event auxtrace_info; struct auxtrace_event auxtrace; struct aux_event aux; struct itrace_start_event itrace_start; diff --git a/third_party/perf_data_converter/src/quipper/perf_data.proto b/third_party/perf_data_converter/src/quipper/perf_data.proto index faf0148..ebe72f1 100644 --- a/third_party/perf_data_converter/src/quipper/perf_data.proto +++ b/third_party/perf_data_converter/src/quipper/perf_data.proto @@ -463,6 +463,33 @@ message PerfDataProto { optional SampleInfo sample_info = 6; } + // Next tag: 3 + message AuxtraceInfoEvent { + // Auxtrace type from the auxtrace_type enum in tools/perf/util/auxtrace.h. + optional uint32 type = 1; + + // Private data. + // WARNING: unparsed_binary_blob_priv_data contains unparsed private data + // specific to the type stored in the above field. This data is included to + // support serialization of a perf.data to perf_data.proto and + // deserialization of a perf_data.proto to perf.data. If this data is used + // for something other than the aforementioned usecase, this data has to be + // parsed based on the type. + // For example: + // If type == PERF_AUXTRACE_INTEL_PT, unparsed_binary_blob_priv_data + // contains fields filled by intel_pt_info_fill() function in the file + // tools/perf/arch/x86/util/intel-pt.c. + // If type == PERF_AUXTRACE_INTEL_BTS, unparsed_binary_blob_priv_data + // contains fields filled by intel_bts_info_fill() function in the file + // tools/perf/arch/x86/util/intel-bts.c. + // + // NOTE: Do not read this unparsed data directly. Quipper should be + // modified to parse the data into a new field before reading. Please + // contact developers of quipper to add support for parsing this data. + // + repeated uint64 unparsed_binary_blob_priv_data = 2; + } + // Next tag: 8 message AuxtraceEvent { // Size of AUX area tracing buffer. @@ -537,6 +564,7 @@ message PerfDataProto { AuxEvent aux_event = 11; ItraceStartEvent itrace_start_event = 13; LostSamplesEvent lost_samples_event = 14; + AuxtraceInfoEvent auxtrace_info_event = 18; AuxtraceEvent auxtrace_event = 12; } // Time after boot in nanoseconds corresponding to the event. diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.cc b/third_party/perf_data_converter/src/quipper/perf_parser.cc index 6755e5c..8f9479c 100644 --- a/third_party/perf_data_converter/src/quipper/perf_parser.cc +++ b/third_party/perf_data_converter/src/quipper/perf_parser.cc @@ -125,10 +125,77 @@ bool PerfParser::ParseRawEvents() { return true; } +const std::vector PerfParser::GetArmSpeEventNameString(ArmSpeEventType type) { + return spe_parser_.GetArmSpeEventNameString(type); +} + +bool PerfParser::MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid) { + // Currently, We only use PID from process_map. + // Actually TID is not used by MapIPAndPidAndGetNameAndOffset + PidTid pidtid = std::make_pair(pid, pid); + uint64_t remapped_event_ip = 0; + if (!MapIPAndPidAndGetNameAndOffset(packet.sampleAddr, pidtid, + &remapped_event_ip, &event.dso_and_offset)) { + return false; + } + if (IsArmSpeBranchInst(packet.instType)) { + if (!MapIPAndPidAndGetNameAndOffset(packet.targetAddr, pidtid, + &remapped_event_ip, &event.arm_spe_event.arm_spe_br_tgt_)) { + return false; + } + event.arm_spe_event.arm_spe_inst_type_ = packet.instType; + } + event.arm_spe_event.arm_spe_type_ = packet.packetType; + return true; +} + +void PerfParser::ProcessArmSpeEvent() { + stats_.num_sample_events += spe_parser_.GetParsedSpeEvent().size(); + for (const auto& spe_event : spe_parser_.GetParsedSpeEvent()) { + for (const auto& process_map : process_mappers_) { + if (ParsedEvent event; MapArmSpeEvent(event, spe_event, process_map.first)) { + parsed_events_.push_back(event); + stats_.num_arm_spe_event_mapped++; + break; + } + } + } + stats_.num_sample_events_mapped += stats_.num_arm_spe_event_mapped; + spe_parser_.Clear(); +} + bool PerfParser::ProcessUserEvents(PerfEvent& event) { // New user events from PERF-4.13 is not yet supported switch (event.header().type()) { case PERF_RECORD_AUXTRACE: + // This part may be called many times during processing, every parsed event + // will store in spe_parser_.GetParsedSpeEvent() + if (spe_parser_.ArmSpeTraceDataExist() && + spe_parser_.ParsingArmSpeData(&event.auxtrace_event().trace_data())) { + stats_.num_arm_spe_events = spe_parser_.GetParsedSpeEvent().size(); + } + break; + case PERF_RECORD_AUXTRACE_INFO: + switch (event.auxtrace_info_event().type()) + { + case PERF_AUXTRACE_ARM_SPE: + spe_parser_.SetArmSpeTraceDataExist(true); + break; + case PERF_AUXTRACE_UNKNOWN: + case PERF_AUXTRACE_INTEL_PT: + case PERF_AUXTRACE_INTEL_BTS: + case PERF_AUXTRACE_CS_ETM: + case PERF_AUXTRACE_S390_CPUMSF: + default: + VLOG(1) << "Unsupported PERF_RECORD_AUXTRACE_INFO: " << event.auxtrace_info_event().type(); + break; + } + case PERF_RECORD_AUXTRACE_ERROR: + case PERF_RECORD_THREAD_MAP: + case PERF_RECORD_STAT_CONFIG: + case PERF_RECORD_STAT: + case PERF_RECORD_STAT_ROUND: + case PERF_RECORD_TIME_CONV: VLOG(1) << "Parsed event type: " << event.header().type() << ". Doing nothing."; break; @@ -251,6 +318,10 @@ bool PerfParser::ProcessEvents() { } if (!FillInDsoBuildIds()) return false; + if (!spe_parser_.GetParsedSpeEvent().empty()) { + ProcessArmSpeEvent(); + } + // Print stats collected from parsing. // clang-format off LOG(INFO) << "Parser processed: " @@ -259,7 +330,9 @@ bool PerfParser::ProcessEvents() { << stats_.num_fork_events << " FORK events, " << stats_.num_exit_events << " EXIT events, " << stats_.num_sample_events << " SAMPLE events, " - << stats_.num_sample_events_mapped << " of these were mapped"; + << stats_.num_sample_events_mapped << " of these were mapped, " + << stats_.num_arm_spe_events << " ARM_SPE events, " + << stats_.num_arm_spe_event_mapped << " of these ARM_SPE events were mapped."; // clang-format on float sample_mapping_percentage = @@ -269,7 +342,8 @@ bool PerfParser::ProcessEvents() { if (sample_mapping_percentage < threshold) { LOG(ERROR) << "Mapped " << static_cast(sample_mapping_percentage) << "% of samples, expected at least " - << static_cast(threshold) << "%"; + << static_cast(threshold) << "%," + << " adjust with option --sample_mapping_percentage_threshold"; return false; } stats_.did_remap = options_.do_remap; diff --git a/third_party/perf_data_converter/src/quipper/perf_parser.h b/third_party/perf_data_converter/src/quipper/perf_parser.h index 2ec734e..37f7e34 100644 --- a/third_party/perf_data_converter/src/quipper/perf_parser.h +++ b/third_party/perf_data_converter/src/quipper/perf_parser.h @@ -22,6 +22,7 @@ #include "compat/string.h" #include "dso.h" #include "perf_reader.h" +#include "arm_spe_parser.h" namespace quipper { @@ -108,6 +109,43 @@ struct ParsedEvent { std::equal(branch_stack.begin(), branch_stack.end(), other.branch_stack.begin()); } + + // A struct that contains Arm Spe event info, + // including event type, instruction type and branch + // target address. + struct ArmSpeEvent { + ArmSpeEventType arm_spe_type_ = 0; + ArmSpeInstType arm_spe_inst_type_; + DSOAndOffset arm_spe_br_tgt_; + + bool ArmSpeEventExist() const { + for (size_t i = 0; i < sizeof(ArmSpeEventType) * 8; i++) { + if (arm_spe_type_ & BIT(i)) { + switch (i) { + case ArmSpeRawType::EV_RETIRED: + case ArmSpeRawType::EV_LLC_REFILL: + case ArmSpeRawType::EV_L1D_REFILL: + /* Useful for AutoFDO */ + return true; + case ArmSpeRawType::EV_MISPRED: + case ArmSpeRawType::EV_REMOTE_ACCESS: + case ArmSpeRawType::EV_TLB_REFILL: + case ArmSpeRawType::EV_NOT_TAKEN: + case ArmSpeRawType::EV_EXCEPTION_GEN: + case ArmSpeRawType::EV_L1D_ACCESS: + case ArmSpeRawType::EV_LLC_ACCESS: + case ArmSpeRawType::EV_TLB_ACCESS: + /* Useless for AutoFDO, just skip it. */ + break; + default: + LOG(FATAL) << "ARM SPE: ArmSpeEventType not support this type: " + << static_cast(arm_spe_type_); + } + } + } + return false; + } + } arm_spe_event; }; struct PerfEventStats { @@ -117,12 +155,14 @@ struct PerfEventStats { uint32_t num_comm_events; uint32_t num_fork_events; uint32_t num_exit_events; + uint32_t num_arm_spe_events; // Number of sample events that were successfully mapped using the address // mapper. The mapping is recorded regardless of whether the address in the // perf sample event itself was assigned the remapped address. The latter is // indicated by |did_remap|. uint32_t num_sample_events_mapped; + uint32_t num_arm_spe_event_mapped; // Whether address remapping was enabled during event parsing. bool did_remap; @@ -191,6 +231,9 @@ class PerfParser { // Use with caution. Deserialization uses this to restore stats from proto. PerfEventStats* mutable_stats() { return &stats_; } + // ArmSpeEventType is a highly compressed format, we need decode it into strings when use. + const std::vector GetArmSpeEventNameString(ArmSpeEventType type); + private: // Used for processing events. e.g. remapping with synthetic addresses. bool ProcessEvents(); @@ -252,6 +295,12 @@ class PerfParser { std::pair GetOrCreateProcessMapper( uint32_t pid, uint32_t ppid = kKernelPid); + // Map every raw ARM SPE event, save them into parsed_events_. + void ProcessArmSpeEvent(); + + // Map an ARM SPE event by PC(from sample) and pid. + bool MapArmSpeEvent(ParsedEvent& event, const ArmSpeParsedPacket& packet, const uint64_t pid); + // Points to a PerfReader that contains the input perf data to parse. PerfReader* const reader_; @@ -277,6 +326,9 @@ class PerfParser { // Maps process ID to an address mapper for that process. std::unordered_map> process_mappers_; + // A member to process Arm SPE event + ArmSpeParser spe_parser_; + DISALLOW_COPY_AND_ASSIGN(PerfParser); }; diff --git a/third_party/perf_data_converter/src/quipper/perf_reader.cc b/third_party/perf_data_converter/src/quipper/perf_reader.cc index 3012cd9..2815b45 100644 --- a/third_party/perf_data_converter/src/quipper/perf_reader.cc +++ b/third_party/perf_data_converter/src/quipper/perf_reader.cc @@ -1916,6 +1916,19 @@ void PerfReader::MaybeSwapEventFields(event_t* event, bool is_cross_endian) { case PERF_RECORD_LOST_SAMPLES: ByteSwap(&event->lost_samples.lost); break; + case PERF_RECORD_AUXTRACE_INFO: { + ByteSwap(&event->auxtrace_info.type); + u64 priv_size = + (event->header.size - + (sizeof(event->header) + sizeof(event->auxtrace_info.type) + + sizeof(u32) // size of auxtrace_info_event.reserved__ + )) / + sizeof(u64); + for (u64 i = 0; i < priv_size; ++i) { + ByteSwap(&event->auxtrace_info.priv[i]); + } + break; + } case PERF_RECORD_AUXTRACE: ByteSwap(&event->auxtrace.size); ByteSwap(&event->auxtrace.offset); diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.cc b/third_party/perf_data_converter/src/quipper/perf_serializer.cc index 613189d..2386f4d 100644 --- a/third_party/perf_data_converter/src/quipper/perf_serializer.cc +++ b/third_party/perf_data_converter/src/quipper/perf_serializer.cc @@ -244,6 +244,9 @@ bool PerfSerializer::SerializeKernelEvent( bool PerfSerializer::SerializeUserEvent( const event_t& event, PerfDataProto_PerfEvent* event_proto) const { switch (event.header.type) { + case PERF_RECORD_AUXTRACE_INFO: + return SerializeAuxtraceInfoEvent( + event, event_proto->mutable_auxtrace_info_event()); case PERF_RECORD_AUXTRACE: return SerializeAuxtraceEvent(event, event_proto->mutable_auxtrace_event()); @@ -329,6 +332,9 @@ bool PerfSerializer::DeserializeKernelEvent( bool PerfSerializer::DeserializeUserEvent( const PerfDataProto_PerfEvent& event_proto, event_t* event) const { switch (event_proto.header().type()) { + case PERF_RECORD_AUXTRACE_INFO: + return DeserializeAuxtraceInfoEvent(event_proto.auxtrace_info_event(), + event); case PERF_RECORD_AUXTRACE: return DeserializeAuxtraceEvent(event_proto.auxtrace_event(), event); default: @@ -882,6 +888,35 @@ bool PerfSerializer::DeserializeBuildIDEvent( return true; } +bool PerfSerializer::SerializeAuxtraceInfoEvent( + const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const { + const struct auxtrace_info_event& auxtrace_info = event.auxtrace_info; + u64 priv_size = + (event.header.size - sizeof(struct auxtrace_info_event)) / sizeof(u64); + sample->set_type(auxtrace_info.type); + if (auxtrace_info.reserved__ != 0) { + LOG(WARNING) << "PERF_RECORD_AUXTRACE_INFO's auxtrace_info_event.reserved__" + " contains a non-zero value: " + << auxtrace_info.reserved__ + << ". This" + " record's format has changed."; + } + for (u64 i = 0; i < priv_size; ++i) { + sample->add_unparsed_binary_blob_priv_data(auxtrace_info.priv[i]); + } + return true; +} + +bool PerfSerializer::DeserializeAuxtraceInfoEvent( + const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const { + struct auxtrace_info_event& auxtrace_info = event->auxtrace_info; + auxtrace_info.type = sample.type(); + for (u64 i = 0; i < sample.unparsed_binary_blob_priv_data_size(); ++i) { + auxtrace_info.priv[i] = sample.unparsed_binary_blob_priv_data(i); + } + return true; +} + bool PerfSerializer::SerializeAuxtraceEvent( const event_t& event, PerfDataProto_AuxtraceEvent* sample) const { const struct auxtrace_event& auxtrace = event.auxtrace; diff --git a/third_party/perf_data_converter/src/quipper/perf_serializer.h b/third_party/perf_data_converter/src/quipper/perf_serializer.h index 606335d..dbdeb23 100644 --- a/third_party/perf_data_converter/src/quipper/perf_serializer.h +++ b/third_party/perf_data_converter/src/quipper/perf_serializer.h @@ -143,6 +143,10 @@ class PerfSerializer { bool DeserializeBuildIDEvent(const PerfDataProto_PerfBuildID& from, malloced_unique_ptr* to) const; + bool SerializeAuxtraceInfoEvent( + const event_t& event, PerfDataProto_AuxtraceInfoEvent* sample) const; + bool DeserializeAuxtraceInfoEvent( + const PerfDataProto_AuxtraceInfoEvent& sample, event_t* event) const; bool SerializeAuxtraceEvent(const event_t& event, PerfDataProto_AuxtraceEvent* sample) const; bool SerializeAuxtraceEventTraceData(const std::vector& from,