From 6309460981655bd0899bc5ca65af123c2e42b638 Mon Sep 17 00:00:00 2001 From: glx Date: Tue, 29 Oct 2024 15:19:06 +0800 Subject: [PATCH 14/20] Update Details.md --- docs/Details.md | 80 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/docs/Details.md b/docs/Details.md index db0b407..14ca694 100644 --- a/docs/Details.md +++ b/docs/Details.md @@ -261,4 +261,82 @@ libkperf提供了采集子线程的能力。如果想要在上面场景中获取 attr.includeNewFork = 1; ``` 然后,通过PmuRead获取到的PmuData,便能包含子线程计数信息了。 -注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 \ No newline at end of file +注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 + +### 采集DDRC带宽 +基于uncore事件可以计算DDRC的访存带宽,不同硬件平台有不同的计算方式。 +鲲鹏芯片上的访存带宽公式可以参考openeuler kernel的tools/perf/pmu-events/arch/arm64/hisilicon/hip09/sys/uncore-ddrc.json: +```json + { + "MetricExpr": "flux_wr * 32 / duration_time", + "BriefDescription": "Average bandwidth of DDRC memory write(Byte/s)", + "Compat": "0x00000030", + "MetricGroup": "DDRC", + "MetricName": "ddrc_bw_write", + "Unit": "hisi_sccl,ddrc" + }, + { + "MetricExpr": "flux_rd * 32 / duration_time", + "BriefDescription": "Average bandwidth of DDRC memory read(Byte/s)", + "Compat": "0x00000030", + "MetricGroup": "DDRC", + "MetricName": "ddrc_bw_read", + "Unit": "hisi_sccl,ddrc" + }, +``` + +根据公式,采集flux_wr和flux_rd事件,用于计算带宽: +```c++ + // 采集hisi_scclX_ddrc设备下的flux_rd和flux_wr, + // 具体设备名称因硬件而异,可以在/sys/devices/下查询。 + vector evts = { + "hisi_sccl1_ddrc/flux_rd/", + "hisi_sccl3_ddrc/flux_rd/", + "hisi_sccl5_ddrc/flux_rd/", + "hisi_sccl7_ddrc/flux_rd/", + "hisi_sccl1_ddrc/flux_wr/", + "hisi_sccl3_ddrc/flux_wr/", + "hisi_sccl5_ddrc/flux_wr/", + "hisi_sccl7_ddrc/flux_wr/" + }; + + PmuAttr attr = {0}; + attr.evtList = evts.data(); + attr.numEvt = evts.size(); + + int pd = PmuOpen(COUNTING, &attr); + if (pd == -1) { + cout << Perror() << "\n"; + return; + } + + PmuEnable(pd); + for (int i=0;i<60;++i) { + sleep(1); + PmuData *data = nullptr; + int len = PmuRead(pd, &data); + // 有8个uncore事件,所以data的长度等于8. + // 前4个是4个numa的read带宽,后4个是4个numa的write带宽。 + for (int j=0;j<4;++j) { + printf("read bandwidth: %f M/s\n", (float)data[j].count*32/1024/1024); + } + for (int j=4;j<8;++j) { + printf("write bandwidth: %f M/s\n", (float)data[j].count*32/1024/1024); + } + PmuDataFree(data); + } + PmuDisable(pd); + PmuClose(pd); +``` + +执行上述代码,输出的结果类似如下: +``` +read bandwidth: 17.32 M/s +read bandwidth: 5.43 M/s +read bandwidth: 2.83 M/s +read bandwidth: 4.09 M/s +write bandwidth: 4.35 M/s +write bandwidth: 2.29 M/s +write bandwidth: 0.84 M/s +write bandwidth: 0.97 M/s +``` -- 2.43.0