925 lines
33 KiB
Diff
925 lines
33 KiB
Diff
From 1722afc51311a6bb0b892df50602f660c706162f Mon Sep 17 00:00:00 2001
|
|
From: liuf9 <liufeiyang6@huawei.com>
|
|
Date: Fri, 15 Dec 2023 11:25:48 +0800
|
|
Subject: [PATCH 2/2] [LLC Allocation] Fix some bugs and remove variable
|
|
prefetch tool. After outer loop analysis, it is possible to get nested loops
|
|
for kernel candidates and this situation has conflicts with the early exiting
|
|
criterion for kernel filtering process and we restrict this criterion for
|
|
innermost loops only. We also fix some pass configuration bugs in common.opt
|
|
and params.opt. We remove variable prefetch tool due to the consideration of
|
|
unsafe inputs from users.
|
|
|
|
---
|
|
gcc/common.opt | 2 +-
|
|
gcc/params.opt | 24 +-
|
|
gcc/testsuite/gcc.dg/llc-allocate/llc-2.c | 2 +-
|
|
.../llc-allocate/llc-issue-builtin-prefetch.c | 48 ----
|
|
.../llc-allocate/llc-tool-insertion-1.c | 48 ----
|
|
.../llc-allocate/llc-tool-insertion-2.c | 48 ----
|
|
.../llc-allocate/llc-tool-insertion-3.c | 48 ----
|
|
.../llc-allocate/llc-tool-insertion-4.c | 47 ---
|
|
.../llc-allocate/llc-tool-insertion-5.c | 48 ----
|
|
.../llc-allocate/llc-tool-insertion-6.c | 47 ---
|
|
.../llc-tool-insertion-7-null-var-name.c | 52 ----
|
|
.../llc-tool-insertion-8-tmp-var-name.c | 54 ----
|
|
gcc/tree-ssa-llc-allocate.c | 267 +-----------------
|
|
13 files changed, 11 insertions(+), 724 deletions(-)
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
|
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
|
|
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
index 56ad9a378..a8a2264ee 100644
|
|
--- a/gcc/common.opt
|
|
+++ b/gcc/common.opt
|
|
@@ -2255,7 +2255,7 @@ Generate prefetch instructions, if available, for arrays in loops. The prefetch
|
|
level can control the optimize level to array prefetch.
|
|
|
|
fllc-allocate
|
|
-Common Report Var(flag_llc_allocate) Init(-1) Optimization
|
|
+Common Report Var(flag_llc_allocate) Optimization
|
|
Generate LLC hint instructions.
|
|
|
|
fprofile
|
|
diff --git a/gcc/params.opt b/gcc/params.opt
|
|
index 792ca5c35..ef7bea311 100644
|
|
--- a/gcc/params.opt
|
|
+++ b/gcc/params.opt
|
|
@@ -1030,13 +1030,11 @@ Common Joined UInteger Var(param_mem_access_num) Init(3) Param Optimization
|
|
Memory access num.
|
|
|
|
-param=prefetch-offset=
|
|
-Common Joined UInteger Var(param_prefetch_offset) Init(1024)
|
|
-IntegerRange(1, 999999) Param Optimization
|
|
+Common Joined UInteger Var(param_prefetch_offset) Init(1024) IntegerRange(1, 999999) Param Optimization
|
|
Prefetch Offset, which is usually a power of two due to cache line size.
|
|
|
|
-param=branch-prob-threshold=
|
|
-Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100)
|
|
-Param Optimization
|
|
+Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100) Param Optimization
|
|
High Execution Rate Branch Threshold.
|
|
|
|
-param=issue-topn=
|
|
@@ -1051,24 +1049,6 @@ Force issue the topn LLC mem_ref hint, without generating dynamic multi-branches
|
|
Common Joined UInteger Var(param_llc_capacity_per_core) Init(114) IntegerRange(0, 999999) Param
|
|
LLC capacity per core.
|
|
|
|
--param=target-variables=
|
|
-Common Joined Var(param_target_variables) Init("") Param Optimization
|
|
---param=target-variables=<var>[,<var>,...] Target variables for prefetching, separated by comma,
|
|
-without space. The representation of a variable can be complex and containing space, please surround
|
|
-it by quotation marks and escape special characters in Linux. The input length should be no more
|
|
-than 512 characters.
|
|
-
|
|
--param=use-ref-group-index=
|
|
-Common Joined UInteger Var(param_use_ref_group_index) Init(0) IntegerRange(0, 1) Param Optimization
|
|
-Prefetch the target variables by their indices in sorted ref_groups, use together with parameter
|
|
-target-variables.
|
|
-
|
|
--param=mem-ref-index=
|
|
-Common Joined Var(param_mem_ref_index) Init("") Param Optimization
|
|
---param=mem-ref-index=<idx>[,<idx>,...] Prefetch the target variable at the memory reference
|
|
-location with the index of customized order, separated by comma, without space. The input length
|
|
-should be no more than 512 characters.
|
|
-
|
|
-param=filter-kernels=
|
|
Common Joined UInteger Var(param_filter_kernels) Init(1) IntegerRange(0, 1) Param
|
|
Allow LLC allocate pass to greedily filter kernels by traversing the corresponding basic blocks
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
index 9f8a5c307..f8b1cc5c1 100644
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
@@ -45,7 +45,7 @@ main (int argc, char *argv[])
|
|
/* { dg-final { scan-tree-dump-not "static_data_size:" "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\{ (?:\\d+\\(\\d+\\) ){1}\}" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-not ", size: (?!(0\.000000))" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 8 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 6 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d x_data \\(0.000000, 1, 0\\) : 3" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d A_j \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d A_data \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
|
deleted file mode 100644
|
|
index 2a58c501f..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
|
+++ /dev/null
|
|
@@ -1,48 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=uPtr" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \\d{4};" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \[_\]\\d{1,4};" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
|
deleted file mode 100644
|
|
index 276781c4f..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
|
+++ /dev/null
|
|
@@ -1,48 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
|
- " lPtr \"" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
|
deleted file mode 100644
|
|
index 57c76f4a6..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
|
+++ /dev/null
|
|
@@ -1,48 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=,lPtr, --param mem-ref-index=5" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-times "WARNING: The target data_ref index is "
|
|
- "out of range." 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
|
deleted file mode 100644
|
|
index d9c053566..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
|
+++ /dev/null
|
|
@@ -1,48 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr,uPtr,, --param mem-ref-index=5" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-not "WARNING: The number of provided memory "
|
|
- "reference indices is less" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "insert prfm" 4 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
|
deleted file mode 100644
|
|
index b87f9903d..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
|
+++ /dev/null
|
|
@@ -1,47 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr --param use-ref-group-index=1" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
|
|
- "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
|
deleted file mode 100644
|
|
index d07836765..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
|
+++ /dev/null
|
|
@@ -1,48 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=1 --param use-ref-group-index=1" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
|
- " psiPtr \"" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "insert svprfd" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
|
deleted file mode 100644
|
|
index c0a6afe5b..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
|
+++ /dev/null
|
|
@@ -1,47 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=3,a --param use-ref-group-index=1" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-#define N 131590
|
|
-#define F 384477
|
|
-
|
|
-double diagPtr[N];
|
|
-double psiPtr[N];
|
|
-double ApsiPtr[N];
|
|
-int lPtr[F];
|
|
-int uPtr[F];
|
|
-double lowerPtr[F];
|
|
-double upperPtr[F];
|
|
-
|
|
-void
|
|
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
|
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
|
-{
|
|
- for (int cell=0; cell<nCells; cell++)
|
|
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
|
-
|
|
- for (int face=0; face<nFaces; face++)
|
|
- {
|
|
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
|
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
|
- }
|
|
-}
|
|
-
|
|
-int
|
|
-main (int argc, char *argv[])
|
|
-{
|
|
- int nCells = N;
|
|
- int nFaces = F;
|
|
- int testIter = 2;
|
|
-
|
|
- for (int i=0; i<testIter; i++)
|
|
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
|
|
- "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
|
deleted file mode 100644
|
|
index 4ad331626..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
|
+++ /dev/null
|
|
@@ -1,52 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=\"bb_16(D)->aux\"" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-typedef struct stack_def
|
|
-{
|
|
- int top; /* index to top stack element */
|
|
- unsigned long reg_set; /* set of live registers */
|
|
- unsigned char reg[128]; /* register - stack mapping */
|
|
-} *stack;
|
|
-
|
|
-typedef struct block_info_def
|
|
-{
|
|
- struct stack_def stack_in; /* Input stack configuration. */
|
|
- struct stack_def stack_out; /* Output stack configuration. */
|
|
- unsigned long out_reg_set; /* Stack regs live on output. */
|
|
- int done; /* True if block already converted. */
|
|
- int predecessors; /* Number of predecessors that need
|
|
- to be visited. */
|
|
-} *block_info;
|
|
-
|
|
-typedef struct basic_block_def
|
|
-{
|
|
- void *aux;
|
|
-} *basic_block;
|
|
-
|
|
-unsigned char
|
|
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
|
|
-{
|
|
- stack output_stack;
|
|
-
|
|
- output_stack = &(((block_info) bb->aux)->stack_in);
|
|
- if (value_reg_low == -1)
|
|
- output_stack->top = -1;
|
|
- else
|
|
- {
|
|
- int reg;
|
|
- output_stack->top = value_reg_high - value_reg_low;
|
|
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
|
|
- {
|
|
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
|
|
- output_stack->reg_set |= (unsigned long) 1 << reg;
|
|
- }
|
|
- }
|
|
- return output_stack->reg[0];
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
|
|
- "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
|
deleted file mode 100644
|
|
index 09a525ce1..000000000
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
|
+++ /dev/null
|
|
@@ -1,54 +0,0 @@
|
|
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=tmp_var_0" } */
|
|
-
|
|
-#include <stdio.h>
|
|
-
|
|
-typedef struct stack_def
|
|
-{
|
|
- int top; /* index to top stack element */
|
|
- unsigned long reg_set; /* set of live registers */
|
|
- unsigned char reg[128]; /* register - stack mapping */
|
|
-} *stack;
|
|
-
|
|
-typedef struct block_info_def
|
|
-{
|
|
- struct stack_def stack_in; /* Input stack configuration. */
|
|
- struct stack_def stack_out; /* Output stack configuration. */
|
|
- unsigned long out_reg_set; /* Stack regs live on output. */
|
|
- int done; /* True if block already converted. */
|
|
- int predecessors; /* Number of predecessors that need
|
|
- to be visited. */
|
|
-} *block_info;
|
|
-
|
|
-typedef struct basic_block_def
|
|
-{
|
|
- void *aux;
|
|
-} *basic_block;
|
|
-
|
|
-unsigned char
|
|
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
|
|
-{
|
|
- stack output_stack;
|
|
-
|
|
- output_stack = &(((block_info) bb->aux)->stack_in);
|
|
- if (value_reg_low == -1)
|
|
- output_stack->top = -1;
|
|
- else
|
|
- {
|
|
- int reg;
|
|
- output_stack->top = value_reg_high - value_reg_low;
|
|
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
|
|
- {
|
|
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
|
|
- output_stack->reg_set |= (unsigned long) 1 << reg;
|
|
- }
|
|
- }
|
|
- return output_stack->reg[0];
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
|
|
- "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
|
- " bb_16(D)->aux \"" 1 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "static issue" 1 "llc_allocate" } } */
|
|
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
|
index 890f66e54..fa8979401 100644
|
|
--- a/gcc/tree-ssa-llc-allocate.c
|
|
+++ b/gcc/tree-ssa-llc-allocate.c
|
|
@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see
|
|
#define INCLUDE_VECTOR
|
|
#define INCLUDE_LIST
|
|
#define INCLUDE_ALGORITHM
|
|
-#define INCLUDE_STRING
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
@@ -1866,7 +1865,10 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
|
list<basic_block> walked_header_bb; /* Used to record nested loops. */
|
|
|
|
for (unsigned i = 0; i < kernels.size (); ++i)
|
|
- end_bb.insert (kernels[i]->header);
|
|
+ {
|
|
+ if (kernels[i]->inner == NULL)
|
|
+ end_bb.insert (kernels[i]->header);
|
|
+ }
|
|
|
|
dump_loop_headers ("kernels", kernels);
|
|
|
|
@@ -2380,30 +2382,6 @@ issue_builtin_prefetch (data_ref &mem_ref)
|
|
update_ssa (TODO_update_ssa_only_virtuals);
|
|
}
|
|
|
|
-/* Retrieve memory reference at the specific index. */
|
|
-
|
|
-data_ref
|
|
-get_data_ref_at_idx (ref_group &var_ref_group)
|
|
-{
|
|
- unsigned int mem_ref_size = static_cast<unsigned int>(
|
|
- var_ref_group.ref_scores.size ());
|
|
- if (strlen (param_mem_ref_index) == 0)
|
|
- return var_ref_group.first_use;
|
|
- else
|
|
- {
|
|
- /* Insert prefetch hint at highly-likely-used location with the given
|
|
- index. */
|
|
- if (var_ref_group.mem_ref_index >= mem_ref_size)
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: The target data_ref index is out "
|
|
- "of range. Use top index instead!\n");
|
|
- return var_ref_group.ref_scores[0].d_ref;
|
|
- }
|
|
- return var_ref_group.ref_scores[var_ref_group.mem_ref_index].d_ref;
|
|
- }
|
|
-}
|
|
-
|
|
/* Static form insertion and issue instruction. We may check the
|
|
determination of the ARM SVE architecture before SVE hint insertion. */
|
|
|
|
@@ -2415,7 +2393,7 @@ static_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
|
|
|
for (int i = 0; i < num_issue_var; ++i)
|
|
{
|
|
- data_ref mem_ref = get_data_ref_at_idx (ref_groups[i]);
|
|
+ data_ref mem_ref = ref_groups[i].first_use;
|
|
if (mem_ref.vectorize_p)
|
|
{
|
|
enum internal_fn ifn_code = gimple_call_internal_fn
|
|
@@ -2591,10 +2569,7 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
|
}
|
|
if (param_force_issue)
|
|
{
|
|
- if (strlen (param_target_variables) > 0)
|
|
- static_issue (ref_groups, static_cast<int>(ref_groups.size ()));
|
|
- else
|
|
- static_issue (ref_groups, num_issue_var);
|
|
+ static_issue (ref_groups, num_issue_var);
|
|
return;
|
|
}
|
|
calc_type topn_calc_type = STATIC_CALC;
|
|
@@ -2626,224 +2601,6 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
|
}
|
|
|
|
/* ==================== phase entry ==================== */
|
|
-/* Check whether a string can be converted to an unsigned integer. */
|
|
-
|
|
-bool is_unsigned_int (const string &s)
|
|
-{
|
|
- if (s.empty () || s.size () > PREFETCH_TOOL_NUM_MAX_LEN)
|
|
- return false;
|
|
-
|
|
- for (unsigned int i = 0; i < s.size (); ++i)
|
|
- {
|
|
- if (s[i] < '0' || s[i] > '9')
|
|
- return false;
|
|
- }
|
|
- return true;
|
|
-}
|
|
-
|
|
-/* Parse a substring separated by comma. If the substring is valid and
|
|
- non-empty, store it as a parsed element. */
|
|
-
|
|
-bool
|
|
-parse_string_helper (const string &substr, vector<string>& str_elts,
|
|
- bool check_unsigned, size_t start, size_t end)
|
|
-{
|
|
- if (substr == "" && dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: The input string from %lu to %lu is "
|
|
- "empty.\n", start, end);
|
|
- else if (check_unsigned && !is_unsigned_int (substr))
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "ERROR: not an unsigned integer: %s\n",
|
|
- substr.c_str ());
|
|
- str_elts.clear ();
|
|
- return false;
|
|
- }
|
|
- else
|
|
- str_elts.push_back (substr);
|
|
- return true;
|
|
-}
|
|
-
|
|
-/* Parse a user input string, separated by comma. */
|
|
-
|
|
-void
|
|
-parse_string (const string &s, vector<string>& str_elts,
|
|
- bool check_unsigned = false)
|
|
-{
|
|
- string delim = ",";
|
|
- size_t start = 0;
|
|
- size_t end = s.find (delim);
|
|
- string substr = s.substr (start, end - start);
|
|
- while (end != string::npos)
|
|
- {
|
|
- if (!parse_string_helper (substr, str_elts, check_unsigned, start, end))
|
|
- return;
|
|
- start = end + delim.size ();
|
|
- end = s.find (delim, start);
|
|
- substr = s.substr (start, end - start);
|
|
- }
|
|
- parse_string_helper (substr, str_elts, check_unsigned, start, end);
|
|
-}
|
|
-
|
|
-/* Parse user input of target variables and memory indices and create a map
|
|
- that assigns a target variable to a memory index. */
|
|
-
|
|
-void
|
|
-parse_param_inputs (map<string, unsigned int> &var2mem_idx)
|
|
-{
|
|
- /* The user input length should have an input length limit. */
|
|
- if ((strlen (param_target_variables) >= PREFETCH_TOOL_INPUT_MAX_LEN
|
|
- || strlen (param_mem_ref_index) >= PREFETCH_TOOL_INPUT_MAX_LEN)
|
|
- && dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "INVALID INPUT: The user inputs for target variables "
|
|
- "and/or memory reference indices are too long for parsing.\n");
|
|
-
|
|
- vector<string> var_names;
|
|
- string target_variables = param_target_variables;
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "Start parsing target variables:\n");
|
|
- if (param_use_ref_group_index)
|
|
- parse_string (target_variables, var_names, true);
|
|
- else
|
|
- parse_string (target_variables, var_names, false);
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "Finish parsing target variables.\n\n");
|
|
-
|
|
- vector<string> var_mem_indices;
|
|
- string mem_indices = param_mem_ref_index;
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "Start parsing memory reference indices:\n");
|
|
- parse_string (mem_indices, var_mem_indices, true);
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "Finish parsing memory reference indices.\n\n");
|
|
-
|
|
- /* Construct a map of var_name: var_mem_index. */
|
|
- if (var_names.size () > 0)
|
|
- {
|
|
- if (var_mem_indices.size () < var_names.size ())
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: The number of provided memory "
|
|
- "reference indices is less than that of target "
|
|
- "variables.\nUse the top index for all variables "
|
|
- "instead.\n");
|
|
- for (string& var_name : var_names)
|
|
- var2mem_idx[var_name] = 0;
|
|
- }
|
|
- else
|
|
- {
|
|
- if (var_mem_indices.size () > var_names.size ()
|
|
- && dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: The number of target variables is "
|
|
- "less than that of memory reference indices.\n");
|
|
- for (unsigned int i = 0; i < var_names.size (); ++i)
|
|
- {
|
|
- var2mem_idx[var_names[i]] = static_cast<unsigned int>(
|
|
- atoi (var_mem_indices[i].c_str ()));
|
|
- }
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-/* Filter reference groups by only selecting target variables from the user
|
|
- input. There are two options for prefetching target variables:
|
|
- 1. Specify variable name parsed by the pass, which you can double-check at
|
|
- "sorted ref_groups" section in the dump file.
|
|
- 2. Specify variable rank exhibited at "sorted ref_groups" section in the
|
|
- dump file.
|
|
-*/
|
|
-
|
|
-void
|
|
-prefetch_variables (const vector<ref_group>& ref_groups,
|
|
- vector<ref_group>& reduced_ref_groups)
|
|
-{
|
|
- map<unsigned int, unsigned int> ref_group2mem_idx;
|
|
-
|
|
- map<string, unsigned int> var2mem_idx; /* externally defined. */
|
|
- parse_param_inputs (var2mem_idx);
|
|
-
|
|
- if (param_use_ref_group_index)
|
|
- {
|
|
- /* Use ref_group index at "sorted ref_groups" section to specify
|
|
- variable. */
|
|
- /* Collect the variables in "reduced_ref_group" only if their indices
|
|
- show up at "sorted ref_groups" section. */
|
|
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
|
|
- {
|
|
- unsigned int var_idx = static_cast<unsigned int>(atoi (
|
|
- var_mem_idx.first.c_str ()));
|
|
- if (var_idx < ref_groups.size ())
|
|
- ref_group2mem_idx[var_idx] = var_mem_idx.second;
|
|
- else if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: The index \"%u\" does not show "
|
|
- "up in the ref_groups.\n", var_idx);
|
|
- }
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Use variable name shown up at "sorted ref_groups" section to specify
|
|
- variable:
|
|
- var2ref_group_idx + var2mem_idx -> ref_group2mem_idx. */
|
|
- /* Create a map that assigns the variable name to its corresponding
|
|
- ref_group index. */
|
|
- map<string, unsigned int> var2ref_group_idx; /* internally detected. */
|
|
- for (unsigned int i = 0; i < ref_groups.size (); ++i)
|
|
- {
|
|
- const ref_group &curr_ref_group = ref_groups[i];
|
|
- const int UINT_MAX_DIGIT = 10;
|
|
- /* Unrecognizable variable name related to ref_group. */
|
|
- if (!get_name (curr_ref_group.var))
|
|
- {
|
|
- /* If the variable name does not have a string representation,
|
|
- we can rename it by "tmp_var_" + <sorted_ref_group_index>. */
|
|
- char group_idx[UINT_MAX_DIGIT];
|
|
- sprintf (group_idx, "%u", i);
|
|
- string tmp_var_name = "tmp_var_" + std::string (group_idx);
|
|
- fprintf (dump_file, "Unrecognizable variable name at ref_group "
|
|
- "index %u.\nThe tree expression for variable is: ", i);
|
|
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
|
|
- fprintf (dump_file, "\n");
|
|
- var2ref_group_idx[tmp_var_name] = i;
|
|
- }
|
|
- else
|
|
- var2ref_group_idx[std::string (get_name (curr_ref_group.var))] = i;
|
|
- }
|
|
- /* Collect the variables in "reduced_ref_group" only if they show up in
|
|
- the ref_groups. */
|
|
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
|
|
- {
|
|
- if (var2ref_group_idx.count (var_mem_idx.first))
|
|
- {
|
|
- unsigned int ref_group_idx = var2ref_group_idx[var_mem_idx.first];
|
|
- ref_group2mem_idx[ref_group_idx] = var_mem_idx.second;
|
|
- }
|
|
- else if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "WARNING: Target variable \" %s \" does "
|
|
- "not show up in the ref_groups. Check whether it needs "
|
|
- "temporary variable name.\n",
|
|
- var_mem_idx.first.c_str ());
|
|
- }
|
|
- }
|
|
-
|
|
- for (const pair<unsigned int, unsigned int> &ref_group_mem_idx :
|
|
- ref_group2mem_idx)
|
|
- {
|
|
- ref_group curr_ref_group = ref_groups[ref_group_mem_idx.first];
|
|
- curr_ref_group.mem_ref_index = ref_group_mem_idx.second;
|
|
- reduced_ref_groups.push_back (curr_ref_group);
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- {
|
|
- fprintf (dump_file, "\nNOTICE: Prefetching target variable \" ");
|
|
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
|
|
- fprintf (dump_file, " \" at ref_group index %u and memory location "
|
|
- "index %u.\n", ref_group_mem_idx.first,
|
|
- ref_group_mem_idx.second);
|
|
- }
|
|
- }
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "\n\n");
|
|
-}
|
|
-
|
|
|
|
/* The LLC intelligent allocation consists of 6 steps. */
|
|
|
|
@@ -2869,17 +2626,7 @@ llc_allocate (void)
|
|
if (!record_and_sort_ref_groups (ref_groups, sorted_kernels, kernels_refs))
|
|
return;
|
|
|
|
- if (strlen (param_target_variables) > 0)
|
|
- {
|
|
- /* If "param_target_variables" is not empty, we will issue parsed target
|
|
- variables compulsorily. */
|
|
- param_force_issue = true;
|
|
- vector<ref_group> reduced_ref_groups;
|
|
- prefetch_variables (ref_groups, reduced_ref_groups);
|
|
- issue_llc_hint (reduced_ref_groups);
|
|
- }
|
|
- else
|
|
- issue_llc_hint (ref_groups);
|
|
+ issue_llc_hint (ref_groups);
|
|
}
|
|
|
|
/* Check whether the function is an operator reloading function. */
|
|
--
|
|
2.33.0
|
|
|