1286 lines
41 KiB
Diff
1286 lines
41 KiB
Diff
From 4a365290cd9563385d32a22f7b1532c50b69e063 Mon Sep 17 00:00:00 2001
|
|
From: zhaoshujian <zhaoshujian@huawei.com>
|
|
Date: Mon, 11 Dec 2023 15:06:28 +0800
|
|
Subject: [PATCH] LLC add extending outer loop
|
|
|
|
|
|
diff --git a/gcc/params.opt b/gcc/params.opt
|
|
index c429359e3..227175eef 100644
|
|
--- a/gcc/params.opt
|
|
+++ b/gcc/params.opt
|
|
@@ -1058,4 +1058,10 @@ Common Joined UInteger Var(param_filter_kernels) Init(1) IntegerRange(0, 1) Para
|
|
Allow LLC allocate pass to greedily filter kernels by traversing the corresponding basic blocks
|
|
through edges with branch probability no less than param_branch_prob_threshold.
|
|
|
|
+-param=outer-loop-nums=
|
|
+Common Joined UInteger Var(param_outer_loop_num) Init(1) IntegerRange(1, 10) Param
|
|
+Maximum number of outer loops allowed to extend outer loops for loops that
|
|
+cannot recognize inner loop boundaries.
|
|
+
|
|
+
|
|
; This comment is to ensure we retain the blank line above.
|
|
diff --git a/gcc/testsuite/g++.dg/llc-allocate/llc-allocate.exp b/gcc/testsuite/g++.dg/llc-allocate/llc-allocate.exp
|
|
new file mode 100644
|
|
index 000000000..9e98191ed
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/g++.dg/llc-allocate/llc-allocate.exp
|
|
@@ -0,0 +1,27 @@
|
|
+# Copyright (C) 1997-2022 Free Software Foundation, Inc.
|
|
+
|
|
+# This program is free software; you can redistribute it and/or modify
|
|
+# it under the terms of the GNU General Public License as published by
|
|
+# the Free Software Foundation; either version 3 of the License, or
|
|
+# (at your option) any later version.
|
|
+#
|
|
+# This program is distributed in the hope that it will be useful,
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+# GNU General Public License for more details.
|
|
+#
|
|
+# You should have received a copy of the GNU General Public License
|
|
+# along with GCC; see the file COPYING3. If not see
|
|
+# <http://www.gnu.org/licenses/>.
|
|
+
|
|
+load_lib g++-dg.exp
|
|
+load_lib target-supports.exp
|
|
+
|
|
+# Initialize `dg'.
|
|
+dg-init
|
|
+
|
|
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] \
|
|
+ "" "-fllc-allocate"
|
|
+
|
|
+# All done.
|
|
+dg-finish
|
|
\ No newline at end of file
|
|
diff --git a/gcc/testsuite/g++.dg/llc-allocate/llc-relion-expand-kernels.C b/gcc/testsuite/g++.dg/llc-allocate/llc-relion-expand-kernels.C
|
|
new file mode 100644
|
|
index 000000000..44a9d7c66
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/g++.dg/llc-allocate/llc-relion-expand-kernels.C
|
|
@@ -0,0 +1,53 @@
|
|
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param branch-prob-threshold=50 --param filter-kernels=0 --param mem-access-num=2 --param issue-topn=1 --param force-issue=1" } */
|
|
+#include "multidim_array.h"
|
|
+
|
|
+class Input
|
|
+{
|
|
+ public:
|
|
+ int metadata_offset = 13;
|
|
+ int exp_nr_images = 1;
|
|
+ MultidimArray<double> exp_Mweight;
|
|
+ void convertAllSquaredDifferencesToWeights();
|
|
+};
|
|
+
|
|
+int main()
|
|
+{
|
|
+ clock_t start = clock();
|
|
+ Input input;
|
|
+ int testIter = 2;
|
|
+
|
|
+ for (int i = 0; i < testIter; ++i)
|
|
+ {
|
|
+ input.convertAllSquaredDifferencesToWeights();
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void Input::convertAllSquaredDifferencesToWeights()
|
|
+{
|
|
+ for (int img_id = 0; img_id < exp_nr_images; img_id++)
|
|
+ {
|
|
+ int my_metadata_offset = metadata_offset + img_id;
|
|
+ MultidimArray<double> sorted_weight;
|
|
+
|
|
+ exp_Mweight.getRow(img_id, sorted_weight);
|
|
+ long int np = 0;
|
|
+ FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sorted_weight)
|
|
+ {
|
|
+ if (DIRECT_MULTIDIM_ELEM(sorted_weight, n) > 0.)
|
|
+ {
|
|
+ DIRECT_MULTIDIM_ELEM(sorted_weight, np) = DIRECT_MULTIDIM_ELEM( \
|
|
+ sorted_weight, n);
|
|
+ np++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "dense memory access" 1 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "__builtin_prefetch" 1 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "static issue" 1 "llc_allocate" } } */
|
|
+
|
|
diff --git a/gcc/testsuite/g++.dg/llc-allocate/multidim_array.h b/gcc/testsuite/g++.dg/llc-allocate/multidim_array.h
|
|
new file mode 100644
|
|
index 000000000..d65066ebf
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/g++.dg/llc-allocate/multidim_array.h
|
|
@@ -0,0 +1,186 @@
|
|
+#ifndef MULTIDIM_ARRAY_H
|
|
+#define MULTIDIM_ARRAY_H
|
|
+
|
|
+#include <iostream>
|
|
+
|
|
+#define RELION_ALIGNED_MALLOC malloc
|
|
+#define RELION_ALIGNED_FREE free
|
|
+
|
|
+#define STARTINGX(v) ((v).xinit)
|
|
+#define STARTINGY(v) ((v).yinit)
|
|
+#define NZYXSIZE(v) ((v).nzyxdim)
|
|
+
|
|
+#define DIRECT_MULTIDIM_ELEM(v,n) ((v).data[(n)])
|
|
+#define FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(v) \
|
|
+ for (long int n=0; n<NZYXSIZE(v); ++n)
|
|
+
|
|
+#define FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY_ptr(v,n,ptr) \
|
|
+ for ((n)=0, (ptr)=(v).data; (n)<NZYXSIZE(v); ++(n), ++(ptr))
|
|
+
|
|
+#define DIRECT_A2D_ELEM(v,i,j) ((v).data[(i)*(v).xdim+(j)])
|
|
+#define A2D_ELEM(v, i, j) \
|
|
+ DIRECT_A2D_ELEM(v, (i) - STARTINGY(v), (j) - STARTINGX(v))
|
|
+
|
|
+#define DIRECT_A1D_ELEM(v, i) ((v).data[(i)])
|
|
+#define A1D_ELEM(v, i) DIRECT_A1D_ELEM(v, (i) - ((v).xinit))
|
|
+
|
|
+template<typename T>
|
|
+class MultidimArray
|
|
+{
|
|
+public:
|
|
+ T* data;
|
|
+ bool destroyData;
|
|
+ long int ndim;
|
|
+ long int zdim;
|
|
+ long int ydim;
|
|
+ long int xdim;
|
|
+ long int yxdim;
|
|
+ long int zyxdim;
|
|
+ long int nzyxdim;
|
|
+ long int zinit;
|
|
+ long int yinit;
|
|
+ long int xinit;
|
|
+ long int nzyxdimAlloc;
|
|
+
|
|
+public:
|
|
+ void clear()
|
|
+ {
|
|
+ coreDeallocate();
|
|
+ coreInit();
|
|
+ }
|
|
+
|
|
+ void coreInit()
|
|
+ {
|
|
+ xdim=0;
|
|
+ yxdim=0;
|
|
+ zyxdim=0;
|
|
+ nzyxdim=0;
|
|
+ ydim=1;
|
|
+ zdim=1;
|
|
+ ndim=1;
|
|
+ zinit=0;
|
|
+ yinit=0;
|
|
+ xinit=0;
|
|
+ data=NULL;
|
|
+ nzyxdimAlloc = 0;
|
|
+ destroyData=true;
|
|
+ }
|
|
+
|
|
+ void coreAllocate(long int _ndim, long int _zdim, long int _ydim, long int _xdim)
|
|
+ {
|
|
+ if (_ndim <= 0 || _zdim <= 0 || _ydim<=0 || _xdim<=0)
|
|
+ {
|
|
+ clear();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ ndim=_ndim;
|
|
+ zdim=_zdim;
|
|
+ ydim=_ydim;
|
|
+ xdim=_xdim;
|
|
+ yxdim=ydim*xdim;
|
|
+ zyxdim=zdim*yxdim;
|
|
+ nzyxdim=ndim*zyxdim;
|
|
+
|
|
+ coreAllocate();
|
|
+ }
|
|
+
|
|
+ void coreAllocate()
|
|
+ {
|
|
+ data = (T*)RELION_ALIGNED_MALLOC(sizeof(T) * nzyxdim);
|
|
+ nzyxdimAlloc = nzyxdim;
|
|
+ }
|
|
+
|
|
+ void coreDeallocate()
|
|
+ {
|
|
+ if (data != NULL && destroyData)
|
|
+ {
|
|
+ RELION_ALIGNED_FREE(data);
|
|
+ }
|
|
+ data=NULL;
|
|
+ nzyxdimAlloc = 0;
|
|
+ }
|
|
+
|
|
+ void resize(long int Ndim, long int Zdim, long int Ydim, long int Xdim)
|
|
+ {
|
|
+ if (Ndim*Zdim*Ydim*Xdim == nzyxdimAlloc && data != NULL)
|
|
+ {
|
|
+ ndim = Ndim;
|
|
+ xdim = Xdim;
|
|
+ ydim = Ydim;
|
|
+ zdim = Zdim;
|
|
+ yxdim = Ydim * Xdim;
|
|
+ zyxdim = Zdim * yxdim;
|
|
+ nzyxdim = Ndim * zyxdim;
|
|
+ nzyxdimAlloc = nzyxdim;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (Xdim <= 0 || Ydim <= 0 || Zdim <= 0 || Ndim <= 0)
|
|
+ {
|
|
+ clear();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (NZYXSIZE(*this) > 0 && data == NULL)
|
|
+ {
|
|
+ coreAllocate();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ size_t YXdim=Ydim*Xdim;
|
|
+ size_t ZYXdim=Zdim*YXdim;
|
|
+ size_t NZYXdim=Ndim*ZYXdim;
|
|
+
|
|
+ T * new_data = (T*)RELION_ALIGNED_MALLOC(sizeof(T) * NZYXdim);
|
|
+ for (long int l = 0; l < Ndim; l++)
|
|
+ for (long int k = 0; k < Zdim; k++)
|
|
+ for (long int i = 0; i < Ydim; i++)
|
|
+ for (long int j = 0; j < Xdim; j++)
|
|
+ {
|
|
+ T val;
|
|
+ new_data[l*ZYXdim + k*YXdim+i*Xdim+j] = val;
|
|
+ }
|
|
+ coreDeallocate();
|
|
+
|
|
+ data = new_data;
|
|
+ ndim = Ndim;
|
|
+ xdim = Xdim;
|
|
+ ydim = Ydim;
|
|
+ zdim = Zdim;
|
|
+ yxdim = Ydim * Xdim;
|
|
+ zyxdim = Zdim * yxdim;
|
|
+ nzyxdim = Ndim * zyxdim;
|
|
+ nzyxdimAlloc = nzyxdim;
|
|
+ }
|
|
+
|
|
+ void resize(long int Xdim)
|
|
+ {
|
|
+ resize(1, 1, 1, Xdim);
|
|
+ }
|
|
+
|
|
+ inline T& operator()(long int i, long int j) const
|
|
+ {
|
|
+ return A2D_ELEM(*this, i, j);
|
|
+ }
|
|
+
|
|
+ inline T& operator()(long int i) const
|
|
+ {
|
|
+ return A1D_ELEM(*this, i);
|
|
+ }
|
|
+
|
|
+ void getRow(long int i, MultidimArray<T>& v) const
|
|
+ {
|
|
+ if (xdim == 0 || ydim == 0)
|
|
+ {
|
|
+ v.clear();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ v.resize(xdim);
|
|
+ for (long int j = 0; j < xdim; j++)
|
|
+ v(j) = (*this)(i, j);
|
|
+ }
|
|
+};
|
|
+
|
|
+#endif /* MULTIDIM_ARRAY_H */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
index 9bc6cc32b..9f8a5c307 100644
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
|
@@ -39,13 +39,13 @@ main (int argc, char *argv[])
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "ref_count = (?:\[3-9\]|\[1-9\]\\d{1,}), ninsns = \[1-9\]\\d*, mem_to_insn_ratio = 0.\[2-9\]\\d*" 2 "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "Tracing succeeded" 6 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "ref_count = (?:\[3-9\]|\[1-9\]\\d{1,}), ninsns = \[1-9\]\\d*, mem_to_insn_ratio = 0.\[2-9\]\\d*" 4 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "Tracing succeeded" 14 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-not "Tracing failed" "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-not "static_data_size:" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times "\{ (?:\\d+\\(\\d+\\) ){1}\}" 4 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "\{ (?:\\d+\\(\\d+\\) ){1}\}" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-not ", size: (?!(0\.000000))" "llc_allocate" } } */
|
|
-/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 6 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 8 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d x_data \\(0.000000, 1, 0\\) : 3" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d A_j \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
|
/* { dg-final { scan-tree-dump-times "\\d A_data \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-allocate.exp b/gcc/testsuite/gcc.dg/llc-allocate/llc-allocate.exp
|
|
index 4f34e722f..05a3bf842 100644
|
|
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-allocate.exp
|
|
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-allocate.exp
|
|
@@ -24,4 +24,4 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
|
"" "-fllc-allocate"
|
|
|
|
# All done.
|
|
-dg-finish
|
|
+dg-finish
|
|
\ No newline at end of file
|
|
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-extend-outer-loop.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-extend-outer-loop.c
|
|
new file mode 100644
|
|
index 000000000..9b2b656fd
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-extend-outer-loop.c
|
|
@@ -0,0 +1,61 @@
|
|
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
|
+#include <stdio.h>
|
|
+#define N 131590
|
|
+#define F 384477
|
|
+
|
|
+int ownStartPtr[F];
|
|
+double bPrimePtr[N];
|
|
+double diagPtr[N];
|
|
+double psiPtr[N];
|
|
+double upperPtr[F];
|
|
+double lowerPtr[F];
|
|
+int uPtr[F];
|
|
+
|
|
+void SMOOTH(int *ownStartPtr, double *bPrimePtr, double *diagPtr, double *psiPtr, int *uPtr, double *lowerPtr, double *upperPtr, int nCells);
|
|
+
|
|
+int main(int argc, char *argv[])
|
|
+{
|
|
+ int nCells = N;
|
|
+ int nFaces = F;
|
|
+ int testIter = 2;
|
|
+ for (int i = 0; i < testIter; i++)
|
|
+ {
|
|
+ SMOOTH(ownStartPtr, bPrimePtr, diagPtr, psiPtr, uPtr, lowerPtr, upperPtr, nCells);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+void SMOOTH(int *ownStartPtr, double *bPrimePtr, double *diagPtr, double *psiPtr, int *uPtr, double *lowerPtr, double *upperPtr, int nCells)
|
|
+{
|
|
+ double psii;
|
|
+ int fStart;
|
|
+ int fEnd = ownStartPtr[0];
|
|
+
|
|
+ for (int celli = 0; celli < nCells; celli++)
|
|
+ {
|
|
+ fStart = fEnd;
|
|
+ fEnd = ownStartPtr[celli + 1];
|
|
+ psii = bPrimePtr[celli];
|
|
+
|
|
+ for (int facei = fStart; facei<fEnd; facei++)
|
|
+ {
|
|
+ psii -= upperPtr[facei] * psiPtr[uPtr[facei]];
|
|
+ }
|
|
+
|
|
+ psii /= diagPtr[celli];
|
|
+ for (int facei = fStart; facei < fEnd; facei++)
|
|
+ {
|
|
+ bPrimePtr[uPtr[facei]] -= lowerPtr[facei] * psii;
|
|
+ }
|
|
+ psiPtr[celli] = psii;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "bPrimePtr : 3" 2 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "diagPtr : 1" 2 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "upperPtr : 1" 2 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "psiPtr : 2" 2 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-times "insert" 8 "llc_allocate" } } */
|
|
+/* { dg-final { scan-tree-dump-not "Processing loop 0" "llc_allocate" } } */
|
|
diff --git a/gcc/testsuite/gfortran.dg/llc-allocate/llc-wrf-4-outer-loop-num.f90 b/gcc/testsuite/gfortran.dg/llc-allocate/llc-wrf-4-outer-loop-num.f90
|
|
new file mode 100644
|
|
index 000000000..728b61ea3
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gfortran.dg/llc-allocate/llc-wrf-4-outer-loop-num.f90
|
|
@@ -0,0 +1,320 @@
|
|
+! { dg-do compile { target { aarch64*-*-linux* } } }
|
|
+! { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=branch-prob-threshold=50 --param=filter-kernels=0 --param=mem-access-num=2 --param=issue-topn=2 --param=force-issue=1 --param=outer-loop-nums=3" }
|
|
+!include "module_small_step_em.F90"
|
|
+
|
|
+Module add_type
|
|
+ IMPLICIT NONE
|
|
+
|
|
+ TYPE :: grid_config_rec_type
|
|
+ LOGICAL :: open_xs
|
|
+ LOGICAL :: open_ys
|
|
+ LOGICAL :: open_xe
|
|
+ LOGICAL :: open_ye
|
|
+ LOGICAL :: symmetric_xs
|
|
+ LOGICAL :: symmetric_xe
|
|
+ LOGICAL :: symmetric_ys
|
|
+ LOGICAL :: symmetric_ye
|
|
+ LOGICAL :: polar
|
|
+ LOGICAL :: nested
|
|
+ LOGICAL :: periodic_x
|
|
+ LOGICAL :: specified
|
|
+ END TYPE
|
|
+END Module
|
|
+
|
|
+program main
|
|
+
|
|
+
|
|
+! include "module_small_step_em_modify.F90"
|
|
+
|
|
+! use module_small_step_em
|
|
+! use module_small_step_em_modify
|
|
+
|
|
+ use add_type
|
|
+
|
|
+ IMPLICIT NONE
|
|
+ INTEGER :: ids,ide, jds,jde, kds,kde
|
|
+ INTEGER,parameter :: ims=-4,kms=1,jms=-4
|
|
+ INTEGER,parameter :: ime=210,kme=36,jme=192
|
|
+ INTEGER :: its,ite, jts,jte, kts,kte
|
|
+ INTEGER :: number_of_small_timesteps,rk_step, rk_order, step, spec_zone
|
|
+
|
|
+ REAL, DIMENSION(ims:ime, kms:kme, jms:jme, 1:8) :: llcRefresh
|
|
+ REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: u, v, u_1, v_1, t_1, ww_1, ft!u, v, u_1, v_1, w_1, t_1, ww1, ww_1,ph_1, ft
|
|
+ REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: u_save, v_save, w_save, t_save, ph_save,h_diabatic
|
|
+ ! REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: u_2, v_2, w_2, t_2, ph_2
|
|
+ ! REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: c2a, ww_save, cqw, cqu, cqv, alpha, gamma, a
|
|
+ REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: ww!pb, p, ph, php, pm1, al, alt, ww, random_array
|
|
+ ! REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: ru_tend, rv_tend
|
|
+ REAL, DIMENSION(ims:ime, kms:kme, jms:jme) :: t, t_ave, uam, vam, wwam
|
|
+
|
|
+ REAL, DIMENSION(ims:ime, jms:jme) :: mu_1,mu_2, mu
|
|
+ REAL, DIMENSION(ims:ime, jms:jme) :: mub, muu, muv, mut, &
|
|
+ msfux, msfuy, &
|
|
+ msfvx, msfvx_inv, msfvy, &
|
|
+ msftx, msfty
|
|
+
|
|
+ REAL, DIMENSION(ims:ime, jms:jme) :: muus, muvs, muts, mudf, muave
|
|
+ REAL, DIMENSION(ims:ime, jms:jme) :: mu_save, mu_tend
|
|
+
|
|
+ REAL, DIMENSION(kms:kme) :: rdn, rdnw,dnw, fnm, fnp, znu
|
|
+
|
|
+ REAL :: rdx,rdy
|
|
+ REAL :: dts, cf1, cf2, cf3, t0, emdiv, smdiv, epssm, g
|
|
+ REAL :: random1,time_begin,time_end,total_time
|
|
+
|
|
+ INTEGER :: i, j, k
|
|
+ INTEGER :: i_start, i_end, j_start, j_end, k_start, k_end
|
|
+ INTEGER :: i_endu, j_endv
|
|
+ INTEGER :: interval=1
|
|
+ INTEGER :: epoch
|
|
+
|
|
+ LOGICAL :: non_hydrostatic, top_lid
|
|
+
|
|
+
|
|
+ TYPE (grid_config_rec_type) :: config_flags
|
|
+ config_flags%open_xs = .true.
|
|
+ config_flags%open_ys = .true.
|
|
+ config_flags%open_xe = .true.
|
|
+ config_flags%open_ye = .true.
|
|
+ config_flags%symmetric_xs = .true.
|
|
+ config_flags%symmetric_xe = .true.
|
|
+ config_flags%symmetric_ys = .true.
|
|
+ config_flags%symmetric_ye = .true.
|
|
+ config_flags%polar = .true.
|
|
+ config_flags%nested = .true.
|
|
+ config_flags%periodic_x = .true.
|
|
+ config_flags%specified = .true.
|
|
+
|
|
+ data ids, jds, kds, its, jts, kts /6*1/
|
|
+ data ide, ite /2*205/
|
|
+ data jde, jte /2*187/
|
|
+ data kde, kte /2*98/
|
|
+
|
|
+ number_of_small_timesteps = 1
|
|
+ rk_step = 1
|
|
+ rk_order = 1
|
|
+ dts = 1.
|
|
+ epssm = 1.
|
|
+ g = 1.
|
|
+
|
|
+ rdx = 1.
|
|
+ rdy = 1.
|
|
+ dts = 1.
|
|
+ cf1 = 1.
|
|
+ cf2 = 1.
|
|
+ cf3 = 1.
|
|
+
|
|
+ t0 = 0.
|
|
+ smdiv = 1.
|
|
+ emdiv = 1.
|
|
+ step = 1
|
|
+ spec_zone = 1
|
|
+
|
|
+ non_hydrostatic = .true.
|
|
+ top_lid = .true.
|
|
+
|
|
+ interval=1
|
|
+
|
|
+
|
|
+ total_time=0
|
|
+
|
|
+ call random_seed(put=(/(i,i=1,10000,interval)/))
|
|
+
|
|
+ call random_number(u)
|
|
+ call random_number(v)
|
|
+ call random_number(u_1)
|
|
+ call random_number(v_1)
|
|
+ call random_number(t_1)
|
|
+ call random_number(ft)
|
|
+
|
|
+ call random_number(ww)
|
|
+ call random_number(ww_1)
|
|
+ call random_number(t)
|
|
+ call random_number(t_ave)
|
|
+ call random_number(uam)
|
|
+ call random_number(vam)
|
|
+ call random_number(wwam)
|
|
+
|
|
+ call random_number(muu)
|
|
+ call random_number(muv)
|
|
+ call random_number(mut)
|
|
+ call random_number(msfux)
|
|
+ call random_number(msfuy)
|
|
+ call random_number(msfvx)
|
|
+ call random_number(msfvx_inv)
|
|
+ call random_number(msfvy)
|
|
+ call random_number(msftx)
|
|
+ call random_number(msfty)
|
|
+ call random_number(mu_tend)
|
|
+
|
|
+ call random_number(muave)
|
|
+ call random_number(muts)
|
|
+ call random_number(mudf)
|
|
+ call random_number(mu)
|
|
+
|
|
+ call random_number(fnm)
|
|
+ call random_number(fnp)
|
|
+ call random_number(dnw)
|
|
+ call random_number(rdnw)
|
|
+
|
|
+ DO j=jms, jme
|
|
+ DO k=kms, kme
|
|
+ DO i=ims, ime
|
|
+
|
|
+ llcRefresh(i,k,j,1)=i+k+j+7
|
|
+
|
|
+ ENDDO
|
|
+ ENDDO
|
|
+ ENDDO
|
|
+
|
|
+ do epoch = 1,2
|
|
+ call advance_mu_t_fortran_plu( ww, ww_1, u, u_1, v, v_1, &
|
|
+ mu, mut, muave, muts, muu, muv, &
|
|
+ mudf, uam, vam, wwam, t, t_1, &
|
|
+ t_ave, ft, mu_tend, &
|
|
+ rdx, rdy, dts, epssm, &
|
|
+ dnw, fnm, fnp, rdnw, &
|
|
+ msfux, msfuy, msfvx, msfvx_inv, &
|
|
+ msfvy, msftx, msfty, &
|
|
+ step, config_flags, &
|
|
+ ids, ide, jds, jde, kds, kde, &
|
|
+ ims, ime, jms, jme, kms, kme, &
|
|
+ its, ite, jts, jte, kts, kte )
|
|
+ enddo
|
|
+end program
|
|
+
|
|
+
|
|
+
|
|
+SUBROUTINE advance_mu_t_fortran_plu( ww, ww_1, u, u_1, v, v_1, &
|
|
+ mu, mut, muave, muts, muu, muv, &
|
|
+ mudf, uam, vam, wwam, t, t_1, &
|
|
+ t_ave, ft, mu_tend, &
|
|
+ rdx, rdy, dts, epssm, &
|
|
+ dnw, fnm, fnp, rdnw, &
|
|
+ msfux, msfuy, msfvx, msfvx_inv, &
|
|
+ msfvy, msftx, msfty, &
|
|
+ step, config_flags, &
|
|
+ ids, ide, jds, jde, kds, kde, &
|
|
+ ims, ime, jms, jme, kms, kme, &
|
|
+ its, ite, jts, jte, kts, kte )
|
|
+ use add_type
|
|
+
|
|
+ IMPLICIT NONE ! religion first
|
|
+
|
|
+ ! stuff coming in
|
|
+
|
|
+ TYPE(grid_config_rec_type), INTENT(IN ) :: config_flags
|
|
+ INTEGER, INTENT(IN ) :: ids,ide, jds,jde, kds,kde
|
|
+ INTEGER, INTENT(IN ) :: ims,ime, jms,jme, kms,kme
|
|
+ INTEGER, INTENT(IN ) :: its,ite, jts,jte, kts,kte
|
|
+
|
|
+ INTEGER, INTENT(IN ) :: step
|
|
+
|
|
+ REAL, DIMENSION( ims:ime , kms:kme, jms:jme ), &
|
|
+ INTENT(IN ) :: &
|
|
+ u, &
|
|
+ v, &
|
|
+ u_1, &
|
|
+ v_1, &
|
|
+ t_1, &
|
|
+ ft
|
|
+
|
|
+ REAL, DIMENSION( ims:ime , kms:kme, jms:jme ), &
|
|
+ INTENT(INOUT) :: &
|
|
+ ww, &
|
|
+ ww_1, &
|
|
+ t, &
|
|
+ t_ave, &
|
|
+ uam, &
|
|
+ vam, &
|
|
+ wwam
|
|
+
|
|
+ REAL, DIMENSION( ims:ime , jms:jme ), INTENT(IN ) :: muu, &
|
|
+ muv, &
|
|
+ mut, &
|
|
+ msfux,&
|
|
+ msfuy,&
|
|
+ msfvx,&
|
|
+ msfvx_inv,&
|
|
+ msfvy,&
|
|
+ msftx,&
|
|
+ msfty,&
|
|
+ mu_tend
|
|
+
|
|
+ REAL, DIMENSION( ims:ime , jms:jme ), INTENT( INOUT) :: muave, &
|
|
+ muts, &
|
|
+ mudf
|
|
+
|
|
+ REAL, DIMENSION( ims:ime , jms:jme ), INTENT(INOUT) :: mu
|
|
+
|
|
+ REAL, DIMENSION( kms:kme ), INTENT(IN ) :: fnm, &
|
|
+ fnp, &
|
|
+ dnw, &
|
|
+ rdnw
|
|
+
|
|
+
|
|
+ REAL, INTENT(IN ) :: rdx, &
|
|
+ rdy, &
|
|
+ dts, &
|
|
+ epssm
|
|
+
|
|
+ REAL, DIMENSION (its:ite, kts:kte) :: wdtn, dvdxi
|
|
+ REAL, DIMENSION (its:ite) :: dmdt
|
|
+
|
|
+ INTEGER :: i,j,k, i_start, i_end, j_start, j_end, k_start, k_end
|
|
+ INTEGER :: i_endu, j_endv
|
|
+ REAL :: acc
|
|
+
|
|
+ INTEGER :: ubv, lbv, t1, t2, t3, t4, ceild, floord
|
|
+
|
|
+ ceild(t1, t2) = ceiling(REAL(t1)/REAL(t2))
|
|
+ floord(t1, t2) = floor(REAL(t1)/REAL(t2))
|
|
+ i_start = its
|
|
+ i_end = min(ite,ide-1)
|
|
+ j_start = jts
|
|
+ j_end = min(jte,jde-1)
|
|
+ k_start = kts
|
|
+ k_end = kte-1
|
|
+ IF ( .NOT. config_flags%periodic_x )THEN
|
|
+ IF ( config_flags%specified .or. config_flags%nested ) then
|
|
+ i_start = max(its,ids+1)
|
|
+ i_end = min(ite,ide-2)
|
|
+ ENDIF
|
|
+ ENDIF
|
|
+ IF ( config_flags%specified .or. config_flags%nested ) then
|
|
+ j_start = max(jts,jds+1)
|
|
+ j_end = min(jte,jde-2)
|
|
+ ENDIF
|
|
+
|
|
+ i_endu = ite
|
|
+ j_endv = jte
|
|
+
|
|
+ DO j = j_start, j_end
|
|
+
|
|
+ DO i=i_start, i_end
|
|
+ dmdt(i) = 0.
|
|
+ ENDDO
|
|
+
|
|
+ DO k=k_start, k_end
|
|
+ DO i=i_start, i_end
|
|
+ dvdxi(i,k) = msftx(i,j)*msfty(i,j)*( &
|
|
+ rdy*((v(i,k,j+1)+muv(i,j+1)*v_1(i,k,j+1)*msfvx_inv(i,j+1)) &
|
|
+ -(v(i,k,j )+muv(i,j )*v_1(i,k,j)*msfvx_inv(i,j ))) &
|
|
+ +rdx*((u(i+1,k,j)+muu(i+1,j)*u_1(i+1,k,j)/msfuy(i+1,j)) &
|
|
+ -(u(i,k,j )+muu(i ,j)*u_1(i,k,j )/msfuy(i,j)) ))
|
|
+ dmdt(i) = dmdt(i) + dnw(k)*dvdxi(i,k)
|
|
+ ENDDO
|
|
+ ENDDO
|
|
+ DO i=i_start, i_end
|
|
+ muave(i,j) = mu(i,j)
|
|
+ mu(i,j) = mu(i,j)+dts*(dmdt(i)+mu_tend(i,j))
|
|
+ mudf(i,j) = (dmdt(i)+mu_tend(i,j)) ! save tendency for div dampfilter
|
|
+ muts(i,j) = mut(i,j)+mu(i,j)
|
|
+ muave(i,j) =.5*((1.+epssm)*mu(i,j)+(1.-epssm)*muave(i,j))
|
|
+ ENDDO
|
|
+ ENDDO
|
|
+END SUBROUTINE advance_mu_t_fortran_plu
|
|
+
|
|
+! { dg-final { scan-tree-dump "issue_llc_hint" "llc_allocate" } }
|
|
+! { dg-final { scan-tree-dump-times "analyze_nested_kernels" 2 "llc_allocate" } }
|
|
+! { dg-final { scan-tree-dump "Stop tracing the outer loop depth" "llc_allocate" } }
|
|
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
|
index 746a1cf95..9a14188d8 100644
|
|
--- a/gcc/tree-ssa-llc-allocate.c
|
|
+++ b/gcc/tree-ssa-llc-allocate.c
|
|
@@ -312,9 +312,6 @@ get_references_in_stmt (gimple *stmt, vector<data_ref> &references)
|
|
|
|
struct loop_filter_out_flag
|
|
{
|
|
- /* Use external gimple. */
|
|
- bool use_ext_gimple;
|
|
-
|
|
/* Use external call. */
|
|
bool use_ext_call;
|
|
|
|
@@ -358,21 +355,7 @@ bool
|
|
filter_out_loop_by_stmt_p (loop_filter_out_flag &loop_filter, gimple *stmt,
|
|
const vector<data_ref> &references, unsigned int &start)
|
|
{
|
|
- /* check use_ext_gimple. */
|
|
- expanded_location cfun_xloc
|
|
- = expand_location (DECL_SOURCE_LOCATION (current_function_decl));
|
|
expanded_location xloc = expand_location (stmt->location);
|
|
- if (xloc.file && filename_cmp (cfun_xloc.file, xloc.file))
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- {
|
|
- fprintf (dump_file, "use_ext_gimple: ");
|
|
- print_gimple_stmt (dump_file, stmt, 0, TDF_LINENO);
|
|
- }
|
|
- loop_filter.use_ext_gimple = true;
|
|
- return true;
|
|
- }
|
|
-
|
|
/* check use_ext_call. */
|
|
if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt))
|
|
{
|
|
@@ -421,11 +404,6 @@ filter_out_loop_by_stmt_p (loop_filter_out_flag &loop_filter, gimple *stmt,
|
|
void
|
|
dump_loop_filter_out_flag (loop_filter_out_flag &loop_filter)
|
|
{
|
|
- if (loop_filter.use_ext_gimple)
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "non-dense mem access: use_ext_gimple\n");
|
|
- }
|
|
if (loop_filter.use_ext_call)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
@@ -493,45 +471,6 @@ get_references_in_loop (vector<data_ref> &references,
|
|
return !filter_out_loop;
|
|
}
|
|
|
|
-/* Determine whether the loop is a single path. */
|
|
-
|
|
-bool
|
|
-single_path_p (class loop *loop, basic_block bb)
|
|
-{
|
|
- if (bb == NULL)
|
|
- return false;
|
|
- if (bb == loop->latch)
|
|
- return true;
|
|
-
|
|
- gimple *stmt = last_stmt (bb);
|
|
- bool res = false;
|
|
-
|
|
- if (stmt && gimple_code (stmt) == GIMPLE_COND)
|
|
- {
|
|
- gcc_assert (EDGE_COUNT (bb->succs) == 2);
|
|
- edge true_edge = NULL;
|
|
- edge false_edge = NULL;
|
|
- extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
|
|
-
|
|
- /* Returns false, if a branch occurs. */
|
|
- if (true_edge->dest->loop_father == loop
|
|
- && false_edge->dest->loop_father == loop)
|
|
- return false;
|
|
-
|
|
- if (true_edge->dest->loop_father == loop)
|
|
- res = single_path_p (loop, true_edge->dest);
|
|
- else
|
|
- res = single_path_p (loop, false_edge->dest);
|
|
- }
|
|
- else
|
|
- {
|
|
- edge e = find_fallthru_edge (bb->succs);
|
|
- if (e)
|
|
- res = single_path_p (loop, e->dest);
|
|
- }
|
|
- return res;
|
|
-}
|
|
-
|
|
/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
|
|
Assume that the HPC data reading and calculation process does not involve
|
|
adding branches in loops. Therefore, all bbs of loops are directly used for
|
|
@@ -611,6 +550,45 @@ dense_memory_p (const vector<data_ref> &references, class loop *loop)
|
|
|
|
/* Analyze the inner loop and get the loop with dense memory access. */
|
|
|
|
+void
|
|
+analyze_loop_dense_memory (vector<class loop *> &kernels,
|
|
+ map<class loop *, vector<data_ref> > &kernels_refs,
|
|
+ class loop *loop)
|
|
+{
|
|
+ vector<data_ref> references;
|
|
+ number_of_latch_executions (loop);
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "\n========== Processing loop %d: ==========\n",
|
|
+ loop->num);
|
|
+ loop_dump (dump_file, loop);
|
|
+ flow_loop_dump (loop, dump_file, NULL, 1);
|
|
+ fprintf (dump_file, "loop unroll: %d\n", loop->unroll);
|
|
+ }
|
|
+
|
|
+ if (get_loop_exit_edges (loop).length () != 1)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "non-dense mem access: loop_branching\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ loop_filter_out_flag loop_filter = {false, false, true, false};
|
|
+
|
|
+ if (!get_references_in_loop (references, loop_filter, loop))
|
|
+ {
|
|
+ dump_loop_filter_out_flag (loop_filter);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (dense_memory_p (references, loop))
|
|
+ {
|
|
+ kernels_refs[loop] = references;
|
|
+ kernels.push_back (loop);
|
|
+ }
|
|
+}
|
|
+/* Analyze the inner loop and get the loop with dense memory access. */
|
|
+
|
|
bool
|
|
get_dense_memory_kernels (vector<class loop *> &kernels,
|
|
map<class loop *, vector<data_ref> > &kernels_refs)
|
|
@@ -619,40 +597,7 @@ get_dense_memory_kernels (vector<class loop *> &kernels,
|
|
fprintf (dump_file, "\nPhase 1: get_dense_memory_kernels\n\n");
|
|
class loop *loop = NULL;
|
|
FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
|
|
- {
|
|
- number_of_latch_executions (loop);
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- {
|
|
- fprintf (dump_file, "\n========== Processing loop %d: ==========\n",
|
|
- loop->num);
|
|
- loop_dump (dump_file, loop);
|
|
- flow_loop_dump (loop, dump_file, NULL, 1);
|
|
- fprintf (dump_file, "loop unroll: %d\n", loop->unroll);
|
|
- }
|
|
-
|
|
- if (get_loop_exit_edges (loop).length () != 1
|
|
- || !single_path_p (loop, loop->header))
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "non-dense mem access: loop_branching\n");
|
|
- continue;
|
|
- }
|
|
-
|
|
- vector<data_ref> references;
|
|
- loop_filter_out_flag loop_filter = {false, false, false, true, false};
|
|
-
|
|
- if (!get_references_in_loop (references, loop_filter, loop))
|
|
- {
|
|
- dump_loop_filter_out_flag (loop_filter);
|
|
- continue;
|
|
- }
|
|
-
|
|
- if (dense_memory_p (references, loop))
|
|
- {
|
|
- kernels_refs[loop] = references;
|
|
- kernels.push_back (loop);
|
|
- }
|
|
- }
|
|
+ analyze_loop_dense_memory (kernels, kernels_refs, loop);
|
|
return kernels.size () > 0;
|
|
}
|
|
|
|
@@ -1094,33 +1039,41 @@ trace_ref_info (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
|
mem_ref.trace_status_p = true;
|
|
}
|
|
|
|
+/* Trace all references in the loop. */
|
|
+
|
|
+void
|
|
+trace_loop_refs_info (vector<data_ref> &refs, set <gimple *> &traced_ref_stmt)
|
|
+{
|
|
+ for (unsigned i = 0; i < refs.size (); ++i)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "trace_references_base_info %d:\n", i);
|
|
+ print_generic_expr (dump_file, refs[i].ref, TDF_SLIM);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
+ trace_ref_info (refs[i], traced_ref_stmt);
|
|
+ }
|
|
+}
|
|
+
|
|
/* Tracing and sorting reference groups. */
|
|
|
|
void
|
|
trace_data_refs_info (vector<class loop *> &kernels,
|
|
- map<class loop*, vector<data_ref> > &loop_refs)
|
|
+ map<class loop*, vector<data_ref> > &loop_refs,
|
|
+ set <gimple *> &traced_ref_stmt)
|
|
{
|
|
if (dump_file)
|
|
fprintf (dump_file, "\nPhase 2: trace_all_references_info\n\n");
|
|
|
|
- set <gimple *> traced_ref_stmt;
|
|
-
|
|
for (unsigned i = 0; i < kernels.size (); ++i)
|
|
{
|
|
- class loop* loop = kernels[i];
|
|
-
|
|
+ class loop *loop = kernels[i];
|
|
+ if (loop_refs.count (loop) == 0)
|
|
+ continue;
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "loop header %d:\n", loop->header->index);
|
|
- for (unsigned j = 0; j < loop_refs[loop].size (); ++j)
|
|
- {
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- {
|
|
- fprintf (dump_file, "trace_references_base_info %d:\n", j);
|
|
- print_generic_expr (dump_file, loop_refs[loop][j].ref, TDF_SLIM);
|
|
- fprintf (dump_file, "\n");
|
|
- }
|
|
- trace_ref_info (loop_refs[loop][j], traced_ref_stmt);
|
|
- }
|
|
+ trace_loop_refs_info (loop_refs[loop], traced_ref_stmt);
|
|
}
|
|
}
|
|
|
|
@@ -1205,7 +1158,7 @@ void
|
|
check_bound_iv_and_add_worklist (vector<tree> &worklist, set<tree> &walked,
|
|
tree t, data_ref &mem_ref)
|
|
{
|
|
- if (TREE_CODE (t) != SSA_NAME)
|
|
+ if (t == NULL_TREE || TREE_CODE (t) != SSA_NAME)
|
|
return;
|
|
|
|
gimple *def_stmt = SSA_NAME_DEF_STMT (t);
|
|
@@ -1278,8 +1231,13 @@ trace_loop_bound_iv (data_ref &mem_ref)
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "\nmem_ref access dimension: %ld\n",
|
|
- mem_ref.loop_bounds.size ());
|
|
+ {
|
|
+ fprintf (dump_file, "\nmem_ref access dimension: %ld\n",
|
|
+ mem_ref.loop_bounds.size ());
|
|
+ fprintf (dump_file, "Traced variables: ");
|
|
+ print_generic_expr (dump_file, mem_ref.base, TDF_SLIM);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
|
|
return mem_ref.loop_bounds.size () > 0;
|
|
}
|
|
@@ -1487,7 +1445,7 @@ trace_and_create_dominate_loop_bounds (data_ref &mem_ref)
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
print_generic_expr (dump_file, mem_ref.ref, TDF_SLIM);
|
|
- fprintf (dump_file, "Tracing loop bound failed at dimension %d",
|
|
+ fprintf (dump_file, "Tracing loop bound failed at dimension %d\n",
|
|
i);
|
|
}
|
|
mem_ref.calc_by = UNHANDLE_CALC;
|
|
@@ -1565,42 +1523,246 @@ trace_ref_dimension_and_loop_bounds (data_ref &mem_ref)
|
|
static_calculate_data_size (mem_ref);
|
|
}
|
|
|
|
-/* analyze nested kernels.
|
|
- 1. multidimension loop analyze.
|
|
- 2. extended outer loop analyze.
|
|
- Later we will extend outer loop analysis.
|
|
+/* Get the loop's niters tree.
|
|
+ Return NULL_TREE if not found. */
|
|
+
|
|
+tree
|
|
+get_cur_loop_niters (map<class loop*, vector<data_ref> > &loop_refs,
|
|
+ class loop* loop)
|
|
+{
|
|
+ if (loop_refs.count (loop) == 0)
|
|
+ return NULL_TREE;
|
|
+ vector<loop_bound> bounds = loop_refs[loop][0].loop_bounds;
|
|
+ return bounds.size () ? bounds[0].niters : NULL_TREE;
|
|
+}
|
|
+
|
|
+/* Trace the sources of the niters tree and return the
|
|
+ outermost depth of the loops containing them.
|
|
+ Return start_depth if not found.
|
|
+
|
|
+ example:
|
|
+ niters:(long) (((int) i_end_417 - (int) i_start_452) + 1)
|
|
+ operand_num: 1, subtree:(long) (((int) i_end_417 - (int) i_start_452) + 1)
|
|
+ operand_num: 2, subtree:((int) i_end_417 - (int) i_start_452) + 1
|
|
+ operand_num: 2, subtree:(int) i_end_417 - (int) i_start_452
|
|
+ operand_num: 1, subtree:(int) i_end_417
|
|
+ SSA_NAME of niters: i_end_417
|
|
+ gimple of SSA: i_end_417 = PHI <i_end_446(9), i_end_410(100)>
|
|
+ return gimple depth;
|
|
+*/
|
|
+
|
|
+unsigned
|
|
+trace_outer_loop_depth (tree niters, unsigned start_depth)
|
|
+{
|
|
+ /* If niter does not exist or the type is INTEGER_CST,
|
|
+ the loop bound is determined and return start_depth. */
|
|
+ if (niters == NULL_TREE || TREE_CODE (niters) == INTEGER_CST)
|
|
+ return start_depth;
|
|
+
|
|
+ gimple *def_stmt = NULL;
|
|
+ /* niters examples: i_start_452, fEnd_35, fEnd_100. */
|
|
+ enum tree_code niter_code = TREE_CODE (niters);
|
|
+ if (niter_code == SSA_NAME)
|
|
+ {
|
|
+ /* Trace the SSA that define this niter. */
|
|
+ def_stmt = SSA_NAME_DEF_STMT (niters);
|
|
+ enum gimple_code stmt_code = gimple_code (def_stmt);
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "ssa_name of niters: ");
|
|
+ print_generic_expr (dump_file, niters);
|
|
+ fprintf (dump_file, "\ngimple of ssa: \n");
|
|
+ print_gimple_stmt (dump_file, def_stmt, 0, TDF_LINENO);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
+ /* Termination condition of dfs. Return the depth of the bb block. */
|
|
+ if (stmt_code == GIMPLE_PHI || stmt_code == GIMPLE_NOP)
|
|
+ {
|
|
+ basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (niters));
|
|
+ if (def_bb == NULL || def_bb->loop_father == NULL)
|
|
+ return start_depth;
|
|
+ unsigned ret_depth = loop_depth (def_bb->loop_father);
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "Stop tracing the outer loop depth, ");
|
|
+ fprintf (dump_file, "current depth: %d, current bb: %d\n", \
|
|
+ ret_depth, def_bb->index);
|
|
+ }
|
|
+ return ret_depth;
|
|
+ }
|
|
+ /* 'ASSIGN': Use dfs to trace the rhs of the assignment statement. */
|
|
+ else if (stmt_code == GIMPLE_ASSIGN)
|
|
+ {
|
|
+ tree rhs = gimple_assign_rhs1 (def_stmt);
|
|
+ if (TREE_CODE (rhs) == TARGET_MEM_REF)
|
|
+ /* fEnd_35 = MEM[base: _19, index: ivtmp.96, step: 4,
|
|
+ offset: 0B] */
|
|
+ return trace_outer_loop_depth (TREE_OPERAND (rhs, 2), start_depth);
|
|
+ else
|
|
+ {
|
|
+ /* M.218_658 = MIN_EXPR <_631, _657> */
|
|
+ unsigned min_depth = start_depth;
|
|
+ unsigned operand_num = gimple_num_ops (def_stmt);
|
|
+ /* 'ASSIGN': start from 1 because op[0] is the lhs. */
|
|
+ for (unsigned i = 1; i < operand_num; i++)
|
|
+ {
|
|
+ tree subtree = GIMPLE_CHECK2<const gassign *>
|
|
+ (def_stmt)->op[i];
|
|
+ if (subtree == NULL)
|
|
+ continue;
|
|
+ unsigned depth = trace_outer_loop_depth (subtree, \
|
|
+ start_depth);
|
|
+ min_depth = MIN (min_depth, depth);
|
|
+ }
|
|
+ return min_depth;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ /* Adding termination conditions:
|
|
+ 1. Niters is MEM variable;
|
|
+ 2. Niters is a runtime value (smooth_uPtr), and consider \
|
|
+ finding footprint in other mem_ref;
|
|
+ 3. Niters is loop variable (i_start/i_end), and the boundary in \
|
|
+ the outer loop depends on the variable j_start/j_end. */
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "The loop termination condition");
|
|
+ fprintf (dump_file, "is to be extended.\n");
|
|
+ }
|
|
+ return start_depth;
|
|
+ }
|
|
+ }
|
|
+ /* The operand nums can be obtained when the tree code is as follows. */
|
|
+ else if (niter_code == NOP_EXPR || niter_code == MEM_REF
|
|
+ || niter_code == ARRAY_REF || niter_code == COND_EXPR
|
|
+ || niter_code == PLUS_EXPR || niter_code == MINUS_EXPR
|
|
+ || niter_code == TARGET_MEM_REF || niter_code == POINTER_PLUS_EXPR)
|
|
+ {
|
|
+ /* operand_num is the operand in the niters statement.
|
|
+ example: In the following niter statement, operand_num = 3.
|
|
+ (unsigned int) fEnd_35 - (unsigned int) fEnd_100 + 4294967295. */
|
|
+ unsigned operand_num = TREE_OPERAND_LENGTH (niters);
|
|
+ unsigned min_depth = start_depth;
|
|
+ for (unsigned i = 0; i < operand_num; i++)
|
|
+ {
|
|
+ tree subtree = TREE_OPERAND (niters, i);
|
|
+ if (subtree == NULL)
|
|
+ continue;
|
|
+ unsigned depth = trace_outer_loop_depth (subtree, start_depth);
|
|
+ min_depth = MIN (min_depth, depth);
|
|
+ }
|
|
+ return min_depth;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "niters is another tree code: %s\n", \
|
|
+ get_tree_code_name (niter_code));
|
|
+ print_generic_expr (dump_file, niters, TDF_SLIM);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
+ return start_depth;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Traces the ref dimension information in each loop. */
|
|
+
|
|
+void
|
|
+analyze_loop_refs_dimension (vector<data_ref> &refs)
|
|
+{
|
|
+ for (unsigned i = 0; i < refs.size (); ++i)
|
|
+ {
|
|
+ if (refs[i].trace_status_p == false)
|
|
+ continue;
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "trace_reference_dimension %d:\n", i);
|
|
+ print_generic_expr (dump_file, refs[i].ref, TDF_SLIM);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
+ trace_ref_dimension_and_loop_bounds (refs[i]);
|
|
+ }
|
|
+}
|
|
+/* analyze nested kernels
|
|
+ 1. multidimension loop analyze
|
|
+ 2. extended outer loop analyze
|
|
*/
|
|
|
|
bool
|
|
analyze_nested_kernels (vector<class loop *> &kernels,
|
|
- map<class loop*, vector<data_ref> > &loop_refs)
|
|
+ map<class loop*, vector<data_ref> > &loop_refs,
|
|
+ set <gimple *> &traced_ref_stmt)
|
|
{
|
|
if (dump_file)
|
|
fprintf (dump_file, "\nPhase 3: analyze_nested_kernels\n\n");
|
|
|
|
- for (unsigned i = 0; i < kernels.size (); ++i)
|
|
+ /* `kernels` may be added in during outer loop extension phase,
|
|
+ thus using initial size to avoid repeatedly analyzing. */
|
|
+ unsigned init_kernels_size = kernels.size ();
|
|
+ for (unsigned i = 0; i < init_kernels_size; ++i)
|
|
{
|
|
class loop* loop = kernels[i];
|
|
if (loop_refs.count (loop) == 0)
|
|
continue;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
- fprintf (dump_file, "\n\nloop header %d:\n", loop->header->index);
|
|
- for (unsigned j = 0; j < loop_refs[loop].size (); ++j)
|
|
+ fprintf (dump_file, "loop header %d:\n", loop->header->index);
|
|
+ analyze_loop_refs_dimension (loop_refs[loop]);
|
|
+
|
|
+ unsigned depth = loop_depth (loop);
|
|
+ unsigned outer_depth = trace_outer_loop_depth (get_cur_loop_niters \
|
|
+ (loop_refs, loop), depth);
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "cur_depth: %d, outer_depth: %d\n", \
|
|
+ depth, outer_depth);
|
|
+ /* param_outer_loop_num: number of loops of the extended outer loop.
|
|
+ Outermost loop should not be extended when outer_depth = 0.
|
|
+ `outer_depth == depth` means the current loop is the loop which
|
|
+ boundary is known, so there is no need to extend the outer loop. */
|
|
+ if (outer_depth == 0 || outer_depth == depth
|
|
+ || depth > outer_depth + param_outer_loop_num)
|
|
+ continue;
|
|
+ /* Extend outer loop. */
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "\nStart extending outer loop\n");
|
|
+ /* Superloops of the loop, start from the loop closest to the \
|
|
+ current loop in the outermost loop. */
|
|
+ for (unsigned j = 0; j < param_outer_loop_num && --depth; ++j)
|
|
{
|
|
- if (loop_refs[loop][j].trace_status_p == false)
|
|
+ class loop* outer_loop = (*loop->superloops)[depth];
|
|
+ /* The outer loop may be added when analyzing previous inner loops,
|
|
+ i.e. the outer loop contains two or more inner loops. */
|
|
+ if (loop_refs.count (outer_loop))
|
|
continue;
|
|
-
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ /* phase1~phase3 analysis on the extended outer loop. */
|
|
+ analyze_loop_dense_memory (kernels, loop_refs, outer_loop);
|
|
+ if (loop_refs.count (outer_loop) == 0)
|
|
+ continue;
|
|
+ for (unsigned k = 0; k < loop_refs[outer_loop].size (); ++k)
|
|
{
|
|
- fprintf (dump_file, "\ntrace_reference_dimension at mem_ref "
|
|
- "index %d in loop %d:\n", j, loop->num);
|
|
- print_generic_expr (dump_file, loop_refs[loop][j].ref, TDF_SLIM);
|
|
- fprintf (dump_file, "\n");
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ {
|
|
+ fprintf (dump_file, "outer_analyze_nested_kernels %d: ", k);
|
|
+ print_generic_expr (dump_file, loop_refs[outer_loop][k].ref,\
|
|
+ TDF_SLIM);
|
|
+ fprintf (dump_file, "\n");
|
|
+ }
|
|
}
|
|
- trace_ref_dimension_and_loop_bounds (loop_refs[loop][j]);
|
|
+ trace_loop_refs_info (loop_refs[outer_loop], traced_ref_stmt);
|
|
+ analyze_loop_refs_dimension (loop_refs[outer_loop]);
|
|
+ outer_depth = trace_outer_loop_depth (get_cur_loop_niters \
|
|
+ (loop_refs, outer_loop), depth);
|
|
+ /* `outer_depth == depth` means the current loop is the loop which
|
|
+ boundary is known, so there is no need to extend the outer loop. */
|
|
+ if (outer_depth == depth)
|
|
+ break;
|
|
+ else
|
|
+ /* The outer loop cannot find the current loop boundary,
|
|
+ Remove the record of outer_loop from the loop_refs. */
|
|
+ loop_refs.erase (outer_loop);
|
|
}
|
|
-
|
|
}
|
|
return true;
|
|
}
|
|
@@ -2694,9 +2856,10 @@ llc_allocate (void)
|
|
if (!get_dense_memory_kernels (kernels, kernels_refs))
|
|
return;
|
|
|
|
- trace_data_refs_info (kernels, kernels_refs);
|
|
+ set <gimple *> traced_ref_stmt;
|
|
+ trace_data_refs_info (kernels, kernels_refs, traced_ref_stmt);
|
|
|
|
- if (!analyze_nested_kernels (kernels, kernels_refs))
|
|
+ if (!analyze_nested_kernels (kernels, kernels_refs, traced_ref_stmt))
|
|
return;
|
|
|
|
vector<class loop *> sorted_kernels;
|
|
--
|
|
2.33.0
|
|
|