From 385da1fe36ca231fc0ad3634b1aa3be4ef679959 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 25 May 2021 14:41:26 -0700 Subject: [PATCH] intel/fs: Track single accumulator in scoreboard lowering pass. This change reduces the precision of the scoreboard data structure for accumulator registers, because the rules determining the aliasing of accumulator registers are non-trivial and poorly documented (e.g. acc0 overlaps the storage of acc1 when the former is accessed with an integer type). We could implement those rules but it wouldn't have any practical benefit since we currently only use acc0-1, and for the most part we can rely on the hardware's accumulator dependency tracking. Instead make our lives easier by representing it as a single register. Reviewed-by: Jason Ekstrand Part-of: --- src/intel/compiler/brw_fs_scoreboard.cpp | 25 +++++++++--------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index 56027077895..5bfec9126e4 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -43,6 +43,8 @@ * - ip instruction pointer * - tm0 timestamp register * - dbg0 debug register + * - acc2-9 special accumulator registers on TGL + * - mme0-7 math macro extended accumulator registers * * The following ARF registers don't need to be tracked here because data * coherency is still provided transparently by the hardware: @@ -615,9 +617,7 @@ namespace { sb.grf_deps[i] = merge(eq, sb0.grf_deps[i], sb1.grf_deps[i]); sb.addr_dep = merge(eq, sb0.addr_dep, sb1.addr_dep); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = merge(eq, sb0.accum_deps[i], sb1.accum_deps[i]); + sb.accum_dep = merge(eq, sb0.accum_dep, sb1.accum_dep); return sb; } @@ -635,9 +635,7 @@ namespace { sb.grf_deps[i] = shadow(sb0.grf_deps[i], sb1.grf_deps[i]); sb.addr_dep = shadow(sb0.addr_dep, sb1.addr_dep); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = shadow(sb0.accum_deps[i], sb1.accum_deps[i]); + sb.accum_dep = shadow(sb0.accum_dep, sb1.accum_dep); return sb; } @@ -655,9 +653,7 @@ namespace { sb.grf_deps[i] = transport(sb0.grf_deps[i], delta); sb.addr_dep = transport(sb0.addr_dep, delta); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = transport(sb0.accum_deps[i], delta); + sb.accum_dep = transport(sb0.accum_dep, delta); return sb; } @@ -673,10 +669,8 @@ namespace { if (sb0.addr_dep != sb1.addr_dep) return false; - for (unsigned i = 0; i < ARRAY_SIZE(sb0.accum_deps); i++) { - if (sb0.accum_deps[i] != sb1.accum_deps[i]) - return false; - } + if (sb0.accum_dep != sb1.accum_dep) + return false; return true; } @@ -690,7 +684,7 @@ namespace { private: dependency grf_deps[BRW_MAX_GRF]; dependency addr_dep; - dependency accum_deps[10]; + dependency accum_dep; dependency * dep(const fs_reg &r) @@ -703,8 +697,7 @@ namespace { r.file == ARF && reg >= BRW_ARF_ADDRESS && reg < BRW_ARF_ACCUMULATOR ? &addr_dep : r.file == ARF && reg >= BRW_ARF_ACCUMULATOR && - reg < BRW_ARF_FLAG ? &accum_deps[ - reg - BRW_ARF_ACCUMULATOR] : + reg < BRW_ARF_FLAG ? &accum_dep : NULL); } };