From c9739e8912286a212359f3a5a4f958c6165ce2cc Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 8 Aug 2023 09:13:16 +0300 Subject: [PATCH] intel/fs: limit register flag interaction of FIND_*LIVE_CHANNEL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those instructions do not access the flag registers on Gfx8+. Removing the interaction enables CSE to remove more of those instructions. Results are a bit mixed (DG2 vulkan fossils): ACO: Totals from 127 (5.97% of 2128) affected shaders: Instrs: 139966 -> 138972 (-0.71%); split: -0.85%, +0.14% Cycles: 1685747 -> 1667480 (-1.08%); split: -2.35%, +1.26% Max live registers: 10582 -> 10544 (-0.36%) Max dispatch width: 1048 -> 1040 (-0.76%) Cyberpunk 2077: Totals from 2879 (27.95% of 10301) affected shaders: Instrs: 4264789 -> 4225666 (-0.92%); split: -1.01%, +0.09% Cycles: 72380209 -> 71619521 (-1.05%); split: -1.63%, +0.58% Subgroup size: 30624 -> 30632 (+0.03%) Spill count: 98 -> 101 (+3.06%) Fill count: 90 -> 93 (+3.33%) Scratch Memory Size: 8192 -> 9216 (+12.50%) Max live registers: 217807 -> 217098 (-0.33%); split: -0.59%, +0.26% Max dispatch width: 23792 -> 24112 (+1.34%) Gaining 40 SIMD16 shaders Rise Of The Tomb Raider: Totals from 622 (5.06% of 12289) affected shaders: Instrs: 437380 -> 434760 (-0.60%); split: -0.72%, +0.12% Cycles: 261843085 -> 261580703 (-0.10%); split: -0.73%, +0.63% Max live registers: 27731 -> 27766 (+0.13%); split: -1.01%, +1.14% Max dispatch width: 5832 -> 5432 (-6.86%); split: +0.27%, -7.13% Loosing 26 SIMD32 shaders Strange Brigade: Totals from 1298 (31.48% of 4123) affected shaders: Instrs: 1504408 -> 1487968 (-1.09%); split: -1.17%, +0.08% Cycles: 20735976 -> 20443216 (-1.41%); split: -1.60%, +0.19% Max live registers: 89911 -> 89957 (+0.05%) DG2 shader-db run: total instructions in shared programs: 23130895 -> 23130036 (<.01%) instructions in affected programs: 260956 -> 260097 (-0.33%) helped: 234 HURT: 101 helped stats (abs) min: 1 max: 54 x̄: 6.36 x̃: 4 helped stats (rel) min: 0.05% max: 8.16% x̄: 2.01% x̃: 1.90% HURT stats (abs) min: 1 max: 37 x̄: 6.23 x̃: 3 HURT stats (rel) min: 0.02% max: 5.67% x̄: 0.89% x̃: 0.55% 95% mean confidence interval for instructions value: -3.62 -1.51 95% mean confidence interval for instructions %-change: -1.33% -0.94% Instructions are helped. total loops in shared programs: 6071 -> 6071 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 898610645 -> 898557166 (<.01%) cycles in affected programs: 18308201 -> 18254722 (-0.29%) helped: 315 HURT: 48 helped stats (abs) min: 1 max: 19312 x̄: 404.23 x̃: 128 helped stats (rel) min: 0.02% max: 28.98% x̄: 3.92% x̃: 2.65% HURT stats (abs) min: 2 max: 14478 x̄: 1538.60 x̃: 409 HURT stats (rel) min: <.01% max: 23.24% x̄: 3.34% x̃: 0.41% 95% mean confidence interval for cycles value: -333.68 39.03 95% mean confidence interval for cycles %-change: -3.51% -2.41% Inconclusive result (value mean confidence interval includes 0). total spills in shared programs: 5964 -> 5964 (0.00%) spills in affected programs: 0 -> 0 helped: 0 HURT: 0 total fills in shared programs: 6909 -> 6909 (0.00%) fills in affected programs: 0 -> 0 helped: 0 HURT: 0 total sends in shared programs: 1040266 -> 1040266 (0.00%) sends in affected programs: 0 -> 0 helped: 0 HURT: 0 LOST: 3 GAINED: 1 Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 862904f1449..a886be7625b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1012,6 +1012,9 @@ fs_inst::flags_written(const intel_device_info *devinfo) const /* On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented * using a separate cmpn and sel instruction. This lowering occurs in * fs_vistor::lower_minmax which is called very, very late. + * + * FIND_LIVE_CHANNEL & FIND_LAST_LIVE_CHANNEL are lowered in + * lower_find_live_channel() on Gfx8+ and do not use the flag registers. */ if ((conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) && opcode != BRW_OPCODE_CSEL && @@ -1019,8 +1022,9 @@ fs_inst::flags_written(const intel_device_info *devinfo) const opcode != BRW_OPCODE_WHILE)) || opcode == FS_OPCODE_FB_WRITE) { return flag_mask(this, 1); - } else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL || - opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL || + } else if ((devinfo->ver <= 7 && + (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL || + opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL)) || opcode == FS_OPCODE_LOAD_LIVE_CHANNELS) { return flag_mask(this, 32); } else {