From 7eab2cb67e8a2bc237b0fb4d0feac34133ad8fdd Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 30 Jan 2024 18:14:02 -0800 Subject: [PATCH] brw/nir: Treat load_workgroup_id as convergent v2: Fix for Xe2. shader-db: Lunar Lake Meteor Lake, DG2, and Tiger Lake had similar results. (Lunar Lake shown) total instructions in shared programs: 18096526 -> 18096500 (<.01%) instructions in affected programs: 6759 -> 6733 (-0.38%) helped: 9 / HURT: 3 total cycles in shared programs: 921727804 -> 921841300 (0.01%) cycles in affected programs: 110049730 -> 110163226 (0.10%) helped: 90 / HURT: 372 Ice Lake and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 20496591 -> 20496402 (<.01%) instructions in affected programs: 48757 -> 48568 (-0.39%) helped: 25 / HURT: 8 total cycles in shared programs: 875253948 -> 875237902 (<.01%) cycles in affected programs: 56760140 -> 56744094 (-0.03%) helped: 363 / HURT: 34 total spills in shared programs: 4555 -> 4546 (-0.20%) spills in affected programs: 174 -> 165 (-5.17%) helped: 2 / HURT: 0 total fills in shared programs: 5243 -> 5224 (-0.36%) fills in affected programs: 382 -> 363 (-4.97%) helped: 2 / HURT: 0 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 141811577 -> 141811551 (-0.00%); split: -0.00%, +0.00% Cycle count: 22173792370 -> 22183128332 (+0.04%); split: -0.00%, +0.04% Max live registers: 48053498 -> 48053415 (-0.00%) Totals from 3911 (0.71% of 551443) affected shaders: Instrs: 2164804 -> 2164778 (-0.00%); split: -0.00%, +0.00% Cycle count: 2404062476 -> 2413398438 (+0.39%); split: -0.02%, +0.41% Max live registers: 413583 -> 413500 (-0.02%) Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d6bfc64fe84..257dfd12011 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -207,12 +207,14 @@ static brw_reg emit_work_group_id_setup(nir_to_brw_state &ntb) { fs_visitor &s = ntb.s; - const fs_builder &bld = ntb.bld; + const fs_builder &bld = ntb.bld.scalar_group(); assert(gl_shader_stage_is_compute(s.stage)); brw_reg id = bld.vgrf(BRW_TYPE_UD, 3); + id.is_scalar = true; + struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_TYPE_UD)); bld.MOV(id, r0_1); @@ -1947,6 +1949,10 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def) nir_instr_as_intrinsic(def.parent_instr); switch (instr->intrinsic) { + case nir_intrinsic_load_workgroup_id: + is_scalar = true; + break; + case nir_intrinsic_load_uniform: is_scalar = get_nir_src(ntb, instr->src[0]).is_scalar; break; @@ -4598,10 +4604,14 @@ fs_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_load_workgroup_id: { brw_reg val = ntb.system_values[SYSTEM_VALUE_WORKGROUP_ID]; + const fs_builder ubld = bld.scalar_group(); + assert(val.file != BAD_FILE); + assert(val.is_scalar); + dest.type = val.type; for (unsigned i = 0; i < 3; i++) - bld.MOV(offset(dest, bld, i), offset(val, bld, i)); + ubld.MOV(offset(dest, ubld, i), offset(val, ubld, i)); break; }