brw/nir: Treat some load_ubo as convergent

v2: Fix for Xe2.

No changes in shader-db or fossil-db on Lunar Lake, Meteor Lake, or DG2.

shader-db:

Tiger Lake, Ice Lake, and Skylake had similar results. (Tiger Lake shown)
total instructions in shared programs: 19626547 -> 19634353 (0.04%)
instructions in affected programs: 1591181 -> 1598987 (0.49%)
helped: 925 / HURT: 3595

total cycles in shared programs: 865236718 -> 866682659 (0.17%)
cycles in affected programs: 151284264 -> 152730205 (0.96%)
helped: 3430 / HURT: 5510

total sends in shared programs: 1032237 -> 1032233 (<.01%)
sends in affected programs: 20 -> 16 (-20.00%)
helped: 4 / HURT: 0

LOST:   48
GAINED: 141

fossil-db:

Tiger Lake, Ice Lake, and Skylake had similar results. (Tiger Lake shown)
Totals:
Instrs: 150662952 -> 150641175 (-0.01%); split: -0.03%, +0.02%
Subgroup size: 7768880 -> 7768888 (+0.00%)
Send messages: 7502265 -> 7502044 (-0.00%)
Cycle count: 15621785298 -> 15618640525 (-0.02%); split: -0.06%, +0.04%
Spill count: 58818 -> 58816 (-0.00%)
Fill count: 101063 -> 101054 (-0.01%)
Max live registers: 31795403 -> 31792179 (-0.01%); split: -0.01%, +0.00%
Max dispatch width: 5572160 -> 5571488 (-0.01%); split: +0.00%, -0.01%

Totals from 10278 (1.62% of 632539) affected shaders:
Instrs: 5276493 -> 5254716 (-0.41%); split: -0.89%, +0.48%
Subgroup size: 156432 -> 156440 (+0.01%)
Send messages: 279259 -> 279038 (-0.08%)
Cycle count: 6483576378 -> 6480431605 (-0.05%); split: -0.16%, +0.11%
Spill count: 27133 -> 27131 (-0.01%)
Fill count: 49384 -> 49375 (-0.02%)
Max live registers: 675781 -> 672557 (-0.48%); split: -0.49%, +0.01%
Max dispatch width: 97256 -> 96584 (-0.69%); split: +0.08%, -0.77%

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29884>
This commit is contained in:
Ian Romanick 2024-02-14 16:22:45 -08:00
parent c48570d2b2
commit 3e63920ca5

View file

@ -1981,6 +1981,10 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
is_scalar = true;
break;
case nir_intrinsic_load_ubo:
is_scalar = get_nir_src(ntb, instr->src[1]).is_scalar;
break;
case nir_intrinsic_load_uniform:
is_scalar = get_nir_src(ntb, instr->src[0]).is_scalar;
break;
@ -6492,7 +6496,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
s.prog_data->has_ubo_pull = true;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
/* load_ubo with non-uniform offset */
/* load_ubo with non-constant offset. The offset might still be
* uniform on non-LSC platforms when loading fewer than 4
* components.
*/
brw_reg base_offset = retype(get_nir_src(ntb, instr->src[1]),
BRW_TYPE_UD);
@ -6500,12 +6507,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
for (unsigned i = 0; i < num_components; i += comps_per_load) {
const unsigned remaining = num_components - i;
bld.VARYING_PULL_CONSTANT_LOAD(offset(dest, bld, i),
surface, surface_handle,
base_offset,
i * brw_type_size_bytes(dest.type),
instr->def.bit_size / 8,
MIN2(remaining, comps_per_load));
xbld.VARYING_PULL_CONSTANT_LOAD(offset(dest, xbld, i),
surface, surface_handle,
base_offset,
i * brw_type_size_bytes(dest.type),
instr->def.bit_size / 8,
MIN2(remaining, comps_per_load));
}
} else {
/* load_ubo_uniform_block_intel with non-constant offset */
@ -6544,8 +6551,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
if (push_reg.file != BAD_FILE) {
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(dest, bld, i),
byte_offset(push_reg, i * type_size));
xbld.MOV(offset(dest, xbld, i),
byte_offset(push_reg, i * type_size));
}
break;
}
@ -6576,7 +6583,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
dest.type);
for (unsigned d = 0; d < count; d++)
bld.MOV(offset(dest, bld, c + d), component(consts, d));
xbld.MOV(offset(dest, xbld, c + d), component(consts, d));
c += count;
}