brw: Have brw_nir_apply_key call brw_nir_lower_simd for all stages

brw_nir_apply_key typically knows the dispatch width (it's fixed for
geometry stages, and we clone the NIR for compute and mesh shaders).
For compute/mesh, this was the very next thing called.  For the others,
if we know the width, there's no reason not to lower it.

Scratch lowering will start using load_simd_width_intel soon, so we
need it to work in all stages.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40843>
This commit is contained in:
Kenneth Graunke 2026-04-08 01:47:32 -07:00 committed by Marge Bot
parent 765d74eebe
commit e5598166b0
4 changed files with 10 additions and 13 deletions

View file

@ -190,8 +190,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
BRW_NIR_SNAPSHOT("first");
brw_nir_apply_key(pt, &key->base, dispatch_width);
BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
brw_nir_optimize(pt);
/* brw_nir_optimize undoes late lowerings. */
BRW_NIR_PASS(nir_opt_algebraic_late);

View file

@ -362,8 +362,6 @@ brw_compile_task(const struct brw_compiler *compiler,
BRW_NIR_SNAPSHOT("first");
brw_nir_apply_key(pt, &key->base, dispatch_width);
BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
brw_nir_optimize(pt);
/* brw_nir_optimize undoes late lowerings. */
BRW_NIR_PASS(nir_opt_algebraic_late);
@ -1100,8 +1098,6 @@ brw_compile_mesh(const struct brw_compiler *compiler,
/* Load uniforms can do a better job for constants, so fold before it. */
BRW_NIR_PASS(nir_opt_constant_folding);
BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
brw_nir_optimize(pt);
/* brw_nir_optimize undoes late lowerings. */
BRW_NIR_PASS(nir_opt_algebraic_late);

View file

@ -3038,8 +3038,6 @@ brw_nir_apply_key(brw_pass_tracker *pt,
pt->progress = false;
unsigned subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size);
/* VS/TCS/TES/GS always run at a fixed SIMD width, which is what our
* max_subgroup_size parameter represents. Compute/Mesh can run at
* different sizes, but we clone the NIR for each SIMD width, and pass
@ -3052,6 +3050,8 @@ brw_nir_apply_key(brw_pass_tracker *pt,
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
nir->info.min_subgroup_size = max_subgroup_size;
nir->info.max_subgroup_size = max_subgroup_size;
OPT(brw_nir_lower_simd);
}
const nir_lower_subgroups_options subgroups_options = {
@ -3442,13 +3442,16 @@ filter_simd(const nir_instr *instr, UNUSED const void *options)
static nir_def *
lower_simd(nir_builder *b, nir_instr *instr, void *options)
{
uintptr_t simd_width = (uintptr_t)options;
unsigned simd_width = b->shader->info.max_subgroup_size;
assert(b->shader->info.min_subgroup_size == simd_width);
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
case nir_intrinsic_load_simd_width_intel:
return nir_imm_int(b, simd_width);
case nir_intrinsic_load_subgroup_id:
assert(mesa_shader_stage_uses_workgroup(b->shader->info.stage));
/* If the whole workgroup fits in one thread, we can lower subgroup_id
* to a constant zero.
*/
@ -3464,10 +3467,10 @@ lower_simd(nir_builder *b, nir_instr *instr, void *options)
}
bool
brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width)
brw_nir_lower_simd(nir_shader *nir)
{
return nir_shader_lower_instructions(nir, filter_simd, lower_simd,
(void *)(uintptr_t)dispatch_width);
return nir->info.min_subgroup_size == nir->info.max_subgroup_size &&
nir_shader_lower_instructions(nir, filter_simd, lower_simd, NULL);
}
nir_variable *

View file

@ -275,7 +275,7 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader,
const struct
intel_device_info *devinfo);
bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
bool brw_nir_lower_simd(nir_shader *nir);
void brw_postprocess_nir_opts(struct brw_pass_tracker *pt);