From e5598166b01a727753286d8948d1d42f0303b50f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 8 Apr 2026 01:47:32 -0700 Subject: [PATCH] brw: Have brw_nir_apply_key call brw_nir_lower_simd for all stages brw_nir_apply_key typically knows the dispatch width (it's fixed for geometry stages, and we clone the NIR for compute and mesh shaders). For compute/mesh, this was the very next thing called. For the others, if we know the width, there's no reason not to lower it. Scratch lowering will start using load_simd_width_intel soon, so we need it to work in all stages. Reviewed-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/brw/brw_compile_cs.cpp | 2 -- src/intel/compiler/brw/brw_compile_mesh.cpp | 4 ---- src/intel/compiler/brw/brw_nir.c | 15 +++++++++------ src/intel/compiler/brw/brw_nir.h | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/intel/compiler/brw/brw_compile_cs.cpp b/src/intel/compiler/brw/brw_compile_cs.cpp index 7e295d5372c..612262bda97 100644 --- a/src/intel/compiler/brw/brw_compile_cs.cpp +++ b/src/intel/compiler/brw/brw_compile_cs.cpp @@ -190,8 +190,6 @@ brw_compile_cs(const struct brw_compiler *compiler, BRW_NIR_SNAPSHOT("first"); brw_nir_apply_key(pt, &key->base, dispatch_width); - BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); - brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ BRW_NIR_PASS(nir_opt_algebraic_late); diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index 4e502d848e7..715a8ef6849 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -362,8 +362,6 @@ brw_compile_task(const struct brw_compiler *compiler, BRW_NIR_SNAPSHOT("first"); brw_nir_apply_key(pt, &key->base, dispatch_width); - BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); - brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ BRW_NIR_PASS(nir_opt_algebraic_late); @@ -1100,8 +1098,6 @@ brw_compile_mesh(const struct brw_compiler *compiler, /* Load uniforms can do a better job for constants, so fold before it. */ BRW_NIR_PASS(nir_opt_constant_folding); - BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width); - brw_nir_optimize(pt); /* brw_nir_optimize undoes late lowerings. */ BRW_NIR_PASS(nir_opt_algebraic_late); diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 544235f1ba4..df7d31c98c0 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -3038,8 +3038,6 @@ brw_nir_apply_key(brw_pass_tracker *pt, pt->progress = false; - unsigned subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size); - /* VS/TCS/TES/GS always run at a fixed SIMD width, which is what our * max_subgroup_size parameter represents. Compute/Mesh can run at * different sizes, but we clone the NIR for each SIMD width, and pass @@ -3052,6 +3050,8 @@ brw_nir_apply_key(brw_pass_tracker *pt, if (nir->info.stage != MESA_SHADER_FRAGMENT) { nir->info.min_subgroup_size = max_subgroup_size; nir->info.max_subgroup_size = max_subgroup_size; + + OPT(brw_nir_lower_simd); } const nir_lower_subgroups_options subgroups_options = { @@ -3442,13 +3442,16 @@ filter_simd(const nir_instr *instr, UNUSED const void *options) static nir_def * lower_simd(nir_builder *b, nir_instr *instr, void *options) { - uintptr_t simd_width = (uintptr_t)options; + unsigned simd_width = b->shader->info.max_subgroup_size; + assert(b->shader->info.min_subgroup_size == simd_width); switch (nir_instr_as_intrinsic(instr)->intrinsic) { case nir_intrinsic_load_simd_width_intel: return nir_imm_int(b, simd_width); case nir_intrinsic_load_subgroup_id: + assert(mesa_shader_stage_uses_workgroup(b->shader->info.stage)); + /* If the whole workgroup fits in one thread, we can lower subgroup_id * to a constant zero. */ @@ -3464,10 +3467,10 @@ lower_simd(nir_builder *b, nir_instr *instr, void *options) } bool -brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width) +brw_nir_lower_simd(nir_shader *nir) { - return nir_shader_lower_instructions(nir, filter_simd, lower_simd, - (void *)(uintptr_t)dispatch_width); + return nir->info.min_subgroup_size == nir->info.max_subgroup_size && + nir_shader_lower_instructions(nir, filter_simd, lower_simd, NULL); } nir_variable * diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index d5fb38cab7d..456a8a0a857 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -275,7 +275,7 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader, const struct intel_device_info *devinfo); -bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width); +bool brw_nir_lower_simd(nir_shader *nir); void brw_postprocess_nir_opts(struct brw_pass_tracker *pt);