From 255d1e883d76d80e2ad5a796814ca0a9483c2ca6 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 17 Oct 2025 10:49:21 +0300 Subject: [PATCH] nir/divergence: fix handling of intel uniform block load Those are normally uniform always, but for the purpose of fused threads handling, we need to check their sources. Signed-off-by: Lionel Landwerlin Fixes: ca1533cd03 ("nir/divergence: add a new mode to cover fused threads on Intel HW") Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 295ed1c1d8a..c9cd8fa1cf9 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -319,14 +319,10 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_base_global_invocation_id: case nir_intrinsic_load_base_workgroup_id: case nir_intrinsic_load_alpha_reference_amd: - case nir_intrinsic_load_ubo_uniform_block_intel: - case nir_intrinsic_load_ssbo_uniform_block_intel: - case nir_intrinsic_load_shared_uniform_block_intel: case nir_intrinsic_load_barycentric_optimize_amd: case nir_intrinsic_load_poly_line_smooth_enabled: case nir_intrinsic_load_rasterization_primitive_amd: case nir_intrinsic_unit_test_uniform_amd: - case nir_intrinsic_load_global_constant_uniform_block_intel: case nir_intrinsic_load_debug_log_desc_amd: case nir_intrinsic_load_xfb_state_address_gfx12_amd: case nir_intrinsic_cmat_length: @@ -364,6 +360,24 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) is_divergent = false; break; + case nir_intrinsic_load_ubo_uniform_block_intel: + case nir_intrinsic_load_ssbo_uniform_block_intel: + case nir_intrinsic_load_shared_uniform_block_intel: + case nir_intrinsic_load_global_constant_uniform_block_intel: + if (options & (nir_divergence_across_subgroups | + nir_divergence_multiple_workgroup_per_compute_subgroup)) { + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) { + if (src_divergent(instr->src[i], state)) { + is_divergent = true; + break; + } + } + } else { + is_divergent = false; + } + break; + /* This is divergent because it specifically loads sequential values into * successive SIMD lanes. */