From db2163188f1683a6f585c8255712e25e5d21c0e5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 16 Apr 2025 08:54:14 +0300 Subject: [PATCH] anv/brw: stop turning load_push_constants into load_uniform Those intrinsics have different semantics in particular with regards to divergence. Turning one into the other without invalidating the divergence information breaks NIR validation. But also the conversion means we get artificially less convergent values in the shaders. So just handle load_push_constants in the backend and stop changing things in Anv. Fixes a bunch of tests in dEQP-VK.descriptor_indexing.* dEQP-VK.pipeline.*.push_constant.graphics_pipeline.dynamic_index_* Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Ian Romanick Part-of: (cherry picked from commit df15968813ce46e40582e4904517fb89813046f3) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_nir.cpp | 4 +++- src/intel/compiler/brw_nir.c | 21 ++++++++++--------- .../compiler/brw_nir_lower_rt_intrinsics.c | 4 ++-- src/intel/vulkan/anv_nir.h | 16 ++++++++------ .../vulkan/anv_nir_compute_push_layout.c | 3 ++- 6 files changed, 29 insertions(+), 21 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index e11622756f6..9d64de902fb 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2454,7 +2454,7 @@ "description": "anv/brw: stop turning load_push_constants into load_uniform", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 632bdec8da5..18885a12ed9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2007,6 +2007,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform) break; case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: is_scalar = get_nir_src(ntb, instr->src[0]).is_scalar; break; @@ -6112,7 +6113,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: { /* Offsets are in bytes but they should always aligned to * the type size */ diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 483162a21e1..addb4511e9f 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2276,20 +2276,21 @@ brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compile } nir_def * -brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, +brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load, nir_def *base_addr, unsigned off) { - assert(load_uniform->intrinsic == nir_intrinsic_load_uniform); + assert(load->intrinsic == nir_intrinsic_load_push_constant || + load->intrinsic == nir_intrinsic_load_uniform); - unsigned bit_size = load_uniform->def.bit_size; + unsigned bit_size = load->def.bit_size; assert(bit_size >= 8 && bit_size % 8 == 0); unsigned byte_size = bit_size / 8; nir_def *sysval; - if (nir_src_is_const(load_uniform->src[0])) { + if (nir_src_is_const(load->src[0])) { uint64_t offset = off + - nir_intrinsic_base(load_uniform) + - nir_src_as_uint(load_uniform->src[0]); + nir_intrinsic_base(load) + + nir_src_as_uint(load->src[0]); /* Things should be component-aligned. */ assert(offset % byte_size == 0); @@ -2309,14 +2310,14 @@ brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, } sysval = nir_extract_bits(b, data, 2, suboffset * 8, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } else { nir_def *offset32 = - nir_iadd_imm(b, load_uniform->src[0].ssa, - off + nir_intrinsic_base(load_uniform)); + nir_iadd_imm(b, load->src[0].ssa, + off + nir_intrinsic_base(load)); nir_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset32)); sysval = nir_load_global_constant(b, addr, byte_size, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } return sysval; diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index 9d4e222db84..858a2c57065 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -152,7 +152,8 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, nir_instr_remove(instr); break; - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: /* We don't want to lower this in the launch trampoline. */ if (stage == MESA_SHADER_COMPUTE) break; @@ -162,7 +163,6 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, BRW_RT_PUSH_CONST_OFFSET); break; - } case nir_intrinsic_load_ray_launch_id: sysval = nir_channels(b, hotzone, 0xe); diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index deb587b3aa6..78c176a316b 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -42,13 +42,17 @@ extern "C" { nir_imm_int(b, 0), \ .base = anv_drv_const_offset(field), \ .range = components * anv_drv_const_size(field)) +/* Use load_uniform for indexed values since load_push_constant requires that + * the offset source is dynamically uniform in the subgroup which we cannot + * guarantee. + */ #define anv_load_driver_uniform_indexed(b, components, field, idx) \ - nir_load_push_constant(b, components, \ - anv_drv_const_size(field[0]) * 8, \ - nir_imul_imm(b, idx, \ - anv_drv_const_size(field[0])), \ - .base = anv_drv_const_offset(field), \ - .range = anv_drv_const_size(field)) + nir_load_uniform(b, components, \ + anv_drv_const_size(field[0]) * 8, \ + nir_imul_imm(b, idx, \ + anv_drv_const_size(field[0])), \ + .base = anv_drv_const_offset(field), \ + .range = anv_drv_const_size(field)) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 00c72b4885e..b8b34bc999a 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -57,6 +57,7 @@ anv_nir_compute_push_layout(nir_shader *nir, has_const_ubo = true; break; + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { unsigned base = nir_intrinsic_base(intrin); unsigned range = nir_intrinsic_range(intrin); @@ -151,6 +152,7 @@ anv_nir_compute_push_layout(nir_shader *nir, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { /* With bindless shaders we load uniforms with SEND * messages. All the push constants are located after the @@ -160,7 +162,6 @@ anv_nir_compute_push_layout(nir_shader *nir, */ unsigned base_offset = brw_shader_stage_requires_bindless_resources(nir->info.stage) ? 0 : push_start; - intrin->intrinsic = nir_intrinsic_load_uniform; nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);