From a3293eb26ccf524d4dfe3fbdfe825522ff76767f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 16 Apr 2025 08:54:14 +0300 Subject: [PATCH] anv/brw: stop turning load_push_constants into load_uniform Those intrinsics have different semantics in particular with regards to divergence. Turning one into the other without invalidating the divergence information breaks NIR validation. But also the conversion means we get artificially less convergent values in the shaders. So just handle load_push_constants in the backend and stop changing things in Anv. Fixes a bunch of tests in dEQP-VK.descriptor_indexing.* dEQP-VK.pipeline.*.push_constant.graphics_pipeline.dynamic_index_* Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Ian Romanick Part-of: (cherry picked from commit df15968813ce46e40582e4904517fb89813046f3) --- .pick_status.json | 2 +- src/intel/compiler/brw_from_nir.cpp | 4 +++- src/intel/compiler/brw_nir.c | 21 ++++++++++--------- .../compiler/brw_nir_lower_rt_intrinsics.c | 4 ++-- src/intel/vulkan/anv_nir.h | 16 ++++++++------ .../vulkan/anv_nir_compute_push_layout.c | 3 ++- 6 files changed, 29 insertions(+), 21 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 8450cd78640..f30147e47d0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5274,7 +5274,7 @@ "description": "anv/brw: stop turning load_push_constants into load_uniform", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 4a9fd0b3289..b71430b3ca2 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -1987,6 +1987,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform) break; case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar; break; @@ -6154,7 +6155,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: { /* Offsets are in bytes but they should always aligned to * the type size */ diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index f9a6dd43402..9daa0ffe419 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2326,20 +2326,21 @@ brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compile } nir_def * -brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, +brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load, nir_def *base_addr, unsigned off) { - assert(load_uniform->intrinsic == nir_intrinsic_load_uniform); + assert(load->intrinsic == nir_intrinsic_load_push_constant || + load->intrinsic == nir_intrinsic_load_uniform); - unsigned bit_size = load_uniform->def.bit_size; + unsigned bit_size = load->def.bit_size; assert(bit_size >= 8 && bit_size % 8 == 0); unsigned byte_size = bit_size / 8; nir_def *sysval; - if (nir_src_is_const(load_uniform->src[0])) { + if (nir_src_is_const(load->src[0])) { uint64_t offset = off + - nir_intrinsic_base(load_uniform) + - nir_src_as_uint(load_uniform->src[0]); + nir_intrinsic_base(load) + + nir_src_as_uint(load->src[0]); /* Things should be component-aligned. */ assert(offset % byte_size == 0); @@ -2359,14 +2360,14 @@ brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, } sysval = nir_extract_bits(b, data, 2, suboffset * 8, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } else { nir_def *offset32 = - nir_iadd_imm(b, load_uniform->src[0].ssa, - off + nir_intrinsic_base(load_uniform)); + nir_iadd_imm(b, load->src[0].ssa, + off + nir_intrinsic_base(load)); nir_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset32)); sysval = nir_load_global_constant(b, addr, byte_size, - load_uniform->num_components, bit_size); + load->num_components, bit_size); } return sysval; diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index 5194862ef71..2d8c8dee1c0 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -153,7 +153,8 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, nir_instr_remove(instr); break; - case nir_intrinsic_load_uniform: { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: /* We don't want to lower this in the launch trampoline. * * Also if the driver chooses to use an inline push address, we @@ -169,7 +170,6 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, BRW_RT_PUSH_CONST_OFFSET); break; - } case nir_intrinsic_load_ray_launch_id: sysval = nir_channels(b, hotzone, 0xe); diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index deb587b3aa6..78c176a316b 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -42,13 +42,17 @@ extern "C" { nir_imm_int(b, 0), \ .base = anv_drv_const_offset(field), \ .range = components * anv_drv_const_size(field)) +/* Use load_uniform for indexed values since load_push_constant requires that + * the offset source is dynamically uniform in the subgroup which we cannot + * guarantee. + */ #define anv_load_driver_uniform_indexed(b, components, field, idx) \ - nir_load_push_constant(b, components, \ - anv_drv_const_size(field[0]) * 8, \ - nir_imul_imm(b, idx, \ - anv_drv_const_size(field[0])), \ - .base = anv_drv_const_offset(field), \ - .range = anv_drv_const_size(field)) + nir_load_uniform(b, components, \ + anv_drv_const_size(field[0]) * 8, \ + nir_imul_imm(b, idx, \ + anv_drv_const_size(field[0])), \ + .base = anv_drv_const_offset(field), \ + .range = anv_drv_const_size(field)) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 8509e194fdd..02a27931912 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -57,6 +57,7 @@ anv_nir_compute_push_layout(nir_shader *nir, has_const_ubo = true; break; + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { unsigned base = nir_intrinsic_base(intrin); unsigned range = nir_intrinsic_range(intrin); @@ -175,6 +176,7 @@ anv_nir_compute_push_layout(nir_shader *nir, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { + case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: { /* With bindless shaders we load uniforms with SEND * messages. All the push constants are located after the @@ -184,7 +186,6 @@ anv_nir_compute_push_layout(nir_shader *nir, */ unsigned base_offset = brw_shader_stage_is_bindless(nir->info.stage) ? 0 : push_start; - intrin->intrinsic = nir_intrinsic_load_uniform; nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);