From 8ce98fedc40545670711560c24e8b9b3f915ebb7 Mon Sep 17 00:00:00 2001 From: Calder Young Date: Mon, 2 Mar 2026 22:57:57 -0800 Subject: [PATCH] anv: Make sure robust UBO access does not fault We can just conditionally replace the address with an address to a zero initialized cacheline if the read is going to go out of bounds. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/intel_shader_enums.h | 2 + src/intel/vulkan/anv_device.c | 4 ++ src/intel/vulkan/anv_nir_lower_ubo_loads.c | 63 +++++++++++----------- src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/anv_shader.c | 8 +++ 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/intel/compiler/intel_shader_enums.h b/src/intel/compiler/intel_shader_enums.h index 85490f03c32..5e1bd0f12ba 100644 --- a/src/intel/compiler/intel_shader_enums.h +++ b/src/intel/compiler/intel_shader_enums.h @@ -609,6 +609,8 @@ enum intel_shader_reloc_id { BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW, BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH, BRW_SHADER_RELOC_PRINTF_BUFFER_SIZE, + BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_LOW, + BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_HIGH, /* Leave this entry last: */ BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE, BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE = diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 55c9a3221bd..3069656efb6 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -961,6 +961,10 @@ VkResult anv_CreateDevice( physical_device->rt_uuid, sizeof(physical_device->rt_uuid)); + /* A null cache line for bounded UBO loads. */ + wa_addr = anv_address_add_aligned(wa_addr, 64, 64); + device->null_cacheline_addr = wa_addr; + /* Make sure the workaround address is the last one in the workaround BO, * so that writes never overwrite other bits of data stored in the * workaround BO. diff --git a/src/intel/vulkan/anv_nir_lower_ubo_loads.c b/src/intel/vulkan/anv_nir_lower_ubo_loads.c index 0ff37f61d05..fca19a920e3 100644 --- a/src/intel/vulkan/anv_nir_lower_ubo_loads.c +++ b/src/intel/vulkan/anv_nir_lower_ubo_loads.c @@ -24,6 +24,27 @@ #include "anv_nir.h" #include "nir_builder.h" +static nir_def * +lower_ubo_load_addr(nir_builder *b, nir_def *base_addr, + nir_def *offset, nir_def *bound, + unsigned load_size) +{ + nir_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset)); + + if (bound) { + addr = + nir_bcsel(b, + nir_ult(b, nir_iadd_imm(b, offset, load_size - 1), bound), + addr, + nir_pack_64_2x32_split( + b, + nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_LOW), + nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_HIGH))); + } + + return addr; +} + static bool lower_ubo_load_instr(nir_builder *b, nir_intrinsic_instr *load, UNUSED void *_data) @@ -58,7 +79,10 @@ lower_ubo_load_instr(nir_builder *b, nir_intrinsic_instr *load, /* Load two just in case we go over a 64B boundary */ nir_def *data[2]; for (unsigned i = 0; i < 2; i++) { - nir_def *addr = nir_iadd_imm(b, base_addr, aligned_offset + i * 64); + nir_def *addr = + lower_ubo_load_addr(b, base_addr, + nir_imm_int(b, aligned_offset + i * 64), + bound, 1); data[i] = nir_load_global_constant_uniform_block_intel( b, 16, 32, addr, @@ -95,34 +119,14 @@ lower_ubo_load_instr(nir_builder *b, nir_intrinsic_instr *load, load->num_components, bit_size); } else { nir_def *offset = load->src[1].ssa; - nir_def *addr = nir_iadd(b, base_addr, nir_u2u64(b, offset)); + unsigned load_size = byte_size * load->num_components; - if (bound) { - nir_def *zero = nir_imm_zero(b, load->num_components, bit_size); - - unsigned load_size = byte_size * load->num_components; - nir_def *in_bounds = - nir_ult(b, nir_iadd_imm(b, offset, load_size - 1), bound); - - nir_push_if(b, in_bounds); - - nir_def *load_val = - nir_load_global_constant(b, load->def.num_components, - load->def.bit_size, addr, - .access = nir_intrinsic_access(load), - .align_mul = nir_intrinsic_align_mul(load), - .align_offset = nir_intrinsic_align_offset(load)); - - nir_pop_if(b, NULL); - - val = nir_if_phi(b, load_val, zero); - } else { - val = nir_load_global_constant(b, load->def.num_components, - load->def.bit_size, addr, - .access = nir_intrinsic_access(load), - .align_mul = nir_intrinsic_align_mul(load), - .align_offset = nir_intrinsic_align_offset(load)); - } + nir_def *addr = lower_ubo_load_addr(b, base_addr, offset, bound, load_size); + val = nir_load_global_constant(b, load->def.num_components, + load->def.bit_size, addr, + .access = nir_intrinsic_access(load), + .align_mul = nir_intrinsic_align_mul(load), + .align_offset = nir_intrinsic_align_offset(load)); } nir_def_replace(&load->def, val); @@ -134,6 +138,5 @@ bool anv_nir_lower_ubo_loads(nir_shader *shader) { return nir_shader_intrinsics_pass(shader, lower_ubo_load_instr, - nir_metadata_none, - NULL); + nir_metadata_none, NULL); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index dae4cab16a4..983a9c48145 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2666,6 +2666,7 @@ struct anv_device { */ struct anv_bo * workaround_bo; struct anv_address workaround_address; + struct anv_address null_cacheline_addr; struct anv_bo * dummy_aux_bo; struct anv_bo * mem_fence_bo; diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index bf43d3456af..7fa6506419b 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -587,6 +587,14 @@ anv_shader_set_relocs(struct anv_device *device, .id = INTEL_SHADER_RELOC_SHADER_START_OFFSET, .value = shader->kernel.offset, }; + reloc_values[rv_count++] = (struct intel_shader_reloc_value) { + .id = BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_HIGH, + .value = anv_address_physical(device->null_cacheline_addr) >> 32, + }; + reloc_values[rv_count++] = (struct intel_shader_reloc_value) { + .id = BRW_SHADER_RELOC_NULL_CACHELINE_ADDR_LOW, + .value = anv_address_physical(device->null_cacheline_addr) & 0xffffffff, + }; if (brw_shader_stage_is_bindless(shader->vk.stage)) { const struct brw_bs_prog_data *bs_prog_data = brw_bs_prog_data_const(shader->prog_data);