From caa0854da87545d9e607b97aee0e9e66cc4b52a0 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 24 Jun 2025 11:47:51 -0400 Subject: [PATCH] nir: plumb load_global_bounded this lets the backend implement bounded loads (i.e. robust SSBOs) in a way that's more clever than a full branch. similar idea to load_global_constant_bound which should eventually be merged into this. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Job Noorman Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 3 +++ src/compiler/nir/nir_lower_io.c | 15 ++++++++++----- src/compiler/nir/nir_opt_preamble.c | 1 + src/compiler/nir/nir_opt_sink.c | 2 ++ src/compiler/nir/nir_shader_compiler_options.h | 3 +++ 6 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index d22322dc5ce..2b56c86370f 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -716,6 +716,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_load_raw_intel: case nir_intrinsic_get_ubo_size: case nir_intrinsic_load_ssbo_address: + case nir_intrinsic_load_global_bounded: case nir_intrinsic_load_global_constant_bounded: case nir_intrinsic_load_global_constant_offset: case nir_intrinsic_load_reg: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e88f6390f9c..e34a4fe7bc1 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1239,6 +1239,9 @@ load("constant", [1], [BASE, RANGE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { address }. load("global", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) +# src[] = { base_address, offset, bound }. +load("global_bounded", [1, 1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], + [CAN_ELIMINATE]) # src[] = { address }. load("global_2x32", [2], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { address }. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 646c5d4ccb2..03dab9bce44 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1525,7 +1525,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, op = nir_intrinsic_load_ubo; break; case nir_var_mem_ssbo: - if (addr_format_is_global(addr_format, mode)) + if (addr_format == nir_address_format_64bit_bounded_global && + b->shader->options->has_load_global_bounded) + op = nir_intrinsic_load_global_bounded; + else if (addr_format_is_global(addr_format, mode)) op = nir_intrinsic_load_global; else op = nir_intrinsic_load_ssbo; @@ -1608,7 +1611,8 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, load->src[0] = nir_src_for_ssa( nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2))); load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); - } else if (op == nir_intrinsic_load_global_constant_bounded) { + } else if (op == nir_intrinsic_load_global_bounded || + op == nir_intrinsic_load_global_constant_bounded) { assert(addr_format == nir_address_format_64bit_bounded_global); load->src[0] = nir_src_for_ssa( nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2))); @@ -1665,8 +1669,9 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, nir_def *result; if (addr_format_needs_bounds_check(addr_format) && - op != nir_intrinsic_load_global_constant_bounded) { - /* We don't need to bounds-check global_constant_bounded because bounds + op != nir_intrinsic_load_global_constant_bounded && + op != nir_intrinsic_load_global_bounded) { + /* We don't need to bounds-check global_(constant_)bounded because bounds * checking is handled by the intrinsic itself. * * The Vulkan spec for robustBufferAccess gives us quite a few options @@ -2079,7 +2084,7 @@ nir_lower_explicit_io_instr(nir_builder *b, * is to just split any loads and stores into individual components here. * * TODO: At some point in the future we may want to add more ops similar to - * nir_intrinsic_load_global_constant_bounded and make bouds checking the + * nir_intrinsic_load_global_(constant_)bounded and make bouds checking the * back-end's problem. Another option would be to somehow plumb more of * that information through to nir_lower_explicit_io. For now, however, * scalarizing is at least correct. diff --git a/src/compiler/nir/nir_opt_preamble.c b/src/compiler/nir/nir_opt_preamble.c index c712dc39733..20ef7aca06d 100644 --- a/src/compiler/nir/nir_opt_preamble.c +++ b/src/compiler/nir/nir_opt_preamble.c @@ -246,6 +246,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) case nir_intrinsic_image_load: case nir_intrinsic_image_samples_identical: case nir_intrinsic_bindless_image_load: + case nir_intrinsic_load_global_bounded: case nir_intrinsic_load_ssbo: case nir_intrinsic_load_ssbo_intel: case nir_intrinsic_load_ssbo_ir3: diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c index 5715c1cd118..66aa44c5f2b 100644 --- a/src/compiler/nir/nir_opt_sink.c +++ b/src/compiler/nir/nir_opt_sink.c @@ -119,6 +119,8 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_ case nir_intrinsic_load_ssbo_intel: *can_mov_out_of_loop = false; return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); + case nir_intrinsic_load_global_bounded: + return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); case nir_intrinsic_load_input: case nir_intrinsic_load_per_primitive_input: case nir_intrinsic_load_interpolated_input: diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index e2815072bef..426ec62805b 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -641,6 +641,9 @@ typedef struct nir_shader_compiler_options { /** Backend supports f2e4m3fn_satfn */ bool has_f2e4m3fn_satfn; + /** Backend supports load_global_bounded intrinsics. */ + bool has_load_global_bounded; + /** * Is this the Intel vec4 backend? *