mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
nir: plumb load_global_bounded
this lets the backend implement bounded loads (i.e. robust SSBOs) in a way that's more clever than a full branch. similar idea to load_global_constant_bound which should eventually be merged into this. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Job Noorman <job@noorman.info> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35720>
This commit is contained in:
parent
37e71a5cb2
commit
caa0854da8
6 changed files with 20 additions and 5 deletions
|
|
@ -716,6 +716,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
case nir_intrinsic_image_load_raw_intel:
|
case nir_intrinsic_image_load_raw_intel:
|
||||||
case nir_intrinsic_get_ubo_size:
|
case nir_intrinsic_get_ubo_size:
|
||||||
case nir_intrinsic_load_ssbo_address:
|
case nir_intrinsic_load_ssbo_address:
|
||||||
|
case nir_intrinsic_load_global_bounded:
|
||||||
case nir_intrinsic_load_global_constant_bounded:
|
case nir_intrinsic_load_global_constant_bounded:
|
||||||
case nir_intrinsic_load_global_constant_offset:
|
case nir_intrinsic_load_global_constant_offset:
|
||||||
case nir_intrinsic_load_reg:
|
case nir_intrinsic_load_reg:
|
||||||
|
|
|
||||||
|
|
@ -1239,6 +1239,9 @@ load("constant", [1], [BASE, RANGE, ACCESS, ALIGN_MUL, ALIGN_OFFSET],
|
||||||
[CAN_ELIMINATE, CAN_REORDER])
|
[CAN_ELIMINATE, CAN_REORDER])
|
||||||
# src[] = { address }.
|
# src[] = { address }.
|
||||||
load("global", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
load("global", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
# src[] = { base_address, offset, bound }.
|
||||||
|
load("global_bounded", [1, 1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET],
|
||||||
|
[CAN_ELIMINATE])
|
||||||
# src[] = { address }.
|
# src[] = { address }.
|
||||||
load("global_2x32", [2], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
load("global_2x32", [2], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
# src[] = { address }.
|
# src[] = { address }.
|
||||||
|
|
|
||||||
|
|
@ -1525,7 +1525,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
op = nir_intrinsic_load_ubo;
|
op = nir_intrinsic_load_ubo;
|
||||||
break;
|
break;
|
||||||
case nir_var_mem_ssbo:
|
case nir_var_mem_ssbo:
|
||||||
if (addr_format_is_global(addr_format, mode))
|
if (addr_format == nir_address_format_64bit_bounded_global &&
|
||||||
|
b->shader->options->has_load_global_bounded)
|
||||||
|
op = nir_intrinsic_load_global_bounded;
|
||||||
|
else if (addr_format_is_global(addr_format, mode))
|
||||||
op = nir_intrinsic_load_global;
|
op = nir_intrinsic_load_global;
|
||||||
else
|
else
|
||||||
op = nir_intrinsic_load_ssbo;
|
op = nir_intrinsic_load_ssbo;
|
||||||
|
|
@ -1608,7 +1611,8 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
load->src[0] = nir_src_for_ssa(
|
load->src[0] = nir_src_for_ssa(
|
||||||
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
||||||
load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
|
load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
|
||||||
} else if (op == nir_intrinsic_load_global_constant_bounded) {
|
} else if (op == nir_intrinsic_load_global_bounded ||
|
||||||
|
op == nir_intrinsic_load_global_constant_bounded) {
|
||||||
assert(addr_format == nir_address_format_64bit_bounded_global);
|
assert(addr_format == nir_address_format_64bit_bounded_global);
|
||||||
load->src[0] = nir_src_for_ssa(
|
load->src[0] = nir_src_for_ssa(
|
||||||
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
||||||
|
|
@ -1665,8 +1669,9 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
|
||||||
nir_def *result;
|
nir_def *result;
|
||||||
if (addr_format_needs_bounds_check(addr_format) &&
|
if (addr_format_needs_bounds_check(addr_format) &&
|
||||||
op != nir_intrinsic_load_global_constant_bounded) {
|
op != nir_intrinsic_load_global_constant_bounded &&
|
||||||
/* We don't need to bounds-check global_constant_bounded because bounds
|
op != nir_intrinsic_load_global_bounded) {
|
||||||
|
/* We don't need to bounds-check global_(constant_)bounded because bounds
|
||||||
* checking is handled by the intrinsic itself.
|
* checking is handled by the intrinsic itself.
|
||||||
*
|
*
|
||||||
* The Vulkan spec for robustBufferAccess gives us quite a few options
|
* The Vulkan spec for robustBufferAccess gives us quite a few options
|
||||||
|
|
@ -2079,7 +2084,7 @@ nir_lower_explicit_io_instr(nir_builder *b,
|
||||||
* is to just split any loads and stores into individual components here.
|
* is to just split any loads and stores into individual components here.
|
||||||
*
|
*
|
||||||
* TODO: At some point in the future we may want to add more ops similar to
|
* TODO: At some point in the future we may want to add more ops similar to
|
||||||
* nir_intrinsic_load_global_constant_bounded and make bouds checking the
|
* nir_intrinsic_load_global_(constant_)bounded and make bouds checking the
|
||||||
* back-end's problem. Another option would be to somehow plumb more of
|
* back-end's problem. Another option would be to somehow plumb more of
|
||||||
* that information through to nir_lower_explicit_io. For now, however,
|
* that information through to nir_lower_explicit_io. For now, however,
|
||||||
* scalarizing is at least correct.
|
* scalarizing is at least correct.
|
||||||
|
|
|
||||||
|
|
@ -246,6 +246,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
||||||
case nir_intrinsic_image_load:
|
case nir_intrinsic_image_load:
|
||||||
case nir_intrinsic_image_samples_identical:
|
case nir_intrinsic_image_samples_identical:
|
||||||
case nir_intrinsic_bindless_image_load:
|
case nir_intrinsic_bindless_image_load:
|
||||||
|
case nir_intrinsic_load_global_bounded:
|
||||||
case nir_intrinsic_load_ssbo:
|
case nir_intrinsic_load_ssbo:
|
||||||
case nir_intrinsic_load_ssbo_intel:
|
case nir_intrinsic_load_ssbo_intel:
|
||||||
case nir_intrinsic_load_ssbo_ir3:
|
case nir_intrinsic_load_ssbo_ir3:
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,8 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_
|
||||||
case nir_intrinsic_load_ssbo_intel:
|
case nir_intrinsic_load_ssbo_intel:
|
||||||
*can_mov_out_of_loop = false;
|
*can_mov_out_of_loop = false;
|
||||||
return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin);
|
return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin);
|
||||||
|
case nir_intrinsic_load_global_bounded:
|
||||||
|
return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin);
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
case nir_intrinsic_load_per_primitive_input:
|
case nir_intrinsic_load_per_primitive_input:
|
||||||
case nir_intrinsic_load_interpolated_input:
|
case nir_intrinsic_load_interpolated_input:
|
||||||
|
|
|
||||||
|
|
@ -641,6 +641,9 @@ typedef struct nir_shader_compiler_options {
|
||||||
/** Backend supports f2e4m3fn_satfn */
|
/** Backend supports f2e4m3fn_satfn */
|
||||||
bool has_f2e4m3fn_satfn;
|
bool has_f2e4m3fn_satfn;
|
||||||
|
|
||||||
|
/** Backend supports load_global_bounded intrinsics. */
|
||||||
|
bool has_load_global_bounded;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this the Intel vec4 backend?
|
* Is this the Intel vec4 backend?
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue