diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index e90888cc8d6..97c4e7b839e 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1756,6 +1756,58 @@ get_mem_access_size_align(nir_intrinsic_op intrin, uint8_t bytes, } } +static bool +brw_nir_ssbo_intel_instr(nir_builder *b, + nir_intrinsic_instr *intrin, + void *cb_data) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_load_ssbo: { + b->cursor = nir_before_instr(&intrin->instr); + nir_def *value = nir_load_ssbo_intel( + b, + intrin->def.num_components, + intrin->def.bit_size, + intrin->src[0].ssa, + intrin->src[1].ssa, + .access = nir_intrinsic_access(intrin), + .align_mul = nir_intrinsic_align_mul(intrin), + .align_offset = nir_intrinsic_align_offset(intrin), + .base = 0); + value->loop_invariant = intrin->def.loop_invariant; + value->divergent = intrin->def.divergent; + nir_def_replace(&intrin->def, value); + return true; + } + + case nir_intrinsic_store_ssbo: { + b->cursor = nir_instr_remove(&intrin->instr); + nir_store_ssbo_intel( + b, + intrin->src[0].ssa, + intrin->src[1].ssa, + intrin->src[2].ssa, + .access = nir_intrinsic_access(intrin), + .align_mul = nir_intrinsic_align_mul(intrin), + .align_offset = nir_intrinsic_align_offset(intrin), + .base = 0); + return true; + } + + default: + return false; + } +} + +static bool +brw_nir_ssbo_intel(nir_shader *shader) +{ + return nir_shader_intrinsics_pass(shader, + brw_nir_ssbo_intel_instr, + nir_metadata_control_flow, + NULL); +} + static void brw_vectorize_lower_mem_access(nir_shader *nir, const struct brw_compiler *compiler, @@ -1808,7 +1860,6 @@ brw_vectorize_lower_mem_access(nir_shader *nir, } } - struct brw_mem_access_cb_data cb_data = { .devinfo = compiler->devinfo, }; @@ -1836,6 +1887,23 @@ brw_vectorize_lower_mem_access(nir_shader *nir, OPT(nir_opt_algebraic); OPT(nir_opt_constant_folding); } + + /* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads + * so that we maximize the offset put into the messages. + */ + if (compiler->devinfo->ver >= 20) { + OPT(brw_nir_ssbo_intel); + + const nir_opt_offsets_options offset_options = { + .buffer_max = UINT32_MAX, + .shared_max = UINT32_MAX, + .shared_atomic_max = UINT32_MAX, + .uniform_max = UINT32_MAX, + }; + OPT(nir_opt_offsets, &offset_options); + + OPT(brw_nir_lower_immediate_offsets); + } } static bool diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 878074f4b81..2a2f7548655 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -215,6 +215,8 @@ bool brw_nir_lower_texture(nir_shader *nir, bool brw_nir_lower_sample_index_in_coord(nir_shader *nir); +bool brw_nir_lower_immediate_offsets(nir_shader *shader); + bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader, const struct intel_device_info *devinfo); diff --git a/src/intel/compiler/brw_nir_lower_immediate_offsets.c b/src/intel/compiler/brw_nir_lower_immediate_offsets.c new file mode 100644 index 00000000000..6b0ea8b9a4a --- /dev/null +++ b/src/intel/compiler/brw_nir_lower_immediate_offsets.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler/nir/nir_builder.h" +#include "brw_eu.h" +#include "brw_nir.h" + +static bool +lower_immediate_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + unsigned max_bits = 0; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_shared: + case nir_intrinsic_store_shared: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_load_shared_block_intel: + case nir_intrinsic_store_shared_block_intel: + case nir_intrinsic_load_shared_uniform_block_intel: + max_bits = LSC_ADDRESS_OFFSET_FLAT_BITS; + break; + case nir_intrinsic_load_ssbo_intel: + case nir_intrinsic_load_ubo_uniform_block_intel: + case nir_intrinsic_load_ssbo_uniform_block_intel: + case nir_intrinsic_store_ssbo_intel: + case nir_intrinsic_store_ssbo_block_intel: { + nir_src *binding = nir_get_io_index_src(intrin); + const bool has_resource = + binding->ssa->parent_instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(binding->ssa->parent_instr)->intrinsic == + nir_intrinsic_resource_intel; + bool ss_binding = false; + if (has_resource) { + nir_intrinsic_instr *resource = + nir_instr_as_intrinsic(binding->ssa->parent_instr); + ss_binding = (nir_intrinsic_resource_access_intel(resource) & + nir_resource_intel_bindless) != 0; + } + max_bits = ss_binding ? + LSC_ADDRESS_OFFSET_SS_BITS : LSC_ADDRESS_OFFSET_BTI_BITS; + break; + } + default: + return false; + } + + assert(nir_intrinsic_has_base(intrin)); + + b->cursor = nir_before_instr(&intrin->instr); + + const int32_t min = u_intN_min(max_bits); + const int32_t max = u_intN_max(max_bits); + + const int32_t base = nir_intrinsic_base(intrin); + if ((base % 4) == 0 && base >= min && base <= max) + return false; + + int32_t new_base = CLAMP(base, min, max); + new_base -= new_base % 4; + + assert(new_base >= min && new_base <= max); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_src_rewrite(offset_src, nir_iadd_imm(b, offset_src->ssa, base - new_base)); + nir_intrinsic_set_base(intrin, new_base); + + return true; +} + +bool +brw_nir_lower_immediate_offsets(nir_shader *shader) +{ + return nir_shader_intrinsics_pass(shader, lower_immediate_offsets, + nir_metadata_control_flow, NULL); +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index ca99c5b7b50..03c69c98bbd 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -73,6 +73,7 @@ libintel_compiler_brw_files = files( 'brw_nir_lower_alpha_to_coverage.c', 'brw_nir_lower_fs_barycentrics.c', 'brw_nir_lower_fs_msaa.c', + 'brw_nir_lower_immediate_offsets.c', 'brw_nir_lower_intersection_shader.c', 'brw_nir_lower_ray_queries.c', 'brw_nir_lower_rt_intrinsics.c',