From a4668733e54aef935d61449c1b69aee607ceba13 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 21 Apr 2026 14:09:49 +0200 Subject: [PATCH] ac/nir: add a pass to fixup SMEM loads with NULL PRT pages Only global/SSBO SMEM loads are considered because for UBOs the "LOW" VA will be set in descriptors. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/meson.build | 1 + src/amd/common/nir/ac_nir.h | 3 + .../nir/ac_nir_fixup_smem_loads_null_prt.c | 69 +++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 src/amd/common/nir/ac_nir_fixup_smem_loads_null_prt.c diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 1579abd8d61..c7dfde77805 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -156,6 +156,7 @@ amd_common_files = files( 'nir/ac_nir_opt_outputs.c', 'nir/ac_nir_cull.c', 'nir/ac_nir_create_gs_copy_shader.c', + 'nir/ac_nir_fixup_smem_loads_null_prt.c', 'nir/ac_nir_lower_esgs_io_to_mem.c', 'nir/ac_nir_lower_global_access.c', 'nir/ac_nir_lower_image_opcodes_cdna.c', diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index 2ffe9e83838..0aa5b7ded65 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -478,6 +478,9 @@ ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_ bool ac_nir_assign_fs_input_locations(nir_shader *nir); +bool +ac_nir_fixup_smem_loads_null_prt(nir_shader *shader, uint8_t address_prt_wa_control_bit); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/nir/ac_nir_fixup_smem_loads_null_prt.c b/src/amd/common/nir/ac_nir_fixup_smem_loads_null_prt.c new file mode 100644 index 00000000000..1d8fef8083e --- /dev/null +++ b/src/amd/common/nir/ac_nir_fixup_smem_loads_null_prt.c @@ -0,0 +1,69 @@ +/* + * Copyright © 2026 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "ac_nir.h" +#include "nir/nir_builder.h" + +/* SMEM reads on a NULL PRT page fail or hang depending on the hw gen. + * + * To workaround that, the driver splits the total VA space in half, so that a single bit controls + * whether it's the "HIGH" or the "LOW" address space. Every sparse residency buffer allocations + * that might be used with SMEM get two allocations: + * + * - the "HIGH" address space is mapped normally and its VA is returned to the application. + * - the "LOW" address space is explicitly mapped to a zero-initialized buffer when it's allocated + * or when it's unmapped. + * + * Other buffer allocations are always allocated in the "LOW" address space, so that control bit is + * always 0. + * + * Mask out the bit that controls whether it's the "HIGH" or the "LOW" address space to implement + * the workaround for sparse residency buffer allocations. + */ +typedef struct { + uint8_t control_bit; +} fixup_smem_loads_null_prt_state; + +static bool +fixup_smem_loads_null_prt(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + fixup_smem_loads_null_prt_state *state = (fixup_smem_loads_null_prt_state *)data; + + if (intrin->intrinsic != nir_intrinsic_load_global_amd && intrin->intrinsic != nir_intrinsic_load_ssbo) + return false; + + const unsigned access = nir_intrinsic_access(intrin); + + if (!(access & ACCESS_SMEM_AMD)) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_def *src = intrin->src[0].ssa; + nir_def *new_src; + + if (intrin->intrinsic == nir_intrinsic_load_global_amd) { + new_src = nir_iand_imm(b, src, ~(1ull << state->control_bit)); + } else { + assert(state->control_bit >= 32); + nir_def *new_addr_hi = nir_iand_imm(b, nir_channel(b, src, 1), ~(1u << (state->control_bit - 32))); + new_src = nir_vec4(b, nir_channel(b, src, 0), new_addr_hi, nir_channel(b, src, 2), nir_channel(b, src, 3)); + } + + nir_src_rewrite(&intrin->src[0], new_src); + + return true; +} + +bool +ac_nir_fixup_smem_loads_null_prt(nir_shader *nir, uint8_t address_prt_wa_control_bit) +{ + fixup_smem_loads_null_prt_state s = { + .control_bit = address_prt_wa_control_bit, + }; + + return nir_shader_intrinsics_pass(nir, fixup_smem_loads_null_prt, nir_metadata_all, &s); +}