tu: Add option to raise the maximum SSBO size

Emulates SSBOS via global memory, real SSBO size and global base address
are stored in the descriptor. The size can be accessed using resbase,
the base address is parsed manualy from the descriptor by passing the
bindless base address into the shader via a driver UBO or const file.

nir_lower_ssbo is used to lower SSBO accesses to global memory when the
buffer size exceeds the limit. We also use it to insert bounds checks on
global memory. The final code for SSBO accesses looks like this:

if (@get_ssbo_size >= max_storage_buffer_range_bytes) {
    if (offset < @get_ssbo_size) {
        // global memory access using base (from resbase) + offset
    } else {
        // do nothing (stores) or return 0 (loads)
    }
} else {
    // original SSBO access
}

A new pass is added to lower @load_ssbo_address generated by
nir_lower_ssbo. We set native_offset=true for nir_lower_ssbo to make
sure it doesn't generate 64 bit address math. The new pass then
transforms @load/store_global into @load/store_global_ir3 passing the 32
bit offset from @load_ssbo_address.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Co-authored-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41477>
This commit is contained in:
Job Noorman 2026-06-10 08:16:12 +02:00 committed by Marge Bot
parent dc1bb7bbf4
commit 9b32234726
13 changed files with 354 additions and 11 deletions

View file

@ -574,7 +574,8 @@ template <chip CHIP>
void
fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format,
const uint8_t (&swiz)[4], uint64_t iova, uint32_t size,
uint32_t struct_size_texels)
uint32_t struct_size_texels,
enum fdl_ssbo_emulation_mode ssbo_emulation)
{
unsigned elem_size = util_format_get_blocksize(format);
unsigned elements = size / elem_size;
@ -609,6 +610,19 @@ fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format,
A6XX_TEX_MEMOBJ_2_TYPE(A6XX_TEX_BUFFER);
descriptor[4] = base_iova;
descriptor[5] = base_iova >> 32;
if (ssbo_emulation == FDL_SSBO_EMULATION_ENABLED) {
/* resbase returns 0 if size is 0 */
if (descriptor[1] == 0) {
descriptor[1] = A6XX_TEX_MEMOBJ_1_WIDTH(1);
}
uint64_t encoded_size = (uint64_t) size << 6ull;
descriptor[7] = A6XX_TEX_MEMOBJ_7_FLAG_LO(encoded_size & 0x7FFFFFF);
descriptor[8] = A6XX_TEX_MEMOBJ_8_FLAG_HI(encoded_size >> 26);
descriptor[11] = iova;
descriptor[12] = iova >> 32;
}
} else if (CHIP >= A8XX) {
descriptor[0] = A8XX_TEX_MEMOBJ_0_INSTANCE_DESC_BASE_LO(iova);
descriptor[1] = A8XX_TEX_MEMOBJ_1_BASE_HI(iova >> 32) |

View file

@ -481,11 +481,18 @@ template <chip CHIP>
void
fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
const struct fdl_view_args *args, bool has_z24uint_s8uint);
enum fdl_ssbo_emulation_mode {
FDL_SSBO_EMULATION_DISABLED,
FDL_SSBO_EMULATION_ENABLED,
};
template <chip CHIP>
void
fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format,
const uint8_t (&swiz)[4], uint64_t iova, uint32_t size,
uint32_t struct_size_texels = 1);
uint32_t struct_size_texels = 1,
enum fdl_ssbo_emulation_mode ssbo_emulation = FDL_SSBO_EMULATION_DISABLED);
#endif
#endif /* FREEDRENO_LAYOUT_H_ */

View file

@ -1014,6 +1014,8 @@ ir3_const_alloc_type_to_string(enum ir3_const_alloc_type type)
return "dyn_descriptor_offset";
case IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS:
return "inline_uniform_addresses";
case IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS:
return "bindless_base_addresses";
case IR3_CONST_ALLOC_DRIVER_PARAMS:
return "driver_params";
case IR3_CONST_ALLOC_UBO_RANGES:

View file

@ -226,7 +226,9 @@ enum ir3_const_alloc_type {
IR3_CONST_ALLOC_PRIMITIVE_PARAM = 10,
/* Common, mapping from varying location to offset. */
IR3_CONST_ALLOC_PRIMITIVE_MAP = 11,
IR3_CONST_ALLOC_MAX = 12,
/* For SSBO emulation */
IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS = 12,
IR3_CONST_ALLOC_MAX = 13,
};
struct ir3_const_allocation {

View file

@ -1312,7 +1312,7 @@ TODO rename UAV src to "UAV" so disasm_field_cb can find it easily?
<bitset name="resbase" extends="#instruction-cat6-a6xx-ibo-1src">
<doc>
RESourceBASE - returns the value encoded into TEX_CONST_7_FLAG_LO/HI
of the given descriptor.
of the given descriptor. Returns 0 if NUM_ELEMENTS == 0.
</doc>
<pattern low="14" high="19">001100</pattern> <!-- OPC -->
</bitset>

View file

@ -4740,6 +4740,9 @@ tu_bind_descriptor_sets(struct tu_cmd_buffer *cmd,
descriptors_state->set_iova[idx] = set ?
(set->va | BINDLESS_DESCRIPTOR_64B) : 0;
if (cmd->device->physical_device->enable_ssbo_emulation)
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
if (!set)
continue;
@ -4805,6 +4808,12 @@ tu_bind_descriptor_sets(struct tu_cmd_buffer *cmd,
va += desc_offset << offset_shift;
va += offset;
if (cmd->device->physical_device->enable_ssbo_emulation) {
dst_desc[11] = va;
dst_desc[12] = va >> 32;
}
unsigned new_offset = (va & 0x3f) >> offset_shift;
va &= ~0x3full;
dst_desc[2] =
@ -4907,7 +4916,8 @@ tu_set_descriptor_buffer_offsets(
info->pOffsets[i]) |
BINDLESS_DESCRIPTOR_64B;
if (set_layout->has_inline_uniforms)
if (cmd->device->physical_device->enable_ssbo_emulation ||
set_layout->has_inline_uniforms)
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
}
@ -7420,6 +7430,7 @@ TU_GENX(tu_CmdBeginCustomResolveEXT);
static uint32_t
tu6_user_consts_size(const struct tu_const_state *const_state,
bool ldgk,
bool load_shader_consts_via_preamble,
mesa_shader_stage type)
{
uint32_t dwords = 0;
@ -7436,6 +7447,15 @@ tu6_user_consts_size(const struct tu_const_state *const_state,
dwords += 8 * const_state->num_inline_ubos;
}
if (const_state->num_bindless_base_addresses > 0) {
if (load_shader_consts_via_preamble) {
if (const_state->bindless_base_addrs_ubo.idx != -1)
dwords += 6 + (2 * const_state->num_bindless_base_addresses + 4);
} else {
dwords += 4 + align(const_state->num_bindless_base_addresses * 2, 4);
}
}
return dwords;
}
@ -7616,17 +7636,116 @@ tu6_const_size(struct tu_cmd_buffer *cmd,
}
bool ldgk = cmd->device->physical_device->info->props.load_inline_uniforms_via_preamble_ldgk;
bool load_shader_consts_via_preamble =
cmd->device->physical_device->info->props.load_shader_consts_via_preamble;
if (compute) {
dwords +=
tu6_user_consts_size(&cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state, ldgk, MESA_SHADER_COMPUTE);
tu6_user_consts_size(&cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state,
ldgk,
load_shader_consts_via_preamble,
MESA_SHADER_COMPUTE);
} else {
for (uint32_t type = MESA_SHADER_VERTEX; type <= MESA_SHADER_FRAGMENT; type++)
dwords += tu6_user_consts_size(&cmd->state.shaders[type]->const_state, ldgk, (mesa_shader_stage) type);
dwords += tu6_user_consts_size(&cmd->state.shaders[type]->const_state,
ldgk,
load_shader_consts_via_preamble,
(mesa_shader_stage) type);
}
return dwords;
}
static void
fill_bindless_base_addresses(const struct tu_const_state *const_state,
struct tu_descriptor_state *descriptors,
uint64_t *addresses)
{
assert(const_state->num_bindless_base_addresses <= MAX_SETS);
for (unsigned i = 0; i < const_state->num_bindless_base_addresses; i++)
addresses[i] = descriptors->set_iova[i] & ~0x3f;
}
static void
tu6_emit_bindless_base_addresses(struct tu_cs *cs,
const struct tu_const_state *const_state,
const struct ir3_const_state *ir_const_state,
mesa_shader_stage type,
struct tu_descriptor_state *descriptors)
{
uint64_t addresses[MAX_SETS] = {0};
fill_bindless_base_addresses(const_state, descriptors, addresses);
uint32_t offset =
ir_const_state->allocs.consts[IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS]
.offset_vec4;
unsigned num_dwords = align(const_state->num_bindless_base_addresses * 2, 4);
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_dwords);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
CP_LOAD_STATE6_0_NUM_UNIT(num_dwords / 4));
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
for (unsigned i = 0; i < const_state->num_bindless_base_addresses; i++)
tu_cs_emit_qw(cs, addresses[i]);
for (unsigned i = const_state->num_bindless_base_addresses * 2;
i < num_dwords; i++)
tu_cs_emit(cs, 0);
}
static void
tu7_emit_bindless_base_addresses(struct tu_cs *cs,
const struct tu_const_state *const_state,
mesa_shader_stage type,
struct tu_descriptor_state *descriptors)
{
uint64_t addresses[MAX_SETS] = {0};
int ubo_offset = const_state->bindless_base_addrs_ubo.idx;
if (ubo_offset < 0)
return;
fill_bindless_base_addresses(const_state, descriptors, addresses);
/* A7XX TODO: Emit data via sub_cs instead of NOP */
uint64_t iova = tu_cs_emit_data_nop(
cs, (uint32_t *) addresses, const_state->num_bindless_base_addresses * 2, 4);
unsigned size_vec4s =
DIV_ROUND_UP(const_state->num_bindless_base_addresses * 2, 4);
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 5);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(ubo_offset) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
CP_LOAD_STATE6_0_NUM_UNIT(1));
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
tu_cs_emit_qw(cs, iova | ((uint64_t) A6XX_UBO_1_SIZE(size_vec4s) << 32));
}
static void
tu_emit_bindless_base_addresses(struct tu_cs *cs,
const struct tu_const_state *const_state,
const struct ir3_const_state *ir_const_state,
mesa_shader_stage type,
struct tu_descriptor_state *descriptors)
{
if (!const_state->num_bindless_base_addresses)
return;
if (cs->device->physical_device->info->props.load_shader_consts_via_preamble) {
tu7_emit_bindless_base_addresses(cs, const_state, type, descriptors);
} else {
tu6_emit_bindless_base_addresses(cs, const_state, ir_const_state, type,
descriptors);
}
}
template <chip CHIP>
static struct tu_draw_state
tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute)
@ -7661,6 +7780,11 @@ tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute)
cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->constlen,
MESA_SHADER_COMPUTE,
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE));
tu_emit_bindless_base_addresses(
&cs, &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state,
cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->const_state,
MESA_SHADER_COMPUTE,
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE));
} else {
struct tu_descriptor_state *descriptors =
tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
@ -7674,6 +7798,10 @@ tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute)
tu_emit_inline_ubo(&cs, &link->tu_const_state,
&link->const_state, link->constlen,
(mesa_shader_stage) type, descriptors);
tu_emit_bindless_base_addresses(&cs, &link->tu_const_state,
&link->const_state,
(mesa_shader_stage) type,
descriptors);
}
}

View file

@ -110,6 +110,7 @@
#define TU_D3D12_MAX_TEXEL_BUFFER_ELEMENTS ((1u << 29u) - 1u)
#define TU_TEXEL_BUFFER_MAX_WIDTH (1u << 14)
#define TU_TEXEL_BUFFER_MAX_HEIGHT (1u << 14)
#define TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES ((1u << 31u) - 1u)
/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
* expose the same maximum range.
* TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual

View file

@ -1131,11 +1131,14 @@ write_buffer_descriptor_addr(const struct tu_device *device,
if (!buffer_info || buffer_info->address == 0)
return;
enum fdl_ssbo_emulation_mode ssbo_emulation_mode =
device->physical_device->enable_ssbo_emulation ? FDL_SSBO_EMULATION_ENABLED : FDL_SSBO_EMULATION_DISABLED;
uint64_t va = buffer_info->address;
uint32_t range = buffer_info->range;
if (info->props.storage_16bit) {
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R16_UINT, tu_swiz(X, Y, Z, W), va, range);
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R16_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode);
dst += FDL6_TEX_CONST_DWORDS;
}
@ -1143,12 +1146,12 @@ write_buffer_descriptor_addr(const struct tu_device *device,
* 16-bit descriptor cannot be used for 32-bit loads through isam.v.
*/
if (!info->props.storage_16bit || !info->props.has_isam_v) {
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R32_UINT, tu_swiz(X, Y, Z, W), va, range);
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R32_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode);
dst += FDL6_TEX_CONST_DWORDS;
}
if (info->props.storage_8bit) {
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R8_UINT, tu_swiz(X, Y, Z, W), va, range);
fdl6_buffer_view_init<CHIP>(dst, PIPE_FORMAT_R8_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode);
dst += FDL6_TEX_CONST_DWORDS;
}
}

View file

@ -105,6 +105,8 @@ tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
sizeof(device->instance->drirc.misc.allow_oob_indirect_ubo_loads));
_mesa_blake3_update(&ctx, &device->enable_texel_buffer_emulation,
sizeof(device->enable_texel_buffer_emulation));
_mesa_blake3_update(&ctx, &device->enable_ssbo_emulation,
sizeof(device->enable_ssbo_emulation));
_mesa_blake3_final(&ctx, blake3);
memcpy(uuid, blake3, VK_UUID_SIZE);
@ -1155,7 +1157,10 @@ tu_get_properties(struct tu_physical_device *pdevice,
? TU_D3D12_MAX_TEXEL_BUFFER_ELEMENTS
: pdevice->info->props.max_texel_buffer_range_elements;
props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
props->maxStorageBufferRange = pdevice->info->props.max_storage_buffer_range_bytes;
props->maxStorageBufferRange =
pdevice->enable_ssbo_emulation
? TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES
: pdevice->info->props.max_storage_buffer_range_bytes;
props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
props->maxMemoryAllocationCount = UINT32_MAX;
props->maxSamplerAllocationCount = 64 * 1024;
@ -1732,6 +1737,10 @@ tu_physical_device_init(struct tu_physical_device *device,
assert(fd_dev_gen(&device->dev_id) == 7);
device->enable_texel_buffer_emulation = instance->drirc.misc.enable_texel_buffer_emulation;
}
if (device->info->props.max_storage_buffer_range_bytes < TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES) {
assert(fd_dev_gen(&device->dev_id) == 7);
device->enable_ssbo_emulation = instance->drirc.misc.enable_ssbo_emulation;
}
}
if (tu_device_get_cache_uuid(device, device->cache_uuid)) {

View file

@ -153,6 +153,7 @@ struct tu_physical_device
bool is_perf_cntr_selectable;
bool enable_texel_buffer_emulation;
bool enable_ssbo_emulation;
struct {
uint32_t non_lazy_type_count;

View file

@ -112,6 +112,9 @@ def declare_options():
B("tu_enable_texel_buffer_emulation", False,
"Emulate texel buffers to allow a higher limit for elements that is in line with what some D3D12 games expect",
c_name="enable_texel_buffer_emulation"),
B("tu_enable_ssbo_emulation", False,
"Emulate SSBOs to allow a higher limit for elements that is in line with what some D3D12 games expect",
c_name="enable_ssbo_emulation"),
]
features_options = []

View file

@ -9,6 +9,7 @@
#include "nir/nir_xfb_info.h"
#include "spirv/nir_spirv.h"
#include "util/macros.h"
#include "util/mesa-blake3.h"
#include "vk_nir.h"
#include "vk_nir_convert_ycbcr.h"
@ -1367,6 +1368,21 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS, ldgk_consts, 1);
if (dev->physical_device->enable_ssbo_emulation) {
const_state->num_bindless_base_addresses = layout->num_sets;
const_state->bindless_base_const_offset_vec4 = const_allocs->max_const_offset_vec4;
if (dev->physical_device->reserved_set_idx >= 0) {
const_state->num_bindless_base_addresses =
MAX2(layout->num_sets, (unsigned) dev->physical_device->reserved_set_idx + 1);
}
if (!dev->compiler->info->props.load_shader_consts_via_preamble) {
ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS,
DIV_ROUND_UP(const_state->num_bindless_base_addresses * 2, 4), 1);
}
}
struct lower_instr_params params = {
.dev = dev,
.shader = tu_shader,
@ -1557,6 +1573,141 @@ tu_nir_lower_ssbo_descriptor(nir_shader *shader,
(void *)dev);
}
static nir_def *
build_ssbo_size_from_resbase(nir_builder *b, nir_def *desc)
{
assert(nir_def_is_intrinsic(desc));
nir_def *encoded_data = nir_resbase_ir3(b, 32, desc);
nir_def *encoded_data_lo = nir_channel(b, encoded_data, 0);
nir_def *encoded_data_hi = nir_channel(b, encoded_data, 1);
nir_def *size_lo = nir_ishr_imm(b, encoded_data_lo, 6);
nir_def *size_hi = nir_ishl_imm(b, encoded_data_hi, 20);
return nir_ior(b, size_lo, size_hi);
}
static nir_intrinsic_instr *
get_ssbo_bindless(nir_intrinsic_instr *intr)
{
nir_def *buffer = nir_get_io_index_src(intr)->ssa;
assert(nir_def_is_intrinsic(buffer));
nir_intrinsic_instr *bindless = nir_def_as_intrinsic(buffer);
assert(bindless->intrinsic == nir_intrinsic_bindless_resource_ir3);
return bindless;
}
static nir_def *
build_ssbo_global_addr(nir_builder *b,
nir_intrinsic_instr *bindless,
struct tu_shader *shader,
bool load_shader_consts_via_preamble,
const struct ir3_const_allocations *const_allocs)
{
nir_def *set_base;
if (load_shader_consts_via_preamble) {
set_base =
ir3_load_driver_ubo(b, 2, &shader->const_state.bindless_base_addrs_ubo,
nir_intrinsic_desc_set(bindless) * 2);
} else {
const unsigned dword_base =
const_allocs->consts[IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS].offset_vec4 *
4 +
nir_intrinsic_desc_set(bindless) * 2;
set_base =
nir_load_const_ir3(b, 2, 32, nir_imm_int(b, 0), .base = dword_base);
}
nir_def *descriptor_offset = nir_iadd_imm(
b, nir_imul_imm(b, bindless->src[0].ssa, FDL6_TEX_CONST_DWORDS * 4),
11 * 4);
nir_def *descriptor_words = nir_load_global_ir3(
b, 2, 32, nir_pack_64_2x32(b, set_base), descriptor_offset,
.access = (enum gl_access_qualifier)(
ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER | ACCESS_CAN_SPECULATE),
.align_mul = 4, .align_offset = 0);
return nir_pack_64_2x32(b, descriptor_words);
}
struct lower_ssbo_address_size_state {
struct tu_shader *shader;
const struct ir3_const_allocations *const_allocs;
bool load_shader_consts_via_preamble;
};
static bool
lower_ssbo_address_size(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo_address:
case nir_intrinsic_get_ssbo_size:
break;
default:
return false;
}
auto state = static_cast<const lower_ssbo_address_size_state *>(data);
b->cursor = nir_before_instr(&intr->instr);
if (intr->intrinsic == nir_intrinsic_load_ssbo_address) {
nir_def *base = build_ssbo_global_addr(
b, get_ssbo_bindless(intr), state->shader,
state->load_shader_consts_via_preamble, state->const_allocs);
nir_def *offset = intr->src[1].ssa;
nir_foreach_use_safe (use, &intr->def) {
nir_instr *use_instr = nir_src_use_instr(use);
b->cursor = nir_before_instr(use_instr);
nir_intrinsic_instr *use_intr = nir_instr_as_intrinsic(use_instr);
switch (use_intr->intrinsic) {
case nir_intrinsic_global_atomic:
case nir_intrinsic_global_atomic_swap: {
nir_def *addr = nir_iadd(b, base, nir_u2u64(b, offset));
nir_src_rewrite(nir_get_io_offset_src(use_intr), addr);
break;
}
case nir_intrinsic_load_global: {
nir_def *load = nir_load_global_ir3(
b, use_intr->def.num_components, use_intr->def.bit_size, base,
offset, .access = nir_intrinsic_access(use_intr));
nir_def_replace(&use_intr->def, load);
break;
}
case nir_intrinsic_store_global: {
nir_store_global_ir3(b, nir_get_io_data_src(use_intr)->ssa, base,
offset,
.access = nir_intrinsic_access(use_intr));
nir_instr_remove(use_instr);
break;
}
default:
UNREACHABLE("unexpected use of @load_ssbo_address");
}
}
} else {
nir_def *ssbo_size =
build_ssbo_size_from_resbase(b, nir_get_io_index_src(intr)->ssa);
nir_def_replace(&intr->def, ssbo_size);
}
return true;
}
static bool
tu_nir_lower_ssbo_address_size(
nir_shader *shader, const struct lower_ssbo_address_size_state *state)
{
return nir_shader_intrinsics_pass(shader, lower_ssbo_address_size,
nir_metadata_control_flow, (void *) state);
}
struct lower_fdm_state {
nir_variable *layer_var;
nir_variable *viewport_var;
@ -2958,6 +3109,7 @@ tu_shader_init(struct tu_device *dev, const void *key_data, size_t key_size)
shader->const_state.fdm_ubo.idx = -1;
shader->const_state.dynamic_offsets_ubo.idx = -1;
shader->const_state.inline_uniforms_ubo.idx = -1;
shader->const_state.bindless_base_addrs_ubo.idx = -1;
return shader;
}
@ -3251,6 +3403,24 @@ tu_shader_create(struct tu_device *dev,
*/
NIR_PASS(_, nir, tu_nir_lower_ssbo_descriptor, dev);
if (dev->physical_device->enable_ssbo_emulation) {
nir_lower_ssbo_options options = {
.native_offset = true,
.min_ssbo_size = dev->compiler->info->props.max_storage_buffer_range_bytes,
.bounds_check = true,
};
NIR_PASS(_, nir, nir_lower_ssbo, &options);
struct lower_ssbo_address_size_state lower_ssbo_state = {
.shader = shader,
.const_allocs = &const_allocs,
.load_shader_consts_via_preamble =
dev->compiler->info->props.load_shader_consts_via_preamble,
};
NIR_PASS(_, nir, tu_nir_lower_ssbo_address_size, &lower_ssbo_state);
}
const struct ir3_shader_options options = {
.api_wavesize = key->api_wavesize,
.real_wavesize = key->real_wavesize,

View file

@ -50,10 +50,13 @@ struct tu_const_state
uint32_t dynamic_offset_loc;
unsigned num_inline_ubos;
struct tu_inline_ubo ubos[MAX_INLINE_UBOS];
uint32_t num_bindless_base_addresses;
uint32_t bindless_base_const_offset_vec4;
struct ir3_driver_ubo fdm_ubo;
struct ir3_driver_ubo dynamic_offsets_ubo;
struct ir3_driver_ubo inline_uniforms_ubo;
struct ir3_driver_ubo bindless_base_addrs_ubo;
};
struct tu_shader