diff --git a/src/freedreno/fdl/fd6_view.cc b/src/freedreno/fdl/fd6_view.cc index a4a89336371..67f2095f8da 100644 --- a/src/freedreno/fdl/fd6_view.cc +++ b/src/freedreno/fdl/fd6_view.cc @@ -574,7 +574,8 @@ template void fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format, const uint8_t (&swiz)[4], uint64_t iova, uint32_t size, - uint32_t struct_size_texels) + uint32_t struct_size_texels, + enum fdl_ssbo_emulation_mode ssbo_emulation) { unsigned elem_size = util_format_get_blocksize(format); unsigned elements = size / elem_size; @@ -609,6 +610,19 @@ fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format, A6XX_TEX_MEMOBJ_2_TYPE(A6XX_TEX_BUFFER); descriptor[4] = base_iova; descriptor[5] = base_iova >> 32; + + if (ssbo_emulation == FDL_SSBO_EMULATION_ENABLED) { + /* resbase returns 0 if size is 0 */ + if (descriptor[1] == 0) { + descriptor[1] = A6XX_TEX_MEMOBJ_1_WIDTH(1); + } + + uint64_t encoded_size = (uint64_t) size << 6ull; + descriptor[7] = A6XX_TEX_MEMOBJ_7_FLAG_LO(encoded_size & 0x7FFFFFF); + descriptor[8] = A6XX_TEX_MEMOBJ_8_FLAG_HI(encoded_size >> 26); + descriptor[11] = iova; + descriptor[12] = iova >> 32; + } } else if (CHIP >= A8XX) { descriptor[0] = A8XX_TEX_MEMOBJ_0_INSTANCE_DESC_BASE_LO(iova); descriptor[1] = A8XX_TEX_MEMOBJ_1_BASE_HI(iova >> 32) | diff --git a/src/freedreno/fdl/freedreno_layout.h b/src/freedreno/fdl/freedreno_layout.h index 10b24f1227e..d0e482d6e92 100644 --- a/src/freedreno/fdl/freedreno_layout.h +++ b/src/freedreno/fdl/freedreno_layout.h @@ -481,11 +481,18 @@ template void fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts, const struct fdl_view_args *args, bool has_z24uint_s8uint); + +enum fdl_ssbo_emulation_mode { + FDL_SSBO_EMULATION_DISABLED, + FDL_SSBO_EMULATION_ENABLED, +}; + template void fdl6_buffer_view_init(uint32_t *descriptor, enum pipe_format format, const uint8_t (&swiz)[4], uint64_t iova, uint32_t size, - uint32_t struct_size_texels = 1); + uint32_t struct_size_texels = 1, + enum fdl_ssbo_emulation_mode ssbo_emulation = FDL_SSBO_EMULATION_DISABLED); #endif #endif /* FREEDRENO_LAYOUT_H_ */ diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 5652cd75b37..6b8419bb278 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -1014,6 +1014,8 @@ ir3_const_alloc_type_to_string(enum ir3_const_alloc_type type) return "dyn_descriptor_offset"; case IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS: return "inline_uniform_addresses"; + case IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS: + return "bindless_base_addresses"; case IR3_CONST_ALLOC_DRIVER_PARAMS: return "driver_params"; case IR3_CONST_ALLOC_UBO_RANGES: diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index ad1390292fc..89fc8f84a6a 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -226,7 +226,9 @@ enum ir3_const_alloc_type { IR3_CONST_ALLOC_PRIMITIVE_PARAM = 10, /* Common, mapping from varying location to offset. */ IR3_CONST_ALLOC_PRIMITIVE_MAP = 11, - IR3_CONST_ALLOC_MAX = 12, + /* For SSBO emulation */ + IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS = 12, + IR3_CONST_ALLOC_MAX = 13, }; struct ir3_const_allocation { diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 4736c645ab6..46b01a065e4 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -1312,7 +1312,7 @@ TODO rename UAV src to "UAV" so disasm_field_cb can find it easily? RESourceBASE - returns the value encoded into TEX_CONST_7_FLAG_LO/HI - of the given descriptor. + of the given descriptor. Returns 0 if NUM_ELEMENTS == 0. 001100 diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 15e4562a416..60ad3a8ef90 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4740,6 +4740,9 @@ tu_bind_descriptor_sets(struct tu_cmd_buffer *cmd, descriptors_state->set_iova[idx] = set ? (set->va | BINDLESS_DESCRIPTOR_64B) : 0; + if (cmd->device->physical_device->enable_ssbo_emulation) + cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS; + if (!set) continue; @@ -4805,6 +4808,12 @@ tu_bind_descriptor_sets(struct tu_cmd_buffer *cmd, va += desc_offset << offset_shift; va += offset; + + if (cmd->device->physical_device->enable_ssbo_emulation) { + dst_desc[11] = va; + dst_desc[12] = va >> 32; + } + unsigned new_offset = (va & 0x3f) >> offset_shift; va &= ~0x3full; dst_desc[2] = @@ -4907,7 +4916,8 @@ tu_set_descriptor_buffer_offsets( info->pOffsets[i]) | BINDLESS_DESCRIPTOR_64B; - if (set_layout->has_inline_uniforms) + if (cmd->device->physical_device->enable_ssbo_emulation || + set_layout->has_inline_uniforms) cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS; } @@ -7420,6 +7430,7 @@ TU_GENX(tu_CmdBeginCustomResolveEXT); static uint32_t tu6_user_consts_size(const struct tu_const_state *const_state, bool ldgk, + bool load_shader_consts_via_preamble, mesa_shader_stage type) { uint32_t dwords = 0; @@ -7436,6 +7447,15 @@ tu6_user_consts_size(const struct tu_const_state *const_state, dwords += 8 * const_state->num_inline_ubos; } + if (const_state->num_bindless_base_addresses > 0) { + if (load_shader_consts_via_preamble) { + if (const_state->bindless_base_addrs_ubo.idx != -1) + dwords += 6 + (2 * const_state->num_bindless_base_addresses + 4); + } else { + dwords += 4 + align(const_state->num_bindless_base_addresses * 2, 4); + } + } + return dwords; } @@ -7616,17 +7636,116 @@ tu6_const_size(struct tu_cmd_buffer *cmd, } bool ldgk = cmd->device->physical_device->info->props.load_inline_uniforms_via_preamble_ldgk; + bool load_shader_consts_via_preamble = + cmd->device->physical_device->info->props.load_shader_consts_via_preamble; if (compute) { dwords += - tu6_user_consts_size(&cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state, ldgk, MESA_SHADER_COMPUTE); + tu6_user_consts_size(&cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state, + ldgk, + load_shader_consts_via_preamble, + MESA_SHADER_COMPUTE); } else { for (uint32_t type = MESA_SHADER_VERTEX; type <= MESA_SHADER_FRAGMENT; type++) - dwords += tu6_user_consts_size(&cmd->state.shaders[type]->const_state, ldgk, (mesa_shader_stage) type); + dwords += tu6_user_consts_size(&cmd->state.shaders[type]->const_state, + ldgk, + load_shader_consts_via_preamble, + (mesa_shader_stage) type); } return dwords; } +static void +fill_bindless_base_addresses(const struct tu_const_state *const_state, + struct tu_descriptor_state *descriptors, + uint64_t *addresses) +{ + assert(const_state->num_bindless_base_addresses <= MAX_SETS); + + for (unsigned i = 0; i < const_state->num_bindless_base_addresses; i++) + addresses[i] = descriptors->set_iova[i] & ~0x3f; +} + +static void +tu6_emit_bindless_base_addresses(struct tu_cs *cs, + const struct tu_const_state *const_state, + const struct ir3_const_state *ir_const_state, + mesa_shader_stage type, + struct tu_descriptor_state *descriptors) +{ + uint64_t addresses[MAX_SETS] = {0}; + fill_bindless_base_addresses(const_state, descriptors, addresses); + + uint32_t offset = + ir_const_state->allocs.consts[IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS] + .offset_vec4; + unsigned num_dwords = align(const_state->num_bindless_base_addresses * 2, 4); + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_dwords); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(num_dwords / 4)); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + + for (unsigned i = 0; i < const_state->num_bindless_base_addresses; i++) + tu_cs_emit_qw(cs, addresses[i]); + + for (unsigned i = const_state->num_bindless_base_addresses * 2; + i < num_dwords; i++) + tu_cs_emit(cs, 0); +} + +static void +tu7_emit_bindless_base_addresses(struct tu_cs *cs, + const struct tu_const_state *const_state, + mesa_shader_stage type, + struct tu_descriptor_state *descriptors) +{ + uint64_t addresses[MAX_SETS] = {0}; + int ubo_offset = const_state->bindless_base_addrs_ubo.idx; + if (ubo_offset < 0) + return; + + fill_bindless_base_addresses(const_state, descriptors, addresses); + + /* A7XX TODO: Emit data via sub_cs instead of NOP */ + uint64_t iova = tu_cs_emit_data_nop( + cs, (uint32_t *) addresses, const_state->num_bindless_base_addresses * 2, 4); + unsigned size_vec4s = + DIV_ROUND_UP(const_state->num_bindless_base_addresses * 2, 4); + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 5); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(ubo_offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + tu_cs_emit_qw(cs, iova | ((uint64_t) A6XX_UBO_1_SIZE(size_vec4s) << 32)); +} + +static void +tu_emit_bindless_base_addresses(struct tu_cs *cs, + const struct tu_const_state *const_state, + const struct ir3_const_state *ir_const_state, + mesa_shader_stage type, + struct tu_descriptor_state *descriptors) +{ + if (!const_state->num_bindless_base_addresses) + return; + + if (cs->device->physical_device->info->props.load_shader_consts_via_preamble) { + tu7_emit_bindless_base_addresses(cs, const_state, type, descriptors); + } else { + tu6_emit_bindless_base_addresses(cs, const_state, ir_const_state, type, + descriptors); + } +} + template static struct tu_draw_state tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute) @@ -7661,6 +7780,11 @@ tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute) cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->constlen, MESA_SHADER_COMPUTE, tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE)); + tu_emit_bindless_base_addresses( + &cs, &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state, + cmd->state.shaders[MESA_SHADER_COMPUTE]->variant->const_state, + MESA_SHADER_COMPUTE, + tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE)); } else { struct tu_descriptor_state *descriptors = tu_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -7674,6 +7798,10 @@ tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute) tu_emit_inline_ubo(&cs, &link->tu_const_state, &link->const_state, link->constlen, (mesa_shader_stage) type, descriptors); + tu_emit_bindless_base_addresses(&cs, &link->tu_const_state, + &link->const_state, + (mesa_shader_stage) type, + descriptors); } } diff --git a/src/freedreno/vulkan/tu_common.h b/src/freedreno/vulkan/tu_common.h index 141685b1a27..bf5c503dc55 100644 --- a/src/freedreno/vulkan/tu_common.h +++ b/src/freedreno/vulkan/tu_common.h @@ -110,6 +110,7 @@ #define TU_D3D12_MAX_TEXEL_BUFFER_ELEMENTS ((1u << 29u) - 1u) #define TU_TEXEL_BUFFER_MAX_WIDTH (1u << 14) #define TU_TEXEL_BUFFER_MAX_HEIGHT (1u << 14) +#define TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES ((1u << 31u) - 1u) /* We use ldc for uniform buffer loads, just like the Qualcomm driver, so * expose the same maximum range. * TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc b/src/freedreno/vulkan/tu_descriptor_set.cc index cb7b2e4e4d7..f4bee33428c 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.cc +++ b/src/freedreno/vulkan/tu_descriptor_set.cc @@ -1131,11 +1131,14 @@ write_buffer_descriptor_addr(const struct tu_device *device, if (!buffer_info || buffer_info->address == 0) return; + enum fdl_ssbo_emulation_mode ssbo_emulation_mode = + device->physical_device->enable_ssbo_emulation ? FDL_SSBO_EMULATION_ENABLED : FDL_SSBO_EMULATION_DISABLED; + uint64_t va = buffer_info->address; uint32_t range = buffer_info->range; if (info->props.storage_16bit) { - fdl6_buffer_view_init(dst, PIPE_FORMAT_R16_UINT, tu_swiz(X, Y, Z, W), va, range); + fdl6_buffer_view_init(dst, PIPE_FORMAT_R16_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode); dst += FDL6_TEX_CONST_DWORDS; } @@ -1143,12 +1146,12 @@ write_buffer_descriptor_addr(const struct tu_device *device, * 16-bit descriptor cannot be used for 32-bit loads through isam.v. */ if (!info->props.storage_16bit || !info->props.has_isam_v) { - fdl6_buffer_view_init(dst, PIPE_FORMAT_R32_UINT, tu_swiz(X, Y, Z, W), va, range); + fdl6_buffer_view_init(dst, PIPE_FORMAT_R32_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode); dst += FDL6_TEX_CONST_DWORDS; } if (info->props.storage_8bit) { - fdl6_buffer_view_init(dst, PIPE_FORMAT_R8_UINT, tu_swiz(X, Y, Z, W), va, range); + fdl6_buffer_view_init(dst, PIPE_FORMAT_R8_UINT, tu_swiz(X, Y, Z, W), va, range, 1, ssbo_emulation_mode); dst += FDL6_TEX_CONST_DWORDS; } } diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index f718dac1613..82efb07af90 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -105,6 +105,8 @@ tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid) sizeof(device->instance->drirc.misc.allow_oob_indirect_ubo_loads)); _mesa_blake3_update(&ctx, &device->enable_texel_buffer_emulation, sizeof(device->enable_texel_buffer_emulation)); + _mesa_blake3_update(&ctx, &device->enable_ssbo_emulation, + sizeof(device->enable_ssbo_emulation)); _mesa_blake3_final(&ctx, blake3); memcpy(uuid, blake3, VK_UUID_SIZE); @@ -1155,7 +1157,10 @@ tu_get_properties(struct tu_physical_device *pdevice, ? TU_D3D12_MAX_TEXEL_BUFFER_ELEMENTS : pdevice->info->props.max_texel_buffer_range_elements; props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE; - props->maxStorageBufferRange = pdevice->info->props.max_storage_buffer_range_bytes; + props->maxStorageBufferRange = + pdevice->enable_ssbo_emulation + ? TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES + : pdevice->info->props.max_storage_buffer_range_bytes; props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE; props->maxMemoryAllocationCount = UINT32_MAX; props->maxSamplerAllocationCount = 64 * 1024; @@ -1732,6 +1737,10 @@ tu_physical_device_init(struct tu_physical_device *device, assert(fd_dev_gen(&device->dev_id) == 7); device->enable_texel_buffer_emulation = instance->drirc.misc.enable_texel_buffer_emulation; } + if (device->info->props.max_storage_buffer_range_bytes < TU_D3D12_MAX_STORAGE_BUFFER_RANGE_BYTES) { + assert(fd_dev_gen(&device->dev_id) == 7); + device->enable_ssbo_emulation = instance->drirc.misc.enable_ssbo_emulation; + } } if (tu_device_get_cache_uuid(device, device->cache_uuid)) { diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 3bce030edd8..d32a7d80f6d 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -153,6 +153,7 @@ struct tu_physical_device bool is_perf_cntr_selectable; bool enable_texel_buffer_emulation; + bool enable_ssbo_emulation; struct { uint32_t non_lazy_type_count; diff --git a/src/freedreno/vulkan/tu_drirc_gen.py b/src/freedreno/vulkan/tu_drirc_gen.py index dfe322254fa..95164eee1b2 100644 --- a/src/freedreno/vulkan/tu_drirc_gen.py +++ b/src/freedreno/vulkan/tu_drirc_gen.py @@ -112,6 +112,9 @@ def declare_options(): B("tu_enable_texel_buffer_emulation", False, "Emulate texel buffers to allow a higher limit for elements that is in line with what some D3D12 games expect", c_name="enable_texel_buffer_emulation"), + B("tu_enable_ssbo_emulation", False, + "Emulate SSBOs to allow a higher limit for elements that is in line with what some D3D12 games expect", + c_name="enable_ssbo_emulation"), ] features_options = [] diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 5f8d49092c0..8c228e9ff66 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -9,6 +9,7 @@ #include "nir/nir_xfb_info.h" #include "spirv/nir_spirv.h" +#include "util/macros.h" #include "util/mesa-blake3.h" #include "vk_nir.h" #include "vk_nir_convert_ycbcr.h" @@ -1367,6 +1368,21 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS, ldgk_consts, 1); + if (dev->physical_device->enable_ssbo_emulation) { + const_state->num_bindless_base_addresses = layout->num_sets; + const_state->bindless_base_const_offset_vec4 = const_allocs->max_const_offset_vec4; + + if (dev->physical_device->reserved_set_idx >= 0) { + const_state->num_bindless_base_addresses = + MAX2(layout->num_sets, (unsigned) dev->physical_device->reserved_set_idx + 1); + } + + if (!dev->compiler->info->props.load_shader_consts_via_preamble) { + ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS, + DIV_ROUND_UP(const_state->num_bindless_base_addresses * 2, 4), 1); + } + } + struct lower_instr_params params = { .dev = dev, .shader = tu_shader, @@ -1557,6 +1573,141 @@ tu_nir_lower_ssbo_descriptor(nir_shader *shader, (void *)dev); } +static nir_def * +build_ssbo_size_from_resbase(nir_builder *b, nir_def *desc) +{ + assert(nir_def_is_intrinsic(desc)); + nir_def *encoded_data = nir_resbase_ir3(b, 32, desc); + nir_def *encoded_data_lo = nir_channel(b, encoded_data, 0); + nir_def *encoded_data_hi = nir_channel(b, encoded_data, 1); + + nir_def *size_lo = nir_ishr_imm(b, encoded_data_lo, 6); + nir_def *size_hi = nir_ishl_imm(b, encoded_data_hi, 20); + + return nir_ior(b, size_lo, size_hi); +} + +static nir_intrinsic_instr * +get_ssbo_bindless(nir_intrinsic_instr *intr) +{ + nir_def *buffer = nir_get_io_index_src(intr)->ssa; + assert(nir_def_is_intrinsic(buffer)); + + nir_intrinsic_instr *bindless = nir_def_as_intrinsic(buffer); + assert(bindless->intrinsic == nir_intrinsic_bindless_resource_ir3); + + return bindless; +} + +static nir_def * +build_ssbo_global_addr(nir_builder *b, + nir_intrinsic_instr *bindless, + struct tu_shader *shader, + bool load_shader_consts_via_preamble, + const struct ir3_const_allocations *const_allocs) +{ + nir_def *set_base; + + if (load_shader_consts_via_preamble) { + set_base = + ir3_load_driver_ubo(b, 2, &shader->const_state.bindless_base_addrs_ubo, + nir_intrinsic_desc_set(bindless) * 2); + } else { + const unsigned dword_base = + const_allocs->consts[IR3_CONST_ALLOC_BINDLESS_BASE_ADDRS].offset_vec4 * + 4 + + nir_intrinsic_desc_set(bindless) * 2; + + set_base = + nir_load_const_ir3(b, 2, 32, nir_imm_int(b, 0), .base = dword_base); + } + + nir_def *descriptor_offset = nir_iadd_imm( + b, nir_imul_imm(b, bindless->src[0].ssa, FDL6_TEX_CONST_DWORDS * 4), + 11 * 4); + nir_def *descriptor_words = nir_load_global_ir3( + b, 2, 32, nir_pack_64_2x32(b, set_base), descriptor_offset, + .access = (enum gl_access_qualifier)( + ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER | ACCESS_CAN_SPECULATE), + .align_mul = 4, .align_offset = 0); + + return nir_pack_64_2x32(b, descriptor_words); +} + +struct lower_ssbo_address_size_state { + struct tu_shader *shader; + const struct ir3_const_allocations *const_allocs; + bool load_shader_consts_via_preamble; +}; + +static bool +lower_ssbo_address_size(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_ssbo_address: + case nir_intrinsic_get_ssbo_size: + break; + default: + return false; + } + + auto state = static_cast(data); + b->cursor = nir_before_instr(&intr->instr); + + if (intr->intrinsic == nir_intrinsic_load_ssbo_address) { + nir_def *base = build_ssbo_global_addr( + b, get_ssbo_bindless(intr), state->shader, + state->load_shader_consts_via_preamble, state->const_allocs); + nir_def *offset = intr->src[1].ssa; + + nir_foreach_use_safe (use, &intr->def) { + nir_instr *use_instr = nir_src_use_instr(use); + b->cursor = nir_before_instr(use_instr); + + nir_intrinsic_instr *use_intr = nir_instr_as_intrinsic(use_instr); + + switch (use_intr->intrinsic) { + case nir_intrinsic_global_atomic: + case nir_intrinsic_global_atomic_swap: { + nir_def *addr = nir_iadd(b, base, nir_u2u64(b, offset)); + nir_src_rewrite(nir_get_io_offset_src(use_intr), addr); + break; + } + case nir_intrinsic_load_global: { + nir_def *load = nir_load_global_ir3( + b, use_intr->def.num_components, use_intr->def.bit_size, base, + offset, .access = nir_intrinsic_access(use_intr)); + nir_def_replace(&use_intr->def, load); + break; + } + case nir_intrinsic_store_global: { + nir_store_global_ir3(b, nir_get_io_data_src(use_intr)->ssa, base, + offset, + .access = nir_intrinsic_access(use_intr)); + nir_instr_remove(use_instr); + break; + } + default: + UNREACHABLE("unexpected use of @load_ssbo_address"); + } + } + } else { + nir_def *ssbo_size = + build_ssbo_size_from_resbase(b, nir_get_io_index_src(intr)->ssa); + nir_def_replace(&intr->def, ssbo_size); + } + + return true; +} + +static bool +tu_nir_lower_ssbo_address_size( + nir_shader *shader, const struct lower_ssbo_address_size_state *state) +{ + return nir_shader_intrinsics_pass(shader, lower_ssbo_address_size, + nir_metadata_control_flow, (void *) state); +} + struct lower_fdm_state { nir_variable *layer_var; nir_variable *viewport_var; @@ -2958,6 +3109,7 @@ tu_shader_init(struct tu_device *dev, const void *key_data, size_t key_size) shader->const_state.fdm_ubo.idx = -1; shader->const_state.dynamic_offsets_ubo.idx = -1; shader->const_state.inline_uniforms_ubo.idx = -1; + shader->const_state.bindless_base_addrs_ubo.idx = -1; return shader; } @@ -3251,6 +3403,24 @@ tu_shader_create(struct tu_device *dev, */ NIR_PASS(_, nir, tu_nir_lower_ssbo_descriptor, dev); + if (dev->physical_device->enable_ssbo_emulation) { + nir_lower_ssbo_options options = { + .native_offset = true, + .min_ssbo_size = dev->compiler->info->props.max_storage_buffer_range_bytes, + .bounds_check = true, + }; + + NIR_PASS(_, nir, nir_lower_ssbo, &options); + + struct lower_ssbo_address_size_state lower_ssbo_state = { + .shader = shader, + .const_allocs = &const_allocs, + .load_shader_consts_via_preamble = + dev->compiler->info->props.load_shader_consts_via_preamble, + }; + NIR_PASS(_, nir, tu_nir_lower_ssbo_address_size, &lower_ssbo_state); + } + const struct ir3_shader_options options = { .api_wavesize = key->api_wavesize, .real_wavesize = key->real_wavesize, diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 5817546a1c8..1cd0e890272 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -50,10 +50,13 @@ struct tu_const_state uint32_t dynamic_offset_loc; unsigned num_inline_ubos; struct tu_inline_ubo ubos[MAX_INLINE_UBOS]; + uint32_t num_bindless_base_addresses; + uint32_t bindless_base_const_offset_vec4; struct ir3_driver_ubo fdm_ubo; struct ir3_driver_ubo dynamic_offsets_ubo; struct ir3_driver_ubo inline_uniforms_ubo; + struct ir3_driver_ubo bindless_base_addrs_ubo; }; struct tu_shader