diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index 3d41d36973f..87523322a59 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -596,11 +596,13 @@ nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, } static void -nvk_bind_descriptor_sets(UNUSED struct nvk_cmd_buffer *cmd, +nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd, struct nvk_descriptor_state *desc, const VkBindDescriptorSetsInfoKHR *info) { VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout); + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + struct nvk_physical_device *pdev = nvk_device_physical(dev); /* Fro the Vulkan 1.3.275 spec: * @@ -644,9 +646,16 @@ nvk_bind_descriptor_sets(UNUSED struct nvk_cmd_buffer *cmd, if (set != NULL && set_layout->dynamic_buffer_count > 0) { for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) { - struct nvk_buffer_address addr = set->dynamic_buffers[j]; - addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j]; - desc->root.dynamic_buffers[dyn_buffer_start + j] = addr; + union nvk_buffer_descriptor db = set->dynamic_buffers[j]; + uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j]; + if (BITSET_TEST(set_layout->dynamic_ubos, j) && + nvk_use_bindless_cbuf(&pdev->info)) { + assert((offset & 0xf) == 0); + db.cbuf.base_addr_shift_4 += offset >> 4; + } else { + db.addr.base_addr += offset; + } + desc->root.dynamic_buffers[dyn_buffer_start + j] = db; } next_dyn_offset += set->layout->dynamic_buffer_count; } @@ -804,6 +813,9 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd, const struct nvk_cbuf *cbuf, struct nvk_buffer_address *addr_out) { + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + struct nvk_physical_device *pdev = nvk_device_physical(dev); + switch (cbuf->type) { case NVK_CBUF_TYPE_INVALID: *addr_out = (struct nvk_buffer_address) { .size = 0 }; @@ -827,7 +839,8 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd, case NVK_CBUF_TYPE_DYNAMIC_UBO: { const uint32_t dyn_start = desc->root.set_dynamic_buffer_start[cbuf->desc_set]; - *addr_out = desc->root.dynamic_buffers[dyn_start + cbuf->dynamic_idx]; + *addr_out = nvk_ubo_descriptor_addr(pdev, + desc->root.dynamic_buffers[dyn_start + cbuf->dynamic_idx]); return true; } @@ -840,8 +853,9 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd, return false; assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE); - void *desc = &push->data[cbuf->desc_offset]; - *addr_out = *(struct nvk_buffer_address *)desc; + union nvk_buffer_descriptor desc; + memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc)); + *addr_out = nvk_ubo_descriptor_addr(pdev, desc); return true; } diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index aee44cd0875..0357056dffa 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -65,7 +65,7 @@ struct nvk_root_descriptor_table { uint8_t set_dynamic_buffer_start[NVK_MAX_SETS]; /* Dynamic buffer bindings */ - struct nvk_buffer_address dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS]; + union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS]; /* enfore alignment to 0x100 as needed pre pascal */ uint8_t __padding[0x40]; diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 9cb9997ed6c..ff0fcb66271 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -2255,15 +2255,38 @@ nvk_mme_bind_cbuf_desc(struct mme_builder *b) /* First 4 bits are group, later bits are slot */ struct mme_value group_slot = mme_load(b); - if (b->devinfo->cls_eng3d >= TURING_A) { - struct mme_value64 addr = mme_load_addr64(b); - mme_tu104_read_fifoed(b, addr, mme_imm(3)); - } + struct mme_value addr_lo, addr_hi, size; + if (nvk_use_bindless_cbuf(b->devinfo)) { + if (b->devinfo->cls_eng3d >= TURING_A) { + struct mme_value64 addr = mme_load_addr64(b); + mme_tu104_read_fifoed(b, addr, mme_imm(2)); + } - /* Load the descriptor */ - struct mme_value addr_lo = mme_load(b); - struct mme_value addr_hi = mme_load(b); - struct mme_value size = mme_load(b); + /* Load the descriptor */ + struct mme_value desc_lo = mme_load(b); + struct mme_value desc_hi = mme_load(b); + + /* The bottom 45 bits are addr >> 4 */ + addr_lo = mme_merge(b, mme_zero(), desc_lo, 4, 28, 0); + addr_hi = mme_merge(b, mme_zero(), desc_lo, 0, 4, 28); + mme_merge_to(b, addr_hi, addr_hi, desc_hi, 4, 13, 0); + + /* The top 19 bits are size >> 4 */ + size = mme_merge(b, mme_zero(), desc_hi, 4, 19, 13); + + mme_free_reg(b, desc_hi); + mme_free_reg(b, desc_lo); + } else { + if (b->devinfo->cls_eng3d >= TURING_A) { + struct mme_value64 addr = mme_load_addr64(b); + mme_tu104_read_fifoed(b, addr, mme_imm(3)); + } + + /* Load the descriptor */ + addr_lo = mme_load(b); + addr_hi = mme_load(b); + size = mme_load(b); + } struct mme_value cb = mme_alloc_reg(b); mme_if(b, ieq, size, mme_zero()) { @@ -2285,7 +2308,7 @@ nvk_mme_bind_cbuf_desc(struct mme_builder *b) mme_emit(b, addr_hi); mme_emit(b, addr_lo); - /* Bottim bit is the valid bit, 8:4 are shader slot */ + /* Bottom bit is the valid bit, 8:4 are shader slot */ mme_merge_to(b, cb, mme_imm(1), group_slot, 4, 5, 4); } diff --git a/src/nouveau/vulkan/nvk_descriptor_set.c b/src/nouveau/vulkan/nvk_descriptor_set.c index 2d14ceedbf8..7f3de145448 100644 --- a/src/nouveau/vulkan/nvk_descriptor_set.c +++ b/src/nouveau/vulkan/nvk_descriptor_set.c @@ -124,7 +124,7 @@ write_storage_image_view_desc(struct nvk_descriptor_set *set, write_desc(set, binding, elem, &desc, sizeof(desc)); } -static struct nvk_buffer_address +static union nvk_buffer_descriptor ubo_desc(struct nvk_physical_device *pdev, const VkDescriptorBufferInfo *const info, uint32_t binding, uint32_t elem) @@ -140,10 +140,17 @@ ubo_desc(struct nvk_physical_device *pdev, addr_range.addr = align64(addr_range.addr, min_cbuf_alignment); addr_range.range = align(addr_range.range, min_cbuf_alignment); - return (struct nvk_buffer_address) { - .base_addr = align64(addr_range.addr, min_cbuf_alignment), - .size = align(addr_range.range, min_cbuf_alignment), - }; + if (nvk_use_bindless_cbuf(&pdev->info)) { + return (union nvk_buffer_descriptor) { .cbuf = { + .base_addr_shift_4 = addr_range.addr >> 4, + .size_shift_4 = addr_range.range >> 4, + }}; + } else { + return (union nvk_buffer_descriptor) { .addr = { + .base_addr = addr_range.addr, + .size = addr_range.range, + }}; + } } static void @@ -152,7 +159,7 @@ write_ubo_desc(struct nvk_physical_device *pdev, const VkDescriptorBufferInfo *const info, uint32_t binding, uint32_t elem) { - const struct nvk_buffer_address desc = ubo_desc(pdev, info, binding, elem); + const union nvk_buffer_descriptor desc = ubo_desc(pdev, info, binding, elem); write_desc(set, binding, elem, &desc, sizeof(desc)); } @@ -168,7 +175,7 @@ write_dynamic_ubo_desc(struct nvk_physical_device *pdev, ubo_desc(pdev, info, binding, elem); } -static struct nvk_buffer_address +static union nvk_buffer_descriptor ssbo_desc(const VkDescriptorBufferInfo *const info, uint32_t binding, uint32_t elem) { @@ -182,10 +189,10 @@ ssbo_desc(const VkDescriptorBufferInfo *const info, addr_range.addr = align64(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT); addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT); - return (struct nvk_buffer_address) { - .base_addr = align64(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT), - .size = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT), - }; + return (union nvk_buffer_descriptor) { .addr = { + .base_addr = addr_range.addr, + .size = addr_range.range, + }}; } @@ -194,7 +201,7 @@ write_ssbo_desc(struct nvk_descriptor_set *set, const VkDescriptorBufferInfo *const info, uint32_t binding, uint32_t elem) { - const struct nvk_buffer_address desc = ssbo_desc(info, binding, elem); + const union nvk_buffer_descriptor desc = ssbo_desc(info, binding, elem); write_desc(set, binding, elem, &desc, sizeof(desc)); } diff --git a/src/nouveau/vulkan/nvk_descriptor_set.h b/src/nouveau/vulkan/nvk_descriptor_set.h index c0af30a912d..99de187d948 100644 --- a/src/nouveau/vulkan/nvk_descriptor_set.h +++ b/src/nouveau/vulkan/nvk_descriptor_set.h @@ -9,6 +9,7 @@ #include "nouveau_bo.h" #include "nvk_device.h" +#include "nvk_physical_device.h" #include "vk_object.h" #include "vk_descriptor_update_template.h" @@ -52,6 +53,16 @@ PRAGMA_DIAGNOSTIC_POP static_assert(sizeof(struct nvk_buffer_view_descriptor) == 4, "nvk_buffer_view_descriptor has no holes"); +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct nvk_bindless_cbuf { + uint64_t base_addr_shift_4:45; + uint64_t size_shift_4:19; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct nvk_bindless_cbuf) == 8, + "nvk_bindless_cbuf has no holes"); + /* This has to match nir_address_format_64bit_bounded_global */ PRAGMA_DIAGNOSTIC_PUSH PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) @@ -64,6 +75,31 @@ PRAGMA_DIAGNOSTIC_POP static_assert(sizeof(struct nvk_buffer_address) == 16, "nvk_buffer_address has no holes"); +union nvk_buffer_descriptor { + struct nvk_buffer_address addr; + struct nvk_bindless_cbuf cbuf; +}; + +static inline bool +nvk_use_bindless_cbuf(const struct nv_device_info *info) +{ + return info->cls_eng3d >= 0xC597 /* TURING_A */; +} + +static inline struct nvk_buffer_address +nvk_ubo_descriptor_addr(const struct nvk_physical_device *pdev, + union nvk_buffer_descriptor desc) +{ + if (nvk_use_bindless_cbuf(&pdev->info)) { + return (struct nvk_buffer_address) { + .base_addr = desc.cbuf.base_addr_shift_4 << 4, + .size = desc.cbuf.size_shift_4 << 4, + }; + } else { + return desc.addr; + } +} + #define NVK_BUFFER_ADDRESS_NULL ((struct nvk_buffer_address) { .size = 0 }) struct nvk_descriptor_pool { @@ -90,7 +126,7 @@ struct nvk_descriptor_set { uint64_t addr; uint32_t size; - struct nvk_buffer_address dynamic_buffers[]; + union nvk_buffer_descriptor dynamic_buffers[]; }; VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_descriptor_set, base, VkDescriptorSet, diff --git a/src/nouveau/vulkan/nvk_descriptor_set_layout.c b/src/nouveau/vulkan/nvk_descriptor_set_layout.c index fd5b824fda5..29d0bfb6348 100644 --- a/src/nouveau/vulkan/nvk_descriptor_set_layout.c +++ b/src/nouveau/vulkan/nvk_descriptor_set_layout.c @@ -51,7 +51,7 @@ nvk_descriptor_stride_align_for_type(const struct nvk_physical_device *pdev, case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - *stride = *alignment = sizeof(struct nvk_buffer_address); + *stride = *alignment = sizeof(union nvk_buffer_descriptor); break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c index 1258367b1bb..94b91727851 100644 --- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c +++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c @@ -50,6 +50,7 @@ compar_cbufs(const void *_a, const void *_b) struct lower_descriptors_ctx { const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS]; + bool use_bindless_cbuf; bool clamp_desc_array_bounds; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; @@ -610,13 +611,14 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, index = nir_iadd(b, index, nir_iadd_imm(b, dynamic_buffer_start, binding_layout->dynamic_buffer_index)); - + uint32_t desc_size = sizeof(union nvk_buffer_descriptor); nir_def *root_desc_offset = - nir_iadd_imm(b, nir_imul_imm(b, index, sizeof(struct nvk_buffer_address)), + nir_iadd_imm(b, nir_imul_imm(b, index, desc_size), nvk_root_descriptor_offset(dynamic_buffers)); - assert(num_components == 4 && bit_size == 32); - return nir_ldc_nv(b, 4, 32, nir_imm_int(b, 0), root_desc_offset, + assert(num_components * bit_size <= desc_size * 8); + return nir_ldc_nv(b, num_components, bit_size, + nir_imm_int(b, 0), root_desc_offset, .align_mul = 16, .align_offset = 0); } @@ -628,12 +630,19 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, assert(binding_layout->stride == 1); const uint32_t binding_size = binding_layout->array_size; - /* Convert it to nir_address_format_64bit_bounded_global */ - assert(num_components == 4 && bit_size == 32); - return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr), - nir_unpack_64_2x32_split_y(b, base_addr), - nir_imm_int(b, binding_size), - nir_imm_int(b, 0)); + if (ctx->use_bindless_cbuf) { + assert(num_components == 1 && bit_size == 64); + const uint32_t size = align(binding_size, 16); + return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4), + ((uint64_t)size >> 4) << 45); + } else { + /* Convert it to nir_address_format_64bit_bounded_global */ + assert(num_components == 4 && bit_size == 32); + return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr), + nir_unpack_64_2x32_split_y(b, base_addr), + nir_imm_int(b, binding_size), + nir_imm_int(b, 0)); + } } default: { @@ -685,6 +694,29 @@ is_idx_intrin(nir_intrinsic_instr *intrin) return intrin->intrinsic == nir_intrinsic_vulkan_resource_index; } +static nir_def * +buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr) +{ + nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)); + nir_def *size = nir_channel(b, addr, 2); + nir_def *offset = nir_channel(b, addr, 3); + + nir_def *addr16 = nir_ushr_imm(b, base_addr, 4); + nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16); + nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16); + + /* If we assume the top bis of the address are 0 as well as the bottom two + * bits of the size. (We can trust it since it's a descriptor) then + * + * ((size >> 4) << 13) | addr + * + * is just an imad. + */ + nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi); + + return nir_vec3(b, addr16_lo, handle_hi, offset); +} + static nir_def * load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin, const struct lower_descriptors_ctx *ctx) @@ -701,13 +733,23 @@ load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin, uint32_t binding = nir_intrinsic_binding(intrin); index = nir_iadd(b, index, intrin->src[0].ssa); - nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) { + nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx); - /* We know a priori that the the .w compnent (offset) is zero */ - return nir_vec4(b, nir_channel(b, desc, 0), - nir_channel(b, desc, 1), - nir_channel(b, desc, 2), - nir_imm_int(b, 0)); + /* The descriptor is just the handle. NIR also needs an offset. */ + return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc), + nir_unpack_64_2x32_split_y(b, desc), + nir_imm_int(b, 0)); + } else { + nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx); + + /* We know a priori that the the .w compnent (offset) is zero */ + return nir_vec4(b, nir_channel(b, desc, 0), + nir_channel(b, desc, 1), + nir_channel(b, desc, 2), + nir_imm_int(b, 0)); + } } static bool @@ -1253,6 +1295,7 @@ nvk_nir_lower_descriptors(nir_shader *nir, struct nvk_cbuf_map *cbuf_map_out) { struct lower_descriptors_ctx ctx = { + .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info), .clamp_desc_array_bounds = rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index d6e6f6e9f92..310a90d523d 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -116,14 +116,18 @@ nir_address_format nvk_ubo_addr_format(const struct nvk_physical_device *pdev, VkPipelineRobustnessBufferBehaviorEXT robustness) { - switch (robustness) { - case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT: - return nir_address_format_64bit_global_32bit_offset; - case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT: - case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT: - return nir_address_format_64bit_bounded_global; - default: - unreachable("Invalid robust buffer access behavior"); + if (nvk_use_bindless_cbuf(&pdev->info)) { + return nir_address_format_vec2_index_32bit_offset; + } else { + switch (robustness) { + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT: + return nir_address_format_64bit_global_32bit_offset; + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT: + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT: + return nir_address_format_64bit_bounded_global; + default: + unreachable("Invalid robust buffer access behavior"); + } } } @@ -258,10 +262,21 @@ lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load, const uint32_t align_mul = nir_intrinsic_align_mul(load); const uint32_t align_offset = nir_intrinsic_align_offset(load); - nir_def *val = nir_ldc_nv(b, load->num_components, load->def.bit_size, - index, offset, .access = access, - .align_mul = align_mul, - .align_offset = align_offset); + nir_def *val; + if (load->src[0].ssa->num_components == 1) { + val = nir_ldc_nv(b, load->num_components, load->def.bit_size, + index, offset, .access = access, + .align_mul = align_mul, + .align_offset = align_offset); + } else if (load->src[0].ssa->num_components == 2) { + nir_def *handle = nir_pack_64_2x32(b, load->src[0].ssa); + val = nir_ldcx_nv(b, load->num_components, load->def.bit_size, + handle, offset, .access = access, + .align_mul = align_mul, + .align_offset = align_offset); + } else { + unreachable("Invalid UBO index"); + } nir_def_rewrite_uses(&load->def, val); return true; }