diff --git a/docs/features.txt b/docs/features.txt index 15ae417a632..132c85022f4 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -444,7 +444,7 @@ Vulkan 1.1 -- all DONE: anv, lvp, radv, tu, vn VK_KHR_sampler_ycbcr_conversion DONE (anv, hasvk, nvk, radv, tu, v3dv, vn) VK_KHR_shader_draw_parameters DONE (anv, dzn, hasvk, lvp, nvk, radv, tu, vn) VK_KHR_storage_buffer_storage_class DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn) - VK_KHR_variable_pointers DONE (anv, hasvk, lvp, panvk, radv, tu, v3dv, vn) + VK_KHR_variable_pointers DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn) Vulkan 1.2 -- all DONE: anv, tu, vn diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c index 4af4d12e6ce..3155b262758 100644 --- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c +++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c @@ -10,7 +10,6 @@ struct lower_descriptors_ctx { const struct vk_pipeline_layout *layout; bool clamp_desc_array_bounds; - nir_address_format desc_addr_format; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; }; @@ -27,18 +26,27 @@ load_descriptor_set_addr(nir_builder *b, uint32_t set, .align_mul = 8, .align_offset = 0, .range = ~0); } +static const struct nvk_descriptor_set_binding_layout * +get_binding_layout(uint32_t set, uint32_t binding, + const struct lower_descriptors_ctx *ctx) +{ + const struct vk_pipeline_layout *layout = ctx->layout; + + assert(set < layout->set_count); + const struct nvk_descriptor_set_layout *set_layout = + vk_to_nvk_descriptor_set_layout(layout->set_layouts[set]); + + assert(binding < set_layout->binding_count); + return &set_layout->binding[binding]; +} + static nir_ssa_def * load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, uint32_t set, uint32_t binding, nir_ssa_def *index, unsigned offset_B, const struct lower_descriptors_ctx *ctx) { - assert(set < NVK_MAX_SETS); - - const struct vk_pipeline_layout *layout = ctx->layout; - const struct nvk_descriptor_set_layout *set_layout = - vk_to_nvk_descriptor_set_layout(layout->set_layouts[set]); const struct nvk_descriptor_set_binding_layout *binding_layout = - &set_layout->binding[binding]; + get_binding_layout(set, binding, ctx); if (ctx->clamp_desc_array_bounds) index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1)); @@ -48,7 +56,7 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { /* Get the index in the root descriptor table dynamic_buffers array. */ uint8_t dynamic_buffer_start = - nvk_descriptor_set_layout_dynbuf_start(layout, set); + nvk_descriptor_set_layout_dynbuf_start(ctx->layout, set); index = nir_iadd_imm(b, index, dynamic_buffer_start + @@ -97,6 +105,18 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, } } +static bool +is_idx_intrin(nir_intrinsic_instr *intrin) +{ + while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + intrin = nir_src_as_intrinsic(intrin->src[0]); + if (intrin == NULL) + return false; + } + + return intrin->intrinsic == nir_intrinsic_vulkan_resource_index; +} + static nir_ssa_def * load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin, const struct lower_descriptors_ctx *ctx) @@ -117,12 +137,19 @@ load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin, } static bool -lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, - const struct lower_descriptors_ctx *ctx) +try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) { + ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); b->cursor = nir_before_instr(&intrin->instr); nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]); + if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) { + assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); + return false; + } + nir_ssa_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); @@ -275,12 +302,12 @@ lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin, } static bool -lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin, - const struct lower_descriptors_ctx *ctx) +try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) { switch (intrin->intrinsic) { case nir_intrinsic_load_vulkan_descriptor: - return lower_load_vulkan_descriptor(b, intrin, ctx); + return try_lower_load_vulkan_descriptor(b, intrin, ctx); case nir_intrinsic_load_workgroup_size: unreachable("Should have been lowered by nir_lower_cs_intrinsics()"); @@ -384,8 +411,8 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, } static bool -lower_descriptors_instr(nir_builder *b, nir_instr *instr, - void *_data) +try_lower_descriptors_instr(nir_builder *b, nir_instr *instr, + void *_data) { const struct lower_descriptors_ctx *ctx = _data; @@ -393,7 +420,207 @@ lower_descriptors_instr(nir_builder *b, nir_instr *instr, case nir_instr_type_tex: return lower_tex(b, nir_instr_as_tex(instr), ctx); case nir_instr_type_intrinsic: - return lower_intrin(b, nir_instr_as_intrinsic(instr), ctx); + return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx); + default: + return false; + } +} + +static bool +lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + nir_ssa_def *index = intrin->src[0].ssa; + + const struct nvk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + nir_ssa_def *binding_addr; + uint8_t binding_stride; + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + nir_ssa_def *set_addr = load_descriptor_set_addr(b, set, ctx); + binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset); + binding_stride = binding_layout->stride; + break; + } + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + const uint32_t root_desc_addr_offset = + nvk_root_descriptor_offset(root_desc_addr); + + nir_ssa_def *root_desc_addr = + nir_load_ubo(b, 1, 64, nir_imm_int(b, 0), + nir_imm_int(b, root_desc_addr_offset), + .align_mul = 8, .align_offset = 0, .range = ~0); + + const uint8_t dynamic_buffer_start = + nvk_descriptor_set_layout_dynbuf_start(ctx->layout, set) + + binding_layout->dynamic_buffer_index; + + const uint32_t dynamic_binding_offset = + nvk_root_descriptor_offset(dynamic_buffers) + + dynamic_buffer_start * sizeof(struct nvk_buffer_address); + + binding_addr = nir_iadd_imm(b, root_desc_addr, dynamic_binding_offset); + binding_stride = sizeof(struct nvk_buffer_address); + break; + } + + default: + unreachable("Not an SSBO descriptor"); + } + + /* Tuck the stride in the top 8 bits of the binding address */ + binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56); + + const uint32_t binding_size = binding_layout->array_size * binding_stride; + nir_ssa_def *offset_in_binding = nir_imul_imm(b, index, binding_stride); + + nir_ssa_def *addr; + switch (ctx->ssbo_addr_format) { + case nir_address_format_64bit_global: + addr = nir_iadd(b, binding_addr, nir_u2u64(b, offset_in_binding)); + break; + + case nir_address_format_64bit_global_32bit_offset: + case nir_address_format_64bit_bounded_global: + addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr), + nir_unpack_64_2x32_split_y(b, binding_addr), + nir_imm_int(b, binding_size), + offset_in_binding); + break; + + default: + unreachable("Unknown address mode"); + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, addr); + + return true; +} + +static bool +lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def *addr = intrin->src[0].ssa; + nir_ssa_def *index = intrin->src[1].ssa; + + nir_ssa_def *addr_high32; + switch (ctx->ssbo_addr_format) { + case nir_address_format_64bit_global: + addr_high32 = nir_unpack_64_2x32_split_y(b, addr); + break; + + case nir_address_format_64bit_global_32bit_offset: + case nir_address_format_64bit_bounded_global: + addr_high32 = nir_channel(b, addr, 1); + break; + + default: + unreachable("Unknown address mode"); + } + + nir_ssa_def *stride = nir_ushr_imm(b, addr_high32, 24); + nir_ssa_def *offset = nir_imul(b, index, stride); + + addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format, + nir_var_mem_ssbo, offset); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, addr); + + return true; +} + +static bool +lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def *addr = intrin->src[0].ssa; + + nir_ssa_def *desc; + switch (ctx->ssbo_addr_format) { + case nir_address_format_64bit_global: + /* Mask off the binding stride */ + addr = nir_iand_imm(b, addr, BITFIELD64_MASK(56)); + desc = nir_build_load_global(b, 1, 64, addr, + .access = ACCESS_NON_WRITEABLE, + .align_mul = 16, .align_offset = 0); + break; + + case nir_address_format_64bit_global_32bit_offset: { + nir_ssa_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)); + nir_ssa_def *offset = nir_channel(b, addr, 3); + /* Mask off the binding stride */ + base = nir_iand_imm(b, base, BITFIELD64_MASK(56)); + desc = nir_load_global_constant_offset(b, 4, 32, base, offset, + .align_mul = 16, + .align_offset = 0); + break; + } + + case nir_address_format_64bit_bounded_global: { + nir_ssa_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)); + nir_ssa_def *size = nir_channel(b, addr, 2); + nir_ssa_def *offset = nir_channel(b, addr, 3); + /* Mask off the binding stride */ + base = nir_iand_imm(b, base, BITFIELD64_MASK(56)); + desc = nir_load_global_constant_bounded(b, 4, 32, base, offset, size, + .align_mul = 16, + .align_offset = 0); + break; + } + + default: + unreachable("Unknown address mode"); + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); + + return true; +} + +static bool +lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr, + void *_data) +{ + const struct lower_descriptors_ctx *ctx = _data; + + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + return lower_ssbo_resource_index(b, intrin, ctx); + case nir_intrinsic_vulkan_resource_reindex: + return lower_ssbo_resource_reindex(b, intrin, ctx); + case nir_intrinsic_load_vulkan_descriptor: + return lower_load_ssbo_descriptor(b, intrin, ctx); default: return false; } @@ -410,11 +637,21 @@ nvk_nir_lower_descriptors(nir_shader *nir, rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT, - .desc_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nvk_buffer_addr_format(rs->storage_buffers), .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers), }; - return nir_shader_instructions_pass(nir, lower_descriptors_instr, + + /* We run in two passes. The first attempts to lower everything it can. + * In the variable pointers case, some SSBO intrinsics may fail to lower + * but that's okay. The second pass cleans up any SSBO intrinsics which + * are left and lowers them to slightly less efficient but variable- + * pointers-correct versions. + */ + return nir_shader_instructions_pass(nir, try_lower_descriptors_instr, + nir_metadata_block_index | + nir_metadata_dominance, + (void *)&ctx) | + nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr, nir_metadata_block_index | nir_metadata_dominance, (void *)&ctx); diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 6aaab6f3fd1..bf1bbeab6ff 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -464,6 +464,8 @@ nvk_get_device_features(const struct nv_device_info *info, .multiview = true, .multiviewGeometryShader = true, .multiviewTessellationShader = true, + .variablePointersStorageBuffer = true, + .variablePointers = true, .shaderDrawParameters = true, .samplerYcbcrConversion = true, diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 3c0d854c664..97f8d560968 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -87,6 +87,7 @@ nvk_physical_device_spirv_options(const struct nvk_physical_device *pdev, .shader_viewport_index_layer = true, .tessellation = true, .transform_feedback = true, + .variable_pointers = true, }, .ssbo_addr_format = nvk_buffer_addr_format(rs->storage_buffers), .phys_ssbo_addr_format = nir_address_format_64bit_global,