From 1de44b1951df3c25c897a4e66440409a8621ba80 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 27 Mar 2023 19:42:31 +0300 Subject: [PATCH] anv: add pipeline/shader support for descriptor buffers Lowering/layout is pretty much the same as direct descriptors. The caveats is that since the descriptor buffers are not visible from the binding tables we can't promote anything to the binding table (except push descriptors). The reason for this is that there is nothing that prevents an application to use both types of descriptors and because descriptor buffers have visible address + capture replay, we can't merge the 2 types in the same virtual address space location (limited to 4Gb max, limited 2Gb with binding tables). If we had the guarantee that both are not going to be used at the same time, we could consider a 2Gb VA for descriptor buffers. Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/compiler/brw_compiler.h | 1 + src/intel/vulkan/anv_nir.h | 1 + .../vulkan/anv_nir_apply_pipeline_layout.c | 35 ++++++++++++++++--- .../vulkan/anv_nir_compute_push_layout.c | 34 +++++++++++++++--- .../vulkan/anv_nir_lower_resource_intel.c | 16 ++------- .../vulkan/anv_nir_push_descriptor_analysis.c | 3 +- src/intel/vulkan/anv_pipeline.c | 9 ++++- src/intel/vulkan/anv_pipeline_cache.c | 7 +++- src/intel/vulkan/anv_private.h | 21 ++++++++--- 9 files changed, 97 insertions(+), 30 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index c0864c64950..e485b580bbe 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -476,6 +476,7 @@ enum brw_shader_reloc_id { BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH, + BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH, BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE, BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1, diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index 162577651e4..435b9065979 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -94,6 +94,7 @@ void anv_nir_compute_push_layout(nir_shader *nir, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map, const struct anv_pipeline_push_map *push_map, + enum anv_descriptor_set_layout_type desc_type, void *mem_ctx); void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data, diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index d05638d765b..fa315732da8 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -154,6 +154,10 @@ add_binding(struct apply_pipeline_layout_state *state, state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER; } +const VkDescriptorSetLayoutCreateFlags non_pushable_set_flags = + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT; + const VkDescriptorBindingFlags non_pushable_binding_flags = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | @@ -165,8 +169,19 @@ add_binding_type(struct apply_pipeline_layout_state *state, { add_binding(state, set, binding); - if ((state->layout->set[set].layout->binding[binding].flags & - non_pushable_binding_flags) == 0 && + const struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + const struct anv_descriptor_set_binding_layout *bind_layout = + &set_layout->binding[binding]; + + /* We can't push descriptor buffers but we can for push descriptors */ + const bool is_set_pushable = + (set_layout->flags & non_pushable_set_flags) == 0 || + set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; + const bool is_binding_pushable = + (bind_layout->flags & non_pushable_binding_flags) == 0; + + if (is_set_pushable && is_binding_pushable && (state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK || @@ -1912,8 +1927,16 @@ anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout, #endif static bool -binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout *bind_layout) +binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout, + const struct anv_descriptor_set_binding_layout *bind_layout) { + if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) + return true; + + if (set_layout->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT)) + return false; + return (bind_layout->flags & non_pushable_binding_flags) == 0; } @@ -2124,7 +2147,9 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, } else if (state.set[s].desc_buffer_used) { map->surface_to_descriptor[map->surface_count] = (struct anv_pipeline_binding) { - .set = ANV_DESCRIPTOR_SET_DESCRIPTORS, + .set = (layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) ? + ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER : + ANV_DESCRIPTOR_SET_DESCRIPTORS, .binding = UINT32_MAX, .index = s, }; @@ -2163,7 +2188,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER) add_embedded_sampler_entry(&state, map, set, b); - if (binding_is_promotable_to_push(bind_layout)) { + if (binding_is_promotable_to_push(set_layout, bind_layout)) { if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { state.set[set].binding[b].push_block = push_map->block_count; for (unsigned i = 0; i < bind_layout->array_size; i++) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index fc766e927e7..74e59e4cb28 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -36,6 +36,7 @@ anv_nir_compute_push_layout(nir_shader *nir, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map, const struct anv_pipeline_push_map *push_map, + enum anv_descriptor_set_layout_type desc_type, void *mem_ctx) { const struct brw_compiler *compiler = pdevice->compiler; @@ -74,6 +75,16 @@ anv_nir_compute_push_layout(nir_shader *nir, push_end = MAX2(push_end, base + sizeof_field(struct anv_push_constants, desc_surface_offsets)); + + if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER && + !pdevice->uses_ex_bso) { + base = offsetof(struct anv_push_constants, + surfaces_base_offset); + push_start = MIN2(push_start, base); + push_end = MAX2(push_end, base + + sizeof_field(struct anv_push_constants, + surfaces_base_offset)); + } break; } @@ -173,19 +184,34 @@ anv_nir_compute_push_layout(nir_shader *nir, case nir_intrinsic_load_desc_set_address_intel: { assert(brw_shader_stage_requires_bindless_resources(nir->info.stage)); b->cursor = nir_before_instr(&intrin->instr); - nir_def *pc_load = nir_load_uniform(b, 1, 32, + nir_def *desc_offset = nir_load_uniform(b, 1, 32, nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)), .base = offsetof(struct anv_push_constants, desc_surface_offsets), .range = sizeof_field(struct anv_push_constants, desc_surface_offsets), .dest_type = nir_type_uint32); - pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK); + desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK); + if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER && + !pdevice->uses_ex_bso) { + nir_def *bindless_base_offset = nir_load_uniform( + b, 1, 32, + nir_imm_int(b, 0), + .base = offsetof(struct anv_push_constants, + surfaces_base_offset), + .range = sizeof_field(struct anv_push_constants, + surfaces_base_offset), + .dest_type = nir_type_uint32); + desc_offset = nir_iadd(b, bindless_base_offset, desc_offset); + } nir_def *desc_addr = nir_pack_64_2x32_split( - b, pc_load, + b, desc_offset, nir_load_reloc_const_intel( - b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH)); + b, + desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ? + BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH : + BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH)); nir_def_rewrite_uses(&intrin->def, desc_addr); break; } diff --git a/src/intel/vulkan/anv_nir_lower_resource_intel.c b/src/intel/vulkan/anv_nir_lower_resource_intel.c index d0bca9862d9..92b18bf51b9 100644 --- a/src/intel/vulkan/anv_nir_lower_resource_intel.c +++ b/src/intel/vulkan/anv_nir_lower_resource_intel.c @@ -113,12 +113,12 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *set_offset = intrin->src[0].ssa; nir_def *binding_offset = intrin->src[1].ssa; - nir_def *sampler_base_offset = intrin->src[3].ssa; /* When using indirect descriptor, the surface handles are loaded from the * descriptor buffer and do not need any offset. */ - if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) { + if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT || + state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) { if (!state->device->uses_ex_bso) { /* We're trying to reduce the number of instructions in the shaders * to compute surface handles. The assumption is that we're using @@ -138,19 +138,9 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) * done for the push constant value provided in * resource_intel::src[0]. That way the shader can just do a single * ADD and get the surface handle. - * - * Samplers have a 4Gb heap and in the message they're in bits 31:6 - * of the component 3 of the sampler message header. But since we - * push only a single offset for the base offset of the descriptor - * set, resource_intel::src[0] has to be shifted right by 6 (bringing - * it back in bytes). */ - if (is_sampler) { - set_offset = nir_ushr_imm(b, set_offset, 6); - set_offset = nir_iadd(b, set_offset, sampler_base_offset); - } else { + if (!is_sampler) binding_offset = nir_ishl_imm(b, binding_offset, 6); - } } nir_src_rewrite(&intrin->src[1], diff --git a/src/intel/vulkan/anv_nir_push_descriptor_analysis.c b/src/intel/vulkan/anv_nir_push_descriptor_analysis.c index 62eb6088381..c6dcb03769d 100644 --- a/src/intel/vulkan/anv_nir_push_descriptor_analysis.c +++ b/src/intel/vulkan/anv_nir_push_descriptor_analysis.c @@ -133,7 +133,8 @@ anv_nir_loads_push_desc_buffer(nir_shader *nir, const struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[bt_idx]; - if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS && + if ((binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS || + binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER) && binding->index == push_set) { return true; } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 291269a4bad..306616f6f51 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1131,7 +1131,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, NIR_PASS_V(nir, anv_nir_compute_push_layout, pdevice, stage->key.base.robust_flags, anv_graphics_pipeline_stage_fragment_dynamic(stage), - prog_data, &stage->bind_map, &push_map, mem_ctx); + prog_data, &stage->bind_map, &push_map, + pipeline->layout.type, mem_ctx); NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice, pipeline->layout.type); @@ -1633,6 +1634,12 @@ anv_pipeline_add_executable(struct anv_pipeline *pipeline, fprintf(stream, "Vulkan push constants and API params"); break; + case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: + fprintf(stream, "Descriptor buffer (desc buffer) for set %d (start=%dB)", + stage->bind_map.push_ranges[i].index, + stage->bind_map.push_ranges[i].start * 32); + break; + case ANV_DESCRIPTOR_SET_DESCRIPTORS: fprintf(stream, "Descriptor buffer for set %d (start=%dB)", stage->bind_map.push_ranges[i].index, diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 73b9577050e..7b466a425dc 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -162,7 +162,12 @@ anv_shader_bin_create(struct anv_device *device, prog_data_in->const_data_offset; int rv_count = 0; - struct brw_shader_reloc_value reloc_values[6]; + struct brw_shader_reloc_value reloc_values[7]; + assert((device->physical->va.descriptor_buffer_pool.addr & 0xffffffff) == 0); + reloc_values[rv_count++] = (struct brw_shader_reloc_value) { + .id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH, + .value = device->physical->va.descriptor_buffer_pool.addr >> 32, + }; assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0); assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0); reloc_values[rv_count++] = (struct brw_shader_reloc_value) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cc6ff768e0f..9e1fdb4bdfb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2838,11 +2838,12 @@ anv_descriptor_set_write_template(struct anv_device *device, const struct vk_descriptor_update_template *template, const void *data); -#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4) -#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3) -#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2) -#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1) -#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX +#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5) +#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4) +#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3) +#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2) +#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1) +#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX struct anv_pipeline_binding { /** Index in the descriptor set @@ -3295,6 +3296,16 @@ struct anv_push_constants { /** Dynamic offsets for dynamic UBOs and SSBOs */ uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + /** Surface buffer base offset + * + * Only used prior to DG2 with descriptor buffers. + * + * (surfaces_base_offset + desc_offsets[set_index]) is relative to + * device->va.descriptor_buffer_pool and can be used to compute a 64bit + * address to the descriptor buffer (using load_desc_set_address_intel). + */ + uint32_t surfaces_base_offset; + union { struct { /** Dynamic MSAA value */