From ab7641b8dcbfee419b761abb27fe6fed476b4e05 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 25 Oct 2023 10:21:22 +0300 Subject: [PATCH] anv: implement descriptor buffer binding And barriers for them. Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_batch_chain.c | 7 +- src/intel/vulkan/anv_cmd_buffer.c | 286 ++++++++++++++----- src/intel/vulkan/anv_descriptor_set.c | 27 +- src/intel/vulkan/anv_genX.h | 3 + src/intel/vulkan/anv_private.h | 61 +++- src/intel/vulkan/genX_cmd_buffer.c | 389 ++++++++++++++++++++++---- src/intel/vulkan/genX_cmd_compute.c | 5 + src/intel/vulkan/genX_cmd_draw.c | 14 + 8 files changed, 658 insertions(+), 134 deletions(-) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index fee5e6d8335..49c812125b3 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -747,8 +747,13 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, { if (size == 0) return ANV_STATE_NULL; + assert(cmd_buffer->state.current_db_mode != + ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN); struct anv_state state = - anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + anv_state_stream_alloc(cmd_buffer->state.current_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ? + &cmd_buffer->dynamic_state_db_stream : + &cmd_buffer->dynamic_state_stream, size, alignment); if (state.map == NULL) anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY); diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5db014ef63c..00b8ecd84a5 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -741,6 +741,52 @@ void anv_CmdBindPipeline( anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages); } +static struct anv_cmd_pipeline_state * +anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + const struct anv_descriptor_set_layout *set_layout, + VkShaderStageFlags *out_stages) +{ + *out_stages = set_layout->shader_stages; + + switch (bind_point) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + *out_stages &= VK_SHADER_STAGE_ALL_GRAPHICS | + (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ? + (VK_SHADER_STAGE_TASK_BIT_EXT | + VK_SHADER_STAGE_MESH_BIT_EXT) : 0); + return &cmd_buffer->state.gfx.base; + + case VK_PIPELINE_BIND_POINT_COMPUTE: + *out_stages &= VK_SHADER_STAGE_COMPUTE_BIT; + return &cmd_buffer->state.compute.base; + + case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: + *out_stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR | + VK_SHADER_STAGE_ANY_HIT_BIT_KHR | + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | + VK_SHADER_STAGE_MISS_BIT_KHR | + VK_SHADER_STAGE_INTERSECTION_BIT_KHR | + VK_SHADER_STAGE_CALLABLE_BIT_KHR; + return &cmd_buffer->state.rt.base; + + default: + unreachable("invalid bind point"); + } +} + +static void +anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer *cmd_buffer, + enum anv_cmd_descriptor_buffer_mode new_mode) +{ + if (cmd_buffer->state.current_db_mode == new_mode) + return; + + /* Ensure we program the STATE_BASE_ADDRESS properly at least once */ + cmd_buffer->state.descriptor_buffers.dirty = true; + cmd_buffer->state.pending_db_mode = new_mode; +} + static void anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, @@ -761,37 +807,20 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, */ assert(!set->pool || !set->pool->host_only); - struct anv_descriptor_set_layout *set_layout = set->layout; - VkShaderStageFlags stages = set_layout->shader_stages; - struct anv_cmd_pipeline_state *pipe_state; + struct anv_descriptor_set_layout *set_layout = + layout->set[set_index].layout; - switch (bind_point) { - case VK_PIPELINE_BIND_POINT_GRAPHICS: - stages &= VK_SHADER_STAGE_ALL_GRAPHICS | - (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ? - (VK_SHADER_STAGE_TASK_BIT_EXT | - VK_SHADER_STAGE_MESH_BIT_EXT) : 0); - pipe_state = &cmd_buffer->state.gfx.base; - break; + anv_cmd_buffer_maybe_dirty_descriptor_mode( + cmd_buffer, + (set->layout->flags & + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) != 0 ? + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER : + ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY); - case VK_PIPELINE_BIND_POINT_COMPUTE: - stages &= VK_SHADER_STAGE_COMPUTE_BIT; - pipe_state = &cmd_buffer->state.compute.base; - break; - - case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: - stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR; - pipe_state = &cmd_buffer->state.rt.base; - break; - - default: - unreachable("invalid bind point"); - } + VkShaderStageFlags stages; + struct anv_cmd_pipeline_state *pipe_state = + anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point, + set_layout, &stages); VkShaderStageFlags dirty_stages = 0; /* If it's a push descriptor set, we have to flag things as dirty @@ -799,50 +828,59 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, * may have edited in-place. */ if (pipe_state->descriptors[set_index] != set || - anv_descriptor_set_is_push(set)) { + anv_descriptor_set_is_push(set)) { pipe_state->descriptors[set_index] = set; - /* When using indirect descriptors, stages that have access to the HW - * binding tables, never need to access the - * anv_push_constants::desc_surface_offsets fields, because any data - * they need from the descriptor buffer is accessible through a binding - * table entry. For stages that are "bindless" (Mesh/Task/RT), we need - * to provide anv_push_constants::desc_surface_offsets matching the bound - * descriptor so that shaders can access the descriptor buffer through - * A64 messages. - * - * With direct descriptors, the shaders can use the - * anv_push_constants::desc_surface_offsets to build bindless offsets. - * So it's we always need to update the push constant data. - */ - bool update_desc_sets = - !cmd_buffer->device->physical->indirect_descriptors || - (stages & (VK_SHADER_STAGE_TASK_BIT_EXT | - VK_SHADER_STAGE_MESH_BIT_EXT | - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR)); + if (set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) { + assert(set->is_push); - if (update_desc_sets) { - struct anv_push_constants *push = &pipe_state->push_constants; + pipe_state->descriptor_buffers[set_index].buffer_index = -1; + pipe_state->descriptor_buffers[set_index].buffer_offset = set->desc_offset; + pipe_state->descriptor_buffers[set_index].bound = true; + cmd_buffer->state.descriptors_dirty |= stages; + cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages; + } else { + /* When using indirect descriptors, stages that have access to the HW + * binding tables, never need to access the + * anv_push_constants::desc_offsets fields, because any data they + * need from the descriptor buffer is accessible through a binding + * table entry. For stages that are "bindless" (Mesh/Task/RT), we + * need to provide anv_push_constants::desc_offsets matching the + * bound descriptor so that shaders can access the descriptor buffer + * through A64 messages. + * + * With direct descriptors, the shaders can use the + * anv_push_constants::desc_offsets to build bindless offsets. So + * it's we always need to update the push constant data. + */ + bool update_desc_sets = + !cmd_buffer->device->physical->indirect_descriptors || + (stages & (VK_SHADER_STAGE_TASK_BIT_EXT | + VK_SHADER_STAGE_MESH_BIT_EXT | + VK_SHADER_STAGE_RAYGEN_BIT_KHR | + VK_SHADER_STAGE_ANY_HIT_BIT_KHR | + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | + VK_SHADER_STAGE_MISS_BIT_KHR | + VK_SHADER_STAGE_INTERSECTION_BIT_KHR | + VK_SHADER_STAGE_CALLABLE_BIT_KHR)); - uint64_t offset = - anv_address_physical(set->desc_surface_addr) - - cmd_buffer->device->physical->va.internal_surface_state_pool.addr; - assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0); - push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK; - push->desc_surface_offsets[set_index] |= offset; - push->desc_sampler_offsets[set_index] |= - anv_address_physical(set->desc_sampler_addr) - - cmd_buffer->device->physical->va.dynamic_state_pool.addr; + if (update_desc_sets) { + struct anv_push_constants *push = &pipe_state->push_constants; + uint64_t offset = + anv_address_physical(set->desc_surface_addr) - + cmd_buffer->device->physical->va.internal_surface_state_pool.addr; + assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0); + push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK; + push->desc_surface_offsets[set_index] |= offset; + push->desc_sampler_offsets[set_index] |= + anv_address_physical(set->desc_sampler_addr) - + cmd_buffer->device->physical->va.dynamic_state_pool.addr; - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - set->desc_surface_addr.bo); - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - set->desc_sampler_addr.bo); + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + set->desc_surface_addr.bo); + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + set->desc_sampler_addr.bo); + } } dirty_stages |= stages; @@ -960,6 +998,108 @@ void anv_CmdBindDescriptorSets2KHR( } } +void anv_CmdBindDescriptorBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t bufferCount, + const VkDescriptorBufferBindingInfoEXT* pBindingInfos) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_cmd_state *state = &cmd_buffer->state; + + for (uint32_t i = 0; i < bufferCount; i++) { + assert(pBindingInfos[i].address >= cmd_buffer->device->physical->va.descriptor_buffer_pool.addr && + pBindingInfos[i].address < (cmd_buffer->device->physical->va.descriptor_buffer_pool.addr + + cmd_buffer->device->physical->va.descriptor_buffer_pool.size)); + + if (state->descriptor_buffers.address[i] != pBindingInfos[i].address) { + state->descriptor_buffers.address[i] = pBindingInfos[i].address; + if (pBindingInfos[i].usage & VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT) + state->descriptor_buffers.surfaces_address = pBindingInfos[i].address; + if (pBindingInfos[i].usage & VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT) + state->descriptor_buffers.samplers_address = pBindingInfos[i].address; + state->descriptor_buffers.dirty = true; + state->descriptor_buffers.offsets_dirty = ~0; + } + } + + anv_cmd_buffer_maybe_dirty_descriptor_mode(cmd_buffer, + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER); +} + +static void +anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct anv_pipeline_layout *layout, + uint32_t first_set, + uint32_t set_count, + const VkDeviceSize *buffer_offsets, + const uint32_t *buffer_indices) +{ + for (uint32_t i = 0; i < set_count; i++) { + const uint32_t set_index = first_set + i; + + const struct anv_descriptor_set_layout *set_layout = + layout->sets_layout.set[set_index].layout; + VkShaderStageFlags stages; + struct anv_cmd_pipeline_state *pipe_state = + anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point, + set_layout, &stages); + + if (buffer_offsets[i] != pipe_state->descriptor_buffers[set_index].buffer_offset || + buffer_indices[i] != pipe_state->descriptor_buffers[set_index].buffer_index || + !pipe_state->descriptor_buffers[set_index].bound) { + pipe_state->descriptor_buffers[set_index].buffer_index = buffer_indices[i]; + pipe_state->descriptor_buffers[set_index].buffer_offset = buffer_offsets[i]; + cmd_buffer->state.descriptors_dirty |= stages; + cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages; + } + pipe_state->descriptor_buffers[set_index].bound = true; + } +} + +void anv_CmdSetDescriptorBufferOffsets2EXT( + VkCommandBuffer commandBuffer, + const VkSetDescriptorBufferOffsetsInfoEXT* pSetDescriptorBufferOffsetsInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, pSetDescriptorBufferOffsetsInfo->layout); + + if (pSetDescriptorBufferOffsetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { + anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + layout, + pSetDescriptorBufferOffsetsInfo->firstSet, + pSetDescriptorBufferOffsetsInfo->setCount, + pSetDescriptorBufferOffsetsInfo->pOffsets, + pSetDescriptorBufferOffsetsInfo->pBufferIndices); + } + if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) { + anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, + layout, + pSetDescriptorBufferOffsetsInfo->firstSet, + pSetDescriptorBufferOffsetsInfo->setCount, + pSetDescriptorBufferOffsetsInfo->pOffsets, + pSetDescriptorBufferOffsetsInfo->pBufferIndices); + } + if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_RT_STAGE_BITS) { + anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer, + VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, + layout, + pSetDescriptorBufferOffsetsInfo->firstSet, + pSetDescriptorBufferOffsetsInfo->setCount, + pSetDescriptorBufferOffsetsInfo->pOffsets, + pSetDescriptorBufferOffsetsInfo->pBufferIndices); + } +} + +void anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT( + VkCommandBuffer commandBuffer, + const VkBindDescriptorBufferEmbeddedSamplersInfoEXT* pBindDescriptorBufferEmbeddedSamplersInfo) +{ + /* no-op */ +} + void anv_CmdBindVertexBuffers2( VkCommandBuffer commandBuffer, uint32_t firstBinding, @@ -1214,7 +1354,6 @@ anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer, assert(pInfo->set < MAX_SETS); struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout; - struct anv_push_descriptor_set *push_set = &anv_cmd_buffer_get_pipe_state(cmd_buffer, bind_point)->push_descriptor; @@ -1263,10 +1402,11 @@ void anv_CmdPushDescriptorSetWithTemplate2KHR( assert(pInfo->set < MAX_PUSH_DESCRIPTORS); struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout; - - struct anv_push_descriptor_set *push_set = - &anv_cmd_buffer_get_pipe_state(cmd_buffer, - template->bind_point)->push_descriptor; + UNUSED VkShaderStageFlags stages; + struct anv_cmd_pipeline_state *pipe_state = + anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, template->bind_point, + set_layout, &stages); + struct anv_push_descriptor_set *push_set = &pipe_state->push_descriptor; if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout)) return; diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index edb05a8cf47..fcd0e383f1a 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -1942,13 +1942,24 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer, (push_set->set_used_on_gpu || set->desc_surface_mem.alloc_size < layout->descriptor_buffer_surface_size)) { struct anv_physical_device *pdevice = cmd_buffer->device->physical; - struct anv_state_stream *push_stream = - pdevice->indirect_descriptors ? - &cmd_buffer->indirect_push_descriptor_stream : - &cmd_buffer->surface_state_stream; - uint64_t push_base_address = pdevice->indirect_descriptors ? - pdevice->va.indirect_push_descriptor_pool.addr : - pdevice->va.internal_surface_state_pool.addr; + struct anv_state_stream *push_stream; + uint64_t push_base_address; + + if (layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) { + push_stream = pdevice->uses_ex_bso ? + &cmd_buffer->push_descriptor_buffer_stream : + &cmd_buffer->surface_state_stream; + push_base_address = pdevice->uses_ex_bso ? + pdevice->va.push_descriptor_buffer_pool.addr : + pdevice->va.internal_surface_state_pool.addr; + } else { + push_stream = pdevice->indirect_descriptors ? + &cmd_buffer->indirect_push_descriptor_stream : + &cmd_buffer->surface_state_stream; + push_base_address = pdevice->indirect_descriptors ? + pdevice->va.indirect_push_descriptor_pool.addr : + pdevice->va.internal_surface_state_pool.addr; + } uint32_t surface_size, sampler_size; anv_descriptor_set_layout_descriptor_buffer_size(layout, 0, @@ -2868,7 +2879,7 @@ void anv_GetDescriptorEXT( (sampler = anv_sampler_from_handle( pDescriptorInfo->data.pCombinedImageSampler->sampler))) { memcpy(pDescriptor + desc_offset + ANV_SURFACE_STATE_SIZE, - sampler->bindless_state.map + i * ANV_SAMPLER_STATE_SIZE, + sampler->db_state[i], ANV_SAMPLER_STATE_SIZE); } else { memset(pDescriptor + desc_offset + ANV_SURFACE_STATE_SIZE, diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index e45216c4099..9802656c3e7 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -149,6 +149,9 @@ void genX(emit_l3_config)(struct anv_batch *batch, void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, const struct intel_l3_config *cfg); +void genX(flush_descriptor_buffers)(struct anv_cmd_buffer *cmd_buffer, + struct anv_cmd_pipeline_state *pipe_state); + uint32_t genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer, struct anv_cmd_pipeline_state *pipe_state, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9e1fdb4bdfb..4c347986b1d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3280,11 +3280,17 @@ struct anv_push_constants { /** * Base offsets for descriptor sets from - * INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS * - * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set + * The offset has different meaning depending on a number of factors : * - * In bits [6:63] : descriptor set address + * - with descriptor sets (direct or indirect), this relative + * pdevice->va.descriptor_pool + * + * - with descriptor buffers on DG2+, relative + * device->va.descriptor_buffer_pool + * + * - with descriptor buffers prior to DG2, relative the programmed value + * in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress */ uint32_t desc_surface_offsets[MAX_SETS]; @@ -3478,6 +3484,26 @@ struct anv_simple_shader { */ struct anv_cmd_pipeline_state { struct anv_descriptor_set *descriptors[MAX_SETS]; + struct { + bool bound; + /** + * Buffer index used by this descriptor set. + */ + int32_t buffer_index; /* -1 means push descriptor */ + /** + * Offset of the descriptor set in the descriptor buffer. + */ + uint32_t buffer_offset; + /** + * Final computed address to be emitted in the descriptor set surface + * state. + */ + uint64_t address; + /** + * The descriptor set surface state. + */ + struct anv_state state; + } descriptor_buffers[MAX_SETS]; struct anv_push_descriptor_set push_descriptor; struct anv_push_constants push_constants; @@ -3645,6 +3671,12 @@ struct anv_cmd_state { */ enum anv_cmd_descriptor_buffer_mode current_db_mode; + /** + * Whether the command buffer has pending descriptor buffers bound it. This + * variable changes before anv_device::current_db_mode. + */ + enum anv_cmd_descriptor_buffer_mode pending_db_mode; + struct { /** * Tracks operations susceptible to interfere with queries in the @@ -3668,6 +3700,14 @@ struct anv_cmd_state { VkShaderStageFlags push_descriptors_dirty; VkShaderStageFlags push_constants_dirty; + struct { + uint64_t surfaces_address; + uint64_t samplers_address; + bool dirty; + VkShaderStageFlags offsets_dirty; + uint64_t address[MAX_SETS]; + } descriptor_buffers; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; bool xfb_enabled; struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; @@ -3954,10 +3994,25 @@ static inline struct anv_address anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer, struct anv_state state) { + if (cmd_buffer->state.current_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER) { + return anv_state_pool_state_address( + &cmd_buffer->device->dynamic_state_db_pool, state); + } return anv_state_pool_state_address( &cmd_buffer->device->dynamic_state_pool, state); } +static inline uint64_t +anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer, + int32_t buffer_index) +{ + if (buffer_index == -1) + return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr; + + return cmd_buffer->state.descriptor_buffers.address[buffer_index]; +} + VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b756e0e4444..85118c914d6 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -132,10 +132,24 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_3d)(cmd_buffer); #endif + /* If no API entry point selected the current mode (this can happen if the + * first operation in the command buffer is a , select BUFFER if + * EXT_descriptor_buffer is enabled, otherwise LEGACY. + */ + if (cmd_buffer->state.pending_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN) { + cmd_buffer->state.pending_db_mode = + cmd_buffer->device->vk.enabled_extensions.EXT_descriptor_buffer ? + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER : + ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY; + } + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) { sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 }; sba.GeneralStateMOCS = mocs; + sba.GeneralStateBufferSize = 0xfffff; sba.GeneralStateBaseAddressModifyEnable = true; + sba.GeneralStateBufferSizeModifyEnable = true; sba.StatelessDataPortAccessMOCS = mocs; @@ -151,29 +165,19 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.SurfaceStateMOCS = mocs; sba.SurfaceStateBaseAddressModifyEnable = true; - sba.DynamicStateBaseAddress = - (struct anv_address) { device->dynamic_state_pool.block_pool.bo, 0 }; - sba.DynamicStateMOCS = mocs; - sba.DynamicStateBaseAddressModifyEnable = true; - sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 }; sba.IndirectObjectMOCS = mocs; + sba.IndirectObjectBufferSize = 0xfffff; sba.IndirectObjectBaseAddressModifyEnable = true; + sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBaseAddress = (struct anv_address) { device->instruction_state_pool.block_pool.bo, 0 }; sba.InstructionMOCS = mocs; + sba.InstructionBufferSize = + device->physical->va.instruction_state_pool.size / 4096; sba.InstructionBaseAddressModifyEnable = true; - - sba.GeneralStateBufferSize = 0xfffff; - sba.IndirectObjectBufferSize = 0xfffff; - sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size + - device->physical->va.sampler_state_pool.size) / 4096; - sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096; - sba.GeneralStateBufferSizeModifyEnable = true; - sba.IndirectObjectBufferSizeModifyEnable = true; - sba.DynamicStateBufferSizeModifyEnable = true; - sba.InstructionBuffersizeModifyEnable = true; + sba.InstructionBuffersizeModifyEnable = true; #if GFX_VER >= 11 sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS; @@ -182,14 +186,61 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.BindlessSamplerStateBaseAddressModifyEnable = true; #endif - if (!device->physical->indirect_descriptors) { + if (cmd_buffer->state.pending_db_mode == ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER) { + sba.DynamicStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.dynamic_state_db_pool.addr, + }; + sba.DynamicStateBufferSize = + (device->physical->va.dynamic_state_db_pool.size + + device->physical->va.descriptor_buffer_pool.size + + device->physical->va.push_descriptor_buffer_pool.size) / 4096; + sba.DynamicStateMOCS = mocs; + sba.DynamicStateBaseAddressModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + #if GFX_VERx10 >= 125 - /* Bindless Surface State & Bindless Sampler State are aligned to the - * same heap - */ - sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { .offset = - device->physical->va.binding_table_pool.addr, }; + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.descriptor_buffer_pool.addr, + }; + sba.BindlessSurfaceStateSize = + (device->physical->va.descriptor_buffer_pool.size + + device->physical->va.push_descriptor_buffer_pool.size) - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; +#else + const uint64_t surfaces_addr = + cmd_buffer->state.descriptor_buffers.surfaces_address != 0 ? + cmd_buffer->state.descriptor_buffers.surfaces_address : + anv_address_physical(device->workaround_address); + const uint64_t surfaces_size = + cmd_buffer->state.descriptor_buffers.surfaces_address != 0 ? + MIN2(device->physical->va.descriptor_buffer_pool.size - + (cmd_buffer->state.descriptor_buffers.surfaces_address - + device->physical->va.descriptor_buffer_pool.addr), + anv_physical_device_bindless_heap_size(device->physical)) : + (device->workaround_bo->size - device->workaround_address.offset); + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { + .offset = surfaces_addr, + }; + sba.BindlessSurfaceStateSize = surfaces_size / ANV_SURFACE_STATE_SIZE - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; +#endif /* GFX_VERx10 < 125 */ + } else if (!device->physical->indirect_descriptors) { +#if GFX_VERx10 >= 125 + sba.DynamicStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.dynamic_state_pool.addr, + }; + sba.DynamicStateBufferSize = + (device->physical->va.dynamic_state_pool.size + + device->physical->va.sampler_state_pool.size) / 4096; + sba.DynamicStateMOCS = mocs; + sba.DynamicStateBaseAddressModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.internal_surface_state_pool.addr, + }; sba.BindlessSurfaceStateSize = (device->physical->va.internal_surface_state_pool.size + device->physical->va.bindless_surface_state_pool.size) - 1; @@ -199,12 +250,23 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) unreachable("Direct descriptor not supported"); #endif } else { + sba.DynamicStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.dynamic_state_pool.addr, + }; + sba.DynamicStateBufferSize = + (device->physical->va.dynamic_state_pool.size + + device->physical->va.sampler_state_pool.size) / 4096; + sba.DynamicStateMOCS = mocs; + sba.DynamicStateBaseAddressModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { .offset = device->physical->va.bindless_surface_state_pool.addr, }; sba.BindlessSurfaceStateSize = - anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; + anv_physical_device_bindless_heap_size(device->physical) / + ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; } @@ -214,6 +276,12 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) #endif } + bool db_mode_changed = false; + if (cmd_buffer->state.current_db_mode != cmd_buffer->state.pending_db_mode) { + cmd_buffer->state.current_db_mode = cmd_buffer->state.pending_db_mode; + db_mode_changed = true; + } + #if INTEL_NEEDS_WA_1607854226 /* Wa_1607854226: * @@ -293,6 +361,50 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, cmd_buffer->state.current_pipeline, bits); + + assert(cmd_buffer->state.current_db_mode != + ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN); + if (db_mode_changed) { +#if GFX_VER == 11 + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) { + ptr.SliceHashStatePointerValid = true; + ptr.SliceHashTableStatePointer = cmd_buffer->state.current_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ? + device->slice_hash_db.offset : + device->slice_hash.offset; + } +#elif GFX_VERx10 == 125 + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) { + ptr.SliceHashStatePointerValid = true; + ptr.SliceHashTableStatePointer = cmd_buffer->state.current_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ? + device->slice_hash_db.offset : + device->slice_hash.offset; + } +#endif + + /* Changing the dynamic state location affects all the states having + * offset relative to that pointer. + */ + struct anv_gfx_dynamic_state *hw_state = &cmd_buffer->state.gfx.dyn_state; + BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); + BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC); + BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR); + BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE); + BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE); + if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate) { + struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_FSR); + } + +#if GFX_VERx10 < 125 + /* The push constant data for compute shader is an offset in the dynamic + * state heap. If we change it, we need to reemit the push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; +#endif + } } void @@ -2108,6 +2220,13 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, break; } + case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: { + assert(pipe_state->descriptor_buffers[binding->index].state.alloc_size); + bt_map[s] = pipe_state->descriptor_buffers[binding->index].state.offset + + state_offset; + break; + } + default: { assert(binding->set < MAX_SETS); const struct anv_descriptor_set *set = @@ -2160,6 +2279,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, pipe_state, binding, desc); } else { + assert(pipeline->layout.type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT || + pipeline->layout.type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER); surface_state_offset = emit_direct_descriptor_binding_table_entry(cmd_buffer, pipe_state, set, binding, desc); @@ -2210,7 +2331,11 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer, continue; memcpy(state->map + (s * 16), - sampler->state[binding->plane], sizeof(sampler->state[0])); + cmd_buffer->state.current_db_mode == + ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ? + sampler->db_state[binding->plane] : + sampler->state[binding->plane], + sizeof(sampler->state[0])); } return VK_SUCCESS; @@ -2484,6 +2609,140 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value) #endif } +ALWAYS_INLINE static void +update_descriptor_set_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_cmd_pipeline_state *pipe_state, + uint32_t set_idx) +{ + if (!pipe_state->descriptor_buffers[set_idx].bound) + return; + + const struct anv_physical_device *device = cmd_buffer->device->physical; + const int32_t buffer_index = + pipe_state->descriptor_buffers[set_idx].buffer_index; + const struct anv_va_range *push_va_range = + GFX_VERx10 >= 125 ? + &device->va.push_descriptor_buffer_pool : + &device->va.internal_surface_state_pool; + const struct anv_va_range *va_range = + buffer_index == -1 ? push_va_range : &device->va.descriptor_buffer_pool; + const uint64_t descriptor_set_addr = + (buffer_index == -1 ? va_range->addr : + cmd_buffer->state.descriptor_buffers.address[buffer_index]) + + pipe_state->descriptor_buffers[set_idx].buffer_offset; + const uint64_t set_size = + MIN2(va_range->size - (descriptor_set_addr - va_range->addr), + anv_physical_device_bindless_heap_size(device)); + + if (descriptor_set_addr != pipe_state->descriptor_buffers[set_idx].address) { + pipe_state->descriptor_buffers[set_idx].address = descriptor_set_addr; + + struct anv_state surface_state = + anv_cmd_buffer_alloc_surface_states(cmd_buffer, 1); + const enum isl_format format = + anv_isl_format_for_descriptor_type(cmd_buffer->device, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + anv_fill_buffer_surface_state( + cmd_buffer->device, surface_state.map, + format, ISL_SWIZZLE_IDENTITY, + ISL_SURF_USAGE_CONSTANT_BUFFER_BIT, + anv_address_from_u64(pipe_state->descriptor_buffers[set_idx].address), + set_size, 1); + + pipe_state->descriptor_buffers[set_idx].state = surface_state; + } +} + +ALWAYS_INLINE static uint32_t +compute_descriptor_set_surface_offset(const struct anv_cmd_buffer *cmd_buffer, + const struct anv_cmd_pipeline_state *pipe_state, + const uint32_t set_idx) +{ + const struct anv_physical_device *device = cmd_buffer->device->physical; + + if (device->uses_ex_bso) { + int32_t buffer_index = + pipe_state->descriptor_buffers[set_idx].buffer_index; + uint64_t buffer_address = + buffer_index == -1 ? + device->va.push_descriptor_buffer_pool.addr : + cmd_buffer->state.descriptor_buffers.address[buffer_index]; + + return (buffer_address - device->va.descriptor_buffer_pool.addr) + + pipe_state->descriptor_buffers[set_idx].buffer_offset; + } + + return pipe_state->descriptor_buffers[set_idx].buffer_offset << 6; +} + +ALWAYS_INLINE static uint32_t +compute_descriptor_set_sampler_offset(const struct anv_cmd_buffer *cmd_buffer, + const struct anv_cmd_pipeline_state *pipe_state, + const uint32_t set_idx) +{ + const struct anv_physical_device *device = cmd_buffer->device->physical; + int32_t buffer_index = + pipe_state->descriptor_buffers[set_idx].buffer_index; + uint64_t buffer_address = + buffer_index == -1 ? + device->va.push_descriptor_buffer_pool.addr : + cmd_buffer->state.descriptor_buffers.address[buffer_index]; + + return (buffer_address - device->va.dynamic_state_db_pool.addr) + + pipe_state->descriptor_buffers[set_idx].buffer_offset; +} + +void +genX(flush_descriptor_buffers)(struct anv_cmd_buffer *cmd_buffer, + struct anv_cmd_pipeline_state *pipe_state) +{ + /* On Gfx12.5+ the STATE_BASE_ADDRESS BindlessSurfaceStateBaseAddress & + * DynamicStateBaseAddress are fixed. So as long as we stay in one + * descriptor buffer mode, there is no need to switch. + */ +#if GFX_VERx10 >= 125 + if (cmd_buffer->state.current_db_mode != + cmd_buffer->state.pending_db_mode) + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); +#else + if (cmd_buffer->state.descriptor_buffers.dirty) + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); +#endif + + assert(cmd_buffer->state.current_db_mode != + ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN); + if (cmd_buffer->state.current_db_mode == ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER && + (cmd_buffer->state.descriptor_buffers.dirty || + (pipe_state->pipeline->active_stages & + cmd_buffer->state.descriptor_buffers.offsets_dirty) != 0)) { + struct anv_push_constants *push_constants = + &pipe_state->push_constants; + for (uint32_t i = 0; i < ARRAY_SIZE(push_constants->desc_surface_offsets); i++) { + update_descriptor_set_surface_state(cmd_buffer, pipe_state, i); + + push_constants->desc_surface_offsets[i] = + compute_descriptor_set_surface_offset(cmd_buffer, pipe_state, i); + push_constants->desc_sampler_offsets[i] = + compute_descriptor_set_sampler_offset(cmd_buffer, pipe_state, i); + } + +#if GFX_VERx10 < 125 + struct anv_device *device = cmd_buffer->device; + push_constants->surfaces_base_offset = + (cmd_buffer->state.descriptor_buffers.surfaces_address - + device->physical->va.descriptor_buffer_pool.addr); +#endif + + cmd_buffer->state.push_constants_dirty |= + (cmd_buffer->state.descriptor_buffers.offsets_dirty & + pipe_state->pipeline->active_stages); + cmd_buffer->state.descriptor_buffers.offsets_dirty &= + ~pipe_state->pipeline->active_stages; + } + + cmd_buffer->state.descriptor_buffers.dirty = false; +} + VkResult genX(BeginCommandBuffer)( VkCommandBuffer commandBuffer, @@ -2511,8 +2770,6 @@ genX(BeginCommandBuffer)( cmd_buffer->usage_flags = pBeginInfo->flags; - cmd_buffer->state.current_db_mode = ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY; - /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for * primary level command buffers. * @@ -2575,7 +2832,12 @@ genX(BeginCommandBuffer)( } #endif - genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + if (cmd_buffer->device->vk.enabled_extensions.EXT_descriptor_buffer) { + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + } else { + cmd_buffer->state.current_db_mode = ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY; + genX(cmd_buffer_emit_bt_pool_base_address)(cmd_buffer); + } /* We sometimes store vertex data in the dynamic state buffer for blorp * operations and our dynamic state stream may re-use data from previous @@ -2888,6 +3150,8 @@ genX(CmdExecuteCommands)( genX(cmd_buffer_flush_generated_draws)(container); + UNUSED enum anv_cmd_descriptor_buffer_mode db_mode = + container->state.current_db_mode; for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); @@ -2953,6 +3217,8 @@ genX(CmdExecuteCommands)( #endif container->state.gfx.viewport_set |= secondary->state.gfx.viewport_set; + + db_mode = secondary->state.current_db_mode; } /* The secondary isn't counted in our VF cache tracking so we need to @@ -2981,19 +3247,41 @@ genX(CmdExecuteCommands)( container->state.current_hash_scale = 0; container->state.gfx.push_constant_stages = 0; container->state.gfx.ds_write_state = false; + memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config)); + + /* Reemit all GFX instructions in container */ memcpy(container->state.gfx.dyn_state.dirty, device->gfx_dirty_state, sizeof(container->state.gfx.dyn_state.dirty)); + if (container->device->vk.enabled_extensions.KHR_fragment_shading_rate) { + /* Also recompute the CPS_STATE offset */ + struct vk_dynamic_graphics_state *dyn = + &container->vk.dynamic_graphics_state; + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_FSR); + } /* Each of the secondary command buffers will use its own state base * address. We need to re-emit state base address for the container after * all of the secondaries are done. - * - * TODO: Maybe we want to make this a dirty bit to avoid extra state base - * address calls? */ - genX(cmd_buffer_emit_state_base_address)(container); + if (container->device->vk.enabled_extensions.EXT_descriptor_buffer) { +#if GFX_VERx10 >= 125 + /* If the last secondary had a different mode, reemit the last pending + * mode. Otherwise, we can do a lighter binding table pool update. + */ + if (db_mode != container->state.current_db_mode) { + container->state.current_db_mode = db_mode; + genX(cmd_buffer_emit_state_base_address)(container); + } else { + genX(cmd_buffer_emit_bt_pool_base_address)(container); + } +#else + genX(cmd_buffer_emit_state_base_address)(container); +#endif + } else { + genX(cmd_buffer_emit_bt_pool_base_address)(container); + } /* Copy of utrace timestamp buffers from secondary into container */ if (u_trace_enabled(&device->ds.trace_context)) { @@ -3221,24 +3509,27 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; break; case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: - /* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a - * storage buffer, physical storage buffer, storage texel buffer, or - * storage image in any shader pipeline stage. - * - * Any storage buffers or images written to must be invalidated and - * flushed before the shader can access them. - * - * Both HDC & Untyped flushes also do invalidation. This is why we use - * this here on Gfx12+. - * - * Gfx11 and prior don't have HDC. Only Data cache flush is available - * and it only operates on the written cache lines. - */ - if (device->info->ver >= 12) { - pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - } - break; + /* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a + * storage buffer, physical storage buffer, storage texel buffer, or + * storage image in any shader pipeline stage. + * + * Any storage buffers or images written to must be invalidated and + * flushed before the shader can access them. + * + * Both HDC & Untyped flushes also do invalidation. This is why we + * use this here on Gfx12+. + * + * Gfx11 and prior don't have HDC. Only Data cache flush is available + * and it only operates on the written cache lines. + */ + if (device->info->ver >= 12) { + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + } + break; + case VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT: + pipe_bits |= ANV_PIPE_STATE_CACHE_INVALIDATE_BIT; + break; default: break; /* Nothing to do */ } diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index fe979fd828c..9a3a09e78ce 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -101,6 +101,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); + genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base); + genX(flush_pipeline_select_gpgpu)(cmd_buffer); /* Apply any pending pipeline flushes we may have. We want to apply them @@ -873,6 +875,9 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, trace_intel_begin_rays(&cmd_buffer->trace); genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); + + genX(flush_descriptor_buffers)(cmd_buffer, &rt->base); + genX(flush_pipeline_select_gpgpu)(cmd_buffer); cmd_buffer->state.rt.pipeline_dirty = false; diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index 94e17fa3e1b..d81ae0be164 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -190,6 +190,14 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer, return anv_descriptor_set_address(set); } + case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: { + return anv_address_from_u64( + anv_cmd_buffer_descriptor_buffer_address( + cmd_buffer, + gfx_state->base.descriptor_buffers[range->index].buffer_index) + + gfx_state->base.descriptor_buffers[range->index].buffer_offset); + } + case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: { if (gfx_state->base.push_constants_state.alloc_size == 0) { gfx_state->base.push_constants_state = @@ -261,6 +269,10 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer, return state.alloc_size; } + case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: + return gfx_state->base.pipeline->layout.set[ + range->index].layout->descriptor_buffer_surface_size; + case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: return (range->start + range->length) * 32; @@ -660,6 +672,8 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1); + genX(flush_descriptor_buffers)(cmd_buffer, &cmd_buffer->state.gfx.base); + genX(flush_pipeline_select_3d)(cmd_buffer); /* Wa_14015814527