anv: implement descriptor buffer binding

And barriers for them.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22151>
This commit is contained in:
Lionel Landwerlin 2023-10-25 10:21:22 +03:00 committed by Marge Bot
parent 349c46c553
commit ab7641b8dc
8 changed files with 658 additions and 134 deletions

View file

@ -747,8 +747,13 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
{
if (size == 0)
return ANV_STATE_NULL;
assert(cmd_buffer->state.current_db_mode !=
ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN);
struct anv_state state =
anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
anv_state_stream_alloc(cmd_buffer->state.current_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ?
&cmd_buffer->dynamic_state_db_stream :
&cmd_buffer->dynamic_state_stream,
size, alignment);
if (state.map == NULL)
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);

View file

@ -741,6 +741,52 @@ void anv_CmdBindPipeline(
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
}
static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
const struct anv_descriptor_set_layout *set_layout,
VkShaderStageFlags *out_stages)
{
*out_stages = set_layout->shader_stages;
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
*out_stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
(cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
(VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
return &cmd_buffer->state.gfx.base;
case VK_PIPELINE_BIND_POINT_COMPUTE:
*out_stages &= VK_SHADER_STAGE_COMPUTE_BIT;
return &cmd_buffer->state.compute.base;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
*out_stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
return &cmd_buffer->state.rt.base;
default:
unreachable("invalid bind point");
}
}
static void
anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer *cmd_buffer,
enum anv_cmd_descriptor_buffer_mode new_mode)
{
if (cmd_buffer->state.current_db_mode == new_mode)
return;
/* Ensure we program the STATE_BASE_ADDRESS properly at least once */
cmd_buffer->state.descriptor_buffers.dirty = true;
cmd_buffer->state.pending_db_mode = new_mode;
}
static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
@ -761,37 +807,20 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
*/
assert(!set->pool || !set->pool->host_only);
struct anv_descriptor_set_layout *set_layout = set->layout;
VkShaderStageFlags stages = set_layout->shader_stages;
struct anv_cmd_pipeline_state *pipe_state;
struct anv_descriptor_set_layout *set_layout =
layout->set[set_index].layout;
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
(cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
(VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
pipe_state = &cmd_buffer->state.gfx.base;
break;
anv_cmd_buffer_maybe_dirty_descriptor_mode(
cmd_buffer,
(set->layout->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) != 0 ?
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER :
ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY);
case VK_PIPELINE_BIND_POINT_COMPUTE:
stages &= VK_SHADER_STAGE_COMPUTE_BIT;
pipe_state = &cmd_buffer->state.compute.base;
break;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
pipe_state = &cmd_buffer->state.rt.base;
break;
default:
unreachable("invalid bind point");
}
VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
set_layout, &stages);
VkShaderStageFlags dirty_stages = 0;
/* If it's a push descriptor set, we have to flag things as dirty
@ -799,50 +828,59 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
* may have edited in-place.
*/
if (pipe_state->descriptors[set_index] != set ||
anv_descriptor_set_is_push(set)) {
anv_descriptor_set_is_push(set)) {
pipe_state->descriptors[set_index] = set;
/* When using indirect descriptors, stages that have access to the HW
* binding tables, never need to access the
* anv_push_constants::desc_surface_offsets fields, because any data
* they need from the descriptor buffer is accessible through a binding
* table entry. For stages that are "bindless" (Mesh/Task/RT), we need
* to provide anv_push_constants::desc_surface_offsets matching the bound
* descriptor so that shaders can access the descriptor buffer through
* A64 messages.
*
* With direct descriptors, the shaders can use the
* anv_push_constants::desc_surface_offsets to build bindless offsets.
* So it's we always need to update the push constant data.
*/
bool update_desc_sets =
!cmd_buffer->device->physical->indirect_descriptors ||
(stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT |
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR));
if (set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
assert(set->is_push);
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
pipe_state->descriptor_buffers[set_index].buffer_index = -1;
pipe_state->descriptor_buffers[set_index].buffer_offset = set->desc_offset;
pipe_state->descriptor_buffers[set_index].bound = true;
cmd_buffer->state.descriptors_dirty |= stages;
cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
} else {
/* When using indirect descriptors, stages that have access to the HW
* binding tables, never need to access the
* anv_push_constants::desc_offsets fields, because any data they
* need from the descriptor buffer is accessible through a binding
* table entry. For stages that are "bindless" (Mesh/Task/RT), we
* need to provide anv_push_constants::desc_offsets matching the
* bound descriptor so that shaders can access the descriptor buffer
* through A64 messages.
*
* With direct descriptors, the shaders can use the
* anv_push_constants::desc_offsets to build bindless offsets. So
* it's we always need to update the push constant data.
*/
bool update_desc_sets =
!cmd_buffer->device->physical->indirect_descriptors ||
(stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT |
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR));
uint64_t offset =
anv_address_physical(set->desc_surface_addr) -
cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
push->desc_surface_offsets[set_index] |= offset;
push->desc_sampler_offsets[set_index] |=
anv_address_physical(set->desc_sampler_addr) -
cmd_buffer->device->physical->va.dynamic_state_pool.addr;
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
uint64_t offset =
anv_address_physical(set->desc_surface_addr) -
cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
push->desc_surface_offsets[set_index] |= offset;
push->desc_sampler_offsets[set_index] |=
anv_address_physical(set->desc_sampler_addr) -
cmd_buffer->device->physical->va.dynamic_state_pool.addr;
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_surface_addr.bo);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_sampler_addr.bo);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_surface_addr.bo);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_sampler_addr.bo);
}
}
dirty_stages |= stages;
@ -960,6 +998,108 @@ void anv_CmdBindDescriptorSets2KHR(
}
}
void anv_CmdBindDescriptorBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t bufferCount,
const VkDescriptorBufferBindingInfoEXT* pBindingInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_state *state = &cmd_buffer->state;
for (uint32_t i = 0; i < bufferCount; i++) {
assert(pBindingInfos[i].address >= cmd_buffer->device->physical->va.descriptor_buffer_pool.addr &&
pBindingInfos[i].address < (cmd_buffer->device->physical->va.descriptor_buffer_pool.addr +
cmd_buffer->device->physical->va.descriptor_buffer_pool.size));
if (state->descriptor_buffers.address[i] != pBindingInfos[i].address) {
state->descriptor_buffers.address[i] = pBindingInfos[i].address;
if (pBindingInfos[i].usage & VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT)
state->descriptor_buffers.surfaces_address = pBindingInfos[i].address;
if (pBindingInfos[i].usage & VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)
state->descriptor_buffers.samplers_address = pBindingInfos[i].address;
state->descriptor_buffers.dirty = true;
state->descriptor_buffers.offsets_dirty = ~0;
}
}
anv_cmd_buffer_maybe_dirty_descriptor_mode(cmd_buffer,
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER);
}
static void
anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
struct anv_pipeline_layout *layout,
uint32_t first_set,
uint32_t set_count,
const VkDeviceSize *buffer_offsets,
const uint32_t *buffer_indices)
{
for (uint32_t i = 0; i < set_count; i++) {
const uint32_t set_index = first_set + i;
const struct anv_descriptor_set_layout *set_layout =
layout->sets_layout.set[set_index].layout;
VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
set_layout, &stages);
if (buffer_offsets[i] != pipe_state->descriptor_buffers[set_index].buffer_offset ||
buffer_indices[i] != pipe_state->descriptor_buffers[set_index].buffer_index ||
!pipe_state->descriptor_buffers[set_index].bound) {
pipe_state->descriptor_buffers[set_index].buffer_index = buffer_indices[i];
pipe_state->descriptor_buffers[set_index].buffer_offset = buffer_offsets[i];
cmd_buffer->state.descriptors_dirty |= stages;
cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
}
pipe_state->descriptor_buffers[set_index].bound = true;
}
}
void anv_CmdSetDescriptorBufferOffsets2EXT(
VkCommandBuffer commandBuffer,
const VkSetDescriptorBufferOffsetsInfoEXT* pSetDescriptorBufferOffsetsInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, pSetDescriptorBufferOffsetsInfo->layout);
if (pSetDescriptorBufferOffsetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_RT_STAGE_BITS) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
}
void anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
VkCommandBuffer commandBuffer,
const VkBindDescriptorBufferEmbeddedSamplersInfoEXT* pBindDescriptorBufferEmbeddedSamplersInfo)
{
/* no-op */
}
void anv_CmdBindVertexBuffers2(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
@ -1214,7 +1354,6 @@ anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
assert(pInfo->set < MAX_SETS);
struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
struct anv_push_descriptor_set *push_set =
&anv_cmd_buffer_get_pipe_state(cmd_buffer,
bind_point)->push_descriptor;
@ -1263,10 +1402,11 @@ void anv_CmdPushDescriptorSetWithTemplate2KHR(
assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
struct anv_push_descriptor_set *push_set =
&anv_cmd_buffer_get_pipe_state(cmd_buffer,
template->bind_point)->push_descriptor;
UNUSED VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, template->bind_point,
set_layout, &stages);
struct anv_push_descriptor_set *push_set = &pipe_state->push_descriptor;
if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
return;

View file

@ -1942,13 +1942,24 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
(push_set->set_used_on_gpu ||
set->desc_surface_mem.alloc_size < layout->descriptor_buffer_surface_size)) {
struct anv_physical_device *pdevice = cmd_buffer->device->physical;
struct anv_state_stream *push_stream =
pdevice->indirect_descriptors ?
&cmd_buffer->indirect_push_descriptor_stream :
&cmd_buffer->surface_state_stream;
uint64_t push_base_address = pdevice->indirect_descriptors ?
pdevice->va.indirect_push_descriptor_pool.addr :
pdevice->va.internal_surface_state_pool.addr;
struct anv_state_stream *push_stream;
uint64_t push_base_address;
if (layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
push_stream = pdevice->uses_ex_bso ?
&cmd_buffer->push_descriptor_buffer_stream :
&cmd_buffer->surface_state_stream;
push_base_address = pdevice->uses_ex_bso ?
pdevice->va.push_descriptor_buffer_pool.addr :
pdevice->va.internal_surface_state_pool.addr;
} else {
push_stream = pdevice->indirect_descriptors ?
&cmd_buffer->indirect_push_descriptor_stream :
&cmd_buffer->surface_state_stream;
push_base_address = pdevice->indirect_descriptors ?
pdevice->va.indirect_push_descriptor_pool.addr :
pdevice->va.internal_surface_state_pool.addr;
}
uint32_t surface_size, sampler_size;
anv_descriptor_set_layout_descriptor_buffer_size(layout, 0,
@ -2868,7 +2879,7 @@ void anv_GetDescriptorEXT(
(sampler = anv_sampler_from_handle(
pDescriptorInfo->data.pCombinedImageSampler->sampler))) {
memcpy(pDescriptor + desc_offset + ANV_SURFACE_STATE_SIZE,
sampler->bindless_state.map + i * ANV_SAMPLER_STATE_SIZE,
sampler->db_state[i],
ANV_SAMPLER_STATE_SIZE);
} else {
memset(pDescriptor + desc_offset + ANV_SURFACE_STATE_SIZE,

View file

@ -149,6 +149,9 @@ void genX(emit_l3_config)(struct anv_batch *batch,
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
const struct intel_l3_config *cfg);
void genX(flush_descriptor_buffers)(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state);
uint32_t
genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,

View file

@ -3280,11 +3280,17 @@ struct anv_push_constants {
/**
* Base offsets for descriptor sets from
* INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
*
* In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
* The offset has different meaning depending on a number of factors :
*
* In bits [6:63] : descriptor set address
* - with descriptor sets (direct or indirect), this relative
* pdevice->va.descriptor_pool
*
* - with descriptor buffers on DG2+, relative
* device->va.descriptor_buffer_pool
*
* - with descriptor buffers prior to DG2, relative the programmed value
* in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress
*/
uint32_t desc_surface_offsets[MAX_SETS];
@ -3478,6 +3484,26 @@ struct anv_simple_shader {
*/
struct anv_cmd_pipeline_state {
struct anv_descriptor_set *descriptors[MAX_SETS];
struct {
bool bound;
/**
* Buffer index used by this descriptor set.
*/
int32_t buffer_index; /* -1 means push descriptor */
/**
* Offset of the descriptor set in the descriptor buffer.
*/
uint32_t buffer_offset;
/**
* Final computed address to be emitted in the descriptor set surface
* state.
*/
uint64_t address;
/**
* The descriptor set surface state.
*/
struct anv_state state;
} descriptor_buffers[MAX_SETS];
struct anv_push_descriptor_set push_descriptor;
struct anv_push_constants push_constants;
@ -3645,6 +3671,12 @@ struct anv_cmd_state {
*/
enum anv_cmd_descriptor_buffer_mode current_db_mode;
/**
* Whether the command buffer has pending descriptor buffers bound it. This
* variable changes before anv_device::current_db_mode.
*/
enum anv_cmd_descriptor_buffer_mode pending_db_mode;
struct {
/**
* Tracks operations susceptible to interfere with queries in the
@ -3668,6 +3700,14 @@ struct anv_cmd_state {
VkShaderStageFlags push_descriptors_dirty;
VkShaderStageFlags push_constants_dirty;
struct {
uint64_t surfaces_address;
uint64_t samplers_address;
bool dirty;
VkShaderStageFlags offsets_dirty;
uint64_t address[MAX_SETS];
} descriptor_buffers;
struct anv_vertex_binding vertex_bindings[MAX_VBS];
bool xfb_enabled;
struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
@ -3954,10 +3994,25 @@ static inline struct anv_address
anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer,
struct anv_state state)
{
if (cmd_buffer->state.current_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER) {
return anv_state_pool_state_address(
&cmd_buffer->device->dynamic_state_db_pool, state);
}
return anv_state_pool_state_address(
&cmd_buffer->device->dynamic_state_pool, state);
}
static inline uint64_t
anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer,
int32_t buffer_index)
{
if (buffer_index == -1)
return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr;
return cmd_buffer->state.descriptor_buffers.address[buffer_index];
}
VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);

View file

@ -132,10 +132,24 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
genX(flush_pipeline_select_3d)(cmd_buffer);
#endif
/* If no API entry point selected the current mode (this can happen if the
* first operation in the command buffer is a , select BUFFER if
* EXT_descriptor_buffer is enabled, otherwise LEGACY.
*/
if (cmd_buffer->state.pending_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN) {
cmd_buffer->state.pending_db_mode =
cmd_buffer->device->vk.enabled_extensions.EXT_descriptor_buffer ?
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER :
ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY;
}
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateMOCS = mocs;
sba.GeneralStateBufferSize = 0xfffff;
sba.GeneralStateBaseAddressModifyEnable = true;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.StatelessDataPortAccessMOCS = mocs;
@ -151,29 +165,19 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
sba.SurfaceStateMOCS = mocs;
sba.SurfaceStateBaseAddressModifyEnable = true;
sba.DynamicStateBaseAddress =
(struct anv_address) { device->dynamic_state_pool.block_pool.bo, 0 };
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 };
sba.IndirectObjectMOCS = mocs;
sba.IndirectObjectBufferSize = 0xfffff;
sba.IndirectObjectBaseAddressModifyEnable = true;
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.InstructionBaseAddress =
(struct anv_address) { device->instruction_state_pool.block_pool.bo, 0 };
sba.InstructionMOCS = mocs;
sba.InstructionBufferSize =
device->physical->va.instruction_state_pool.size / 4096;
sba.InstructionBaseAddressModifyEnable = true;
sba.GeneralStateBufferSize = 0xfffff;
sba.IndirectObjectBufferSize = 0xfffff;
sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size +
device->physical->va.sampler_state_pool.size) / 4096;
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
#if GFX_VER >= 11
sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS;
@ -182,14 +186,61 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
#endif
if (!device->physical->indirect_descriptors) {
if (cmd_buffer->state.pending_db_mode == ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER) {
sba.DynamicStateBaseAddress = (struct anv_address) {
.offset = device->physical->va.dynamic_state_db_pool.addr,
};
sba.DynamicStateBufferSize =
(device->physical->va.dynamic_state_db_pool.size +
device->physical->va.descriptor_buffer_pool.size +
device->physical->va.push_descriptor_buffer_pool.size) / 4096;
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
#if GFX_VERx10 >= 125
/* Bindless Surface State & Bindless Sampler State are aligned to the
* same heap
*/
sba.BindlessSurfaceStateBaseAddress =
(struct anv_address) { .offset =
device->physical->va.binding_table_pool.addr, };
sba.BindlessSurfaceStateBaseAddress = (struct anv_address) {
.offset = device->physical->va.descriptor_buffer_pool.addr,
};
sba.BindlessSurfaceStateSize =
(device->physical->va.descriptor_buffer_pool.size +
device->physical->va.push_descriptor_buffer_pool.size) - 1;
sba.BindlessSurfaceStateMOCS = mocs;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
#else
const uint64_t surfaces_addr =
cmd_buffer->state.descriptor_buffers.surfaces_address != 0 ?
cmd_buffer->state.descriptor_buffers.surfaces_address :
anv_address_physical(device->workaround_address);
const uint64_t surfaces_size =
cmd_buffer->state.descriptor_buffers.surfaces_address != 0 ?
MIN2(device->physical->va.descriptor_buffer_pool.size -
(cmd_buffer->state.descriptor_buffers.surfaces_address -
device->physical->va.descriptor_buffer_pool.addr),
anv_physical_device_bindless_heap_size(device->physical)) :
(device->workaround_bo->size - device->workaround_address.offset);
sba.BindlessSurfaceStateBaseAddress = (struct anv_address) {
.offset = surfaces_addr,
};
sba.BindlessSurfaceStateSize = surfaces_size / ANV_SURFACE_STATE_SIZE - 1;
sba.BindlessSurfaceStateMOCS = mocs;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
#endif /* GFX_VERx10 < 125 */
} else if (!device->physical->indirect_descriptors) {
#if GFX_VERx10 >= 125
sba.DynamicStateBaseAddress = (struct anv_address) {
.offset = device->physical->va.dynamic_state_pool.addr,
};
sba.DynamicStateBufferSize =
(device->physical->va.dynamic_state_pool.size +
device->physical->va.sampler_state_pool.size) / 4096;
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.BindlessSurfaceStateBaseAddress = (struct anv_address) {
.offset = device->physical->va.internal_surface_state_pool.addr,
};
sba.BindlessSurfaceStateSize =
(device->physical->va.internal_surface_state_pool.size +
device->physical->va.bindless_surface_state_pool.size) - 1;
@ -199,12 +250,23 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
unreachable("Direct descriptor not supported");
#endif
} else {
sba.DynamicStateBaseAddress = (struct anv_address) {
.offset = device->physical->va.dynamic_state_pool.addr,
};
sba.DynamicStateBufferSize =
(device->physical->va.dynamic_state_pool.size +
device->physical->va.sampler_state_pool.size) / 4096;
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.BindlessSurfaceStateBaseAddress =
(struct anv_address) { .offset =
device->physical->va.bindless_surface_state_pool.addr,
};
sba.BindlessSurfaceStateSize =
anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1;
anv_physical_device_bindless_heap_size(device->physical) /
ANV_SURFACE_STATE_SIZE - 1;
sba.BindlessSurfaceStateMOCS = mocs;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
}
@ -214,6 +276,12 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
#endif
}
bool db_mode_changed = false;
if (cmd_buffer->state.current_db_mode != cmd_buffer->state.pending_db_mode) {
cmd_buffer->state.current_db_mode = cmd_buffer->state.pending_db_mode;
db_mode_changed = true;
}
#if INTEL_NEEDS_WA_1607854226
/* Wa_1607854226:
*
@ -293,6 +361,50 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
bits);
assert(cmd_buffer->state.current_db_mode !=
ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN);
if (db_mode_changed) {
#if GFX_VER == 11
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
ptr.SliceHashStatePointerValid = true;
ptr.SliceHashTableStatePointer = cmd_buffer->state.current_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ?
device->slice_hash_db.offset :
device->slice_hash.offset;
}
#elif GFX_VERx10 == 125
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
ptr.SliceHashStatePointerValid = true;
ptr.SliceHashTableStatePointer = cmd_buffer->state.current_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ?
device->slice_hash_db.offset :
device->slice_hash.offset;
}
#endif
/* Changing the dynamic state location affects all the states having
* offset relative to that pointer.
*/
struct anv_gfx_dynamic_state *hw_state = &cmd_buffer->state.gfx.dyn_state;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE);
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate) {
struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_FSR);
}
#if GFX_VERx10 < 125
/* The push constant data for compute shader is an offset in the dynamic
* state heap. If we change it, we need to reemit the push constants.
*/
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
#endif
}
}
void
@ -2108,6 +2220,13 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
break;
}
case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: {
assert(pipe_state->descriptor_buffers[binding->index].state.alloc_size);
bt_map[s] = pipe_state->descriptor_buffers[binding->index].state.offset +
state_offset;
break;
}
default: {
assert(binding->set < MAX_SETS);
const struct anv_descriptor_set *set =
@ -2160,6 +2279,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
pipe_state,
binding, desc);
} else {
assert(pipeline->layout.type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
pipeline->layout.type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER);
surface_state_offset =
emit_direct_descriptor_binding_table_entry(cmd_buffer, pipe_state,
set, binding, desc);
@ -2210,7 +2331,11 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer,
continue;
memcpy(state->map + (s * 16),
sampler->state[binding->plane], sizeof(sampler->state[0]));
cmd_buffer->state.current_db_mode ==
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER ?
sampler->db_state[binding->plane] :
sampler->state[binding->plane],
sizeof(sampler->state[0]));
}
return VK_SUCCESS;
@ -2484,6 +2609,140 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value)
#endif
}
ALWAYS_INLINE static void
update_descriptor_set_surface_state(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,
uint32_t set_idx)
{
if (!pipe_state->descriptor_buffers[set_idx].bound)
return;
const struct anv_physical_device *device = cmd_buffer->device->physical;
const int32_t buffer_index =
pipe_state->descriptor_buffers[set_idx].buffer_index;
const struct anv_va_range *push_va_range =
GFX_VERx10 >= 125 ?
&device->va.push_descriptor_buffer_pool :
&device->va.internal_surface_state_pool;
const struct anv_va_range *va_range =
buffer_index == -1 ? push_va_range : &device->va.descriptor_buffer_pool;
const uint64_t descriptor_set_addr =
(buffer_index == -1 ? va_range->addr :
cmd_buffer->state.descriptor_buffers.address[buffer_index]) +
pipe_state->descriptor_buffers[set_idx].buffer_offset;
const uint64_t set_size =
MIN2(va_range->size - (descriptor_set_addr - va_range->addr),
anv_physical_device_bindless_heap_size(device));
if (descriptor_set_addr != pipe_state->descriptor_buffers[set_idx].address) {
pipe_state->descriptor_buffers[set_idx].address = descriptor_set_addr;
struct anv_state surface_state =
anv_cmd_buffer_alloc_surface_states(cmd_buffer, 1);
const enum isl_format format =
anv_isl_format_for_descriptor_type(cmd_buffer->device,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
anv_fill_buffer_surface_state(
cmd_buffer->device, surface_state.map,
format, ISL_SWIZZLE_IDENTITY,
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
anv_address_from_u64(pipe_state->descriptor_buffers[set_idx].address),
set_size, 1);
pipe_state->descriptor_buffers[set_idx].state = surface_state;
}
}
ALWAYS_INLINE static uint32_t
compute_descriptor_set_surface_offset(const struct anv_cmd_buffer *cmd_buffer,
const struct anv_cmd_pipeline_state *pipe_state,
const uint32_t set_idx)
{
const struct anv_physical_device *device = cmd_buffer->device->physical;
if (device->uses_ex_bso) {
int32_t buffer_index =
pipe_state->descriptor_buffers[set_idx].buffer_index;
uint64_t buffer_address =
buffer_index == -1 ?
device->va.push_descriptor_buffer_pool.addr :
cmd_buffer->state.descriptor_buffers.address[buffer_index];
return (buffer_address - device->va.descriptor_buffer_pool.addr) +
pipe_state->descriptor_buffers[set_idx].buffer_offset;
}
return pipe_state->descriptor_buffers[set_idx].buffer_offset << 6;
}
ALWAYS_INLINE static uint32_t
compute_descriptor_set_sampler_offset(const struct anv_cmd_buffer *cmd_buffer,
const struct anv_cmd_pipeline_state *pipe_state,
const uint32_t set_idx)
{
const struct anv_physical_device *device = cmd_buffer->device->physical;
int32_t buffer_index =
pipe_state->descriptor_buffers[set_idx].buffer_index;
uint64_t buffer_address =
buffer_index == -1 ?
device->va.push_descriptor_buffer_pool.addr :
cmd_buffer->state.descriptor_buffers.address[buffer_index];
return (buffer_address - device->va.dynamic_state_db_pool.addr) +
pipe_state->descriptor_buffers[set_idx].buffer_offset;
}
void
genX(flush_descriptor_buffers)(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state)
{
/* On Gfx12.5+ the STATE_BASE_ADDRESS BindlessSurfaceStateBaseAddress &
* DynamicStateBaseAddress are fixed. So as long as we stay in one
* descriptor buffer mode, there is no need to switch.
*/
#if GFX_VERx10 >= 125
if (cmd_buffer->state.current_db_mode !=
cmd_buffer->state.pending_db_mode)
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
#else
if (cmd_buffer->state.descriptor_buffers.dirty)
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
#endif
assert(cmd_buffer->state.current_db_mode !=
ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN);
if (cmd_buffer->state.current_db_mode == ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER &&
(cmd_buffer->state.descriptor_buffers.dirty ||
(pipe_state->pipeline->active_stages &
cmd_buffer->state.descriptor_buffers.offsets_dirty) != 0)) {
struct anv_push_constants *push_constants =
&pipe_state->push_constants;
for (uint32_t i = 0; i < ARRAY_SIZE(push_constants->desc_surface_offsets); i++) {
update_descriptor_set_surface_state(cmd_buffer, pipe_state, i);
push_constants->desc_surface_offsets[i] =
compute_descriptor_set_surface_offset(cmd_buffer, pipe_state, i);
push_constants->desc_sampler_offsets[i] =
compute_descriptor_set_sampler_offset(cmd_buffer, pipe_state, i);
}
#if GFX_VERx10 < 125
struct anv_device *device = cmd_buffer->device;
push_constants->surfaces_base_offset =
(cmd_buffer->state.descriptor_buffers.surfaces_address -
device->physical->va.descriptor_buffer_pool.addr);
#endif
cmd_buffer->state.push_constants_dirty |=
(cmd_buffer->state.descriptor_buffers.offsets_dirty &
pipe_state->pipeline->active_stages);
cmd_buffer->state.descriptor_buffers.offsets_dirty &=
~pipe_state->pipeline->active_stages;
}
cmd_buffer->state.descriptor_buffers.dirty = false;
}
VkResult
genX(BeginCommandBuffer)(
VkCommandBuffer commandBuffer,
@ -2511,8 +2770,6 @@ genX(BeginCommandBuffer)(
cmd_buffer->usage_flags = pBeginInfo->flags;
cmd_buffer->state.current_db_mode = ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY;
/* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for
* primary level command buffers.
*
@ -2575,7 +2832,12 @@ genX(BeginCommandBuffer)(
}
#endif
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
if (cmd_buffer->device->vk.enabled_extensions.EXT_descriptor_buffer) {
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
} else {
cmd_buffer->state.current_db_mode = ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY;
genX(cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
}
/* We sometimes store vertex data in the dynamic state buffer for blorp
* operations and our dynamic state stream may re-use data from previous
@ -2888,6 +3150,8 @@ genX(CmdExecuteCommands)(
genX(cmd_buffer_flush_generated_draws)(container);
UNUSED enum anv_cmd_descriptor_buffer_mode db_mode =
container->state.current_db_mode;
for (uint32_t i = 0; i < commandBufferCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
@ -2953,6 +3217,8 @@ genX(CmdExecuteCommands)(
#endif
container->state.gfx.viewport_set |= secondary->state.gfx.viewport_set;
db_mode = secondary->state.current_db_mode;
}
/* The secondary isn't counted in our VF cache tracking so we need to
@ -2981,19 +3247,41 @@ genX(CmdExecuteCommands)(
container->state.current_hash_scale = 0;
container->state.gfx.push_constant_stages = 0;
container->state.gfx.ds_write_state = false;
memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config));
/* Reemit all GFX instructions in container */
memcpy(container->state.gfx.dyn_state.dirty,
device->gfx_dirty_state,
sizeof(container->state.gfx.dyn_state.dirty));
if (container->device->vk.enabled_extensions.KHR_fragment_shading_rate) {
/* Also recompute the CPS_STATE offset */
struct vk_dynamic_graphics_state *dyn =
&container->vk.dynamic_graphics_state;
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_FSR);
}
/* Each of the secondary command buffers will use its own state base
* address. We need to re-emit state base address for the container after
* all of the secondaries are done.
*
* TODO: Maybe we want to make this a dirty bit to avoid extra state base
* address calls?
*/
genX(cmd_buffer_emit_state_base_address)(container);
if (container->device->vk.enabled_extensions.EXT_descriptor_buffer) {
#if GFX_VERx10 >= 125
/* If the last secondary had a different mode, reemit the last pending
* mode. Otherwise, we can do a lighter binding table pool update.
*/
if (db_mode != container->state.current_db_mode) {
container->state.current_db_mode = db_mode;
genX(cmd_buffer_emit_state_base_address)(container);
} else {
genX(cmd_buffer_emit_bt_pool_base_address)(container);
}
#else
genX(cmd_buffer_emit_state_base_address)(container);
#endif
} else {
genX(cmd_buffer_emit_bt_pool_base_address)(container);
}
/* Copy of utrace timestamp buffers from secondary into container */
if (u_trace_enabled(&device->ds.trace_context)) {
@ -3221,24 +3509,27 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
/* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a
* storage buffer, physical storage buffer, storage texel buffer, or
* storage image in any shader pipeline stage.
*
* Any storage buffers or images written to must be invalidated and
* flushed before the shader can access them.
*
* Both HDC & Untyped flushes also do invalidation. This is why we use
* this here on Gfx12+.
*
* Gfx11 and prior don't have HDC. Only Data cache flush is available
* and it only operates on the written cache lines.
*/
if (device->info->ver >= 12) {
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
}
break;
/* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a
* storage buffer, physical storage buffer, storage texel buffer, or
* storage image in any shader pipeline stage.
*
* Any storage buffers or images written to must be invalidated and
* flushed before the shader can access them.
*
* Both HDC & Untyped flushes also do invalidation. This is why we
* use this here on Gfx12+.
*
* Gfx11 and prior don't have HDC. Only Data cache flush is available
* and it only operates on the written cache lines.
*/
if (device->info->ver >= 12) {
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT:
pipe_bits |= ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
break;
default:
break; /* Nothing to do */
}

View file

@ -101,6 +101,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
/* Apply any pending pipeline flushes we may have. We want to apply them
@ -873,6 +875,9 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
trace_intel_begin_rays(&cmd_buffer->trace);
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
genX(flush_descriptor_buffers)(cmd_buffer, &rt->base);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
cmd_buffer->state.rt.pipeline_dirty = false;

View file

@ -190,6 +190,14 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
return anv_descriptor_set_address(set);
}
case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER: {
return anv_address_from_u64(
anv_cmd_buffer_descriptor_buffer_address(
cmd_buffer,
gfx_state->base.descriptor_buffers[range->index].buffer_index) +
gfx_state->base.descriptor_buffers[range->index].buffer_offset);
}
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
if (gfx_state->base.push_constants_state.alloc_size == 0) {
gfx_state->base.push_constants_state =
@ -261,6 +269,10 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
return state.alloc_size;
}
case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER:
return gfx_state->base.pipeline->layout.set[
range->index].layout->descriptor_buffer_surface_size;
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
return (range->start + range->length) * 32;
@ -660,6 +672,8 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
genX(flush_descriptor_buffers)(cmd_buffer, &cmd_buffer->state.gfx.base);
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Wa_14015814527