anv: add indirect command layout support

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
Lionel Landwerlin 2024-04-17 16:45:17 +03:00 committed by Marge Bot
parent 68885511d2
commit fb26ed6bf7
4 changed files with 847 additions and 0 deletions

View file

@ -40,6 +40,69 @@ genX_xml_h = custom_target(
genX_bits_included_symbols = [
# instructions
'COMPUTE_WALKER',
'GPGPU_WALKER',
'MEDIA_CURBE_LOAD',
'MEDIA_INTERFACE_DESCRIPTOR_LOAD',
'MEDIA_STATE_FLUSH',
'MEDIA_VFE_STATE',
'MI_ARB_CHECK',
'3DMESH_3D',
'3DPRIMITIVE',
'3DPRIMITIVE_EXTENDED',
'3DSTATE_CLEAR_PARAMS',
'3DSTATE_CLIP',
'3DSTATE_CLIP_MESH',
'3DSTATE_CONSTANT_ALL',
'3DSTATE_CONSTANT_VS',
'3DSTATE_DS',
'3DSTATE_GS',
'3DSTATE_HS',
'3DSTATE_INDEX_BUFFER',
'3DSTATE_MESH_CONTROL',
'3DSTATE_MESH_DISTRIB',
'3DSTATE_MESH_SHADER',
'3DSTATE_MESH_SHADER_DATA',
'3DSTATE_PRIMITIVE_REPLICATION',
'3DSTATE_PS',
'3DSTATE_PS_BLEND',
'3DSTATE_PS_EXTRA',
'3DSTATE_PUSH_CONSTANT_ALLOC_VS',
'3DSTATE_RASTER',
'3DSTATE_SBE',
'3DSTATE_SBE_MESH',
'3DSTATE_SBE_SWIZ',
'3DSTATE_SF',
'3DSTATE_STREAMOUT',
'3DSTATE_TASK_CONTROL',
'3DSTATE_TASK_REDISTRIB',
'3DSTATE_TASK_SHADER',
'3DSTATE_TASK_SHADER_DATA',
'3DSTATE_TE',
'3DSTATE_URB_ALLOC_MESH',
'3DSTATE_URB_ALLOC_TASK',
'3DSTATE_URB_ALLOC_VS',
'3DSTATE_URB_VS',
'3DSTATE_VF_INSTANCING',
'3DSTATE_VF_SGVS',
'3DSTATE_VF_SGVS_2',
'3DSTATE_VF_TOPOLOGY',
'3DSTATE_VFG',
'3DSTATE_WM',
'3DSTATE_VS',
'PIPE_CONTROL',
'RESOURCE_BARRIER',
# instruction fields
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Base Address',
'3DSTATE_CPSIZE_CONTROL_BUFFER::Surface Pitch',
'3DSTATE_DEPTH_BUFFER::Surface Base Address',
'3DSTATE_DEPTH_BUFFER::Surface Pitch',
'3DSTATE_HIER_DEPTH_BUFFER::Surface Base Address',
'3DSTATE_HIER_DEPTH_BUFFER::Surface Pitch',
'3DSTATE_STENCIL_BUFFER::Surface Base Address',
'3DSTATE_STENCIL_BUFFER::Surface Pitch',
'3DSTATE_SO_BUFFER::Surface Base Address',
'3DSTATE_SO_BUFFER::Stream Offset',
'MI_BATCH_BUFFER_START::Batch Buffer Start Address',
'MI_REPORT_PERF_COUNT::Memory Address',
'MI_STORE_DATA_IMM::Address',
@ -61,6 +124,7 @@ genX_bits_included_symbols = [
'COMPUTE_WALKER::body',
'EXECUTE_INDIRECT_DISPATCH::body',
# structures
'3DSTATE_CONSTANT_ALL_DATA',
'RENDER_SURFACE_STATE::Surface Base Address',
'RENDER_SURFACE_STATE::Surface Pitch',
'RENDER_SURFACE_STATE::Surface QPitch',
@ -81,6 +145,7 @@ genX_bits_included_symbols = [
'CLEAR_COLOR',
'VERTEX_BUFFER_STATE::Buffer Starting Address',
'CPS_STATE',
'INTERFACE_DESCRIPTOR_DATA',
'RT_DISPATCH_GLOBALS::Hit Group Table',
'RT_DISPATCH_GLOBALS::Miss Group Table',
'RT_DISPATCH_GLOBALS::Callable Group Table',
@ -91,6 +156,7 @@ genX_bits_included_symbols = [
'RT_DISPATCH_GLOBALS::Launch Height',
'RT_DISPATCH_GLOBALS::Launch Depth',
'SAMPLER_STATE',
'VERTEX_ELEMENT_STATE',
]
genX_bits_h = custom_target(

View file

@ -0,0 +1,707 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <stdbool.h>
#include "genxml/genX_bits.h"
#include "shaders/libintel_shaders.h"
#include "anv_private.h"
/* The DGC preprocess command buffer layout is separated in 4 parts:
*
* +--------+----------+--------+------+
* | prolog | commands | epilog | data |
* +--------+----------+--------+------+
*
* The prolog consist of a few commands to deal with the command buffer
* prefetch and editing some the return address in the epilog part.
*
* The commands is where the generated commands are located.
*
* The epilog is where the jump back to the calling command buffer happens.
*
* Data is where things like INTERFACE_DESCRIPTOR_DATA is located (on pre
* Gfx12.5) and the push constant data used by the commands.
*/
static uint32_t
draw_cmd_size(const struct intel_device_info *devinfo,
const struct vk_indirect_command_layout *vk_layout)
{
return 4 *
((vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) ?
_3DMESH_3D_length(devinfo) :
devinfo->ver >= 11 ? _3DPRIMITIVE_EXTENDED_length(devinfo) :
_3DPRIMITIVE_length(devinfo));
}
static void
layout_add_command(struct anv_indirect_command_layout *layout, uint32_t size,
const char *name)
{
layout->cmd_size = align(layout->cmd_size, 4);
layout->cmd_size += size;
layout->items[layout->n_items++] = (struct anv_indirect_command_layout_item) {
.name = name,
.size = size,
};
}
static void
layout_add_data(struct anv_indirect_command_layout *layout,
uint32_t size, uint32_t alignment,
uint16_t *out_data_offset)
{
layout->data_size = align(layout->data_size, alignment);
if (out_data_offset)
*out_data_offset = layout->data_size;
layout->data_size += size;
}
static void
push_layout_add_range(struct anv_dgc_push_layout *pc_layout,
const struct vk_indirect_command_push_constant_layout *vk_pc_layout)
{
pc_layout->entries[pc_layout->num_entries++] = (struct anv_dgc_push_entry) {
.seq_offset = vk_pc_layout->src_offset_B,
.push_offset = vk_pc_layout->dst_offset_B,
.size = vk_pc_layout->size_B,
};
}
static uint32_t
push_constant_command_size(const struct intel_device_info *devinfo,
mesa_shader_stage stage,
uint32_t n_slots)
{
uint32_t dwords = 0;
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
case MESA_SHADER_FRAGMENT:
if (devinfo->ver >= 12) {
dwords += (_3DSTATE_CONSTANT_ALL_length(devinfo) +
n_slots * _3DSTATE_CONSTANT_ALL_DATA_length(devinfo));
} else {
dwords += _3DSTATE_CONSTANT_VS_length(devinfo);
}
break;
case MESA_SHADER_MESH:
dwords += _3DSTATE_MESH_SHADER_DATA_length(devinfo);
break;
case MESA_SHADER_TASK:
dwords += _3DSTATE_TASK_SHADER_DATA_length(devinfo);
break;
default:
UNREACHABLE("Invalid stage");
}
return 4 * dwords;
}
VkResult anv_CreateIndirectCommandsLayoutEXT(
VkDevice _device,
const VkIndirectCommandsLayoutCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
const struct intel_device_info *devinfo = device->info;
struct anv_indirect_command_layout *layout_obj;
layout_obj = vk_indirect_command_layout_create(
&device->vk, pCreateInfo, pAllocator,
sizeof(struct anv_indirect_command_layout));
if (!layout_obj)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_indirect_command_layout *vk_layout = &layout_obj->vk;
const bool is_gfx =
(vk_layout->dgc_info &
(BITFIELD_BIT(MESA_VK_DGC_DRAW) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH))) != 0;
struct anv_dgc_gfx_layout *gfx_layout = &layout_obj->gfx_layout;
struct anv_dgc_cs_layout *cs_layout = &layout_obj->cs_layout;
struct anv_dgc_push_layout *pc_layout =
is_gfx ? &gfx_layout->push_constants : &cs_layout->push_constants;
/* Some checks that the OpenCL code stays in sync. */
STATIC_ASSERT(ANV_DGC_RT_GLOBAL_DISPATCH_SIZE == BRW_RT_PUSH_CONST_OFFSET);
/* Keep this in sync with generate_commands.cl:write_prolog_epilog() */
layout_obj->cmd_prolog_size = 4 *
(MI_STORE_DATA_IMM_length(devinfo) + 1 +
MI_BATCH_BUFFER_START_length(devinfo) +
(devinfo->ver >= 12 ? MI_ARB_CHECK_length(devinfo) : 0));
layout_obj->cmd_epilog_size = 4 * MI_BATCH_BUFFER_START_length(devinfo);
/* On <= Gfx12.0 the gl_NumWorkGroups is located in the push constants so
* we need push constant data per sequence.
*/
const bool has_per_sequence_constants = true;
if (has_per_sequence_constants) {
/* RT & compute need a combined push constants and also Mesh. */
uint32_t pc_size = sizeof(struct anv_push_constants);
/* Prior to Gfx12.5+, there is no HW mechanism in the HW thread
* generation to provide a workgroup local id. The way the workgroup
* local id is provided is through a per-thread push constant mechanism
* that read a per thread 32B (one GRF) piece of data in which the
* driver writes the thread id.
*
* The maximum workgroup size is 1024. With a worse case dispatch size
* of SIMD8, that means at max 128 HW threads, each needing a 32B for
* its subgroup_id value within the workgroup. 32B * 128 = 4096B.
*/
if (devinfo->verx10 < 125 &&
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DISPATCH)))
pc_size += 4096;
/* RT_DISPATCH_GLOBALS is located just before the push constant data. */
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_RT))
pc_size += ANV_DGC_RT_GLOBAL_DISPATCH_SIZE;
layout_add_data(layout_obj, pc_size, ANV_UBO_ALIGNMENT,
&pc_layout->data_offset);
for (uint32_t i = 0; i < vk_layout->n_pc_layouts; i++) {
const struct vk_indirect_command_push_constant_layout *vk_pc_layout =
&vk_layout->pc_layouts[i];
push_layout_add_range(pc_layout, vk_pc_layout);
}
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_SI)) {
pc_layout->seq_id_active = true;
pc_layout->seq_id_offset = vk_layout->si_layout.dst_offset_B;
}
pc_layout->mocs =
isl_mocs(&device->isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT, false);
}
/* Graphics */
if (vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_DRAW) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH))) {
layout_obj->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
/* 3DSTATE_INDEX_BUFFER */
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IB)) {
layout_add_command(layout_obj,
_3DSTATE_INDEX_BUFFER_length(devinfo) * 4,
"index");
}
/* 3DSTATE_VERTEX_BUFFERS */
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB)) {
layout_add_command(layout_obj,
(1 /* TODO: _3DSTATE_VERTEX_BUFFERS_length(devinfo) */ +
util_bitcount(vk_layout->vertex_bindings) *
VERTEX_BUFFER_STATE_length(devinfo)) * 4,
"vertex");
}
if ((vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) == 0) {
if (intel_needs_workaround(device->info, 16011107343))
layout_add_command(layout_obj, _3DSTATE_HS_length(devinfo) * 4, "hs");
if (intel_needs_workaround(device->info, 22018402687))
layout_add_command(layout_obj, _3DSTATE_DS_length(devinfo) * 4, "ds");
}
const VkShaderStageFlags draw_stages =
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) ?
(VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT |
VK_SHADER_STAGE_FRAGMENT_BIT) :
(VK_SHADER_STAGE_VERTEX_BIT |
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
VK_SHADER_STAGE_GEOMETRY_BIT |
VK_SHADER_STAGE_FRAGMENT_BIT);
const bool need_push_constants =
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IES)) != 0 ||
(vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_PC) |
BITFIELD_BIT(MESA_VK_DGC_SI))) != 0;
/* 3DSTATE_CONSTANT_* */
if (need_push_constants) {
uint32_t cmd_size = 0;
anv_foreach_vk_stage(vk_stage, draw_stages) {
cmd_size += push_constant_command_size(
devinfo, vk_to_mesa_shader_stage(vk_stage), 4);
}
layout_add_command(layout_obj, cmd_size, "push-constants");
}
/* 3DPRIMITIVE / 3DMESH_3D */
layout_add_command(layout_obj, draw_cmd_size(devinfo, vk_layout), "draw");
gfx_layout->draw.seq_offset = vk_layout->draw_src_offset_B;
}
/* Compute */
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DISPATCH)) {
layout_obj->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IES)) {
cs_layout->indirect_set.active = true;
cs_layout->indirect_set.seq_offset = vk_layout->ies_src_offset_B;
}
cs_layout->dispatch.seq_offset = vk_layout->dispatch_src_offset_B;
if (devinfo->verx10 >= 125) {
/* On Gfx12.5+ everything is in a single instruction */
uint32_t cmd_size = COMPUTE_WALKER_length(devinfo) * 4;
layout_add_command(layout_obj, cmd_size, "compute-walker");
} else {
/* Prior generations */
uint32_t cmd_size = 4 * (MEDIA_CURBE_LOAD_length(devinfo) +
GPGPU_WALKER_length(devinfo) +
MEDIA_STATE_FLUSH_length(devinfo));
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IES)) {
cmd_size += 4 * (MEDIA_VFE_STATE_length(devinfo) +
MEDIA_INTERFACE_DESCRIPTOR_LOAD_length(devinfo));
layout_add_data(layout_obj,
4 * INTERFACE_DESCRIPTOR_DATA_length(devinfo), 64,
&cs_layout->indirect_set.data_offset);
}
layout_add_command(layout_obj, cmd_size,
"media-curbe,gpgpu-walker,media-state");
}
}
/* Ray-tracing */
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_RT)) {
layout_obj->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
uint32_t cmd_size = COMPUTE_WALKER_length(devinfo) * 4;
layout_add_command(layout_obj, cmd_size, "compute-walker");
cs_layout->dispatch.seq_offset = vk_layout->dispatch_src_offset_B;
}
layout_obj->data_prolog_size = align(layout_obj->data_prolog_size, 64);
layout_obj->data_size = align(layout_obj->data_size, ANV_UBO_ALIGNMENT);
layout_obj->emits_push_constants =
(vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_PC) |
BITFIELD_BIT(MESA_VK_DGC_SI))) ||
((vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_DRAW) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED) |
BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH))) &&
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IES)));
*pIndirectCommandsLayout = anv_indirect_command_layout_to_handle(layout_obj);
return VK_SUCCESS;
}
void
anv_DestroyIndirectCommandsLayoutEXT(
VkDevice _device,
VkIndirectCommandsLayoutEXT indirectCommandsLayout,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_indirect_command_layout, layout, indirectCommandsLayout);
if (!layout)
return;
vk_indirect_command_layout_destroy(&device->vk, pAllocator, &layout->vk);
}
void anv_GetGeneratedCommandsMemoryRequirementsEXT(
VkDevice _device,
const VkGeneratedCommandsMemoryRequirementsInfoEXT* pInfo,
VkMemoryRequirements2* pMemoryRequirements)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_indirect_command_layout, layout_obj,
pInfo->indirectCommandsLayout);
const struct intel_device_info *devinfo = device->info;
pMemoryRequirements->memoryRequirements.alignment = 64;
pMemoryRequirements->memoryRequirements.size =
align(layout_obj->cmd_prolog_size + layout_obj->cmd_epilog_size +
pInfo->maxSequenceCount * layout_obj->cmd_size, 64) +
align(pInfo->maxSequenceCount * layout_obj->data_size, 64) +
align(layout_obj->data_prolog_size, 64);
pMemoryRequirements->memoryRequirements.memoryTypeBits =
devinfo->verx10 <= 120 ?
device->physical->memory.dynamic_visible_mem_types :
device->physical->memory.default_buffer_mem_types;
if (!device->physical->has_scratch_page) {
pMemoryRequirements->memoryRequirements.size +=
MAX2(devinfo->engine_class_prefetch[INTEL_ENGINE_CLASS_RENDER],
devinfo->engine_class_prefetch[INTEL_ENGINE_CLASS_COMPUTE]);
}
}
void
anv_dgc_fill_gfx_state(struct anv_dgc_gfx_state *state,
struct anv_cmd_buffer *cmd_buffer,
const struct anv_indirect_command_layout *layout,
struct anv_shader ** const shaders)
{
struct anv_device *device = cmd_buffer->device;
const struct vk_indirect_command_layout *vk_layout = &layout->vk;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
if (vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_PC) |
BITFIELD_BIT(MESA_VK_DGC_SI))) {
for (uint32_t i = 0; i < ANV_GRAPHICS_SHADER_STAGE_COUNT; i++) {
if (shaders[i] == NULL)
continue;
const struct anv_pipeline_bind_map *bind_map = &shaders[i]->bind_map;
for (uint32_t i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) {
const struct anv_push_range *range = &bind_map->push_ranges[i];
if (range->length == 0)
break;
switch (range->set) {
case ANV_DESCRIPTOR_SET_DESCRIPTORS:
if (bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
state->push_constants.addresses[i] =
anv_cmd_buffer_descriptor_buffer_address(
cmd_buffer,
gfx->base.descriptor_buffers[range->index].buffer_index) +
gfx->base.descriptor_buffers[range->index].buffer_offset;
} else {
struct anv_descriptor_set *set = gfx->base.descriptors[range->index];
state->push_constants.addresses[i] = anv_address_physical(
anv_descriptor_set_address(set));
}
break;
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
break;
case ANV_DESCRIPTOR_SET_NULL:
case ANV_DESCRIPTOR_SET_PER_PRIM_PADDING:
state->push_constants.addresses[i] =
anv_address_physical(device->workaround_address);
break;
default: {
struct anv_descriptor_set *set = gfx->base.descriptors[range->set];
const struct anv_descriptor *desc =
&set->descriptors[range->index];
if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
if (desc->buffer) {
state->push_constants.addresses[i] = anv_address_physical(
anv_address_add(desc->buffer->address,
desc->offset));
}
} else {
assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
if (desc->buffer) {
const struct anv_cmd_pipeline_state *pipe_state = &gfx->base;
uint32_t dynamic_offset =
pipe_state->dynamic_offsets[
range->set].offsets[range->dynamic_offset_index];
state->push_constants.addresses[i] = anv_address_physical(
anv_address_add(desc->buffer->address,
desc->offset + dynamic_offset));
}
}
if (state->push_constants.addresses[i] == 0) {
/* For NULL UBOs, we just return an address in the
* workaround BO. We do writes to it for workarounds but
* always at the bottom. The higher bytes should be all
* zeros.
*/
assert(range->length * 32 <= 2048);
state->push_constants.addresses[i] =
anv_address_physical((struct anv_address) {
.bo = device->workaround_bo,
.offset = 1024,
});
}
}
}
}
}
}
state->draw.instance_multiplier = gfx->instance_multiplier;
}
/* This function determines the final layout of GFX generated commands. A lot
* of things make the amount of space vary (number of stages, number of push
* constant slots, etc...) such that we can only determine this just before
* executing the generation.
*/
uint32_t
anv_dgc_fill_gfx_layout(struct anv_dgc_gfx_layout *layout,
const struct anv_device *device,
const struct anv_indirect_command_layout *layout_obj,
struct anv_shader ** const shaders)
{
const struct vk_indirect_command_layout *vk_layout = &layout_obj->vk;
const struct intel_device_info *devinfo = device->info;
uint32_t cmd_offset = 0;
layout->draw.draw_type =
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) ? ANV_DGC_DRAW_TYPE_MESH :
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED)) ? ANV_DGC_DRAW_TYPE_INDEXED :
ANV_DGC_DRAW_TYPE_SEQUENTIAL;
layout->index_buffer.cmd_offset = cmd_offset;
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IB)) {
layout->index_buffer.cmd_size = _3DSTATE_INDEX_BUFFER_length(devinfo) * 4;
layout->index_buffer.seq_offset = vk_layout->index_src_offset_B;
layout->index_buffer.mocs =
isl_mocs(&device->isl_dev, ISL_SURF_USAGE_INDEX_BUFFER_BIT, false);
if (vk_layout->index_mode_is_dx) {
/* DXGI_FORMAT values */
layout->index_buffer.u32_value = 42;
layout->index_buffer.u16_value = 57;
layout->index_buffer.u8_value = 62;
} else {
layout->index_buffer.u32_value = VK_INDEX_TYPE_UINT32;
layout->index_buffer.u16_value = VK_INDEX_TYPE_UINT16;
layout->index_buffer.u8_value = VK_INDEX_TYPE_UINT8_EXT;
}
cmd_offset += layout->index_buffer.cmd_size;
}
layout->vertex_buffers.cmd_offset = cmd_offset;
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB)) {
layout->vertex_buffers.cmd_size =
(1 /* TODO: _3DSTATE_VERTEX_BUFFERS_length(devinfo) */ +
util_bitcount(vk_layout->vertex_bindings) *
VERTEX_BUFFER_STATE_length(devinfo)) * 4;
layout->vertex_buffers.mocs =
isl_mocs(&device->isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false);
layout->vertex_buffers.n_buffers = vk_layout->n_vb_layouts;
for (uint32_t i = 0; i < vk_layout->n_vb_layouts; i++) {
layout->vertex_buffers.buffers[i].seq_offset =
vk_layout->vb_layouts[i].src_offset_B;
layout->vertex_buffers.buffers[i].binding =
vk_layout->vb_layouts[i].binding;
}
cmd_offset += layout->vertex_buffers.cmd_size;
}
layout->indirect_set.final_cmds_offset = cmd_offset;
if (intel_needs_workaround(devinfo, 16011107343) &&
shaders[MESA_SHADER_TESS_CTRL] != NULL) {
layout->indirect_set.final_cmds_size +=
_3DSTATE_HS_length(devinfo) * 4;
}
if (intel_needs_workaround(devinfo, 22018402687) &&
shaders[MESA_SHADER_TESS_EVAL] != NULL) {
layout->indirect_set.final_cmds_size +=
_3DSTATE_DS_length(devinfo) * 4;
}
cmd_offset += layout->indirect_set.final_cmds_size;
layout->push_constants.cmd_offset = cmd_offset;
if (vk_layout->dgc_info & (BITFIELD_BIT(MESA_VK_DGC_PC) |
BITFIELD_BIT(MESA_VK_DGC_SI))) {
struct anv_dgc_push_layout *pc_layout = &layout->push_constants;
layout->push_constants.flags = ANV_DGC_PUSH_CONSTANTS_CMD_ACTIVE;
for (uint32_t i = 0; i < vk_layout->n_pc_layouts; i++) {
const struct vk_indirect_command_push_constant_layout *vk_pc_layout =
&vk_layout->pc_layouts[i];
push_layout_add_range(&layout->push_constants, vk_pc_layout);
}
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_SI)) {
pc_layout->seq_id_active = true;
pc_layout->seq_id_offset = vk_layout->si_layout.dst_offset_B;
}
pc_layout->mocs =
isl_mocs(&device->isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT, false);
for (uint32_t i = 0; i < ANV_GRAPHICS_SHADER_STAGE_COUNT; i++) {
if (shaders[i] == NULL)
continue;
const struct anv_pipeline_bind_map *bind_map =
&shaders[i]->bind_map;
uint32_t n_slots = bind_map->inline_dwords_count > 0 ? 1 : 0;
for (uint32_t j = 0; j < ARRAY_SIZE(bind_map->push_ranges); j++) {
if (bind_map->push_ranges[j].length == 0)
break;
n_slots++;
}
if (n_slots > 0) {
layout->push_constants.cmd_size +=
push_constant_command_size(devinfo, i, n_slots);
}
}
cmd_offset += layout->push_constants.cmd_size;
}
layout->draw.cmd_offset = cmd_offset;
layout->draw.cmd_size = draw_cmd_size(devinfo, vk_layout);
layout->draw.seq_offset = vk_layout->draw_src_offset_B;
cmd_offset += layout->draw.cmd_size;
assert(cmd_offset <= layout_obj->cmd_size);
return cmd_offset;
}
void
anv_dgc_print_gfx_state(FILE *f,
const struct anv_dgc_gfx_layout *layout,
const struct anv_indirect_command_layout *layout_obj)
{
fprintf(f, "Generated Gfx state:\n");
#define PRINT(state_bits, cond2, ...) do { \
if ((state_bits) == 0 || \
(layout_obj->vk.dgc_info & (state_bits)) || \
(cond2)) \
fprintf(f, __VA_ARGS__); \
} while (0)
PRINT(BITFIELD_BIT(MESA_VK_DGC_IB), false,
" ib: cmd_offset=0x%04x-0x%04x (%u)\n",
layout->index_buffer.cmd_offset,
layout->index_buffer.cmd_offset +
layout->index_buffer.cmd_size,
layout->index_buffer.cmd_size);
PRINT(BITFIELD_BIT(MESA_VK_DGC_VB), false,
" vb: cmd_offset=0x%04x-0x%04x (%u)\n",
layout->vertex_buffers.cmd_offset,
layout->vertex_buffers.cmd_offset +
layout->vertex_buffers.cmd_size,
layout->vertex_buffers.cmd_size);
PRINT(0, false,
" final: cmd_offset=0x%04x-0x%04x (%u)\n",
layout->indirect_set.final_cmds_offset,
layout->indirect_set.final_cmds_offset +
layout->indirect_set.final_cmds_size,
layout->indirect_set.final_cmds_size);
PRINT(BITFIELD_BIT(MESA_VK_DGC_PC) |
BITFIELD_BIT(MESA_VK_DGC_SI),
layout->push_constants.cmd_size != 0,
" push: cmd_offset=0x%04x-0x%04x (%u)\n",
layout->push_constants.cmd_offset,
layout->push_constants.cmd_offset +
layout->push_constants.cmd_size,
layout->push_constants.cmd_size);
PRINT(0, false,
" draw: cmd_offset=0x%04x-0x%04x (%u)\n",
layout->draw.cmd_offset,
layout->draw.cmd_offset +
layout->draw.cmd_size,
layout->draw.cmd_size);
#undef PRINT
}
void
anv_dgc_print_layout(FILE *f,
const struct anv_indirect_command_layout *layout)
{
fprintf(f, "Generated %s layout:\n",
layout->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ? "Gfx" :
layout->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE ? "CS" :
layout->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE ? "RT" : "unknown");
#define DGC_BIT(name) ((layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_##name)) ? #name"," : "")
fprintf(f, " bits: %s%s%s%s%s%s%s%s%s%s\n",
DGC_BIT(IES),
DGC_BIT(PC),
DGC_BIT(IB),
DGC_BIT(VB),
DGC_BIT(SI),
DGC_BIT(DRAW),
DGC_BIT(DRAW_INDEXED),
DGC_BIT(DRAW_MESH),
DGC_BIT(DISPATCH),
DGC_BIT(RT));
#undef DGC_BIT
fprintf(f, " seq_stride: %zu\n", layout->vk.stride);
fprintf(f, " cmd_prolog: %u\n", layout->cmd_prolog_size);
fprintf(f, " cmd_stride: %u\n", layout->cmd_size);
fprintf(f, " cmd_epilog: %u\n", layout->cmd_epilog_size);
fprintf(f, " data_prolog: %u\n", layout->data_prolog_size);
fprintf(f, " data_stride: %u\n", layout->data_size);
fprintf(f, " sequences:\n");
const struct vk_indirect_command_layout *vk_layout = &layout->vk;
const struct anv_dgc_push_layout *pc_layout =
layout->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ?
&layout->gfx_layout.push_constants : &layout->cs_layout.push_constants;
if (pc_layout->num_entries > 0 || pc_layout->seq_id_active) {
fprintf(f, " push_constants:\n");
for (uint32_t i = 0; i < pc_layout->num_entries; i++) {
fprintf(f,
" pc_entry%02u seq_offset: 0x%04x (offset=%hu, size=%hu)\n",
i,
pc_layout->entries[i].seq_offset,
pc_layout->entries[i].push_offset,
pc_layout->entries[i].size);
}
if (pc_layout->seq_id_active) {
fprintf(f, " seq_id_offset: 0x%04hx\n",
pc_layout->seq_id_offset);
}
}
switch (layout->bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_IB)) {
fprintf(f, " index_buffer:\n");
fprintf(f, " seq_offset: 0x%04x\n", vk_layout->index_src_offset_B);
}
if (vk_layout->n_vb_layouts) {
fprintf(f, " vertex_buffers:\n");
for (uint32_t i = 0; i < vk_layout->n_vb_layouts; i++) {
fprintf(f, " seq_offset: 0x%04x (vb%u)\n",
vk_layout->vb_layouts[i].src_offset_B,
vk_layout->vb_layouts[i].binding);
}
}
fprintf(f, " %s:\n",
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) ? "mesh" :
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED)) ? "draw-indexed" :
(vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW)) ? "draw" :
"unknown");
fprintf(f, " seq_offset: 0x%04x\n", vk_layout->draw_src_offset_B);
break;
}
case VK_PIPELINE_BIND_POINT_COMPUTE:
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
const struct anv_dgc_cs_layout *cs_layout = &layout->cs_layout;
if (cs_layout->indirect_set.active) {
fprintf(f, " ies:\n");
fprintf(f, " seq_offset: 0x%04x\n", cs_layout->indirect_set.seq_offset);
}
fprintf(f, " dispatch:\n");
fprintf(f, " seq_offset: 0x%04x\n", cs_layout->dispatch.seq_offset);
break;
}
default:
UNREACHABLE("Invalid bind point");
}
fprintf(f, " commands:\n");
for (uint32_t i = 0; i < layout->n_items; i++) {
fprintf(f, " %s: %u\n",
layout->items[i].name, layout->items[i].size);
}
}

View file

@ -87,6 +87,7 @@
#include "vk_descriptor_set_layout.h"
#include "vk_descriptor_update_template.h"
#include "vk_device.h"
#include "vk_device_generated_commands.h"
#include "vk_device_memory.h"
#include "vk_drm_syncobj.h"
#include "vk_enum_defines.h"
@ -6579,6 +6580,75 @@ enum anv_dgc_stage anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage);
uint32_t anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages);
struct anv_indirect_command_layout {
struct vk_indirect_command_layout vk;
VkPipelineBindPoint bind_point;
struct anv_dgc_gfx_layout gfx_layout;
struct anv_dgc_cs_layout cs_layout;
/** Layout is as follow :
* --------------------
* | prolog cmds |
* |------------------|
* | sequence cmds |
* |------------------|
* | epilog cmds |
* |------------------|
* | sideband data |
* |------------------|
* | prolog data |
* |------------------|
* | sequence data |
* --------------------
*/
/**
* Non sequence related commands at the beginning of the preprocess buffer
*/
uint32_t cmd_prolog_size;
/**
* Non sequence related commands at the end of the preprocess buffer (jump
* back to main batch)
*/
uint32_t cmd_epilog_size;
/** Amount of command memory required for each sequence */
uint32_t cmd_size;
/** Non sequence related data */
uint32_t data_prolog_size;
/** Amount of data memory required (per sequence) */
uint32_t data_size;
/** Amount of sideband data memory required (not per sequence) */
uint32_t sideband_size;
/** Track if push constants are emitted at all */
bool emits_push_constants;
struct anv_indirect_command_layout_item {
const char *name;
uint32_t size;
} items[10];
uint32_t n_items;
};
void anv_dgc_print_gfx_state(FILE *f,
const struct anv_dgc_gfx_layout *layout,
const struct anv_indirect_command_layout *layout_obj);
void anv_dgc_print_layout(FILE *f,
const struct anv_indirect_command_layout *layout);
void anv_dgc_fill_gfx_state(struct anv_dgc_gfx_state *state,
struct anv_cmd_buffer *cmd_buffer,
const struct anv_indirect_command_layout *layout_obj,
struct anv_shader ** const shaders);
uint32_t anv_dgc_fill_gfx_layout(struct anv_dgc_gfx_layout *layout,
const struct anv_device *device,
const struct anv_indirect_command_layout *layout_obj,
struct anv_shader ** const shaders);
struct anv_vid_mem {
struct anv_device_memory *mem;
VkDeviceSize offset;
@ -6989,6 +7059,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
VkVideoSessionKHR,
VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_indirect_command_layout, vk.base,
VkIndirectCommandsLayoutEXT,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_indirect_execution_set, base,
VkIndirectExecutionSetEXT,
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT)

View file

@ -160,6 +160,7 @@ libanv_files = files(
'anv_cmd_buffer.c',
'anv_descriptor_set.c',
'anv_device.c',
'anv_dgc_layout.c',
'anv_dgc_set.c',
'anv_embedded_sampler.c',
'anv_event.c',