lavapipe: NV_device_generated_commands

this adds sufficient handling to pass the vkd3d-proton tests as well
as running cts on zink, which is gonna have to be enough since there's
no vkcts

it works by dynamically generating a vk_cmd_queue list of commands just
like the regular cmd queue would generate, with the minor change that
the final link has a nulled next pointer to correctly handle buffer copies,
where the last link would otherwise have a next pointer pointing to the
original cmd list

Acked-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23394>
This commit is contained in:
Mike Blumenkrantz 2023-06-02 10:22:11 -04:00 committed by Marge Bot
parent 3209609535
commit 976dd26004
4 changed files with 456 additions and 10 deletions

View file

@ -211,6 +211,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported =
.EXT_robustness2 = true,
.GOOGLE_decorate_string = true,
.GOOGLE_hlsl_functionality1 = true,
.NV_device_generated_commands = true,
};
static int
@ -514,6 +515,9 @@ lvp_get_features(const struct lvp_physical_device *pdevice,
.robustImageAccess2 = true,
.nullDescriptor = true,
/* VK_NV_device_generated_commands */
.deviceGeneratedCommands = true,
/* VK_EXT_primitive_topology_list_restart */
.primitiveTopologyListRestart = true,
.primitiveTopologyPatchListRestart = true,
@ -1135,6 +1139,20 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceProperties2(
PIPE_CAP_RASTERIZER_SUBPIXEL_BITS);
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV: {
VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *properties =
(VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *)ext;
properties->maxGraphicsShaderGroupCount = 1<<12;
properties->maxIndirectSequenceCount = 1<<20;
properties->maxIndirectCommandsTokenCount = MAX_DGC_TOKENS;
properties->maxIndirectCommandsStreamCount = MAX_DGC_STREAMS;
properties->maxIndirectCommandsTokenOffset = 2047;
properties->maxIndirectCommandsStreamStride = 2048;
properties->minSequencesCountBufferOffsetAlignment = 4;
properties->minSequencesIndexBufferOffsetAlignment = 4;
properties->minIndirectCommandsBufferOffsetAlignment = 4;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES: {
VkPhysicalDeviceInlineUniformBlockProperties *properties =
(VkPhysicalDeviceInlineUniformBlockProperties *)ext;
@ -2341,6 +2359,126 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPrivateDataEXT(
privateDataSlot, pData);
}
VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectCommandsLayoutNV(
VkDevice _device,
const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkIndirectCommandsLayoutNV* pIndirectCommandsLayout)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
struct lvp_indirect_command_layout *dlayout;
size_t size = sizeof(*dlayout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
dlayout =
vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct lvp_indirect_command_layout),
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!dlayout)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &dlayout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV);
dlayout->stream_count = pCreateInfo->streamCount;
dlayout->token_count = pCreateInfo->tokenCount;
for (unsigned i = 0; i < pCreateInfo->streamCount; i++)
dlayout->stream_strides[i] = pCreateInfo->pStreamStrides[i];
typed_memcpy(dlayout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount);
*pIndirectCommandsLayout = lvp_indirect_command_layout_to_handle(dlayout);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectCommandsLayoutNV(
VkDevice _device,
VkIndirectCommandsLayoutNV indirectCommandsLayout,
const VkAllocationCallbacks* pAllocator)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
VK_FROM_HANDLE(lvp_indirect_command_layout, layout, indirectCommandsLayout);
if (!layout)
return;
vk_object_base_finish(&layout->base);
vk_free2(&device->vk.alloc, pAllocator, layout);
}
enum vk_cmd_type
lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token)
{
switch (token->tokenType) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV:
return VK_CMD_BIND_PIPELINE_SHADER_GROUP_NV;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV:
if (token->indirectStateFlags & VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV) {
return VK_CMD_SET_FRONT_FACE;
}
assert(!"unknown token type!");
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
return VK_CMD_PUSH_CONSTANTS;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
return VK_CMD_BIND_INDEX_BUFFER;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
return VK_CMD_BIND_VERTEX_BUFFERS2;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
return VK_CMD_DRAW_INDEXED_INDIRECT;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
return VK_CMD_DRAW_INDIRECT;
// only available if VK_EXT_mesh_shader is supported
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV:
return VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT;
// only available if VK_NV_mesh_shader is supported
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV:
unreachable("NV_mesh_shader unsupported!");
default:
unreachable("unknown token type");
}
return UINT32_MAX;
}
VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV(
VkDevice device,
const VkGeneratedCommandsMemoryRequirementsInfoNV* pInfo,
VkMemoryRequirements2* pMemoryRequirements)
{
VK_FROM_HANDLE(lvp_indirect_command_layout, dlayout, pInfo->indirectCommandsLayout);
size_t size = sizeof(struct list_head);
for (unsigned i = 0; i < dlayout->token_count; i++) {
const VkIndirectCommandsLayoutTokenNV *token = &dlayout->tokens[i];
UNUSED struct vk_cmd_queue_entry *cmd;
enum vk_cmd_type type = lvp_nv_dgc_token_to_cmd_type(token);
size += vk_cmd_queue_type_sizes[type];
switch (token->tokenType) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
size += sizeof(*cmd->u.bind_vertex_buffers.buffers);
size += sizeof(*cmd->u.bind_vertex_buffers.offsets);
size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
size += token->pushconstantSize;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV:
break;
default:
unreachable("unknown type!");
}
}
pMemoryRequirements->memoryRequirements.memoryTypeBits = 1;
pMemoryRequirements->memoryRequirements.alignment = 4;
pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment);
}
VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceExternalFenceProperties(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,

View file

@ -855,10 +855,9 @@ handle_graphics_layout(struct rendering_state *state, gl_shader_stage stage, str
}
}
static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd,
static void handle_graphics_pipeline(struct lvp_pipeline *pipeline,
struct rendering_state *state)
{
LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline.pipeline);
const struct vk_graphics_pipeline_state *ps = &pipeline->graphics_state;
lvp_pipeline_shaders_compile(pipeline);
bool dynamic_tess_origin = BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
@ -1198,7 +1197,7 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd,
handle_compute_pipeline(cmd, state);
handle_pipeline_access(state, MESA_SHADER_COMPUTE);
} else {
handle_graphics_pipeline(cmd, state);
handle_graphics_pipeline(pipeline, state);
lvp_forall_gfx_stage(sh) {
handle_pipeline_access(state, sh);
}
@ -1206,6 +1205,19 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd,
state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size;
}
static void
handle_graphics_pipeline_group(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
{
assert(cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline_shader_group_nv.pipeline);
if (cmd->u.bind_pipeline_shader_group_nv.group_index)
pipeline = lvp_pipeline_from_handle(pipeline->groups[cmd->u.bind_pipeline_shader_group_nv.group_index - 1]);
handle_graphics_pipeline(pipeline, state);
lvp_forall_gfx_stage(sh)
handle_pipeline_access(state, sh);
state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size;
}
static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd,
struct rendering_state *state)
{
@ -2864,11 +2876,13 @@ static void handle_index_buffer(struct vk_cmd_queue_entry *cmd,
default:
break;
}
state->index_offset = ib->offset;
if (ib->buffer)
if (ib->buffer) {
state->index_offset = ib->offset;
state->index_buffer = lvp_buffer_from_handle(ib->buffer)->bo;
else
} else {
state->index_offset = 0;
state->index_buffer = state->device->zero_buffer;
}
state->ib_dirty = true;
}
@ -4227,6 +4241,233 @@ static void handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry *cmd
state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info);
}
static VkBuffer
get_buffer(struct rendering_state *state, uint8_t *ptr, size_t *offset)
{
simple_mtx_lock(&state->device->bda_lock);
hash_table_foreach(&state->device->bda, he) {
const uint8_t *bda = he->key;
if (ptr < bda)
continue;
struct lvp_buffer *buffer = he->data;
if (bda + buffer->size > ptr) {
*offset = ptr - bda;
simple_mtx_unlock(&state->device->bda_lock);
return lvp_buffer_to_handle(buffer);
}
}
fprintf(stderr, "unrecognized BDA!\n");
abort();
}
static size_t
process_sequence(struct rendering_state *state,
VkPipeline pipeline, struct lvp_indirect_command_layout *dlayout,
struct list_head *list, uint8_t *pbuf, size_t max_size,
uint8_t **map_streams, const VkIndirectCommandsStreamNV *pstreams, uint32_t seq)
{
size_t size = 0;
for (uint32_t t = 0; t < dlayout->token_count; t++){
const VkIndirectCommandsLayoutTokenNV *token = &dlayout->tokens[t];
uint32_t stride = dlayout->stream_strides[token->stream];
uint8_t *stream = map_streams[token->stream];
uint32_t offset = stride * seq + token->offset;
uint32_t draw_offset = offset + pstreams[token->stream].offset;
void *input = stream + offset;
struct vk_cmd_queue_entry *cmd = (struct vk_cmd_queue_entry*)(pbuf + size);
size_t cmd_size = vk_cmd_queue_type_sizes[lvp_nv_dgc_token_to_cmd_type(token)];
uint8_t *cmdptr = (void*)(pbuf + size + cmd_size);
if (max_size < size + cmd_size)
abort();
cmd->type = lvp_nv_dgc_token_to_cmd_type(token);
switch (token->tokenType) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: {
VkBindShaderGroupIndirectCommandNV *bind = input;
cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
cmd->u.bind_pipeline_shader_group_nv.pipeline = pipeline;
cmd->u.bind_pipeline_shader_group_nv.group_index = bind->groupIndex;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: {
VkSetStateFlagsIndirectCommandNV *state = input;
if (token->indirectStateFlags & VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV) {
if (state->data & BITFIELD_BIT(VK_FRONT_FACE_CLOCKWISE)) {
cmd->u.set_front_face.front_face = VK_FRONT_FACE_CLOCKWISE;
} else {
cmd->u.set_front_face.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
}
} else {
/* skip this if unrecognized state flag */
continue;
}
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: {
uint32_t *data = input;
cmd_size += token->pushconstantSize;
if (max_size < size + cmd_size)
abort();
cmd->u.push_constants.layout = token->pushconstantPipelineLayout;
cmd->u.push_constants.stage_flags = token->pushconstantShaderStageFlags;
cmd->u.push_constants.offset = token->pushconstantOffset;
cmd->u.push_constants.size = token->pushconstantSize;
cmd->u.push_constants.values = (void*)cmdptr;
memcpy(cmd->u.push_constants.values, data, token->pushconstantSize);
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: {
VkBindIndexBufferIndirectCommandNV *data = input;
cmd->u.bind_index_buffer.offset = 0;
if (data->bufferAddress)
cmd->u.bind_index_buffer.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_index_buffer.offset);
else
cmd->u.bind_index_buffer.buffer = VK_NULL_HANDLE;
cmd->u.bind_index_buffer.index_type = data->indexType;
for (unsigned i = 0; i < token->indexTypeCount; i++) {
if (data->indexType == token->pIndexTypeValues[i]) {
cmd->u.bind_index_buffer.index_type = token->pIndexTypes[i];
break;
}
}
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: {
VkBindVertexBufferIndirectCommandNV *data = input;
cmd_size += sizeof(*cmd->u.bind_vertex_buffers.buffers) + sizeof(*cmd->u.bind_vertex_buffers.offsets);
cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
if (max_size < size + cmd_size)
abort();
cmd->u.bind_vertex_buffers2.first_binding = token->vertexBindingUnit;
cmd->u.bind_vertex_buffers2.binding_count = 1;
cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr;
cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers));
cmd->u.bind_vertex_buffers2.offsets[0] = 0;
cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE;
if (token->vertexDynamicStride) {
cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets) + sizeof(*cmd->u.bind_vertex_buffers2.sizes));
cmd->u.bind_vertex_buffers2.strides[0] = data->stride;
} else {
cmd->u.bind_vertex_buffers2.strides = NULL;
}
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: {
cmd->u.draw_indexed_indirect.buffer = pstreams[token->stream].buffer;
cmd->u.draw_indexed_indirect.offset = draw_offset;
cmd->u.draw_indexed_indirect.draw_count = 1;
cmd->u.draw_indexed_indirect.stride = 0;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: {
cmd->u.draw_indirect.buffer = pstreams[token->stream].buffer;
cmd->u.draw_indirect.offset = draw_offset;
cmd->u.draw_indirect.draw_count = 1;
cmd->u.draw_indirect.stride = 0;
break;
}
// only available if VK_EXT_mesh_shader is supported
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: {
cmd->u.draw_mesh_tasks_indirect_ext.buffer = pstreams[token->stream].buffer;
cmd->u.draw_mesh_tasks_indirect_ext.offset = draw_offset;
cmd->u.draw_mesh_tasks_indirect_ext.draw_count = 1;
cmd->u.draw_mesh_tasks_indirect_ext.stride = 0;
break;
}
// only available if VK_NV_mesh_shader is supported
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV:
unreachable("NV_mesh_shader unsupported!");
default:
unreachable("unknown token type");
break;
}
size += cmd_size;
list_addtail(&cmd->cmd_link, list);
}
return size;
}
static void
handle_preprocess_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
{
VkGeneratedCommandsInfoNV *pre = cmd->u.preprocess_generated_commands_nv.generated_commands_info;
VK_FROM_HANDLE(lvp_indirect_command_layout, dlayout, pre->indirectCommandsLayout);
struct pipe_transfer *stream_maps[16];
uint8_t *streams[16];
for (unsigned i = 0; i < pre->streamCount; i++) {
struct lvp_buffer *buf = lvp_buffer_from_handle(pre->pStreams[i].buffer);
streams[i] = pipe_buffer_map(state->pctx, buf->bo, PIPE_MAP_READ, &stream_maps[i]);
streams[i] += pre->pStreams[i].offset;
}
LVP_FROM_HANDLE(lvp_buffer, pbuf, pre->preprocessBuffer);
LVP_FROM_HANDLE(lvp_buffer, seqc, pre->sequencesCountBuffer);
LVP_FROM_HANDLE(lvp_buffer, seqi, pre->sequencesIndexBuffer);
unsigned seq_count = pre->sequencesCount;
if (seqc) {
unsigned count = 0;
pipe_buffer_read(state->pctx, seqc->bo, pre->sequencesCountOffset, sizeof(uint32_t), &count);
seq_count = MIN2(count, seq_count);
}
uint32_t *seq = NULL;
struct pipe_transfer *seq_map = NULL;
if (seqi) {
seq = pipe_buffer_map(state->pctx, seqi->bo, PIPE_MAP_READ, &seq_map);
seq = (uint32_t*)(((uint8_t*)seq) + pre->sequencesIndexOffset);
}
struct pipe_transfer *pmap;
uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap);
p += pre->preprocessOffset;
struct list_head *list = (void*)p;
size_t size = sizeof(struct list_head);
size_t max_size = pre->preprocessSize;
if (size > max_size)
abort();
list_inithead(list);
size_t offset = size;
for (unsigned i = 0; i < seq_count; i++) {
uint32_t s = seq ? seq[i] : i;
offset += process_sequence(state, pre->pipeline, dlayout, list, p + offset, max_size, streams, pre->pStreams, s);
}
/* vk_cmd_queue will copy the binary and break the list, so null the tail pointer */
list->prev->next = NULL;
for (unsigned i = 0; i < pre->streamCount; i++)
state->pctx->buffer_unmap(state->pctx, stream_maps[i]);
state->pctx->buffer_unmap(state->pctx, pmap);
if (seq_map)
state->pctx->buffer_unmap(state->pctx, seq_map);
}
static void
handle_execute_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds)
{
VkGeneratedCommandsInfoNV *gen = cmd->u.execute_generated_commands_nv.generated_commands_info;
struct vk_cmd_execute_generated_commands_nv *exec = &cmd->u.execute_generated_commands_nv;
if (!exec->is_preprocessed) {
struct vk_cmd_queue_entry pre;
pre.u.preprocess_generated_commands_nv.generated_commands_info = exec->generated_commands_info;
handle_preprocess_generated_commands(&pre, state);
}
LVP_FROM_HANDLE(lvp_buffer, pbuf, gen->preprocessBuffer);
struct pipe_transfer *pmap;
uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap);
p += gen->preprocessOffset;
struct list_head *list = (void*)p;
lvp_execute_cmd_buffer(list, state, print_cmds);
state->pctx->buffer_unmap(state->pctx, pmap);
}
void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp)
{
struct vk_device_dispatch_table cmd_enqueue_dispatch;
@ -4350,6 +4591,11 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp)
ENQUEUE_CMD(CmdDrawMeshTasksEXT)
ENQUEUE_CMD(CmdDrawMeshTasksIndirectEXT)
ENQUEUE_CMD(CmdDrawMeshTasksIndirectCountEXT)
ENQUEUE_CMD(CmdBindPipelineShaderGroupNV)
ENQUEUE_CMD(CmdPreprocessGeneratedCommandsNV)
ENQUEUE_CMD(CmdExecuteGeneratedCommandsNV)
#undef ENQUEUE_CMD
}
@ -4681,6 +4927,15 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds,
emit_state(state);
handle_draw_mesh_tasks_indirect_count(cmd, state);
break;
case VK_CMD_BIND_PIPELINE_SHADER_GROUP_NV:
handle_graphics_pipeline_group(cmd, state);
break;
case VK_CMD_PREPROCESS_GENERATED_COMMANDS_NV:
handle_preprocess_generated_commands(cmd, state);
break;
case VK_CMD_EXECUTE_GENERATED_COMMANDS_NV:
handle_execute_generated_commands(cmd, state, print_cmds);
break;
default:
fprintf(stderr, "Unsupported command %s\n", vk_cmd_queue_type_names[cmd->type]);
unreachable("Unsupported command");
@ -4688,6 +4943,8 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds,
}
first = false;
did_flush = false;
if (!cmd->cmd_link.next)
break;
}
}

View file

@ -81,6 +81,11 @@ lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline)
if (pipeline->layout)
vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
for (unsigned i = 0; i < pipeline->num_groups; i++) {
LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]);
lvp_pipeline_destroy(device, p);
}
vk_free(&device->vk.alloc, pipeline->state_data);
vk_object_base_finish(&pipeline->base);
vk_free(&device->vk.alloc, pipeline);
@ -991,7 +996,8 @@ lvp_graphics_pipeline_create(
VkDevice _device,
VkPipelineCache _cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
VkPipeline *pPipeline)
VkPipeline *pPipeline,
bool group)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
@ -1000,7 +1006,12 @@ lvp_graphics_pipeline_create(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
size_t size = 0;
const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV);
if (!group && groupinfo)
size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline);
pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -1013,9 +1024,28 @@ lvp_graphics_pipeline_create(
vk_free(&device->vk.alloc, pipeline);
return result;
}
if (!group && groupinfo) {
VkGraphicsPipelineCreateInfo pci = *pCreateInfo;
for (unsigned i = 0; i < groupinfo->groupCount; i++) {
const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i];
pci.pVertexInputState = g->pVertexInputState;
pci.pTessellationState = g->pTessellationState;
pci.pStages = g->pStages;
pci.stageCount = g->stageCount;
result = lvp_graphics_pipeline_create(_device, _cache, &pci, &pipeline->groups[i], true);
if (result != VK_SUCCESS) {
lvp_pipeline_destroy(device, pipeline);
return result;
}
pipeline->num_groups++;
}
for (unsigned i = 0; i < groupinfo->pipelineCount; i++)
pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i];
pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount;
}
VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
if (feedback) {
if (feedback && !group) {
feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
@ -1043,7 +1073,8 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
r = lvp_graphics_pipeline_create(_device,
pipelineCache,
&pCreateInfos[i],
&pPipelines[i]);
&pPipelines[i],
false);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;

View file

@ -86,6 +86,8 @@ extern "C" {
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE 4096
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS 8
#define MAX_DGC_STREAMS 16
#define MAX_DGC_TOKENS 16
#ifdef _WIN32
#define lvp_printflike(a, b)
@ -490,6 +492,10 @@ struct lvp_pipeline {
bool library;
bool compiled;
bool used;
unsigned num_groups;
unsigned num_groups_total;
VkPipeline groups[0];
};
void
@ -541,6 +547,16 @@ struct lvp_cmd_buffer {
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
};
struct lvp_indirect_command_layout {
struct vk_object_base base;
uint8_t stream_count;
uint8_t token_count;
uint16_t stream_strides[MAX_DGC_STREAMS];
VkPipelineBindPoint bind_point;
VkIndirectCommandsLayoutUsageFlagsNV flags;
VkIndirectCommandsLayoutTokenNV tokens[0];
};
extern const struct vk_command_buffer_ops lvp_cmd_buffer_ops;
static inline const struct lvp_descriptor_set_layout *
@ -598,6 +614,8 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_query_pool, base, VkQueryPool,
VK_OBJECT_TYPE_QUERY_POOL)
VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_sampler, base, VkSampler,
VK_OBJECT_TYPE_SAMPLER)
VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
struct lvp_write_descriptor {
uint32_t dst_binding;
@ -672,6 +690,8 @@ void
lvp_inline_uniforms(nir_shader *nir, const struct lvp_shader *shader, const uint32_t *uniform_values, uint32_t ubo);
void *
lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir);
enum vk_cmd_type
lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token);
#ifdef __cplusplus
}
#endif