diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index f6f6a1c6b64..1de670987d8 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -211,6 +211,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .EXT_robustness2 = true, .GOOGLE_decorate_string = true, .GOOGLE_hlsl_functionality1 = true, + .NV_device_generated_commands = true, }; static int @@ -514,6 +515,9 @@ lvp_get_features(const struct lvp_physical_device *pdevice, .robustImageAccess2 = true, .nullDescriptor = true, + /* VK_NV_device_generated_commands */ + .deviceGeneratedCommands = true, + /* VK_EXT_primitive_topology_list_restart */ .primitiveTopologyListRestart = true, .primitiveTopologyPatchListRestart = true, @@ -1135,6 +1139,20 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceProperties2( PIPE_CAP_RASTERIZER_SUBPIXEL_BITS); break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV: { + VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *properties = + (VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *)ext; + properties->maxGraphicsShaderGroupCount = 1<<12; + properties->maxIndirectSequenceCount = 1<<20; + properties->maxIndirectCommandsTokenCount = MAX_DGC_TOKENS; + properties->maxIndirectCommandsStreamCount = MAX_DGC_STREAMS; + properties->maxIndirectCommandsTokenOffset = 2047; + properties->maxIndirectCommandsStreamStride = 2048; + properties->minSequencesCountBufferOffsetAlignment = 4; + properties->minSequencesIndexBufferOffsetAlignment = 4; + properties->minIndirectCommandsBufferOffsetAlignment = 4; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES: { VkPhysicalDeviceInlineUniformBlockProperties *properties = (VkPhysicalDeviceInlineUniformBlockProperties *)ext; @@ -2341,6 +2359,126 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetPrivateDataEXT( privateDataSlot, pData); } +VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectCommandsLayoutNV( + VkDevice _device, + const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectCommandsLayoutNV* pIndirectCommandsLayout) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + struct lvp_indirect_command_layout *dlayout; + + size_t size = sizeof(*dlayout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV); + + dlayout = + vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct lvp_indirect_command_layout), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!dlayout) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &dlayout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV); + + dlayout->stream_count = pCreateInfo->streamCount; + dlayout->token_count = pCreateInfo->tokenCount; + for (unsigned i = 0; i < pCreateInfo->streamCount; i++) + dlayout->stream_strides[i] = pCreateInfo->pStreamStrides[i]; + typed_memcpy(dlayout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount); + + *pIndirectCommandsLayout = lvp_indirect_command_layout_to_handle(dlayout); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectCommandsLayoutNV( + VkDevice _device, + VkIndirectCommandsLayoutNV indirectCommandsLayout, + const VkAllocationCallbacks* pAllocator) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + VK_FROM_HANDLE(lvp_indirect_command_layout, layout, indirectCommandsLayout); + + if (!layout) + return; + + vk_object_base_finish(&layout->base); + vk_free2(&device->vk.alloc, pAllocator, layout); +} + +enum vk_cmd_type +lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token) +{ + switch (token->tokenType) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: + return VK_CMD_BIND_PIPELINE_SHADER_GROUP_NV; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: + if (token->indirectStateFlags & VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV) { + return VK_CMD_SET_FRONT_FACE; + } + assert(!"unknown token type!"); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: + return VK_CMD_PUSH_CONSTANTS; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: + return VK_CMD_BIND_INDEX_BUFFER; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: + return VK_CMD_BIND_VERTEX_BUFFERS2; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: + return VK_CMD_DRAW_INDEXED_INDIRECT; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: + return VK_CMD_DRAW_INDIRECT; + // only available if VK_EXT_mesh_shader is supported + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: + return VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT; + // only available if VK_NV_mesh_shader is supported + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV: + unreachable("NV_mesh_shader unsupported!"); + default: + unreachable("unknown token type"); + } + return UINT32_MAX; +} + +VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV( + VkDevice device, + const VkGeneratedCommandsMemoryRequirementsInfoNV* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + VK_FROM_HANDLE(lvp_indirect_command_layout, dlayout, pInfo->indirectCommandsLayout); + + size_t size = sizeof(struct list_head); + + for (unsigned i = 0; i < dlayout->token_count; i++) { + const VkIndirectCommandsLayoutTokenNV *token = &dlayout->tokens[i]; + UNUSED struct vk_cmd_queue_entry *cmd; + enum vk_cmd_type type = lvp_nv_dgc_token_to_cmd_type(token); + size += vk_cmd_queue_type_sizes[type]; + + switch (token->tokenType) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: + size += sizeof(*cmd->u.bind_vertex_buffers.buffers); + size += sizeof(*cmd->u.bind_vertex_buffers.offsets); + size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: + size += token->pushconstantSize; + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: + break; + default: + unreachable("unknown type!"); + } + } + + pMemoryRequirements->memoryRequirements.memoryTypeBits = 1; + pMemoryRequirements->memoryRequirements.alignment = 4; + pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment); +} + VKAPI_ATTR void VKAPI_CALL lvp_GetPhysicalDeviceExternalFenceProperties( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 54960d42fe1..9ee9be1e07b 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -855,10 +855,9 @@ handle_graphics_layout(struct rendering_state *state, gl_shader_stage stage, str } } -static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd, +static void handle_graphics_pipeline(struct lvp_pipeline *pipeline, struct rendering_state *state) { - LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline.pipeline); const struct vk_graphics_pipeline_state *ps = &pipeline->graphics_state; lvp_pipeline_shaders_compile(pipeline); bool dynamic_tess_origin = BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN); @@ -1198,7 +1197,7 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd, handle_compute_pipeline(cmd, state); handle_pipeline_access(state, MESA_SHADER_COMPUTE); } else { - handle_graphics_pipeline(cmd, state); + handle_graphics_pipeline(pipeline, state); lvp_forall_gfx_stage(sh) { handle_pipeline_access(state, sh); } @@ -1206,6 +1205,19 @@ static void handle_pipeline(struct vk_cmd_queue_entry *cmd, state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size; } +static void +handle_graphics_pipeline_group(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) +{ + assert(cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline_shader_group_nv.pipeline); + if (cmd->u.bind_pipeline_shader_group_nv.group_index) + pipeline = lvp_pipeline_from_handle(pipeline->groups[cmd->u.bind_pipeline_shader_group_nv.group_index - 1]); + handle_graphics_pipeline(pipeline, state); + lvp_forall_gfx_stage(sh) + handle_pipeline_access(state, sh); + state->push_size[pipeline->is_compute_pipeline] = pipeline->layout->push_constant_size; +} + static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2864,11 +2876,13 @@ static void handle_index_buffer(struct vk_cmd_queue_entry *cmd, default: break; } - state->index_offset = ib->offset; - if (ib->buffer) + if (ib->buffer) { + state->index_offset = ib->offset; state->index_buffer = lvp_buffer_from_handle(ib->buffer)->bo; - else + } else { + state->index_offset = 0; state->index_buffer = state->device->zero_buffer; + } state->ib_dirty = true; } @@ -4227,6 +4241,233 @@ static void handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry *cmd state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info); } +static VkBuffer +get_buffer(struct rendering_state *state, uint8_t *ptr, size_t *offset) +{ + simple_mtx_lock(&state->device->bda_lock); + hash_table_foreach(&state->device->bda, he) { + const uint8_t *bda = he->key; + if (ptr < bda) + continue; + struct lvp_buffer *buffer = he->data; + if (bda + buffer->size > ptr) { + *offset = ptr - bda; + simple_mtx_unlock(&state->device->bda_lock); + return lvp_buffer_to_handle(buffer); + } + } + fprintf(stderr, "unrecognized BDA!\n"); + abort(); +} + +static size_t +process_sequence(struct rendering_state *state, + VkPipeline pipeline, struct lvp_indirect_command_layout *dlayout, + struct list_head *list, uint8_t *pbuf, size_t max_size, + uint8_t **map_streams, const VkIndirectCommandsStreamNV *pstreams, uint32_t seq) +{ + size_t size = 0; + for (uint32_t t = 0; t < dlayout->token_count; t++){ + const VkIndirectCommandsLayoutTokenNV *token = &dlayout->tokens[t]; + uint32_t stride = dlayout->stream_strides[token->stream]; + uint8_t *stream = map_streams[token->stream]; + uint32_t offset = stride * seq + token->offset; + uint32_t draw_offset = offset + pstreams[token->stream].offset; + void *input = stream + offset; + + struct vk_cmd_queue_entry *cmd = (struct vk_cmd_queue_entry*)(pbuf + size); + size_t cmd_size = vk_cmd_queue_type_sizes[lvp_nv_dgc_token_to_cmd_type(token)]; + uint8_t *cmdptr = (void*)(pbuf + size + cmd_size); + + if (max_size < size + cmd_size) + abort(); + cmd->type = lvp_nv_dgc_token_to_cmd_type(token); + + switch (token->tokenType) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: { + VkBindShaderGroupIndirectCommandNV *bind = input; + cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + cmd->u.bind_pipeline_shader_group_nv.pipeline = pipeline; + cmd->u.bind_pipeline_shader_group_nv.group_index = bind->groupIndex; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: { + VkSetStateFlagsIndirectCommandNV *state = input; + if (token->indirectStateFlags & VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV) { + if (state->data & BITFIELD_BIT(VK_FRONT_FACE_CLOCKWISE)) { + cmd->u.set_front_face.front_face = VK_FRONT_FACE_CLOCKWISE; + } else { + cmd->u.set_front_face.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE; + } + } else { + /* skip this if unrecognized state flag */ + continue; + } + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: { + uint32_t *data = input; + cmd_size += token->pushconstantSize; + if (max_size < size + cmd_size) + abort(); + cmd->u.push_constants.layout = token->pushconstantPipelineLayout; + cmd->u.push_constants.stage_flags = token->pushconstantShaderStageFlags; + cmd->u.push_constants.offset = token->pushconstantOffset; + cmd->u.push_constants.size = token->pushconstantSize; + cmd->u.push_constants.values = (void*)cmdptr; + memcpy(cmd->u.push_constants.values, data, token->pushconstantSize); + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: { + VkBindIndexBufferIndirectCommandNV *data = input; + cmd->u.bind_index_buffer.offset = 0; + if (data->bufferAddress) + cmd->u.bind_index_buffer.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_index_buffer.offset); + else + cmd->u.bind_index_buffer.buffer = VK_NULL_HANDLE; + cmd->u.bind_index_buffer.index_type = data->indexType; + for (unsigned i = 0; i < token->indexTypeCount; i++) { + if (data->indexType == token->pIndexTypeValues[i]) { + cmd->u.bind_index_buffer.index_type = token->pIndexTypes[i]; + break; + } + } + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: { + VkBindVertexBufferIndirectCommandNV *data = input; + cmd_size += sizeof(*cmd->u.bind_vertex_buffers.buffers) + sizeof(*cmd->u.bind_vertex_buffers.offsets); + cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides); + if (max_size < size + cmd_size) + abort(); + + cmd->u.bind_vertex_buffers2.first_binding = token->vertexBindingUnit; + cmd->u.bind_vertex_buffers2.binding_count = 1; + + cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr; + cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers)); + cmd->u.bind_vertex_buffers2.offsets[0] = 0; + cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE; + + if (token->vertexDynamicStride) { + cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets) + sizeof(*cmd->u.bind_vertex_buffers2.sizes)); + cmd->u.bind_vertex_buffers2.strides[0] = data->stride; + } else { + cmd->u.bind_vertex_buffers2.strides = NULL; + } + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: { + cmd->u.draw_indexed_indirect.buffer = pstreams[token->stream].buffer; + cmd->u.draw_indexed_indirect.offset = draw_offset; + cmd->u.draw_indexed_indirect.draw_count = 1; + cmd->u.draw_indexed_indirect.stride = 0; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: { + cmd->u.draw_indirect.buffer = pstreams[token->stream].buffer; + cmd->u.draw_indirect.offset = draw_offset; + cmd->u.draw_indirect.draw_count = 1; + cmd->u.draw_indirect.stride = 0; + break; + } + // only available if VK_EXT_mesh_shader is supported + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: { + cmd->u.draw_mesh_tasks_indirect_ext.buffer = pstreams[token->stream].buffer; + cmd->u.draw_mesh_tasks_indirect_ext.offset = draw_offset; + cmd->u.draw_mesh_tasks_indirect_ext.draw_count = 1; + cmd->u.draw_mesh_tasks_indirect_ext.stride = 0; + break; + } + // only available if VK_NV_mesh_shader is supported + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV: + unreachable("NV_mesh_shader unsupported!"); + default: + unreachable("unknown token type"); + break; + } + size += cmd_size; + list_addtail(&cmd->cmd_link, list); + } + return size; +} + +static void +handle_preprocess_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) +{ + VkGeneratedCommandsInfoNV *pre = cmd->u.preprocess_generated_commands_nv.generated_commands_info; + VK_FROM_HANDLE(lvp_indirect_command_layout, dlayout, pre->indirectCommandsLayout); + struct pipe_transfer *stream_maps[16]; + uint8_t *streams[16]; + for (unsigned i = 0; i < pre->streamCount; i++) { + struct lvp_buffer *buf = lvp_buffer_from_handle(pre->pStreams[i].buffer); + streams[i] = pipe_buffer_map(state->pctx, buf->bo, PIPE_MAP_READ, &stream_maps[i]); + streams[i] += pre->pStreams[i].offset; + } + LVP_FROM_HANDLE(lvp_buffer, pbuf, pre->preprocessBuffer); + LVP_FROM_HANDLE(lvp_buffer, seqc, pre->sequencesCountBuffer); + LVP_FROM_HANDLE(lvp_buffer, seqi, pre->sequencesIndexBuffer); + + unsigned seq_count = pre->sequencesCount; + if (seqc) { + unsigned count = 0; + pipe_buffer_read(state->pctx, seqc->bo, pre->sequencesCountOffset, sizeof(uint32_t), &count); + seq_count = MIN2(count, seq_count); + } + uint32_t *seq = NULL; + struct pipe_transfer *seq_map = NULL; + if (seqi) { + seq = pipe_buffer_map(state->pctx, seqi->bo, PIPE_MAP_READ, &seq_map); + seq = (uint32_t*)(((uint8_t*)seq) + pre->sequencesIndexOffset); + } + + struct pipe_transfer *pmap; + uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap); + p += pre->preprocessOffset; + struct list_head *list = (void*)p; + size_t size = sizeof(struct list_head); + size_t max_size = pre->preprocessSize; + if (size > max_size) + abort(); + list_inithead(list); + + size_t offset = size; + for (unsigned i = 0; i < seq_count; i++) { + uint32_t s = seq ? seq[i] : i; + offset += process_sequence(state, pre->pipeline, dlayout, list, p + offset, max_size, streams, pre->pStreams, s); + } + + /* vk_cmd_queue will copy the binary and break the list, so null the tail pointer */ + list->prev->next = NULL; + + for (unsigned i = 0; i < pre->streamCount; i++) + state->pctx->buffer_unmap(state->pctx, stream_maps[i]); + state->pctx->buffer_unmap(state->pctx, pmap); + if (seq_map) + state->pctx->buffer_unmap(state->pctx, seq_map); +} + +static void +handle_execute_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds) +{ + VkGeneratedCommandsInfoNV *gen = cmd->u.execute_generated_commands_nv.generated_commands_info; + struct vk_cmd_execute_generated_commands_nv *exec = &cmd->u.execute_generated_commands_nv; + if (!exec->is_preprocessed) { + struct vk_cmd_queue_entry pre; + pre.u.preprocess_generated_commands_nv.generated_commands_info = exec->generated_commands_info; + handle_preprocess_generated_commands(&pre, state); + } + LVP_FROM_HANDLE(lvp_buffer, pbuf, gen->preprocessBuffer); + struct pipe_transfer *pmap; + uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap); + p += gen->preprocessOffset; + struct list_head *list = (void*)p; + + lvp_execute_cmd_buffer(list, state, print_cmds); + + state->pctx->buffer_unmap(state->pctx, pmap); +} + void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) { struct vk_device_dispatch_table cmd_enqueue_dispatch; @@ -4350,6 +4591,11 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdDrawMeshTasksEXT) ENQUEUE_CMD(CmdDrawMeshTasksIndirectEXT) ENQUEUE_CMD(CmdDrawMeshTasksIndirectCountEXT) + + ENQUEUE_CMD(CmdBindPipelineShaderGroupNV) + ENQUEUE_CMD(CmdPreprocessGeneratedCommandsNV) + ENQUEUE_CMD(CmdExecuteGeneratedCommandsNV) + #undef ENQUEUE_CMD } @@ -4681,6 +4927,15 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, emit_state(state); handle_draw_mesh_tasks_indirect_count(cmd, state); break; + case VK_CMD_BIND_PIPELINE_SHADER_GROUP_NV: + handle_graphics_pipeline_group(cmd, state); + break; + case VK_CMD_PREPROCESS_GENERATED_COMMANDS_NV: + handle_preprocess_generated_commands(cmd, state); + break; + case VK_CMD_EXECUTE_GENERATED_COMMANDS_NV: + handle_execute_generated_commands(cmd, state, print_cmds); + break; default: fprintf(stderr, "Unsupported command %s\n", vk_cmd_queue_type_names[cmd->type]); unreachable("Unsupported command"); @@ -4688,6 +4943,8 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, } first = false; did_flush = false; + if (!cmd->cmd_link.next) + break; } } diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 99b8161efc3..421a05fa8a1 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -81,6 +81,11 @@ lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline) if (pipeline->layout) vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk); + for (unsigned i = 0; i < pipeline->num_groups; i++) { + LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]); + lvp_pipeline_destroy(device, p); + } + vk_free(&device->vk.alloc, pipeline->state_data); vk_object_base_finish(&pipeline->base); vk_free(&device->vk.alloc, pipeline); @@ -991,7 +996,8 @@ lvp_graphics_pipeline_create( VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, - VkPipeline *pPipeline) + VkPipeline *pPipeline, + bool group) { LVP_FROM_HANDLE(lvp_device, device, _device); LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache); @@ -1000,7 +1006,12 @@ lvp_graphics_pipeline_create( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8, + size_t size = 0; + const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV); + if (!group && groupinfo) + size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline); + + pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1013,9 +1024,28 @@ lvp_graphics_pipeline_create( vk_free(&device->vk.alloc, pipeline); return result; } + if (!group && groupinfo) { + VkGraphicsPipelineCreateInfo pci = *pCreateInfo; + for (unsigned i = 0; i < groupinfo->groupCount; i++) { + const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i]; + pci.pVertexInputState = g->pVertexInputState; + pci.pTessellationState = g->pTessellationState; + pci.pStages = g->pStages; + pci.stageCount = g->stageCount; + result = lvp_graphics_pipeline_create(_device, _cache, &pci, &pipeline->groups[i], true); + if (result != VK_SUCCESS) { + lvp_pipeline_destroy(device, pipeline); + return result; + } + pipeline->num_groups++; + } + for (unsigned i = 0; i < groupinfo->pipelineCount; i++) + pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i]; + pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount; + } VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); - if (feedback) { + if (feedback && !group) { feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); @@ -1043,7 +1073,8 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines( r = lvp_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], - &pPipelines[i]); + &pPipelines[i], + false); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index fbbf315e166..fbad94b3593 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -86,6 +86,8 @@ extern "C" { #define MAX_PUSH_DESCRIPTORS 32 #define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE 4096 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS 8 +#define MAX_DGC_STREAMS 16 +#define MAX_DGC_TOKENS 16 #ifdef _WIN32 #define lvp_printflike(a, b) @@ -490,6 +492,10 @@ struct lvp_pipeline { bool library; bool compiled; bool used; + + unsigned num_groups; + unsigned num_groups_total; + VkPipeline groups[0]; }; void @@ -541,6 +547,16 @@ struct lvp_cmd_buffer { uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; }; +struct lvp_indirect_command_layout { + struct vk_object_base base; + uint8_t stream_count; + uint8_t token_count; + uint16_t stream_strides[MAX_DGC_STREAMS]; + VkPipelineBindPoint bind_point; + VkIndirectCommandsLayoutUsageFlagsNV flags; + VkIndirectCommandsLayoutTokenNV tokens[0]; +}; + extern const struct vk_command_buffer_ops lvp_cmd_buffer_ops; static inline const struct lvp_descriptor_set_layout * @@ -598,6 +614,8 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout, base, VkIndirectCommandsLayoutNV, + VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV) struct lvp_write_descriptor { uint32_t dst_binding; @@ -672,6 +690,8 @@ void lvp_inline_uniforms(nir_shader *nir, const struct lvp_shader *shader, const uint32_t *uniform_values, uint32_t ubo); void * lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir); +enum vk_cmd_type +lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token); #ifdef __cplusplus } #endif