From 2336149c905a3ae4a173aca529cce596854486de Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 7 Jan 2025 11:04:47 -0500 Subject: [PATCH] lavapipe: KHR_device_address_commands Part-of: --- src/gallium/frontends/lavapipe/lvp_device.c | 4 + src/gallium/frontends/lavapipe/lvp_execute.c | 574 +++++++++++++++++-- 2 files changed, 528 insertions(+), 50 deletions(-) diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 4ce8fd7d9ec..195eeb7097e 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -138,6 +138,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .KHR_deferred_host_operations = true, .KHR_depth_stencil_resolve = true, .KHR_descriptor_update_template = true, + .KHR_device_address_commands = true, .KHR_device_group = true, .KHR_draw_indirect_count = true, .KHR_driver_properties = true, @@ -882,6 +883,9 @@ lvp_get_features(const struct lvp_physical_device *pdevice, .unifiedImageLayouts = true, .unifiedImageLayoutsVideo = true, + /* VK_KHR_device_address_commands */ + .deviceAddressCommands = true, + /* VK_KHR_cooperative_matrix */ .cooperativeMatrix = has_cooperative_matrix(), .cooperativeMatrixRobustBufferAccess = has_cooperative_matrix(), diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index c231157e2d6..4d0cfc50ebb 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -267,6 +267,38 @@ get_buffer_resource(struct pipe_context *ctx, void *mem) return pres; } +static VkBuffer +get_buffer(struct rendering_state *state, const uint8_t *ptr, size_t *offset) +{ + if (!ptr) { + *offset = 0; + return VK_NULL_HANDLE; + } + simple_mtx_lock(&state->device->bda_lock); + hash_table_foreach(&state->device->bda, he) { + const uint8_t *bda = he->key; + if (ptr < bda) + continue; + struct lvp_buffer *buffer = he->data; + if (bda + buffer->vk.size > ptr) { + *offset = ptr - bda; + simple_mtx_unlock(&state->device->bda_lock); + return lvp_buffer_to_handle(buffer); + } + } + fprintf(stderr, "unrecognized BDA!\n"); + abort(); +} + +static struct pipe_resource * +get_bda_internal_resource(struct rendering_state *state, VkDeviceAddress addr, unsigned *offset) +{ + size_t o; + VK_FROM_HANDLE(lvp_buffer, buffer, get_buffer(state, (void*)(uintptr_t)addr, &o)); + *offset = o; + return buffer ? buffer->bo : NULL; +} + ALWAYS_INLINE static void assert_subresource_layers(const struct pipe_resource *pres, const struct lvp_image *image, @@ -1206,9 +1238,8 @@ static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd, int idx = i + vcb->first_binding; state->vb[idx].buffer_offset = vcb->offsets[i]; - if (state->vb_sizes[idx] != UINT32_MAX) - pipe_resource_reference(&state->vb[idx].buffer.resource, NULL); - state->vb[idx].buffer.resource = vcb->buffers[i] && (!vcb->sizes || vcb->sizes[i]) ? lvp_buffer_from_handle(vcb->buffers[i])->bo : NULL; + struct pipe_resource *pres = vcb->buffers[i] && (!vcb->sizes || vcb->sizes[i]) ? lvp_buffer_from_handle(vcb->buffers[i])->bo : NULL; + pipe_resource_reference(&state->vb[idx].buffer.resource, pres); if (state->vb[idx].buffer.resource && vcb->sizes) { if (vcb->sizes[i] == VK_WHOLE_SIZE || vcb->offsets[i] + vcb->sizes[i] >= state->vb[idx].buffer.resource->width0) { state->vb_sizes[idx] = UINT32_MAX; @@ -1236,6 +1267,37 @@ static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd, state->vb_dirty = true; } +static void handle_vertex_buffers3(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_bind_vertex_buffers3_khr *vcb = &cmd->u.bind_vertex_buffers3_khr; + + int i; + for (i = 0; i < vcb->binding_count; i++) { + int idx = i + vcb->first_binding; + struct pipe_resource *pres = get_buffer_resource(state->pctx, (void *)(uintptr_t)vcb->binding_infos[i].addressRange.address); + state->vb[idx].buffer_offset = 0; + state->vb_sizes[idx] = 0; + + pipe_resource_reference(&state->vb[idx].buffer.resource, NULL); + state->vb[idx].buffer.resource = pres; + if (pres) { + state->vb[idx].buffer.resource->width0 = vcb->binding_infos[i].addressRange.size; + state->vb_sizes[idx] = MIN2(UINT32_MAX, vcb->binding_infos[i].addressRange.size); + } + + if (vcb->binding_infos[i].setStride) { + state->vb_strides[idx] = vcb->binding_infos[i].addressRange.stride; + state->vb_strides_dirty = true; + } + } + if (vcb->first_binding < state->start_vb) + state->start_vb = vcb->first_binding; + if (vcb->first_binding + vcb->binding_count >= state->num_vb) + state->num_vb = vcb->first_binding + vcb->binding_count; + state->vb_dirty = true; +} + static void handle_set_stage_buffer(struct rendering_state *state, struct pipe_resource *bo, @@ -2224,6 +2286,37 @@ subresource_layercount(const struct lvp_image *image, const VkImageSubresourceLa return image->vk.array_layers - sub->baseArrayLayer; } +static void +handle_copy_image_to_memory(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + const struct VkCopyDeviceMemoryImageInfoKHR *copycmd = cmd->u.copy_image_to_memory_khr.copy_memory_info; + + for (uint32_t i = 0; i < copycmd->regionCount; i++) { + const VkDeviceMemoryImageCopyKHR *region = ©cmd->pRegions[i]; + VkImageToMemoryCopy copyregion = { + VK_STRUCTURE_TYPE_IMAGE_TO_MEMORY_COPY_EXT, + NULL, + (void*)(uintptr_t)region->addressRange.address, + region->addressRowLength, + region->addressImageHeight, + region->imageSubresource, + region->imageOffset, + region->imageExtent, + }; + VkCopyImageToMemoryInfo copy = { + VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO, + NULL, + 0, + copycmd->image, + VK_IMAGE_LAYOUT_GENERAL, + 1, + ©region + }; + state->device->vk.dispatch_table.CopyImageToMemoryEXT(lvp_device_to_handle(state->device), ©); + } +} + static void handle_copy_image_to_buffer2(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2330,8 +2423,8 @@ handle_copy_memory_to_image_indirect(struct vk_cmd_queue_entry *cmd, off, ext, }; - VkCopyMemoryToImageInfoEXT hiccopy = { - VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT, + VkCopyMemoryToImageInfo hiccopy = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO, NULL, 0, copycmd->dstImage, @@ -2339,10 +2432,41 @@ handle_copy_memory_to_image_indirect(struct vk_cmd_queue_entry *cmd, 1, ©region }; - state->device->vk.dispatch_table.CopyMemoryToImageEXT(lvp_device_to_handle(state->device), &hiccopy); + state->device->vk.dispatch_table.CopyMemoryToImage(lvp_device_to_handle(state->device), &hiccopy); } } +static void +handle_copy_memory_to_image(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + const struct VkCopyDeviceMemoryImageInfoKHR *copycmd = cmd->u.copy_memory_to_image_khr.copy_memory_info; + + for (uint32_t i = 0; i < copycmd->regionCount; i++) { + const VkDeviceMemoryImageCopyKHR *region = ©cmd->pRegions[i]; + VkMemoryToImageCopy copyregion = { + VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, + NULL, + (void*)(uintptr_t)region->addressRange.address, + region->addressRowLength, + region->addressImageHeight, + region->imageSubresource, + region->imageOffset, + region->imageExtent, + }; + VkCopyMemoryToImageInfo copy = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO, + NULL, + 0, + copycmd->image, + VK_IMAGE_LAYOUT_GENERAL, + 1, + ©region + }; + state->device->vk.dispatch_table.CopyMemoryToImage(lvp_device_to_handle(state->device), ©); + } +} + static void handle_copy_buffer_to_image(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2571,6 +2695,24 @@ static void handle_copy_buffer(struct vk_cmd_queue_entry *cmd, } } +static void handle_copy_memory(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + const VkCopyDeviceMemoryInfoKHR *copycmd = cmd->u.copy_memory_khr.copy_memory_info; + + for (uint32_t i = 0; i < copycmd->regionCount; i++) { + const VkDeviceMemoryCopyKHR *region = ©cmd->pRegions[i]; + struct pipe_box box = { 0 }; + unsigned src_offset, dst_offset; + struct pipe_resource *src = get_bda_internal_resource(state, region->srcRange.address, &src_offset); + struct pipe_resource *dst = get_bda_internal_resource(state, region->dstRange.address, &dst_offset); + u_box_1d(src_offset, region->srcRange.size, &box); + state->pctx->resource_copy_region(state->pctx, dst, 0, + dst_offset, 0, 0, + src, 0, &box); + } +} + static void handle_blit_image(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2685,6 +2827,23 @@ static void handle_fill_buffer(struct vk_cmd_queue_entry *cmd, 4); } +static void handle_fill_memory(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_fill_memory_khr *fillcmd = &cmd->u.fill_memory_khr; + size_t size = fillcmd->dst_range->size; + size_t dst_offset; + VK_FROM_HANDLE(lvp_buffer, dst, get_buffer(state, (void*)(uintptr_t)fillcmd->dst_range->address, &dst_offset)); + + size = vk_buffer_range(&dst->vk, dst_offset, size); + state->pctx->clear_buffer(state->pctx, + dst->bo, + dst_offset, + size, + &fillcmd->data, + 4); +} + static void handle_update_buffer(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2705,6 +2864,28 @@ static void handle_update_buffer(struct vk_cmd_queue_entry *cmd, state->pctx->buffer_unmap(state->pctx, dst_t); } +static void handle_update_memory(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_update_memory_khr *updcmd = &cmd->u.update_memory_khr; + size_t dst_offset; + VK_FROM_HANDLE(lvp_buffer, dst_buffer, get_buffer(state, (void*)(uintptr_t)updcmd->dst_range->address, &dst_offset)); + uint32_t *dst; + struct pipe_transfer *dst_t; + struct pipe_box box; + + u_box_1d(dst_offset, updcmd->data_size, &box); + dst = state->pctx->buffer_map(state->pctx, + dst_buffer->bo, + 0, + PIPE_MAP_WRITE, + &box, + &dst_t); + + memcpy(dst, updcmd->data, updcmd->data_size); + state->pctx->buffer_unmap(state->pctx, dst_t); +} + static void handle_draw_indexed(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2798,6 +2979,34 @@ static void handle_draw_indirect(struct vk_cmd_queue_entry *cmd, pipe_resource_reference(&index, NULL); } +static void handle_draw_indirect2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state, bool indexed) +{ + struct pipe_draw_start_count_bias draw = {0}; + struct pipe_resource *index = NULL; + if (indexed) { + state->info.index_bounds_valid = false; + state->info.index_size = state->index_size; + state->info.index.resource = state->index_buffer; + state->info.max_index = ~0U; + if (state->index_offset || state->index_buffer_size != UINT32_MAX) { + struct pipe_transfer *xfer; + uint8_t *mem = pipe_buffer_map(state->pctx, state->index_buffer, 0, &xfer); + state->pctx->buffer_unmap(state->pctx, xfer); + index = get_buffer_resource(state->pctx, mem + state->index_offset); + index->width0 = MIN2(state->index_buffer->width0 - state->index_offset, state->index_buffer_size); + state->info.index.resource = index; + } + } else + state->info.index_size = 0; + state->indirect_info.stride = cmd->u.draw_indirect2_khr.info->addressRange.stride; + state->indirect_info.draw_count = cmd->u.draw_indirect2_khr.info->drawCount; + state->indirect_info.buffer = get_bda_internal_resource(state, cmd->u.draw_indirect2_khr.info->addressRange.address, &state->indirect_info.offset); + + state->pctx->draw_vbo(state->pctx, &state->info, 0, &state->indirect_info, &draw, 1); + pipe_resource_reference(&index, NULL); +} + static void handle_index_buffer(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2838,6 +3047,26 @@ static void handle_index_buffer2(struct vk_cmd_queue_entry *cmd, state->ib_dirty = true; } +static void handle_index_buffer3(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_bind_index_buffer3_khr *ib = &cmd->u.bind_index_buffer3_khr; + const VkDeviceAddressRangeKHR *region = &ib->info->addressRange; + + if (region->address) { + state->index_size = vk_index_type_to_bytes(ib->info->indexType); + state->index_buffer_size = region->size; + state->index_buffer = get_bda_internal_resource(state, region->address, &state->index_offset); + } else { + state->index_size = 4; + state->index_buffer_size = UINT32_MAX; + state->index_offset = 0; + state->index_buffer = state->device->zero_buffer; + } + + state->ib_dirty = true; +} + static void handle_dispatch(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -2872,6 +3101,13 @@ static void handle_dispatch_indirect(struct vk_cmd_queue_entry *cmd, state->pctx->launch_grid(state->pctx, &state->dispatch_info); } +static void handle_dispatch_indirect2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + state->dispatch_info.indirect = get_bda_internal_resource(state, cmd->u.dispatch_indirect2_khr.info->addressRange.address, &state->dispatch_info.indirect_offset); + state->pctx->launch_grid(state->pctx, &state->dispatch_info); +} + static void handle_push_constants(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -3055,32 +3291,55 @@ static void handle_copy_query_pool_results(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { struct vk_cmd_copy_query_pool_results *copycmd = &cmd->u.copy_query_pool_results; - VK_FROM_HANDLE(lvp_query_pool, pool, copycmd->query_pool); - enum pipe_query_flags flags = (copycmd->flags & VK_QUERY_RESULT_WAIT_BIT) ? PIPE_QUERY_WAIT : 0; + struct vk_cmd_copy_query_pool_results_to_memory_khr *memcmd = &cmd->u.copy_query_pool_results_to_memory_khr; + struct lvp_query_pool *pool; + struct lvp_buffer *dstbuffer; + size_t dst_offset = 0; + uint32_t first_query = 0; + uint32_t query_count = 0; + size_t stride = 0; + VkQueryResultFlags vkflags = 0; + if (cmd->type == VK_CMD_COPY_QUERY_POOL_RESULTS) { + dstbuffer = lvp_buffer_from_handle(copycmd->dst_buffer); + pool = lvp_query_pool_from_handle(copycmd->query_pool); + vkflags = copycmd->flags; + first_query = copycmd->first_query; + query_count = copycmd->query_count; + dst_offset = copycmd->dst_offset; + stride = copycmd->stride; + } else { + dstbuffer = lvp_buffer_from_handle(get_buffer(state, (void*)(uintptr_t)memcmd->dst_range->address, &dst_offset)); + stride = memcmd->dst_range->stride; + pool = lvp_query_pool_from_handle(memcmd->query_pool); + vkflags = memcmd->query_result_flags; + first_query = memcmd->first_query; + query_count = memcmd->query_count; + } + enum pipe_query_flags flags = (vkflags & VK_QUERY_RESULT_WAIT_BIT) ? PIPE_QUERY_WAIT : 0; - if (copycmd->flags & VK_QUERY_RESULT_PARTIAL_BIT) + if (vkflags & VK_QUERY_RESULT_PARTIAL_BIT) flags |= PIPE_QUERY_PARTIAL; - unsigned result_size = copycmd->flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; - for (unsigned i = copycmd->first_query; i < copycmd->first_query + copycmd->query_count; i++) { - unsigned offset = copycmd->dst_offset + (copycmd->stride * (i - copycmd->first_query)); + unsigned result_size = vkflags & VK_QUERY_RESULT_64_BIT ? 8 : 4; + for (unsigned i = first_query; i < first_query + query_count; i++) { + unsigned offset = dst_offset + (stride * (i - first_query)); if (pool->base_type >= PIPE_QUERY_TYPES) { struct pipe_transfer *transfer; - uint8_t *map = pipe_buffer_map(state->pctx, lvp_buffer_from_handle(copycmd->dst_buffer)->bo, PIPE_MAP_WRITE, &transfer); + uint8_t *map = pipe_buffer_map(state->pctx, dstbuffer->bo, PIPE_MAP_WRITE, &transfer); map += offset; void *data = &pool->queries; - if (copycmd->flags & VK_QUERY_RESULT_64_BIT) { + if (vkflags & VK_QUERY_RESULT_64_BIT) { uint64_t *dst = (uint64_t *)map; uint64_t *src = (uint64_t *)data; *dst = src[i]; - if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + if (vkflags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) *(dst + 1) = 1; } else { uint32_t *dst = (uint32_t *)map; uint64_t *src = (uint64_t *)data; *dst = (uint32_t) (src[i] & UINT32_MAX); - if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + if (vkflags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) *(dst + 1) = 1; } @@ -3091,7 +3350,7 @@ static void handle_copy_query_pool_results(struct vk_cmd_queue_entry *cmd, if (pool->queries[i]) { unsigned num_results = 0; - if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + if (vkflags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { num_results = util_bitcount(pool->vk.pipeline_statistics); } else @@ -3099,9 +3358,9 @@ static void handle_copy_query_pool_results(struct vk_cmd_queue_entry *cmd, state->pctx->get_query_result_resource(state->pctx, pool->queries[i], flags, - copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, + vkflags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, -1, - lvp_buffer_from_handle(copycmd->dst_buffer)->bo, + dstbuffer->bo, offset + num_results * result_size); } if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { @@ -3110,32 +3369,32 @@ static void handle_copy_query_pool_results(struct vk_cmd_queue_entry *cmd, state->pctx->get_query_result_resource(state->pctx, pool->queries[i], flags, - copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, + vkflags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, bit, - lvp_buffer_from_handle(copycmd->dst_buffer)->bo, + dstbuffer->bo, offset + num_results++ * result_size); } else { state->pctx->get_query_result_resource(state->pctx, pool->queries[i], flags, - copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, + vkflags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32, 0, - lvp_buffer_from_handle(copycmd->dst_buffer)->bo, + dstbuffer->bo, offset); } } else { /* if no queries emitted yet, just reset the buffer to 0 so avail is reported correctly */ - if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + if (vkflags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { struct pipe_transfer *src_t; uint32_t *map; struct pipe_box box = {0}; box.x = offset; - box.width = copycmd->stride; + box.width = stride; box.height = 1; box.depth = 1; map = state->pctx->buffer_map(state->pctx, - lvp_buffer_from_handle(copycmd->dst_buffer)->bo, 0, PIPE_MAP_READ, &box, + dstbuffer->bo, 0, PIPE_MAP_READ, &box, &src_t); memset(map, 0, box.width); @@ -3361,6 +3620,35 @@ static void handle_draw_indirect_count(struct vk_cmd_queue_entry *cmd, pipe_resource_reference(&index, NULL); } +static void handle_draw_indirect_count2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state, bool indexed) +{ + struct pipe_draw_start_count_bias draw = {0}; + struct pipe_resource *index = NULL; + if (indexed) { + state->info.index_bounds_valid = false; + state->info.index_size = state->index_size; + state->info.index.resource = state->index_buffer; + state->info.max_index = ~0U; + if (state->index_offset || state->index_buffer_size != UINT32_MAX) { + struct pipe_transfer *xfer; + uint8_t *mem = pipe_buffer_map(state->pctx, state->index_buffer, 0, &xfer); + state->pctx->buffer_unmap(state->pctx, xfer); + index = get_buffer_resource(state->pctx, mem + state->index_offset); + index->width0 = MIN2(state->index_buffer->width0 - state->index_offset, state->index_buffer_size); + state->info.index.resource = index; + } + } else + state->info.index_size = 0; + state->indirect_info.draw_count = cmd->u.draw_indirect_count2_khr.info->maxDrawCount; + state->indirect_info.stride = cmd->u.draw_indirect_count2_khr.info->addressRange.stride; + state->indirect_info.buffer = get_bda_internal_resource(state, cmd->u.draw_indirect_count2_khr.info->addressRange.address, &state->indirect_info.offset); + state->indirect_info.indirect_draw_count = get_bda_internal_resource(state, cmd->u.draw_indirect_count2_khr.info->countAddressRange.address, &state->indirect_info.indirect_draw_count_offset); + + state->pctx->draw_vbo(state->pctx, &state->info, 0, &state->indirect_info, &draw, 1); + pipe_resource_reference(&index, NULL); +} + static void handle_push_descriptor_set(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -3452,6 +3740,30 @@ static void handle_bind_transform_feedback_buffers(struct vk_cmd_queue_entry *cm state->num_so_targets = btfb->first_binding + btfb->binding_count; } +static void handle_bind_transform_feedback_buffers2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_bind_transform_feedback_buffers2_ext *btfb = &cmd->u.bind_transform_feedback_buffers2_ext; + + for (unsigned i = 0; i < btfb->binding_count; i++) { + int idx = i + btfb->first_binding; + uint32_t size; + size_t offset; + VK_FROM_HANDLE(lvp_buffer, buf, get_buffer(state, (void*)(uintptr_t)btfb->binding_infos[i].addressRange.address, &offset)); + + size = vk_buffer_range(&buf->vk, offset, btfb->binding_infos[i].addressRange.size ? btfb->binding_infos[i].addressRange.size : VK_WHOLE_SIZE); + + if (state->so_targets[idx]) + state->pctx->stream_output_target_destroy(state->pctx, state->so_targets[idx]); + + state->so_targets[idx] = state->pctx->create_stream_output_target(state->pctx, + buf->bo, + offset, + size); + } + state->num_so_targets = btfb->first_binding + btfb->binding_count; +} + static void handle_begin_transform_feedback(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -3472,6 +3784,28 @@ static void handle_begin_transform_feedback(struct vk_cmd_queue_entry *cmd, state->so_targets, offsets, MESA_PRIM_UNKNOWN); } +static void handle_begin_transform_feedback2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_begin_transform_feedback2_ext *btf = &cmd->u.begin_transform_feedback2_ext; + uint32_t offsets[4] = {0}; + + for (unsigned i = 0; btf->counter_infos && i < btf->counter_range_count; i++) { + if (!btf->counter_infos[i].addressRange.address) + continue; + + unsigned counter_offset; + struct pipe_resource *pres = get_bda_internal_resource(state, btf->counter_infos[i].addressRange.address, &counter_offset); + pipe_buffer_read(state->pctx, + pres, + counter_offset, + 4, + &offsets[i]); + } + state->pctx->set_stream_output_targets(state->pctx, state->num_so_targets, + state->so_targets, offsets, MESA_PRIM_UNKNOWN); +} + static void handle_end_transform_feedback(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -3495,6 +3829,31 @@ static void handle_end_transform_feedback(struct vk_cmd_queue_entry *cmd, state->pctx->set_stream_output_targets(state->pctx, 0, NULL, NULL, 0); } +static void handle_end_transform_feedback2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_end_transform_feedback2_ext *etf = &cmd->u.end_transform_feedback2_ext; + + if (etf->counter_range_count) { + for (unsigned i = 0; etf->counter_infos && i < etf->counter_range_count; i++) { + if (!etf->counter_infos[i].addressRange.address) + continue; + + uint32_t offset; + offset = state->pctx->stream_output_target_offset(state->so_targets[i]); + + unsigned counter_offset; + struct pipe_resource *pres = get_bda_internal_resource(state, etf->counter_infos[i].addressRange.address, &counter_offset); + pipe_buffer_write(state->pctx, + pres, + counter_offset, + 4, + &offset); + } + } + state->pctx->set_stream_output_targets(state->pctx, 0, NULL, NULL, 0); +} + static void handle_draw_indirect_byte_count(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -3515,6 +3874,27 @@ static void handle_draw_indirect_byte_count(struct vk_cmd_queue_entry *cmd, state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, &draw, 1); } +static void handle_draw_indirect_byte_count2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct vk_cmd_draw_indirect_byte_count2_ext *dibc = &cmd->u.draw_indirect_byte_count2_ext; + struct pipe_draw_start_count_bias draw = {0}; + + unsigned counter_offset; + struct pipe_resource *pres = get_bda_internal_resource(state, dibc->counter_info->addressRange.address, &counter_offset); + pipe_buffer_read(state->pctx, + pres, + counter_offset, + 4, &draw.count); + + state->info.start_instance = cmd->u.draw_indirect_byte_count2_ext.first_instance; + state->info.instance_count = cmd->u.draw_indirect_byte_count2_ext.instance_count; + state->info.index_size = 0; + + draw.count /= cmd->u.draw_indirect_byte_count2_ext.vertex_stride; + state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, &draw, 1); +} + static void lvp_emit_conditional_rendering(struct rendering_state *state) { @@ -3540,6 +3920,19 @@ static void handle_begin_conditional_rendering(struct vk_cmd_queue_entry *cmd, lvp_emit_conditional_rendering(state); } +static void handle_begin_conditional_rendering2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + struct VkConditionalRenderingBeginInfo2EXT *bcr = cmd->u.begin_conditional_rendering2_ext.conditional_rendering_begin; + unsigned offset; + struct pipe_resource *pres = get_bda_internal_resource(state, bcr->addressRange.address, &offset); + state->conditional_rendering.enabled = true; + state->pctx->render_condition_mem(state->pctx, + pres, + offset, + bcr->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT); +} + static void handle_end_conditional_rendering(struct rendering_state *state) { state->conditional_rendering.enabled = false; @@ -4036,6 +4429,15 @@ static void handle_draw_mesh_tasks_indirect(struct vk_cmd_queue_entry *cmd, state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info); } +static void handle_draw_mesh_tasks_indirect2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + state->dispatch_info.indirect = get_bda_internal_resource(state, cmd->u.draw_mesh_tasks_indirect2_ext.info->addressRange.address, &state->dispatch_info.indirect_offset); + state->dispatch_info.indirect_stride = cmd->u.draw_mesh_tasks_indirect2_ext.info->addressRange.stride; + state->dispatch_info.draw_count = cmd->u.draw_mesh_tasks_indirect2_ext.info->drawCount; + state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info); +} + static void handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -4048,23 +4450,14 @@ static void handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry *cmd state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info); } -static VkBuffer -get_buffer(struct rendering_state *state, const uint8_t *ptr, size_t *offset) +static void handle_draw_mesh_tasks_indirect_count2(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) { - simple_mtx_lock(&state->device->bda_lock); - hash_table_foreach(&state->device->bda, he) { - const uint8_t *bda = he->key; - if (ptr < bda) - continue; - struct lvp_buffer *buffer = he->data; - if (bda + buffer->vk.size > ptr) { - *offset = ptr - bda; - simple_mtx_unlock(&state->device->bda_lock); - return lvp_buffer_to_handle(buffer); - } - } - fprintf(stderr, "unrecognized BDA!\n"); - abort(); + state->dispatch_info.indirect = get_bda_internal_resource(state, cmd->u.draw_mesh_tasks_indirect_count2_ext.info->addressRange.address, &state->dispatch_info.indirect_offset); + state->dispatch_info.indirect_stride = cmd->u.draw_mesh_tasks_indirect_count2_ext.info->addressRange.stride; + state->dispatch_info.draw_count = cmd->u.draw_mesh_tasks_indirect_count2_ext.info->maxDrawCount; + state->dispatch_info.indirect_draw_count = get_bda_internal_resource(state, cmd->u.draw_mesh_tasks_indirect_count2_ext.info->countAddressRange.address, &state->dispatch_info.indirect_draw_count_offset); + state->pctx->draw_mesh_tasks(state->pctx, &state->dispatch_info); } static size_t @@ -4589,14 +4982,6 @@ handle_dispatch_graph(struct vk_cmd_queue_entry *cmd, struct rendering_state *st } #endif -static struct pipe_resource * -get_buffer_pipe(struct rendering_state *state, const void *ptr) -{ - size_t offset; - VK_FROM_HANDLE(lvp_buffer, buffer, get_buffer(state, ptr, &offset)); - return buffer->bo; -} - static void handle_copy_acceleration_structure(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -4945,6 +5330,7 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdCopyImageToBuffer2) ENQUEUE_CMD(CmdUpdateBuffer) ENQUEUE_CMD(CmdFillBuffer) + ENQUEUE_CMD(CmdFillMemoryKHR) ENQUEUE_CMD(CmdClearColorImage) ENQUEUE_CMD(CmdClearDepthStencilImage) ENQUEUE_CMD(CmdClearAttachments) @@ -4957,6 +5343,7 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdEndQuery) ENQUEUE_CMD(CmdResetQueryPool) ENQUEUE_CMD(CmdCopyQueryPoolResults) + ENQUEUE_CMD(CmdCopyQueryPoolResultsToMemoryKHR) ENQUEUE_CMD(CmdExecuteCommands) ENQUEUE_CMD(CmdDrawIndirectCount) ENQUEUE_CMD(CmdDrawIndexedIndirectCount) @@ -5067,6 +5454,25 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdSetColorBlendAdvancedEXT) + ENQUEUE_CMD(CmdBindIndexBuffer3KHR) + ENQUEUE_CMD(CmdBindVertexBuffers3KHR) + ENQUEUE_CMD(CmdDrawIndirect2KHR) + ENQUEUE_CMD(CmdDrawIndexedIndirect2KHR) + ENQUEUE_CMD(CmdDrawIndirectCount2KHR) + ENQUEUE_CMD(CmdDrawIndexedIndirectCount2KHR) + ENQUEUE_CMD(CmdDispatchIndirect2KHR) + ENQUEUE_CMD(CmdDrawMeshTasksIndirect2EXT) + ENQUEUE_CMD(CmdDrawMeshTasksIndirectCount2EXT) + ENQUEUE_CMD(CmdCopyMemoryKHR) + ENQUEUE_CMD(CmdCopyMemoryToImageKHR) + ENQUEUE_CMD(CmdCopyImageToMemoryKHR) + ENQUEUE_CMD(CmdUpdateMemoryKHR) + ENQUEUE_CMD(CmdBeginConditionalRendering2EXT) + ENQUEUE_CMD(CmdBindTransformFeedbackBuffers2EXT) + ENQUEUE_CMD(CmdBeginTransformFeedback2EXT) + ENQUEUE_CMD(CmdEndTransformFeedback2EXT) + ENQUEUE_CMD(CmdDrawIndirectByteCount2EXT) + #undef ENQUEUE_CMD } @@ -5239,6 +5645,7 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, handle_reset_query_pool(cmd, state); break; case VK_CMD_COPY_QUERY_POOL_RESULTS: + case VK_CMD_COPY_QUERY_POOL_RESULTS_TO_MEMORY_KHR: handle_copy_query_pool_results(cmd, state); break; case VK_CMD_PUSH_CONSTANTS2: @@ -5337,6 +5744,71 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, case VK_CMD_END_RENDERING2_KHR: handle_end_rendering(cmd, state); break; + case VK_CMD_BIND_INDEX_BUFFER3_KHR: + handle_index_buffer3(cmd, state); + break; + case VK_CMD_BIND_VERTEX_BUFFERS3_KHR: + handle_vertex_buffers3(cmd, state); + break; + case VK_CMD_DRAW_INDIRECT2_KHR: + emit_state(state); + handle_draw_indirect2(cmd, state, false); + break; + case VK_CMD_DRAW_INDEXED_INDIRECT2_KHR: + emit_state(state); + handle_draw_indirect2(cmd, state, true); + break; + case VK_CMD_DRAW_INDIRECT_COUNT2_KHR: + emit_state(state); + handle_draw_indirect_count2(cmd, state, false); + break; + case VK_CMD_DRAW_INDEXED_INDIRECT_COUNT2_KHR: + emit_state(state); + handle_draw_indirect_count2(cmd, state, true); + break; + case VK_CMD_DISPATCH_INDIRECT2_KHR: + emit_compute_state(state); + handle_dispatch_indirect2(cmd, state); + break; + case VK_CMD_DRAW_MESH_TASKS_INDIRECT2_EXT: + emit_state(state); + handle_draw_mesh_tasks_indirect2(cmd, state); + break; + case VK_CMD_DRAW_MESH_TASKS_INDIRECT_COUNT2_EXT: + emit_state(state); + handle_draw_mesh_tasks_indirect_count2(cmd, state); + break; + case VK_CMD_COPY_MEMORY_KHR: + handle_copy_memory(cmd, state); + break; + case VK_CMD_COPY_MEMORY_TO_IMAGE_KHR: + handle_copy_memory_to_image(cmd, state); + break; + case VK_CMD_COPY_IMAGE_TO_MEMORY_KHR: + handle_copy_image_to_memory(cmd, state); + break; + case VK_CMD_UPDATE_MEMORY_KHR: + handle_update_memory(cmd, state); + break; + case VK_CMD_FILL_MEMORY_KHR: + handle_fill_memory(cmd, state); + break; + case VK_CMD_BEGIN_CONDITIONAL_RENDERING2_EXT: + handle_begin_conditional_rendering2(cmd, state); + break; + case VK_CMD_BIND_TRANSFORM_FEEDBACK_BUFFERS2_EXT: + handle_bind_transform_feedback_buffers2(cmd, state); + break; + case VK_CMD_BEGIN_TRANSFORM_FEEDBACK2_EXT: + handle_begin_transform_feedback2(cmd, state); + break; + case VK_CMD_END_TRANSFORM_FEEDBACK2_EXT: + handle_end_transform_feedback2(cmd, state); + break; + case VK_CMD_DRAW_INDIRECT_BYTE_COUNT2_EXT: + emit_state(state); + handle_draw_indirect_byte_count2(cmd, state); + break; case VK_CMD_SET_DEVICE_MASK: /* no-op */ break; @@ -5576,6 +6048,8 @@ VkResult lvp_execute_cmds(struct lvp_device *device, for (unsigned i = 0; i < ARRAY_SIZE(state->desc_buffers); i++) pipe_resource_reference(&state->desc_buffers[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(state->vb); i++) + pipe_resource_reference(&state->vb[i].buffer.resource, NULL); if (state->advanced_blend_fs_variant) state->pctx->delete_fs_state(state->pctx, state->advanced_blend_fs_variant);