diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index d73019db877..bc9e3ce7dac 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -467,3 +467,14 @@ radv_get_copy_flags_from_bo(const struct radeon_winsys_bo *bo) return copy_flags; } + +VkAddressCopyFlagsKHR +radv_get_copy_flags_from_command_flags(VkAddressCommandFlagsKHR command_flags) +{ + VkAddressCopyFlagsKHR copy_flags = 0; + + if (!(command_flags & VK_ADDRESS_COMMAND_FULLY_BOUND_BIT_KHR)) + copy_flags |= VK_ADDRESS_COPY_SPARSE_BIT_KHR; + + return copy_flags; +} diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index 4aef0a3e237..6f2f5334402 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -352,6 +352,8 @@ VkResult radv_meta_get_noop_pipeline_layout(struct radv_device *device, VkPipeli VkAddressCopyFlagsKHR radv_get_copy_flags_from_bo(const struct radeon_winsys_bo *bo); +VkAddressCopyFlagsKHR radv_get_copy_flags_from_command_flags(VkAddressCommandFlagsKHR command_flags); + static inline unsigned radv_get_image_stride_for_96bit(const struct radv_device *device, const struct radv_image *image) { diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index 1c4b376cf94..66f3889e392 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -332,6 +332,24 @@ radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSi radv_resume_conditional_rendering(cmd_buffer); } +VKAPI_ATTR void VKAPI_CALL +radv_CmdFillMemoryKHR(VkCommandBuffer commandBuffer, const VkDeviceAddressRangeKHR *pDstRange, + VkAddressCommandFlagsKHR dstFlags, uint32_t data) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(dstFlags); + + radv_suspend_conditional_rendering(cmd_buffer); + + radv_meta_begin(cmd_buffer); + + radv_fill_memory(cmd_buffer, pDstRange->address, pDstRange->size, data, dst_copy_flags); + + radv_meta_end(cmd_buffer); + + radv_resume_conditional_rendering(cmd_buffer); +} + void radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size, VkAddressCopyFlagsKHR src_copy_flags, VkAddressCopyFlagsKHR dst_copy_flags) @@ -382,6 +400,30 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop radv_resume_conditional_rendering(cmd_buffer); } +VKAPI_ATTR void VKAPI_CALL +radv_CmdCopyMemoryKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryInfoKHR *pCopyMemoryInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + radv_suspend_conditional_rendering(cmd_buffer); + + radv_meta_begin(cmd_buffer); + + for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) { + const VkDeviceMemoryCopyKHR *region = &pCopyMemoryInfo->pRegions[r]; + + VkAddressCopyFlagsKHR src_copy_flags = radv_get_copy_flags_from_command_flags(region->srcFlags); + VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(region->dstFlags); + + radv_copy_memory(cmd_buffer, region->srcRange.address, region->dstRange.address, region->srcRange.size, + src_copy_flags, dst_copy_flags); + } + + radv_meta_end(cmd_buffer); + + radv_resume_conditional_rendering(cmd_buffer); +} + void radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size) { @@ -449,3 +491,22 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice radv_resume_conditional_rendering(cmd_buffer); } + +VKAPI_ATTR void VKAPI_CALL +radv_CmdUpdateMemoryKHR(VkCommandBuffer commandBuffer, const VkDeviceAddressRangeKHR *pDstRange, + VkAddressCommandFlagsKHR dstFlags, VkDeviceSize dataSize, const void *pData) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(dstFlags); + + radv_suspend_conditional_rendering(cmd_buffer); + + radv_meta_begin(cmd_buffer); + + radv_update_memory(cmd_buffer, pDstRange->address, dataSize, pData, dst_copy_flags); + + radv_meta_end(cmd_buffer); + + radv_resume_conditional_rendering(cmd_buffer); +} diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index b59f17a7633..2e4600e1085 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -75,18 +75,16 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer) return true; } -static void gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, - uint64_t buffer_size, VkAddressCopyFlagsKHR src_copy_flags, - struct radv_image *image, VkImageLayout layout, - const VkBufferImageCopy2 *region, const bool use_compute); +static void gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, + VkAddressCopyFlagsKHR src_copy_flags, struct radv_image *image, + const VkDeviceMemoryImageCopyKHR *region, bool use_compute); + +static void compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR dst_copy_flags, + struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region); -static void compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size, - VkAddressCopyFlagsKHR dst_copy_flags, struct radv_image *image, - VkImageLayout layout, const VkBufferImageCopy2 *region); static void -transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_va, uint64_t buffer_size, - VkAddressCopyFlagsKHR buffer_flags, struct radv_image *image, const VkImageLayout layout, - const VkBufferImageCopy2 *region, bool to_image) +transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR buffer_flags, + struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region, bool to_image) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; @@ -101,10 +99,9 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v radv_gang_cache_flush(cmd_buffer); if (to_image) { - gfx_or_compute_copy_memory_to_image(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region, - true); + gfx_or_compute_copy_memory_to_image(cmd_buffer, buffer_flags, image, region, true); } else { - compute_copy_image_to_memory(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region); + compute_copy_image_to_memory(cmd_buffer, buffer_flags, image, region); } return; @@ -116,9 +113,9 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v const VkOffset3D img_offset_el = vk_image_offset_to_elements(&image->vk, region->imageOffset); const VkExtent3D img_extent_el = vk_image_extent_to_elements(&image->vk, region->imageExtent); - struct ac_sdma_surf buf = radv_sdma_get_buf_surf(buffer_va, image, region); + struct ac_sdma_surf buf = radv_sdma_get_buf_surf(image, region); const struct ac_sdma_surf img = - radv_sdma_get_surf(cmd_buffer, image, layout, region->imageSubresource, img_offset_el); + radv_sdma_get_surf(cmd_buffer, image, region->imageLayout, region->imageSubresource, img_offset_el); const VkExtent3D extent = radv_sdma_get_copy_extent(image, region->imageSubresource, img_extent_el); if (radv_sdma_use_unaligned_buffer_image_copy(device, &buf, &img, extent)) { @@ -192,9 +189,9 @@ radv_fixup_copy_dst_htile_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra } static void -gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size, - VkAddressCopyFlagsKHR src_copy_flags, struct radv_image *image, - VkImageLayout layout, const VkBufferImageCopy2 *region, const bool use_compute) +gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR src_copy_flags, + struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region, + bool use_compute) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -205,8 +202,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t assert(image->vk.samples == 1); if (use_compute) { - radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, ®ion->imageSubresource, ®ion->imageOffset, - ®ion->imageExtent, true); + radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, region->imageLayout, ®ion->imageSubresource, + ®ion->imageOffset, ®ion->imageExtent, true); } /** @@ -226,14 +223,15 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t /* Create blit surfaces */ struct radv_meta_blit2d_surf img_bsurf = - radv_blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource); + radv_blit_surf_for_image_level_layer(image, region->imageLayout, ®ion->imageSubresource); if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) { const uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); const VkFormat raw_format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format)); if (!radv_dcc_formats_compatible(pdev->info.gfx_level, img_bsurf.format, raw_format, NULL) && - radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask)) { + radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, region->imageLayout, + queue_mask)) { radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON); radv_decompress_dcc(cmd_buffer, image, @@ -252,12 +250,11 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t img_bsurf.format = raw_format; } - const struct vk_image_buffer_layout buf_layout = vk_image_buffer_copy_layout(&image->vk, region); + const struct vk_image_buffer_layout buf_layout = vk_image_memory_copy_layout(&image->vk, region); struct radv_meta_blit2d_buffer buf_bsurf = { - .addr = buffer_addr, - .size = buffer_size, + .addr = region->addressRange.address, + .size = region->addressRange.size, .format = img_bsurf.format, - .offset = region->bufferOffset, .pitch = buf_layout.row_stride_B / buf_layout.element_size_B, .copy_flags = src_copy_flags, }; @@ -291,8 +288,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t } if (use_compute) { - radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, ®ion->imageSubresource, ®ion->imageOffset, - ®ion->imageExtent, false); + radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, region->imageLayout, ®ion->imageSubresource, + ®ion->imageOffset, ®ion->imageExtent, false); } } @@ -321,14 +318,22 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo); + const VkDeviceMemoryImageCopyKHR copy = { + .sType = VK_STRUCTURE_TYPE_DEVICE_MEMORY_IMAGE_COPY_KHR, + .addressRange = vk_device_address_range(&src_buffer->vk, region->bufferOffset, VK_WHOLE_SIZE), + .addressRowLength = region->bufferRowLength, + .addressImageHeight = region->bufferImageHeight, + .imageSubresource = region->imageSubresource, + .imageLayout = pCopyBufferToImageInfo->dstImageLayout, + .imageOffset = region->imageOffset, + .imageExtent = region->imageExtent, + }; + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { - transfer_copy_memory_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags, - dst_image, pCopyBufferToImageInfo->dstImageLayout, region, true); + transfer_copy_memory_image(cmd_buffer, src_copy_flags, dst_image, ©, true); } else { const bool use_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(dst_image); - gfx_or_compute_copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, - src_copy_flags, dst_image, pCopyBufferToImageInfo->dstImageLayout, region, - use_compute); + gfx_or_compute_copy_memory_to_image(cmd_buffer, src_copy_flags, dst_image, ©, use_compute); } } @@ -360,10 +365,65 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm radv_resume_conditional_rendering(cmd_buffer); } +VKAPI_ATTR void VKAPI_CALL +radv_CmdCopyMemoryToImageKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryImageInfoKHR *pCopyMemoryInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(radv_image, dst_image, pCopyMemoryInfo->image); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_suspend_conditional_rendering(cmd_buffer); + + radv_meta_begin(cmd_buffer); + + for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) { + const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r]; + const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; + const unsigned bind_idx = dst_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0; + + radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo); + + VkAddressCopyFlagsKHR copy_flags = radv_get_copy_flags_from_command_flags(region->addressFlags); + + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { + transfer_copy_memory_image(cmd_buffer, copy_flags, dst_image, region, true); + } else { + const bool use_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(dst_image); + gfx_or_compute_copy_memory_to_image(cmd_buffer, copy_flags, dst_image, region, use_compute); + } + } + + if (radv_is_format_emulated(pdev, dst_image->vk.format) && cmd_buffer->qf != RADV_QUEUE_TRANSFER) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, 0, dst_image, NULL) | + radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_TRANSFER_READ_BIT, 0, dst_image, NULL); + + const enum util_format_layout format_layout = radv_format_description(dst_image->vk.format)->layout; + for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) { + const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r]; + + if (format_layout == UTIL_FORMAT_LAYOUT_ASTC) { + radv_meta_decode_astc(cmd_buffer, dst_image, region->imageLayout, ®ion->imageSubresource, + region->imageOffset, region->imageExtent); + } else { + radv_meta_decode_etc(cmd_buffer, dst_image, region->imageLayout, ®ion->imageSubresource, + region->imageOffset, region->imageExtent); + } + } + } + + radv_meta_end(cmd_buffer); + + radv_resume_conditional_rendering(cmd_buffer); +} + static void -compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size, - VkAddressCopyFlagsKHR dst_copy_flags, struct radv_image *image, VkImageLayout layout, - const VkBufferImageCopy2 *region) +compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR dst_copy_flags, + struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -380,8 +440,8 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer */ const VkOffset3D img_offset_el = vk_image_offset_to_elements(&image->vk, region->imageOffset); const VkExtent3D bufferExtent = { - .width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width, - .height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height, + .width = region->addressRowLength ? region->addressRowLength : region->imageExtent.width, + .height = region->addressImageHeight ? region->addressImageHeight : region->imageExtent.height, }; const VkExtent3D buf_extent_el = vk_image_extent_to_elements(&image->vk, bufferExtent); @@ -390,14 +450,15 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer /* Create blit surfaces */ struct radv_meta_blit2d_surf img_info = - radv_blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource); + radv_blit_surf_for_image_level_layer(image, region->imageLayout, ®ion->imageSubresource); if (!radv_is_buffer_format_supported(img_info.format, NULL)) { const uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); const VkFormat raw_format = vk_format_for_size(vk_format_get_blocksize(img_info.format)); if (!radv_dcc_formats_compatible(pdev->info.gfx_level, img_info.format, raw_format, NULL) && - radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask)) { + radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, region->imageLayout, + queue_mask)) { radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON); radv_decompress_dcc(cmd_buffer, image, @@ -417,10 +478,9 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer } struct radv_meta_blit2d_buffer buf_info = { - .addr = buffer_addr, - .size = buffer_size, + .addr = region->addressRange.address, + .size = region->addressRange.size, .format = img_info.format, - .offset = region->bufferOffset, .pitch = buf_extent_el.width, .copy_flags = dst_copy_flags, }; @@ -469,12 +529,55 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo); + VkDeviceMemoryImageCopyKHR copy = { + .sType = VK_STRUCTURE_TYPE_DEVICE_MEMORY_IMAGE_COPY_KHR, + .addressRange = vk_device_address_range(&dst_buffer->vk, region->bufferOffset, VK_WHOLE_SIZE), + .addressFlags = dst_buffer->vk.address_flags, + .addressRowLength = region->bufferRowLength, + .addressImageHeight = region->bufferImageHeight, + .imageSubresource = region->imageSubresource, + .imageLayout = pCopyImageToBufferInfo->srcImageLayout, + .imageOffset = region->imageOffset, + .imageExtent = region->imageExtent, + }; + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { - transfer_copy_memory_image(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, - src_image, pCopyImageToBufferInfo->srcImageLayout, region, false); + transfer_copy_memory_image(cmd_buffer, dst_copy_flags, src_image, ©, false); } else { - compute_copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, - src_image, pCopyImageToBufferInfo->srcImageLayout, region); + compute_copy_image_to_memory(cmd_buffer, dst_copy_flags, src_image, ©); + } + } + + radv_meta_end(cmd_buffer); + + radv_resume_conditional_rendering(cmd_buffer); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdCopyImageToMemoryKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryImageInfoKHR *pCopyMemoryInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(radv_image, src_image, pCopyMemoryInfo->image); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_suspend_conditional_rendering(cmd_buffer); + + radv_meta_begin(cmd_buffer); + + for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) { + const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r]; + const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; + const unsigned bind_idx = src_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0; + + VkAddressCopyFlagsKHR copy_flags = radv_get_copy_flags_from_command_flags(region->addressFlags); + + radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo); + + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { + transfer_copy_memory_image(cmd_buffer, copy_flags, src_image, region, false); + } else { + compute_copy_image_to_memory(cmd_buffer, copy_flags, src_image, region); } } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 14a58095c04..e40644e0580 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7916,9 +7916,41 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + STACK_ARRAY(VkBindVertexBuffer3InfoKHR, bindings, bindingCount); + + for (uint32_t i = 0; i < bindingCount; i++) { + VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); + VkStridedDeviceAddressRangeKHR addr_range = {0}; + + if (buffer) { + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + addr_range = vk_strided_device_address_range( + &buffer->vk, pOffsets[i], pSizes != NULL ? pSizes[i] : VK_WHOLE_SIZE, pStrides != NULL ? pStrides[i] : 0); + } + + bindings[i] = (VkBindVertexBuffer3InfoKHR){ + .sType = VK_STRUCTURE_TYPE_BIND_VERTEX_BUFFER_3_INFO_KHR, + .addressRange = addr_range, + .addressFlags = buffer ? buffer->vk.address_flags : 0, + .setStride = pStrides != NULL, + }; + } + + radv_CmdBindVertexBuffers3KHR(commandBuffer, firstBinding, bindingCount, bindingCount > 0 ? bindings : NULL); + + STACK_ARRAY_FINISH(bindings); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBindVertexBuffers3KHR(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBindVertexBuffer3InfoKHR *pBindingInfos) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - struct radv_cmd_stream *cs = cmd_buffer->cs; /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ @@ -7928,26 +7960,27 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t misaligned_mask_invalid = 0; for (uint32_t i = 0; i < bindingCount; i++) { - VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); + const VkBindVertexBuffer3InfoKHR *binding_info = &pBindingInfos[i]; uint32_t idx = firstBinding + i; - VkDeviceSize size = pSizes ? pSizes[i] : VK_WHOLE_SIZE; - VkDeviceSize stride = pStrides ? pStrides[i] : d->vk.vi_binding_strides[idx]; - uint64_t addr = buffer ? vk_buffer_address(&buffer->vk, pOffsets[i]) : 0; - if (!!vb[idx].addr != !!addr || (addr && (((vb[idx].addr & 0x3) != (addr & 0x3) || - (d->vk.vi_binding_strides[idx] & 0x3) != (stride & 0x3))))) { + VkDeviceSize size = binding_info->addressRange.size; + VkDeviceSize stride = binding_info->setStride ? binding_info->addressRange.stride : 0; + uint64_t addr = size ? binding_info->addressRange.address : 0; + + if (!!vb[idx].addr != !!addr || + (addr && ((vb[idx].addr & 0x3) != (addr & 0x3) || (d->vk.vi_binding_strides[idx] & 0x3) != (stride & 0x3)))) { misaligned_mask_invalid |= d->vertex_input.bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff; } vb[idx].addr = addr; - vb[idx].size = buffer ? vk_buffer_range(&buffer->vk, pOffsets[i], size) : 0; - /* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */ - if (pStrides) - radv_cmd_set_vertex_binding_strides(cmd_buffer, idx, 1, (uint16_t *)&pStrides[i]); + vb[idx].size = size; + + /* If setStride=false, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */ + if (binding_info->setStride) + radv_cmd_set_vertex_binding_strides(cmd_buffer, idx, 1, (uint16_t *)&stride); uint32_t bit = BITFIELD_BIT(idx); - if (buffer) { - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + if (size) { cmd_buffer->state.vbo_bound_mask |= bit; } else { cmd_buffer->state.vbo_bound_mask &= ~bit; @@ -8003,15 +8036,36 @@ radv_CmdBindIndexBuffer2(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDevic VK_FROM_HANDLE(radv_buffer, index_buffer, buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; - - cmd_buffer->state.index_type = vk_to_index_type(indexType); + VkDeviceAddressRangeKHR addr_range = {0}; if (index_buffer) { - cmd_buffer->state.index_va = vk_buffer_address(&index_buffer->vk, offset); - - int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); - cmd_buffer->state.max_index_count = (vk_buffer_range(&index_buffer->vk, offset, size)) / index_size; radv_cs_add_buffer(device->ws, cs->b, index_buffer->bo); + + addr_range = vk_device_address_range(&index_buffer->vk, offset, size); + } + + const VkBindIndexBuffer3InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_BIND_INDEX_BUFFER_3_INFO_KHR, + .addressRange = addr_range, + .addressFlags = index_buffer ? index_buffer->vk.address_flags : 0, + .indexType = indexType, + }; + + radv_CmdBindIndexBuffer3KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBindIndexBuffer3KHR(VkCommandBuffer commandBuffer, const VkBindIndexBuffer3InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.index_type = vk_to_index_type(pInfo->indexType); + + if (pInfo->addressRange.size) { + cmd_buffer->state.index_va = pInfo->addressRange.address; + + int index_size = radv_get_vgt_index_size(vk_to_index_type(pInfo->indexType)); + cmd_buffer->state.max_index_count = pInfo->addressRange.size / index_size; } else { cmd_buffer->state.index_va = 0; cmd_buffer->state.max_index_count = 0; @@ -13228,18 +13282,33 @@ radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSi VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + const VkDrawIndirect2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .drawCount = drawCount, + }; + + radv_CmdDrawIndirect2KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawIndirect2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; - info.count = drawCount; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.stride = stride; + info.count = pInfo->drawCount; + info.indirect_va = pInfo->addressRange.address; + info.stride = pInfo->addressRange.stride; info.strmout_va = 0; info.count_va = 0; info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; radv_emit_indirect_draw_packets(cmd_buffer, &info); @@ -13254,18 +13323,33 @@ radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkD VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + const VkDrawIndirect2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .drawCount = drawCount, + }; + + radv_CmdDrawIndexedIndirect2KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawIndexedIndirect2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; info.indexed = true; - info.count = drawCount; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.stride = stride; + info.count = pInfo->drawCount; + info.indirect_va = pInfo->addressRange.address; + info.stride = pInfo->addressRange.stride; info.count_va = 0; info.strmout_va = 0; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; radv_emit_indirect_draw_packets(cmd_buffer, &info); @@ -13281,19 +13365,36 @@ radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDev VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; - struct radv_draw_info info; - - info.count = maxDrawCount; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset); - info.stride = stride; - info.strmout_va = 0; - info.indexed = false; - info.instance_count = 0; radv_cs_add_buffer(device->ws, cs->b, buffer->bo); radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); + const VkDrawIndirectCount2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE), + .countAddressFlags = count_buffer->vk.address_flags, + .maxDrawCount = maxDrawCount, + }; + + radv_CmdDrawIndirectCount2KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawIndirectCount2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_draw_info info; + + info.count = pInfo->maxDrawCount; + info.indirect_va = pInfo->addressRange.address; + info.count_va = pInfo->countAddressRange.address; + info.stride = pInfo->addressRange.stride; + info.strmout_va = 0; + info.indexed = false; + info.instance_count = 0; + if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; radv_emit_indirect_draw_packets(cmd_buffer, &info); @@ -13310,19 +13411,36 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; - struct radv_draw_info info; - - info.indexed = true; - info.count = maxDrawCount; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset); - info.stride = stride; - info.strmout_va = 0; - info.instance_count = 0; radv_cs_add_buffer(device->ws, cs->b, buffer->bo); radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); + const VkDrawIndirectCount2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE), + .countAddressFlags = count_buffer->vk.address_flags, + .maxDrawCount = maxDrawCount, + }; + + radv_CmdDrawIndexedIndirectCount2KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawIndexedIndirectCount2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_draw_info info; + + info.indexed = true; + info.count = pInfo->maxDrawCount; + info.indirect_va = pInfo->addressRange.address; + info.count_va = pInfo->countAddressRange.address; + info.stride = pInfo->addressRange.stride; + info.strmout_va = 0; + info.instance_count = 0; + if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; radv_emit_indirect_draw_packets(cmd_buffer, &info); @@ -13366,19 +13484,36 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + const VkDrawIndirect2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .drawCount = drawCount, + }; + + radv_CmdDrawMeshTasksIndirect2EXT(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawMeshTasksIndirect2EXT(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.stride = stride; - info.count = drawCount; + info.indirect_va = pInfo->addressRange.address; + info.stride = pInfo->addressRange.stride; + info.count = pInfo->drawCount; info.strmout_va = 0; info.count_va = 0; info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - - if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount, false)) + if (!radv_before_taskmesh_draw(cmd_buffer, &info, pInfo->drawCount, false)) return; if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { @@ -13395,27 +13530,45 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, _buffer); VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); + + const VkDrawIndirectCount2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR, + .addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride), + .addressFlags = buffer->vk.address_flags, + .countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE), + .countAddressFlags = count_buffer->vk.address_flags, + .maxDrawCount = maxDrawCount, + }; + + radv_CmdDrawMeshTasksIndirectCount2EXT(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawMeshTasksIndirectCount2EXT(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; - info.indirect_va = vk_buffer_address(&buffer->vk, offset); - info.stride = stride; - info.count = maxDrawCount; + info.indirect_va = pInfo->addressRange.address; + info.stride = pInfo->addressRange.stride; + info.count = pInfo->maxDrawCount; info.strmout_va = 0; - info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset); + info.count_va = pInfo->countAddressRange.address; info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); - - if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount, false)) + if (!radv_before_taskmesh_draw(cmd_buffer, &info, pInfo->maxDrawCount, false)) return; if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { @@ -14083,11 +14236,25 @@ radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDevi VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radv_dispatch_info info = {.indirect_va = vk_buffer_address(&buffer->vk, offset)}; struct radv_cmd_stream *cs = cmd_buffer->cs; radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + const VkDispatchIndirect2InfoKHR info = { + .sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR, + .addressRange = vk_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE), + .addressFlags = buffer->vk.address_flags, + }; + + radv_CmdDispatchIndirect2KHR(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDispatchIndirect2KHR(VkCommandBuffer commandBuffer, const VkDispatchIndirect2InfoKHR *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_dispatch_info info = {.indirect_va = pInfo->addressRange.address}; + radv_compute_dispatch(cmd_buffer, &info); } @@ -15232,13 +15399,27 @@ radv_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; - bool draw_visible = true; - uint64_t va; - - va = vk_buffer_address(&buffer->vk, pConditionalRenderingBegin->offset); radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + const VkConditionalRenderingBeginInfo2EXT begin_info = { + .sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_2_EXT, + .addressRange = vk_device_address_range(&buffer->vk, pConditionalRenderingBegin->offset, VK_WHOLE_SIZE), + .addressFlags = buffer->vk.address_flags, + .flags = pConditionalRenderingBegin->flags, + }; + + radv_CmdBeginConditionalRendering2EXT(commandBuffer, &begin_info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBeginConditionalRendering2EXT(VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfo2EXT *pConditionalRenderingBegin) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const uint64_t va = pConditionalRenderingBegin->addressRange.address; + bool draw_visible = true; + /* By default, if the 32-bit value at offset in buffer memory is zero, * then the rendering commands are discarded, otherwise they are * executed as normal. If the inverted flag is set, all commands are @@ -15267,24 +15448,43 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_cmd_stream *cs = cmd_buffer->cs; - uint8_t enabled_mask = 0; + + STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, bindings, bindingCount); assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); for (uint32_t i = 0; i < bindingCount; i++) { VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); - uint32_t idx = firstBinding + i; - sb[idx].va = vk_buffer_address(&buffer->vk, pOffsets[i]); - - if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) { - sb[idx].size = buffer->vk.size - pOffsets[i]; - } else { - sb[idx].size = pSizes[i]; - } + bindings[i] = (VkBindTransformFeedbackBuffer2InfoEXT){ + .sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT, + .addressRange = vk_device_address_range(&buffer->vk, pOffsets[i], pSizes ? pSizes[i] : VK_WHOLE_SIZE), + .addressFlags = buffer->vk.address_flags, + }; radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + } + + radv_CmdBindTransformFeedbackBuffers2EXT(commandBuffer, firstBinding, bindingCount, + bindingCount > 0 ? bindings : NULL); + + STACK_ARRAY_FINISH(bindings); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBindTransformFeedbackBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBindTransformFeedbackBuffer2InfoEXT *pBindingInfos) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; + uint8_t enabled_mask = 0; + + assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); + for (uint32_t i = 0; i < bindingCount; i++) { + uint32_t idx = firstBinding + i; + + sb[idx].va = pBindingInfos[i].addressRange.address; + sb[idx].size = pBindingInfos[i].addressRange.size; enabled_mask |= 1 << idx; } @@ -15380,6 +15580,41 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, counters, counterBufferCount); + + for (uint32_t i = 0; i < counterBufferCount; i++) { + VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + VkDeviceAddressRangeKHR addr_range = {0}; + + if (buffer) { + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + addr_range = + vk_device_address_range(&buffer->vk, pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0, VK_WHOLE_SIZE); + } + + counters[i] = (VkBindTransformFeedbackBuffer2InfoEXT){ + .sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT, + .addressRange = addr_range, + .addressFlags = buffer ? buffer->vk.address_flags : 0, + }; + } + + radv_CmdBeginTransformFeedback2EXT(commandBuffer, firstCounterBuffer, counterBufferCount, + counterBufferCount > 0 ? counters : NULL); + + STACK_ARRAY_FINISH(counters); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBeginTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCounterRange, + uint32_t counterRangeCount, + const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfos) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); @@ -15389,7 +15624,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radv_cmd_stream *cs = cmd_buffer->cs; - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + assert(firstCounterRange + counterRangeCount <= MAX_SO_BUFFERS); if (pdev->info.gfx_level >= GFX12) { radv_init_streamout_state(cmd_buffer); @@ -15408,24 +15643,13 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10); u_foreach_bit (i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + int32_t counter_buffer_idx = i - firstCounterRange; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterRangeCount) counter_buffer_idx = -1; - bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; - uint64_t va = 0; - - if (append) { - VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t counter_buffer_offset = 0; - - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - - va += vk_buffer_address(&buffer->vk, counter_buffer_offset); - - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - } + const bool append = + counter_buffer_idx >= 0 && pCounterInfos && pCounterInfos[counter_buffer_idx].addressRange.size > 0; + uint64_t va = append ? pCounterInfos[counter_buffer_idx].addressRange.address : 0; if (pdev->info.gfx_level >= GFX12) { if (append) { @@ -15488,6 +15712,40 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC VKAPI_ATTR void VKAPI_CALL radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, counters, counterBufferCount); + + for (uint32_t i = 0; i < counterBufferCount; i++) { + VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + VkDeviceAddressRangeKHR addr_range = {0}; + + if (buffer) { + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + addr_range = + vk_device_address_range(&buffer->vk, pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0, VK_WHOLE_SIZE); + } + + counters[i] = (VkBindTransformFeedbackBuffer2InfoEXT){ + .sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT, + .addressRange = addr_range, + .addressFlags = buffer ? buffer->vk.address_flags : 0, + }; + } + + radv_CmdEndTransformFeedback2EXT(commandBuffer, firstCounterBuffer, counterBufferCount, + counterBufferCount > 0 ? counters : NULL); + + STACK_ARRAY_FINISH(counters); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdEndTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCounterRange, uint32_t counterRangeCount, + const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfos) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); @@ -15496,7 +15754,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou struct radv_cmd_stream *cs = cmd_buffer->cs; bool needs_pfp_sync_me = false; - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + assert(firstCounterRange + counterRangeCount <= MAX_SO_BUFFERS); if (pdev->use_ngg_streamout) { /* Wait for streamout to finish before copying back the number of bytes @@ -15514,26 +15772,15 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 12); u_foreach_bit (i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + int32_t counter_buffer_idx = i - firstCounterRange; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterRangeCount) counter_buffer_idx = -1; - bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; - uint64_t va = 0; + const bool append = + counter_buffer_idx >= 0 && pCounterInfos && pCounterInfos[counter_buffer_idx].addressRange.size > 0; + uint64_t va = append ? pCounterInfos[counter_buffer_idx].addressRange.address : 0; - if (append) { - VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t counter_buffer_offset = 0; - - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - - va += vk_buffer_address(&buffer->vk, counter_buffer_offset); - - radv_cs_add_buffer(device->ws, cs->b, buffer->bo); - - needs_pfp_sync_me = true; - } + needs_pfp_sync_me |= append; if (pdev->info.gfx_level >= GFX12) { if (append) { @@ -15628,6 +15875,25 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, counterBuffer->bo); + + const VkBindTransformFeedbackBuffer2InfoEXT info = { + .sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT, + .addressRange = vk_device_address_range(&counterBuffer->vk, counterBufferOffset, VK_WHOLE_SIZE), + }; + + radv_CmdDrawIndirectByteCount2EXT(commandBuffer, instanceCount, firstInstance, &info, counterOffset, vertexStride); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdDrawIndirectByteCount2EXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance, + const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfo, uint32_t counterOffset, + uint32_t vertexStride) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; @@ -15635,13 +15901,11 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc info.count = 0; info.instance_count = instanceCount; info.first_instance = firstInstance; - info.strmout_va = vk_buffer_address(&counterBuffer->vk, counterBufferOffset); + info.strmout_va = pCounterInfo->addressRange.address; info.stride = vertexStride; info.indexed = false; info.indirect_va = 0; - radv_cs_add_buffer(device->ws, cs->b, counterBuffer->bo); - if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; struct VkMultiDrawInfoEXT minfo = {0, 0}; @@ -15668,15 +15932,32 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_stream *cs = cmd_buffer->cs; - const uint64_t va = vk_buffer_address(&buffer->vk, dstOffset); radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + const VkMemoryMarkerInfoAMD info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_MARKER_INFO_AMD, + .stage = stage, + .dstRange = vk_device_address_range(&buffer->vk, dstOffset, VK_WHOLE_SIZE), + .marker = marker, + }; + + radv_CmdWriteMarkerToMemoryAMD(commandBuffer, &info); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdWriteMarkerToMemoryAMD(VkCommandBuffer commandBuffer, const VkMemoryMarkerInfoAMD *pInfo) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; + const uint64_t va = pInfo->dstRange.address; + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { radeon_check_space(device->ws, cs->b, 4); - ac_emit_sdma_fence(cmd_buffer->cs->b, va, marker); + ac_emit_sdma_fence(cmd_buffer->cs->b, va, pInfo->marker); return; } @@ -15684,11 +15965,12 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12); - if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { - ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM, false); + if (!(pInfo->stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { + ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, pInfo->marker, va, AC_CP_COPY_DATA_WR_CONFIRM, + false); } else { radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, va, marker, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, va, pInfo->marker, cmd_buffer->gfx9_eop_bug_va); } diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index e26a8bd0762..8516e42fecc 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -2453,13 +2453,32 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo VkQueryResultFlags flags) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - VK_FROM_HANDLE(radv_query_pool, pool, queryPool); VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); + + const VkStridedDeviceAddressRangeKHR dstRange = + vk_strided_device_address_range(&dst_buffer->vk, dstOffset, VK_WHOLE_SIZE, stride); + + radv_CmdCopyQueryPoolResultsToMemoryKHR(commandBuffer, queryPool, firstQuery, queryCount, &dstRange, + dst_buffer->vk.address_flags, flags); +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdCopyQueryPoolResultsToMemoryKHR(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, + uint32_t queryCount, const VkStridedDeviceAddressRangeKHR *pDstRange, + VkAddressCommandFlagsKHR dstFlags, VkQueryResultFlags queryResultFlags) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(radv_query_pool, pool, queryPool); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); - const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset); struct radv_cmd_stream *cs = cmd_buffer->cs; + const uint64_t dst_va = pDstRange->address; + const uint64_t stride = pDstRange->stride; if (!queryCount) return; @@ -2467,7 +2486,6 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo radv_suspend_conditional_rendering(cmd_buffer); radv_cs_add_buffer(device->ws, cs->b, pool->bo); - radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); /* Workaround engines that forget to properly specify WAIT_BIT because some driver implicitly * synchronizes before query copy. @@ -2487,26 +2505,26 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: - radv_copy_occlusion_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_occlusion_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - radv_copy_pipeline_stat_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_pipeline_stat_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: - radv_copy_timestamp_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_timestamp_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - radv_copy_tfb_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_tfb_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: - radv_copy_pg_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_pg_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: - radv_copy_ms_prim_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags); + radv_copy_ms_prim_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags); break; default: UNREACHABLE("trying to get results of unhandled query type"); diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index 0b807b0e91c..d05d73e1ef4 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -102,9 +102,9 @@ radv_sdma_get_bpe(const struct radv_image *const image, const VkImageSubresource } struct ac_sdma_surf -radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image, const VkBufferImageCopy2 *const region) +radv_sdma_get_buf_surf(const struct radv_image *const image, const VkDeviceMemoryImageCopyKHR *const region) { - const struct vk_image_buffer_layout layout = vk_image_buffer_copy_layout(&image->vk, region); + const struct vk_image_buffer_layout layout = vk_image_memory_copy_layout(&image->vk, region); assert(util_bitcount(region->imageSubresource.aspectMask) == 1); @@ -115,7 +115,7 @@ radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image, const uint32_t bpe = radv_sdma_get_bpe(image, ®ion->imageSubresource); const struct ac_sdma_surf info = { - .va = buffer_va + region->bufferOffset, + .va = region->addressRange.address, .pitch = pitch, .slice_pitch = slice_pitch, .bpp = bpe, diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h index af7ceeb0f20..222e6384ec4 100644 --- a/src/amd/vulkan/radv_sdma.h +++ b/src/amd/vulkan/radv_sdma.h @@ -43,8 +43,8 @@ radv_sdma_get_copy_extent(const struct radv_image *const image, const VkImageSub return extent; } -struct ac_sdma_surf radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image, - const VkBufferImageCopy2 *const region); +struct ac_sdma_surf radv_sdma_get_buf_surf(const struct radv_image *const image, + const VkDeviceMemoryImageCopyKHR *const region); struct ac_sdma_surf radv_sdma_get_surf(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *const image, VkImageLayout image_layout, const VkImageSubresourceLayers subresource, const VkOffset3D offset);