radv: implement VK_KHR_device_address_commands

Because there is no way to know where the address has been allocated
(GTT or VRAM), the existing entrypoints aren't dropped and the sparse
bit is derived from VK_ADDRESS_COMMAND_FULLY_BOUND_BIT_KHR.

It would be nice to figure out if the CP DMA vs compute heuristic for
GTT BOs on dGPUs could be removed to simplify this implementation.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40386>
This commit is contained in:
Samuel Pitoiset 2026-03-24 15:30:24 +01:00 committed by Marge Bot
parent 479a992b02
commit a97c889a7b
8 changed files with 659 additions and 182 deletions

View file

@ -467,3 +467,14 @@ radv_get_copy_flags_from_bo(const struct radeon_winsys_bo *bo)
return copy_flags;
}
VkAddressCopyFlagsKHR
radv_get_copy_flags_from_command_flags(VkAddressCommandFlagsKHR command_flags)
{
VkAddressCopyFlagsKHR copy_flags = 0;
if (!(command_flags & VK_ADDRESS_COMMAND_FULLY_BOUND_BIT_KHR))
copy_flags |= VK_ADDRESS_COPY_SPARSE_BIT_KHR;
return copy_flags;
}

View file

@ -352,6 +352,8 @@ VkResult radv_meta_get_noop_pipeline_layout(struct radv_device *device, VkPipeli
VkAddressCopyFlagsKHR radv_get_copy_flags_from_bo(const struct radeon_winsys_bo *bo);
VkAddressCopyFlagsKHR radv_get_copy_flags_from_command_flags(VkAddressCommandFlagsKHR command_flags);
static inline unsigned
radv_get_image_stride_for_96bit(const struct radv_device *device, const struct radv_image *image)
{

View file

@ -332,6 +332,24 @@ radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSi
radv_resume_conditional_rendering(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdFillMemoryKHR(VkCommandBuffer commandBuffer, const VkDeviceAddressRangeKHR *pDstRange,
VkAddressCommandFlagsKHR dstFlags, uint32_t data)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(dstFlags);
radv_suspend_conditional_rendering(cmd_buffer);
radv_meta_begin(cmd_buffer);
radv_fill_memory(cmd_buffer, pDstRange->address, pDstRange->size, data, dst_copy_flags);
radv_meta_end(cmd_buffer);
radv_resume_conditional_rendering(cmd_buffer);
}
void
radv_copy_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size,
VkAddressCopyFlagsKHR src_copy_flags, VkAddressCopyFlagsKHR dst_copy_flags)
@ -382,6 +400,30 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop
radv_resume_conditional_rendering(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryInfoKHR *pCopyMemoryInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_suspend_conditional_rendering(cmd_buffer);
radv_meta_begin(cmd_buffer);
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
VkAddressCopyFlagsKHR src_copy_flags = radv_get_copy_flags_from_command_flags(region->srcFlags);
VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(region->dstFlags);
radv_copy_memory(cmd_buffer, region->srcRange.address, region->dstRange.address, region->srcRange.size,
src_copy_flags, dst_copy_flags);
}
radv_meta_end(cmd_buffer);
radv_resume_conditional_rendering(cmd_buffer);
}
void
radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size)
{
@ -449,3 +491,22 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
radv_resume_conditional_rendering(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdUpdateMemoryKHR(VkCommandBuffer commandBuffer, const VkDeviceAddressRangeKHR *pDstRange,
VkAddressCommandFlagsKHR dstFlags, VkDeviceSize dataSize, const void *pData)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VkAddressCopyFlagsKHR dst_copy_flags = radv_get_copy_flags_from_command_flags(dstFlags);
radv_suspend_conditional_rendering(cmd_buffer);
radv_meta_begin(cmd_buffer);
radv_update_memory(cmd_buffer, pDstRange->address, dataSize, pData, dst_copy_flags);
radv_meta_end(cmd_buffer);
radv_resume_conditional_rendering(cmd_buffer);
}

View file

@ -75,18 +75,16 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
return true;
}
static void gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr,
uint64_t buffer_size, VkAddressCopyFlagsKHR src_copy_flags,
struct radv_image *image, VkImageLayout layout,
const VkBufferImageCopy2 *region, const bool use_compute);
static void gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer,
VkAddressCopyFlagsKHR src_copy_flags, struct radv_image *image,
const VkDeviceMemoryImageCopyKHR *region, bool use_compute);
static void compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR dst_copy_flags,
struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region);
static void compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
VkAddressCopyFlagsKHR dst_copy_flags, struct radv_image *image,
VkImageLayout layout, const VkBufferImageCopy2 *region);
static void
transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_va, uint64_t buffer_size,
VkAddressCopyFlagsKHR buffer_flags, struct radv_image *image, const VkImageLayout layout,
const VkBufferImageCopy2 *region, bool to_image)
transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR buffer_flags,
struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region, bool to_image)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
@ -101,10 +99,9 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
radv_gang_cache_flush(cmd_buffer);
if (to_image) {
gfx_or_compute_copy_memory_to_image(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region,
true);
gfx_or_compute_copy_memory_to_image(cmd_buffer, buffer_flags, image, region, true);
} else {
compute_copy_image_to_memory(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region);
compute_copy_image_to_memory(cmd_buffer, buffer_flags, image, region);
}
return;
@ -116,9 +113,9 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
const VkOffset3D img_offset_el = vk_image_offset_to_elements(&image->vk, region->imageOffset);
const VkExtent3D img_extent_el = vk_image_extent_to_elements(&image->vk, region->imageExtent);
struct ac_sdma_surf buf = radv_sdma_get_buf_surf(buffer_va, image, region);
struct ac_sdma_surf buf = radv_sdma_get_buf_surf(image, region);
const struct ac_sdma_surf img =
radv_sdma_get_surf(cmd_buffer, image, layout, region->imageSubresource, img_offset_el);
radv_sdma_get_surf(cmd_buffer, image, region->imageLayout, region->imageSubresource, img_offset_el);
const VkExtent3D extent = radv_sdma_get_copy_extent(image, region->imageSubresource, img_extent_el);
if (radv_sdma_use_unaligned_buffer_image_copy(device, &buf, &img, extent)) {
@ -192,9 +189,9 @@ radv_fixup_copy_dst_htile_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra
}
static void
gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
VkAddressCopyFlagsKHR src_copy_flags, struct radv_image *image,
VkImageLayout layout, const VkBufferImageCopy2 *region, const bool use_compute)
gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR src_copy_flags,
struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region,
bool use_compute)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -205,8 +202,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
assert(image->vk.samples == 1);
if (use_compute) {
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, &region->imageSubresource, &region->imageOffset,
&region->imageExtent, true);
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, region->imageLayout, &region->imageSubresource,
&region->imageOffset, &region->imageExtent, true);
}
/**
@ -226,14 +223,15 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_bsurf =
radv_blit_surf_for_image_level_layer(image, layout, &region->imageSubresource);
radv_blit_surf_for_image_level_layer(image, region->imageLayout, &region->imageSubresource);
if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
const uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
const VkFormat raw_format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
if (!radv_dcc_formats_compatible(pdev->info.gfx_level, img_bsurf.format, raw_format, NULL) &&
radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask)) {
radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, region->imageLayout,
queue_mask)) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
radv_decompress_dcc(cmd_buffer, image,
@ -252,12 +250,11 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
img_bsurf.format = raw_format;
}
const struct vk_image_buffer_layout buf_layout = vk_image_buffer_copy_layout(&image->vk, region);
const struct vk_image_buffer_layout buf_layout = vk_image_memory_copy_layout(&image->vk, region);
struct radv_meta_blit2d_buffer buf_bsurf = {
.addr = buffer_addr,
.size = buffer_size,
.addr = region->addressRange.address,
.size = region->addressRange.size,
.format = img_bsurf.format,
.offset = region->bufferOffset,
.pitch = buf_layout.row_stride_B / buf_layout.element_size_B,
.copy_flags = src_copy_flags,
};
@ -291,8 +288,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
}
if (use_compute) {
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, &region->imageSubresource, &region->imageOffset,
&region->imageExtent, false);
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, region->imageLayout, &region->imageSubresource,
&region->imageOffset, &region->imageExtent, false);
}
}
@ -321,14 +318,22 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo);
const VkDeviceMemoryImageCopyKHR copy = {
.sType = VK_STRUCTURE_TYPE_DEVICE_MEMORY_IMAGE_COPY_KHR,
.addressRange = vk_device_address_range(&src_buffer->vk, region->bufferOffset, VK_WHOLE_SIZE),
.addressRowLength = region->bufferRowLength,
.addressImageHeight = region->bufferImageHeight,
.imageSubresource = region->imageSubresource,
.imageLayout = pCopyBufferToImageInfo->dstImageLayout,
.imageOffset = region->imageOffset,
.imageExtent = region->imageExtent,
};
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_memory_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags,
dst_image, pCopyBufferToImageInfo->dstImageLayout, region, true);
transfer_copy_memory_image(cmd_buffer, src_copy_flags, dst_image, &copy, true);
} else {
const bool use_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(dst_image);
gfx_or_compute_copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size,
src_copy_flags, dst_image, pCopyBufferToImageInfo->dstImageLayout, region,
use_compute);
gfx_or_compute_copy_memory_to_image(cmd_buffer, src_copy_flags, dst_image, &copy, use_compute);
}
}
@ -360,10 +365,65 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
radv_resume_conditional_rendering(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToImageKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryImageInfoKHR *pCopyMemoryInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_image, dst_image, pCopyMemoryInfo->image);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_suspend_conditional_rendering(cmd_buffer);
radv_meta_begin(cmd_buffer);
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
const unsigned bind_idx = dst_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo);
VkAddressCopyFlagsKHR copy_flags = radv_get_copy_flags_from_command_flags(region->addressFlags);
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_memory_image(cmd_buffer, copy_flags, dst_image, region, true);
} else {
const bool use_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(dst_image);
gfx_or_compute_copy_memory_to_image(cmd_buffer, copy_flags, dst_image, region, use_compute);
}
}
if (radv_is_format_emulated(pdev, dst_image->vk.format) && cmd_buffer->qf != RADV_QUEUE_TRANSFER) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, 0, dst_image, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_TRANSFER_READ_BIT, 0, dst_image, NULL);
const enum util_format_layout format_layout = radv_format_description(dst_image->vk.format)->layout;
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
if (format_layout == UTIL_FORMAT_LAYOUT_ASTC) {
radv_meta_decode_astc(cmd_buffer, dst_image, region->imageLayout, &region->imageSubresource,
region->imageOffset, region->imageExtent);
} else {
radv_meta_decode_etc(cmd_buffer, dst_image, region->imageLayout, &region->imageSubresource,
region->imageOffset, region->imageExtent);
}
}
}
radv_meta_end(cmd_buffer);
radv_resume_conditional_rendering(cmd_buffer);
}
static void
compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
VkAddressCopyFlagsKHR dst_copy_flags, struct radv_image *image, VkImageLayout layout,
const VkBufferImageCopy2 *region)
compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, VkAddressCopyFlagsKHR dst_copy_flags,
struct radv_image *image, const VkDeviceMemoryImageCopyKHR *region)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -380,8 +440,8 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer
*/
const VkOffset3D img_offset_el = vk_image_offset_to_elements(&image->vk, region->imageOffset);
const VkExtent3D bufferExtent = {
.width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
.height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
.width = region->addressRowLength ? region->addressRowLength : region->imageExtent.width,
.height = region->addressImageHeight ? region->addressImageHeight : region->imageExtent.height,
};
const VkExtent3D buf_extent_el = vk_image_extent_to_elements(&image->vk, bufferExtent);
@ -390,14 +450,15 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_info =
radv_blit_surf_for_image_level_layer(image, layout, &region->imageSubresource);
radv_blit_surf_for_image_level_layer(image, region->imageLayout, &region->imageSubresource);
if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
const uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
const VkFormat raw_format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
if (!radv_dcc_formats_compatible(pdev->info.gfx_level, img_info.format, raw_format, NULL) &&
radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask)) {
radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, region->imageLayout,
queue_mask)) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
radv_decompress_dcc(cmd_buffer, image,
@ -417,10 +478,9 @@ compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer
}
struct radv_meta_blit2d_buffer buf_info = {
.addr = buffer_addr,
.size = buffer_size,
.addr = region->addressRange.address,
.size = region->addressRange.size,
.format = img_info.format,
.offset = region->bufferOffset,
.pitch = buf_extent_el.width,
.copy_flags = dst_copy_flags,
};
@ -469,12 +529,55 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo);
VkDeviceMemoryImageCopyKHR copy = {
.sType = VK_STRUCTURE_TYPE_DEVICE_MEMORY_IMAGE_COPY_KHR,
.addressRange = vk_device_address_range(&dst_buffer->vk, region->bufferOffset, VK_WHOLE_SIZE),
.addressFlags = dst_buffer->vk.address_flags,
.addressRowLength = region->bufferRowLength,
.addressImageHeight = region->bufferImageHeight,
.imageSubresource = region->imageSubresource,
.imageLayout = pCopyImageToBufferInfo->srcImageLayout,
.imageOffset = region->imageOffset,
.imageExtent = region->imageExtent,
};
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_memory_image(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags,
src_image, pCopyImageToBufferInfo->srcImageLayout, region, false);
transfer_copy_memory_image(cmd_buffer, dst_copy_flags, src_image, &copy, false);
} else {
compute_copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags,
src_image, pCopyImageToBufferInfo->srcImageLayout, region);
compute_copy_image_to_memory(cmd_buffer, dst_copy_flags, src_image, &copy);
}
}
radv_meta_end(cmd_buffer);
radv_resume_conditional_rendering(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyImageToMemoryKHR(VkCommandBuffer commandBuffer, const VkCopyDeviceMemoryImageInfoKHR *pCopyMemoryInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_image, src_image, pCopyMemoryInfo->image);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_suspend_conditional_rendering(cmd_buffer);
radv_meta_begin(cmd_buffer);
for (unsigned r = 0; r < pCopyMemoryInfo->regionCount; r++) {
const VkDeviceMemoryImageCopyKHR *region = &pCopyMemoryInfo->pRegions[r];
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
const unsigned bind_idx = src_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
VkAddressCopyFlagsKHR copy_flags = radv_get_copy_flags_from_command_flags(region->addressFlags);
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo);
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_memory_image(cmd_buffer, copy_flags, src_image, region, false);
} else {
compute_copy_image_to_memory(cmd_buffer, copy_flags, src_image, region);
}
}

View file

@ -7916,9 +7916,41 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
STACK_ARRAY(VkBindVertexBuffer3InfoKHR, bindings, bindingCount);
for (uint32_t i = 0; i < bindingCount; i++) {
VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
VkStridedDeviceAddressRangeKHR addr_range = {0};
if (buffer) {
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
addr_range = vk_strided_device_address_range(
&buffer->vk, pOffsets[i], pSizes != NULL ? pSizes[i] : VK_WHOLE_SIZE, pStrides != NULL ? pStrides[i] : 0);
}
bindings[i] = (VkBindVertexBuffer3InfoKHR){
.sType = VK_STRUCTURE_TYPE_BIND_VERTEX_BUFFER_3_INFO_KHR,
.addressRange = addr_range,
.addressFlags = buffer ? buffer->vk.address_flags : 0,
.setStride = pStrides != NULL,
};
}
radv_CmdBindVertexBuffers3KHR(commandBuffer, firstBinding, bindingCount, bindingCount > 0 ? bindings : NULL);
STACK_ARRAY_FINISH(bindings);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBindVertexBuffers3KHR(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
const VkBindVertexBuffer3InfoKHR *pBindingInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct radv_cmd_stream *cs = cmd_buffer->cs;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
@ -7928,26 +7960,27 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
uint32_t misaligned_mask_invalid = 0;
for (uint32_t i = 0; i < bindingCount; i++) {
VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
const VkBindVertexBuffer3InfoKHR *binding_info = &pBindingInfos[i];
uint32_t idx = firstBinding + i;
VkDeviceSize size = pSizes ? pSizes[i] : VK_WHOLE_SIZE;
VkDeviceSize stride = pStrides ? pStrides[i] : d->vk.vi_binding_strides[idx];
uint64_t addr = buffer ? vk_buffer_address(&buffer->vk, pOffsets[i]) : 0;
if (!!vb[idx].addr != !!addr || (addr && (((vb[idx].addr & 0x3) != (addr & 0x3) ||
(d->vk.vi_binding_strides[idx] & 0x3) != (stride & 0x3))))) {
VkDeviceSize size = binding_info->addressRange.size;
VkDeviceSize stride = binding_info->setStride ? binding_info->addressRange.stride : 0;
uint64_t addr = size ? binding_info->addressRange.address : 0;
if (!!vb[idx].addr != !!addr ||
(addr && ((vb[idx].addr & 0x3) != (addr & 0x3) || (d->vk.vi_binding_strides[idx] & 0x3) != (stride & 0x3)))) {
misaligned_mask_invalid |= d->vertex_input.bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff;
}
vb[idx].addr = addr;
vb[idx].size = buffer ? vk_buffer_range(&buffer->vk, pOffsets[i], size) : 0;
/* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
if (pStrides)
radv_cmd_set_vertex_binding_strides(cmd_buffer, idx, 1, (uint16_t *)&pStrides[i]);
vb[idx].size = size;
/* If setStride=false, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
if (binding_info->setStride)
radv_cmd_set_vertex_binding_strides(cmd_buffer, idx, 1, (uint16_t *)&stride);
uint32_t bit = BITFIELD_BIT(idx);
if (buffer) {
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
if (size) {
cmd_buffer->state.vbo_bound_mask |= bit;
} else {
cmd_buffer->state.vbo_bound_mask &= ~bit;
@ -8003,15 +8036,36 @@ radv_CmdBindIndexBuffer2(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDevic
VK_FROM_HANDLE(radv_buffer, index_buffer, buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
cmd_buffer->state.index_type = vk_to_index_type(indexType);
VkDeviceAddressRangeKHR addr_range = {0};
if (index_buffer) {
cmd_buffer->state.index_va = vk_buffer_address(&index_buffer->vk, offset);
int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
cmd_buffer->state.max_index_count = (vk_buffer_range(&index_buffer->vk, offset, size)) / index_size;
radv_cs_add_buffer(device->ws, cs->b, index_buffer->bo);
addr_range = vk_device_address_range(&index_buffer->vk, offset, size);
}
const VkBindIndexBuffer3InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_BIND_INDEX_BUFFER_3_INFO_KHR,
.addressRange = addr_range,
.addressFlags = index_buffer ? index_buffer->vk.address_flags : 0,
.indexType = indexType,
};
radv_CmdBindIndexBuffer3KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBindIndexBuffer3KHR(VkCommandBuffer commandBuffer, const VkBindIndexBuffer3InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->state.index_type = vk_to_index_type(pInfo->indexType);
if (pInfo->addressRange.size) {
cmd_buffer->state.index_va = pInfo->addressRange.address;
int index_size = radv_get_vgt_index_size(vk_to_index_type(pInfo->indexType));
cmd_buffer->state.max_index_count = pInfo->addressRange.size / index_size;
} else {
cmd_buffer->state.index_va = 0;
cmd_buffer->state.max_index_count = 0;
@ -13228,18 +13282,33 @@ radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSi
VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkDrawIndirect2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.drawCount = drawCount,
};
radv_CmdDrawIndirect2KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawIndirect2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
info.count = drawCount;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.stride = stride;
info.count = pInfo->drawCount;
info.indirect_va = pInfo->addressRange.address;
info.stride = pInfo->addressRange.stride;
info.strmout_va = 0;
info.count_va = 0;
info.indexed = false;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
if (!radv_before_draw(cmd_buffer, &info, 1, false))
return;
radv_emit_indirect_draw_packets(cmd_buffer, &info);
@ -13254,18 +13323,33 @@ radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkD
VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkDrawIndirect2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.drawCount = drawCount,
};
radv_CmdDrawIndexedIndirect2KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawIndexedIndirect2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
info.indexed = true;
info.count = drawCount;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.stride = stride;
info.count = pInfo->drawCount;
info.indirect_va = pInfo->addressRange.address;
info.stride = pInfo->addressRange.stride;
info.count_va = 0;
info.strmout_va = 0;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
if (!radv_before_draw(cmd_buffer, &info, 1, false))
return;
radv_emit_indirect_draw_packets(cmd_buffer, &info);
@ -13281,19 +13365,36 @@ radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDev
VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_draw_info info;
info.count = maxDrawCount;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset);
info.stride = stride;
info.strmout_va = 0;
info.indexed = false;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo);
const VkDrawIndirectCount2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE),
.countAddressFlags = count_buffer->vk.address_flags,
.maxDrawCount = maxDrawCount,
};
radv_CmdDrawIndirectCount2KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawIndirectCount2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
info.count = pInfo->maxDrawCount;
info.indirect_va = pInfo->addressRange.address;
info.count_va = pInfo->countAddressRange.address;
info.stride = pInfo->addressRange.stride;
info.strmout_va = 0;
info.indexed = false;
info.instance_count = 0;
if (!radv_before_draw(cmd_buffer, &info, 1, false))
return;
radv_emit_indirect_draw_packets(cmd_buffer, &info);
@ -13310,19 +13411,36 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer
VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_draw_info info;
info.indexed = true;
info.count = maxDrawCount;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset);
info.stride = stride;
info.strmout_va = 0;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo);
const VkDrawIndirectCount2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE),
.countAddressFlags = count_buffer->vk.address_flags,
.maxDrawCount = maxDrawCount,
};
radv_CmdDrawIndexedIndirectCount2KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawIndexedIndirectCount2KHR(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
info.indexed = true;
info.count = pInfo->maxDrawCount;
info.indirect_va = pInfo->addressRange.address;
info.count_va = pInfo->countAddressRange.address;
info.stride = pInfo->addressRange.stride;
info.strmout_va = 0;
info.instance_count = 0;
if (!radv_before_draw(cmd_buffer, &info, 1, false))
return;
radv_emit_indirect_draw_packets(cmd_buffer, &info);
@ -13366,19 +13484,36 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkDrawIndirect2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.drawCount = drawCount,
};
radv_CmdDrawMeshTasksIndirect2EXT(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawMeshTasksIndirect2EXT(VkCommandBuffer commandBuffer, const VkDrawIndirect2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_draw_info info;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.stride = stride;
info.count = drawCount;
info.indirect_va = pInfo->addressRange.address;
info.stride = pInfo->addressRange.stride;
info.count = pInfo->drawCount;
info.strmout_va = 0;
info.count_va = 0;
info.indexed = false;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount, false))
if (!radv_before_taskmesh_draw(cmd_buffer, &info, pInfo->drawCount, false))
return;
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
@ -13395,27 +13530,45 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo);
const VkDrawIndirectCount2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DRAW_INDIRECT_COUNT_2_INFO_KHR,
.addressRange = vk_strided_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE, stride),
.addressFlags = buffer->vk.address_flags,
.countAddressRange = vk_device_address_range(&count_buffer->vk, countBufferOffset, VK_WHOLE_SIZE),
.countAddressFlags = count_buffer->vk.address_flags,
.maxDrawCount = maxDrawCount,
};
radv_CmdDrawMeshTasksIndirectCount2EXT(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawMeshTasksIndirectCount2EXT(VkCommandBuffer commandBuffer, const VkDrawIndirectCount2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_draw_info info;
info.indirect_va = vk_buffer_address(&buffer->vk, offset);
info.stride = stride;
info.count = maxDrawCount;
info.indirect_va = pInfo->addressRange.address;
info.stride = pInfo->addressRange.stride;
info.count = pInfo->maxDrawCount;
info.strmout_va = 0;
info.count_va = vk_buffer_address(&count_buffer->vk, countBufferOffset);
info.count_va = pInfo->countAddressRange.address;
info.indexed = false;
info.instance_count = 0;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo);
if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount, false))
if (!radv_before_taskmesh_draw(cmd_buffer, &info, pInfo->maxDrawCount, false))
return;
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
@ -14083,11 +14236,25 @@ radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDevi
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_dispatch_info info = {.indirect_va = vk_buffer_address(&buffer->vk, offset)};
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkDispatchIndirect2InfoKHR info = {
.sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR,
.addressRange = vk_device_address_range(&buffer->vk, offset, VK_WHOLE_SIZE),
.addressFlags = buffer->vk.address_flags,
};
radv_CmdDispatchIndirect2KHR(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDispatchIndirect2KHR(VkCommandBuffer commandBuffer, const VkDispatchIndirect2InfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_dispatch_info info = {.indirect_va = pInfo->addressRange.address};
radv_compute_dispatch(cmd_buffer, &info);
}
@ -15232,13 +15399,27 @@ radv_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
bool draw_visible = true;
uint64_t va;
va = vk_buffer_address(&buffer->vk, pConditionalRenderingBegin->offset);
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkConditionalRenderingBeginInfo2EXT begin_info = {
.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_2_EXT,
.addressRange = vk_device_address_range(&buffer->vk, pConditionalRenderingBegin->offset, VK_WHOLE_SIZE),
.addressFlags = buffer->vk.address_flags,
.flags = pConditionalRenderingBegin->flags,
};
radv_CmdBeginConditionalRendering2EXT(commandBuffer, &begin_info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginConditionalRendering2EXT(VkCommandBuffer commandBuffer,
const VkConditionalRenderingBeginInfo2EXT *pConditionalRenderingBegin)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
const uint64_t va = pConditionalRenderingBegin->addressRange.address;
bool draw_visible = true;
/* By default, if the 32-bit value at offset in buffer memory is zero,
* then the rendering commands are discarded, otherwise they are
* executed as normal. If the inverted flag is set, all commands are
@ -15267,24 +15448,43 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint8_t enabled_mask = 0;
STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, bindings, bindingCount);
assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
VK_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
uint32_t idx = firstBinding + i;
sb[idx].va = vk_buffer_address(&buffer->vk, pOffsets[i]);
if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
sb[idx].size = buffer->vk.size - pOffsets[i];
} else {
sb[idx].size = pSizes[i];
}
bindings[i] = (VkBindTransformFeedbackBuffer2InfoEXT){
.sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT,
.addressRange = vk_device_address_range(&buffer->vk, pOffsets[i], pSizes ? pSizes[i] : VK_WHOLE_SIZE),
.addressFlags = buffer->vk.address_flags,
};
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
}
radv_CmdBindTransformFeedbackBuffers2EXT(commandBuffer, firstBinding, bindingCount,
bindingCount > 0 ? bindings : NULL);
STACK_ARRAY_FINISH(bindings);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBindTransformFeedbackBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
const VkBindTransformFeedbackBuffer2InfoEXT *pBindingInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
uint8_t enabled_mask = 0;
assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
uint32_t idx = firstBinding + i;
sb[idx].va = pBindingInfos[i].addressRange.address;
sb[idx].size = pBindingInfos[i].addressRange.size;
enabled_mask |= 1 << idx;
}
@ -15380,6 +15580,41 @@ VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
const VkDeviceSize *pCounterBufferOffsets)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, counters, counterBufferCount);
for (uint32_t i = 0; i < counterBufferCount; i++) {
VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
VkDeviceAddressRangeKHR addr_range = {0};
if (buffer) {
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
addr_range =
vk_device_address_range(&buffer->vk, pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0, VK_WHOLE_SIZE);
}
counters[i] = (VkBindTransformFeedbackBuffer2InfoEXT){
.sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT,
.addressRange = addr_range,
.addressFlags = buffer ? buffer->vk.address_flags : 0,
};
}
radv_CmdBeginTransformFeedback2EXT(commandBuffer, firstCounterBuffer, counterBufferCount,
counterBufferCount > 0 ? counters : NULL);
STACK_ARRAY_FINISH(counters);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCounterRange,
uint32_t counterRangeCount,
const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
@ -15389,7 +15624,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radv_cmd_stream *cs = cmd_buffer->cs;
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
assert(firstCounterRange + counterRangeCount <= MAX_SO_BUFFERS);
if (pdev->info.gfx_level >= GFX12) {
radv_init_streamout_state(cmd_buffer);
@ -15408,24 +15643,13 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10);
u_foreach_bit (i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
int32_t counter_buffer_idx = i - firstCounterRange;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterRangeCount)
counter_buffer_idx = -1;
bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
uint64_t va = 0;
if (append) {
VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
uint64_t counter_buffer_offset = 0;
if (pCounterBufferOffsets)
counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
va += vk_buffer_address(&buffer->vk, counter_buffer_offset);
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
}
const bool append =
counter_buffer_idx >= 0 && pCounterInfos && pCounterInfos[counter_buffer_idx].addressRange.size > 0;
uint64_t va = append ? pCounterInfos[counter_buffer_idx].addressRange.address : 0;
if (pdev->info.gfx_level >= GFX12) {
if (append) {
@ -15488,6 +15712,40 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
VKAPI_ATTR void VKAPI_CALL
radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount,
const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
STACK_ARRAY(VkBindTransformFeedbackBuffer2InfoEXT, counters, counterBufferCount);
for (uint32_t i = 0; i < counterBufferCount; i++) {
VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
VkDeviceAddressRangeKHR addr_range = {0};
if (buffer) {
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
addr_range =
vk_device_address_range(&buffer->vk, pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0, VK_WHOLE_SIZE);
}
counters[i] = (VkBindTransformFeedbackBuffer2InfoEXT){
.sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT,
.addressRange = addr_range,
.addressFlags = buffer ? buffer->vk.address_flags : 0,
};
}
radv_CmdEndTransformFeedback2EXT(commandBuffer, firstCounterBuffer, counterBufferCount,
counterBufferCount > 0 ? counters : NULL);
STACK_ARRAY_FINISH(counters);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdEndTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCounterRange, uint32_t counterRangeCount,
const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
@ -15496,7 +15754,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
struct radv_cmd_stream *cs = cmd_buffer->cs;
bool needs_pfp_sync_me = false;
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
assert(firstCounterRange + counterRangeCount <= MAX_SO_BUFFERS);
if (pdev->use_ngg_streamout) {
/* Wait for streamout to finish before copying back the number of bytes
@ -15514,26 +15772,15 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 12);
u_foreach_bit (i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
int32_t counter_buffer_idx = i - firstCounterRange;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterRangeCount)
counter_buffer_idx = -1;
bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
uint64_t va = 0;
const bool append =
counter_buffer_idx >= 0 && pCounterInfos && pCounterInfos[counter_buffer_idx].addressRange.size > 0;
uint64_t va = append ? pCounterInfos[counter_buffer_idx].addressRange.address : 0;
if (append) {
VK_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
uint64_t counter_buffer_offset = 0;
if (pCounterBufferOffsets)
counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
va += vk_buffer_address(&buffer->vk, counter_buffer_offset);
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
needs_pfp_sync_me = true;
}
needs_pfp_sync_me |= append;
if (pdev->info.gfx_level >= GFX12) {
if (append) {
@ -15628,6 +15875,25 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, counterBuffer->bo);
const VkBindTransformFeedbackBuffer2InfoEXT info = {
.sType = VK_STRUCTURE_TYPE_BIND_TRANSFORM_FEEDBACK_BUFFER_2_INFO_EXT,
.addressRange = vk_device_address_range(&counterBuffer->vk, counterBufferOffset, VK_WHOLE_SIZE),
};
radv_CmdDrawIndirectByteCount2EXT(commandBuffer, instanceCount, firstInstance, &info, counterOffset, vertexStride);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawIndirectByteCount2EXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance,
const VkBindTransformFeedbackBuffer2InfoEXT *pCounterInfo, uint32_t counterOffset,
uint32_t vertexStride)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_draw_info info;
@ -15635,13 +15901,11 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc
info.count = 0;
info.instance_count = instanceCount;
info.first_instance = firstInstance;
info.strmout_va = vk_buffer_address(&counterBuffer->vk, counterBufferOffset);
info.strmout_va = pCounterInfo->addressRange.address;
info.stride = vertexStride;
info.indexed = false;
info.indirect_va = 0;
radv_cs_add_buffer(device->ws, cs->b, counterBuffer->bo);
if (!radv_before_draw(cmd_buffer, &info, 1, false))
return;
struct VkMultiDrawInfoEXT minfo = {0, 0};
@ -15668,15 +15932,32 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint64_t va = vk_buffer_address(&buffer->vk, dstOffset);
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
const VkMemoryMarkerInfoAMD info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_MARKER_INFO_AMD,
.stage = stage,
.dstRange = vk_device_address_range(&buffer->vk, dstOffset, VK_WHOLE_SIZE),
.marker = marker,
};
radv_CmdWriteMarkerToMemoryAMD(commandBuffer, &info);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdWriteMarkerToMemoryAMD(VkCommandBuffer commandBuffer, const VkMemoryMarkerInfoAMD *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint64_t va = pInfo->dstRange.address;
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
radeon_check_space(device->ws, cs->b, 4);
ac_emit_sdma_fence(cmd_buffer->cs->b, va, marker);
ac_emit_sdma_fence(cmd_buffer->cs->b, va, pInfo->marker);
return;
}
@ -15684,11 +15965,12 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12);
if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, marker, va, AC_CP_COPY_DATA_WR_CONFIRM, false);
if (!(pInfo->stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
ac_emit_cp_copy_data(cs->b, COPY_DATA_IMM, COPY_DATA_DST_MEM, pInfo->marker, va, AC_CP_COPY_DATA_WR_CONFIRM,
false);
} else {
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, va, pInfo->marker,
cmd_buffer->gfx9_eop_bug_va);
}

View file

@ -2453,13 +2453,32 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
VkQueryResultFlags flags)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
const VkStridedDeviceAddressRangeKHR dstRange =
vk_strided_device_address_range(&dst_buffer->vk, dstOffset, VK_WHOLE_SIZE, stride);
radv_CmdCopyQueryPoolResultsToMemoryKHR(commandBuffer, queryPool, firstQuery, queryCount, &dstRange,
dst_buffer->vk.address_flags, flags);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyQueryPoolResultsToMemoryKHR(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
uint32_t queryCount, const VkStridedDeviceAddressRangeKHR *pDstRange,
VkAddressCommandFlagsKHR dstFlags, VkQueryResultFlags queryResultFlags)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint64_t dst_va = pDstRange->address;
const uint64_t stride = pDstRange->stride;
if (!queryCount)
return;
@ -2467,7 +2486,6 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
/* Workaround engines that forget to properly specify WAIT_BIT because some driver implicitly
* synchronizes before query copy.
@ -2487,26 +2505,26 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
radv_copy_occlusion_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_occlusion_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
radv_copy_pipeline_stat_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_pipeline_stat_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
radv_copy_timestamp_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_timestamp_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
radv_copy_tfb_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_tfb_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
radv_copy_pg_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_pg_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
radv_copy_ms_prim_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, flags);
radv_copy_ms_prim_query_result(cmd_buffer, pool, firstQuery, queryCount, dst_va, stride, queryResultFlags);
break;
default:
UNREACHABLE("trying to get results of unhandled query type");

View file

@ -102,9 +102,9 @@ radv_sdma_get_bpe(const struct radv_image *const image, const VkImageSubresource
}
struct ac_sdma_surf
radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image, const VkBufferImageCopy2 *const region)
radv_sdma_get_buf_surf(const struct radv_image *const image, const VkDeviceMemoryImageCopyKHR *const region)
{
const struct vk_image_buffer_layout layout = vk_image_buffer_copy_layout(&image->vk, region);
const struct vk_image_buffer_layout layout = vk_image_memory_copy_layout(&image->vk, region);
assert(util_bitcount(region->imageSubresource.aspectMask) == 1);
@ -115,7 +115,7 @@ radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image,
const uint32_t bpe = radv_sdma_get_bpe(image, &region->imageSubresource);
const struct ac_sdma_surf info = {
.va = buffer_va + region->bufferOffset,
.va = region->addressRange.address,
.pitch = pitch,
.slice_pitch = slice_pitch,
.bpp = bpe,

View file

@ -43,8 +43,8 @@ radv_sdma_get_copy_extent(const struct radv_image *const image, const VkImageSub
return extent;
}
struct ac_sdma_surf radv_sdma_get_buf_surf(uint64_t buffer_va, const struct radv_image *const image,
const VkBufferImageCopy2 *const region);
struct ac_sdma_surf radv_sdma_get_buf_surf(const struct radv_image *const image,
const VkDeviceMemoryImageCopyKHR *const region);
struct ac_sdma_surf radv_sdma_get_surf(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *const image,
VkImageLayout image_layout, const VkImageSubresourceLayers subresource,
const VkOffset3D offset);