radv/meta: add a function to fixup HTILE metadata for copies on compute queue

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39656>
This commit is contained in:
Samuel Pitoiset 2026-02-02 18:12:34 +01:00 committed by Marge Bot
parent 9f5a20abde
commit 4f41818194

View file

@ -127,6 +127,60 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
radv_sdma_copy_buffer_image(device, cs, &buf, &img, extent, to_image);
}
static void
radv_fixup_copy_dst_htile_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
VkImageLayout image_layout, const VkImageSubresourceLayers *subresource,
const VkOffset3D *offset, const VkExtent3D *extent, bool before_copy)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
if (!radv_layout_is_htile_compressed(device, image, subresource->mipLevel, image_layout, queue_mask))
return;
const bool is_partial_copy = offset->x || offset->y || offset->z || extent->width != image->vk.extent.width ||
extent->height != image->vk.extent.height || extent->depth != image->vk.extent.depth;
if (before_copy) {
/* For partial copies, HTILE is decompressed before because image stores don't write the
* uncompressed DWORD to HTILE. And then it's needed to re-initialize HTILE to its
* uncompressed state after the copy.
*/
if (is_partial_copy) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
u_foreach_bit (i, subresource->aspectMask) {
unsigned aspect_mask = 1u << i;
radv_expand_depth_stencil(cmd_buffer, image,
&(VkImageSubresourceRange){
.aspectMask = aspect_mask,
.baseMipLevel = subresource->mipLevel,
.levelCount = 1,
.baseArrayLayer = subresource->baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&image->vk, subresource),
},
NULL);
}
radv_describe_barrier_end(cmd_buffer);
}
} else {
/* Fixup HTILE after a copy on compute. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
const VkImageSubresourceRange range = {
.aspectMask = subresource->aspectMask,
.baseMipLevel = subresource->mipLevel,
.levelCount = 1,
.baseArrayLayer = subresource->baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&image->vk, subresource),
};
const uint32_t htile_value = radv_get_htile_initial_value(device, image);
cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, &range, htile_value, false);
}
}
static void
gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
enum radv_copy_flags src_copy_flags, struct radv_image *image, VkImageLayout layout,
@ -145,37 +199,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
if (use_compute) {
/* For partial copies, HTILE is decompressed before because image stores don't write the
* uncompressed DWORD to HTILE. And then it's needed to re-initialize HTILE to its
* uncompressed state after the copy.
*/
const bool is_partial_copy = region->imageOffset.x || region->imageOffset.y || region->imageOffset.z ||
region->imageExtent.width != image->vk.extent.width ||
region->imageExtent.height != image->vk.extent.height ||
region->imageExtent.depth != image->vk.extent.depth;
uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
if (radv_layout_is_htile_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask) &&
is_partial_copy) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
u_foreach_bit (i, region->imageSubresource.aspectMask) {
unsigned aspect_mask = 1u << i;
radv_expand_depth_stencil(
cmd_buffer, image,
&(VkImageSubresourceRange){
.aspectMask = aspect_mask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&image->vk, &region->imageSubresource),
},
NULL);
}
radv_describe_barrier_end(cmd_buffer);
}
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, &region->imageSubresource, &region->imageOffset,
&region->imageExtent, true);
}
/**
@ -257,24 +282,8 @@ gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t
}
if (use_compute) {
/* Fixup HTILE after a copy on compute. */
uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
if (radv_layout_is_htile_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
VkImageSubresourceRange range = {
.aspectMask = region->imageSubresource.aspectMask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&image->vk, &region->imageSubresource),
};
uint32_t htile_value = radv_get_htile_initial_value(device, image);
cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, &range, htile_value, false);
}
radv_fixup_copy_dst_htile_metadata(cmd_buffer, image, layout, &region->imageSubresource, &region->imageOffset,
&region->imageExtent, false);
}
radv_meta_restore(&saved_state, cmd_buffer);
@ -567,34 +576,8 @@ gfx_or_compute_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image
RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
if (use_compute) {
/* For partial copies, HTILE should be decompressed before copying because the metadata is
* re-initialized to the uncompressed state after.
*/
uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
if (radv_layout_is_htile_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
queue_mask) &&
(region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
region->extent.depth != dst_image->vk.extent.depth)) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
u_foreach_bit (i, region->dstSubresource.aspectMask) {
unsigned aspect_mask = 1u << i;
radv_expand_depth_stencil(
cmd_buffer, dst_image,
&(VkImageSubresourceRange){
.aspectMask = aspect_mask,
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->dstSubresource.baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
},
NULL);
}
radv_describe_barrier_end(cmd_buffer);
}
radv_fixup_copy_dst_htile_metadata(cmd_buffer, dst_image, dst_image_layout, &region->dstSubresource,
&region->dstOffset, &region->extent, true);
}
/* Create blit surfaces */
@ -693,25 +676,8 @@ gfx_or_compute_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image
}
if (use_compute) {
/* Fixup HTILE after a copy on compute. */
uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
if (radv_layout_is_htile_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
queue_mask)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
VkImageSubresourceRange range = {
.aspectMask = region->dstSubresource.aspectMask,
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->dstSubresource.baseArrayLayer,
.layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
};
uint32_t htile_value = radv_get_htile_initial_value(device, dst_image);
cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value, false);
}
radv_fixup_copy_dst_htile_metadata(cmd_buffer, dst_image, dst_image_layout, &region->dstSubresource,
&region->dstOffset, &region->extent, true);
}
radv_meta_restore(&saved_state, cmd_buffer);