mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 17:38:25 +02:00
v3dv: implement TFU image-to-buffer copy on V3D 7.1
Generalize copy_buffer_image_tfu with a to_buffer flag selecting which side is the raster destination, and wire it into v3dv_CmdCopyImageToBuffer2 before the TLB path. The to_buffer=true direction has the same eligibility constraints as buffer-to-image, except that V3D 4.2 is unsupported as its TFU cannot produce raster output, and for image-to-buffer the destination is always a raster buffer. Assisted-by: Claude Opus 4.7 Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41725>
This commit is contained in:
parent
0054ff2cb7
commit
99bce54daa
1 changed files with 64 additions and 36 deletions
|
|
@ -1126,6 +1126,13 @@ copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
return handled;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_buffer_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_image *image,
|
||||
struct v3dv_buffer *buffer,
|
||||
const VkBufferImageCopy2 *region,
|
||||
bool to_buffer);
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
|
||||
const VkCopyImageToBufferInfo2 *info)
|
||||
|
|
@ -1142,6 +1149,10 @@ v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
|
|||
for (uint32_t i = 0; i < info->regionCount; i++) {
|
||||
const VkBufferImageCopy2 *region = &info->pRegions[i];
|
||||
|
||||
if (copy_buffer_image_tfu(cmd_buffer, image, buffer, region,
|
||||
true /* to_buffer */))
|
||||
continue;
|
||||
|
||||
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region))
|
||||
continue;
|
||||
|
||||
|
|
@ -1903,32 +1914,44 @@ v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if the implementation supports the requested operation (even if
|
||||
* it failed to process it, for example, due to an out-of-memory error).
|
||||
* Shared TFU implementation for buffer<->image copies. When to_buffer is
|
||||
* false the image is the destination (buffer-to-image); when true the
|
||||
* buffer is the destination (image-to-buffer). Returns true if the
|
||||
* implementation supports the requested operation (even if it failed to
|
||||
* process it, for example, due to an out-of-memory error).
|
||||
*/
|
||||
static bool
|
||||
copy_buffer_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_image *image,
|
||||
struct v3dv_buffer *buffer,
|
||||
const VkBufferImageCopy2 *region)
|
||||
const VkBufferImageCopy2 *region,
|
||||
bool to_buffer)
|
||||
{
|
||||
if (V3D_DBG(DISABLE_TFU)) {
|
||||
perf_debug("Copy buffer to image: TFU disabled, fallbacks could be slower.\n");
|
||||
perf_debug("TFU disabled, fallbacks could be slower: %s.\n",
|
||||
to_buffer ? "Copy image to buffer"
|
||||
: "Copy buffer to image");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
|
||||
|
||||
/* Destination can't be raster format on V3D 4.2 */
|
||||
if (cmd_buffer->device->devinfo.ver < 71 && !image->tiled)
|
||||
/* The TFU on V3D 4.2 cannot produce raster output. For buffer-to-image
|
||||
* that only matters when the destination image is linear-tiled; for
|
||||
* image-to-buffer the destination is always a raster buffer, so V3D 4.2
|
||||
* is unsupported.
|
||||
*/
|
||||
if (cmd_buffer->device->devinfo.ver < 71 &&
|
||||
(to_buffer || !image->tiled)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We can't copy D24S8 because buffer to image copies only copy one aspect
|
||||
* at a time, and the TFU copies full images. Also, V3D depth bits for
|
||||
* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
|
||||
* the Vulkan spec has the buffer data specified the other way around, so it
|
||||
* is not a straight copy, we would have to swizzle the channels, which the
|
||||
* TFU can't do.
|
||||
/* We can't copy D24S8 because buffer<->image copies only copy one
|
||||
* aspect at a time, and the TFU copies full images. Also, V3D depth
|
||||
* bits for both D24S8 and D24X8 are stored in the 24-bit MSB of each
|
||||
* 32-bit word, but the Vulkan spec has the buffer data specified the
|
||||
* other way around, so it is not a straight copy, we would have to
|
||||
* swizzle the channels, which the TFU can't do.
|
||||
*/
|
||||
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
|
||||
|
|
@ -1988,13 +2011,14 @@ copy_buffer_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
assert(num_layers > 0);
|
||||
|
||||
assert(image->planes[plane].mem && image->planes[plane].mem->bo);
|
||||
const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo;
|
||||
|
||||
assert(buffer->mem && buffer->mem->bo);
|
||||
const struct v3dv_bo *src_bo = buffer->mem->bo;
|
||||
const struct v3dv_bo *image_bo = image->planes[plane].mem->bo;
|
||||
const struct v3dv_bo *buffer_bo = buffer->mem->bo;
|
||||
|
||||
const uint32_t cpp = image->planes[plane].cpp;
|
||||
|
||||
/* Emit a TFU job per layer to copy */
|
||||
const uint32_t buffer_stride = width * image->planes[plane].cpp;
|
||||
const uint32_t buffer_stride = width * cpp;
|
||||
for (int i = 0; i < num_layers; i++) {
|
||||
uint32_t layer;
|
||||
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -2002,27 +2026,29 @@ copy_buffer_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
else
|
||||
layer = region->imageOffset.z + i;
|
||||
|
||||
const uint32_t buffer_offset =
|
||||
const uint32_t image_offset = image_bo->offset +
|
||||
v3dv_layer_offset(image, mip_level, layer, plane);
|
||||
const uint32_t buffer_offset = buffer_bo->offset +
|
||||
buffer->mem_offset + region->bufferOffset +
|
||||
height * buffer_stride * i;
|
||||
const uint32_t src_offset = src_bo->offset + buffer_offset;
|
||||
|
||||
const uint32_t dst_offset =
|
||||
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane);
|
||||
|
||||
v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
|
||||
cmd_buffer,
|
||||
dst_bo->handle,
|
||||
dst_offset,
|
||||
slice->tiling,
|
||||
tfu_slice_stride_arg(slice),
|
||||
image->planes[plane].cpp,
|
||||
src_bo->handle,
|
||||
src_offset,
|
||||
V3D_TILING_RASTER,
|
||||
width,
|
||||
1,
|
||||
width, height, format_plane);
|
||||
if (to_buffer) {
|
||||
v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
|
||||
cmd_buffer,
|
||||
buffer_bo->handle, buffer_offset,
|
||||
V3D_TILING_RASTER, width, 1,
|
||||
image_bo->handle, image_offset,
|
||||
slice->tiling, tfu_slice_stride_arg(slice), cpp,
|
||||
width, height, format_plane);
|
||||
} else {
|
||||
v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
|
||||
cmd_buffer,
|
||||
image_bo->handle, image_offset,
|
||||
slice->tiling, tfu_slice_stride_arg(slice), cpp,
|
||||
buffer_bo->handle, buffer_offset,
|
||||
V3D_TILING_RASTER, width, 1,
|
||||
width, height, format_plane);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -2132,7 +2158,8 @@ create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
struct v3dv_buffer *buffer,
|
||||
const VkBufferImageCopy2 *region)
|
||||
{
|
||||
if (copy_buffer_image_tfu(cmd_buffer, image, buffer, region))
|
||||
if (copy_buffer_image_tfu(cmd_buffer, image, buffer, region,
|
||||
false /* to_buffer */))
|
||||
return true;
|
||||
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
|
||||
return true;
|
||||
|
|
@ -3361,7 +3388,8 @@ v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
|
|||
* where we are copying full images, since they should be the fastest.
|
||||
*/
|
||||
uint32_t batch_size = 1;
|
||||
if (copy_buffer_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
|
||||
if (copy_buffer_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r],
|
||||
false /* to_buffer */))
|
||||
goto handled;
|
||||
|
||||
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue