v3dv: Enable meta_copy_buffer with TFU for V3D 7.1

Buffer-to-buffer copies on V3D 7.1+ can be served by the TFU as a
raster-to-raster copy, avoiding the per-copy CL render job and
tile_alloc/TSDA BO overhead of the TLB-based path.

Treat the buffer as a raster texture and chunk the copy into TFU
jobs of up to 16384x16384 pixels. Pick the largest pixel size
(cpp in {4,2,1}) such that src/dst offsets and size are all
cpp-aligned: cpp=4 (R8G8B8A8_UINT) is the expected common case;
cpp=2 (R8G8_UINT) and cpp=1 (R8_UINT) handle Vulkan-permitted
unaligned vkCmdCopyBuffer regions that would otherwise fall back
to the slow TLB path. Skipped when V3D_DEBUG=disable_tfu is set;
emits perf_debug when the cpp=1/2 fallback is taken.

Drop the `if (copy_job)` guard on src_bo cleanup registration in
v3dv_CmdUpdateBuffer: the TFU path queues jobs without returning a
v3dv_job*, so the staging BO must be tracked unconditionally to
avoid leaking once the cmd buffer is submitted.

Assisted-by: Claude Opus 4.7
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41725>
This commit is contained in:
Jose Maria Casanova Crespo 2026-05-19 17:35:46 +02:00 committed by Marge Bot
parent ff6f82c834
commit 9b131eb86e
3 changed files with 77 additions and 6 deletions

View file

@ -24,6 +24,11 @@
#ifndef V3D_TFU_H
#define V3D_TFU_H
/* Maximum width and height (in pixels) per submitted TFU job. The ios
* register encodes width and height as 14-bit values each.
*/
#define V3D_TFU_MAX_DIM 16384
/* Disable level 0 write, just write following mipmaps */
#define V3D33_TFU_IOA_DIMTW (1 << 0)
#define V3D33_TFU_IOA_FORMAT_SHIFT 3

View file

@ -1851,15 +1851,12 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
.dstOffset = dstOffset,
.size = dataSize,
};
struct v3dv_job *copy_job =
v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
src_bo, 0, &region);
if (copy_job) {
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
}
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
cmd_buffer->state.is_transfer = false;
}

View file

@ -1363,6 +1363,75 @@ v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t src_offset,
const VkBufferCopy2 *region)
{
#if V3D_VERSION >= 71
/* Use TFU raster-to-raster copy on V3D 7.1+. Treat the buffer data
* as a raster texture and copy via the TFU, avoiding the expensive
* CL render job. Pick the largest cpp such that src/dst offsets
* and size are all cpp-aligned: cpp=4 (R8G8B8A8_UINT) is the
* expected common case; cpp=2 (R8G8_UINT) and cpp=1 (R8_UINT)
* handle Vulkan-permitted unaligned vkCmdCopyBuffer regions.
*/
if (!V3D_DBG(DISABLE_TFU)) {
const uint64_t abs_src = (uint64_t)src_offset + region->srcOffset;
const uint64_t abs_dst = (uint64_t)dst_offset + region->dstOffset;
const uint64_t align_mask =
abs_src | abs_dst | (uint64_t)region->size;
uint32_t cpp;
VkFormat vk_format;
if ((align_mask & 3) == 0) {
cpp = 4;
vk_format = VK_FORMAT_R8G8B8A8_UINT;
} else if ((align_mask & 1) == 0) {
cpp = 2;
vk_format = VK_FORMAT_R8G8_UINT;
} else {
cpp = 1;
vk_format = VK_FORMAT_R8_UINT;
}
if (cpp != 4) {
perf_debug("meta_copy_buffer: TFU cpp=%u fallback "
"(src=%" PRIu64 " dst=%" PRIu64
" size=%" PRIu64 ").\n",
cpp, abs_src, abs_dst, (uint64_t)region->size);
}
const struct v3dv_format *format = v3dX(get_format)(vk_format);
assert(format && format->plane_count == 1);
uint32_t num_pixels = region->size / cpp;
uint32_t cur_src = src_offset + region->srcOffset;
uint32_t cur_dst = dst_offset + region->dstOffset;
while (num_pixels > 0) {
uint32_t width = MIN2(num_pixels, V3D_TFU_MAX_DIM);
uint32_t height = MAX2(1, MIN2(num_pixels / width, V3D_TFU_MAX_DIM));
uint32_t pixels_this_job = width * height;
assert(pixels_this_job <= num_pixels);
v3dX(meta_emit_tfu_job)(cmd_buffer,
dst->handle,
dst->offset + cur_dst,
V3D_TILING_RASTER,
width * cpp, cpp,
src->handle,
src->offset + cur_src,
V3D_TILING_RASTER,
width * cpp, cpp,
width, height,
&format->planes[0]);
num_pixels -= pixels_this_job;
cur_src += pixels_this_job * cpp;
cur_dst += pixels_this_job * cpp;
}
return NULL;
}
if (V3D_DBG(DISABLE_TFU))
perf_debug("meta_copy_buffer: TFU disabled, using TLB.\n");
#endif
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;