mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 19:48:17 +02:00
v3dv: Enable meta_copy_buffer with TFU for V3D 7.1
Buffer-to-buffer copies on V3D 7.1+ can be served by the TFU as a
raster-to-raster copy, avoiding the per-copy CL render job and
tile_alloc/TSDA BO overhead of the TLB-based path.
Treat the buffer as a raster texture and chunk the copy into TFU
jobs of up to 16384x16384 pixels. Pick the largest pixel size
(cpp in {4,2,1}) such that src/dst offsets and size are all
cpp-aligned: cpp=4 (R8G8B8A8_UINT) is the expected common case;
cpp=2 (R8G8_UINT) and cpp=1 (R8_UINT) handle Vulkan-permitted
unaligned vkCmdCopyBuffer regions that would otherwise fall back
to the slow TLB path. Skipped when V3D_DEBUG=disable_tfu is set;
emits perf_debug when the cpp=1/2 fallback is taken.
Drop the `if (copy_job)` guard on src_bo cleanup registration in
v3dv_CmdUpdateBuffer: the TFU path queues jobs without returning a
v3dv_job*, so the staging BO must be tracked unconditionally to
avoid leaking once the cmd buffer is submitted.
Assisted-by: Claude Opus 4.7
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41725>
This commit is contained in:
parent
ff6f82c834
commit
9b131eb86e
3 changed files with 77 additions and 6 deletions
|
|
@ -24,6 +24,11 @@
|
|||
#ifndef V3D_TFU_H
|
||||
#define V3D_TFU_H
|
||||
|
||||
/* Maximum width and height (in pixels) per submitted TFU job. The ios
|
||||
* register encodes width and height as 14-bit values each.
|
||||
*/
|
||||
#define V3D_TFU_MAX_DIM 16384
|
||||
|
||||
/* Disable level 0 write, just write following mipmaps */
|
||||
#define V3D33_TFU_IOA_DIMTW (1 << 0)
|
||||
#define V3D33_TFU_IOA_FORMAT_SHIFT 3
|
||||
|
|
|
|||
|
|
@ -1851,15 +1851,12 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
|||
.dstOffset = dstOffset,
|
||||
.size = dataSize,
|
||||
};
|
||||
struct v3dv_job *copy_job =
|
||||
v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
|
||||
v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
|
||||
(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
|
||||
src_bo, 0, ®ion);
|
||||
|
||||
if (copy_job) {
|
||||
v3dv_cmd_buffer_add_private_obj(
|
||||
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
|
||||
}
|
||||
v3dv_cmd_buffer_add_private_obj(
|
||||
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
|
||||
|
||||
cmd_buffer->state.is_transfer = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1363,6 +1363,75 @@ v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
uint32_t src_offset,
|
||||
const VkBufferCopy2 *region)
|
||||
{
|
||||
#if V3D_VERSION >= 71
|
||||
/* Use TFU raster-to-raster copy on V3D 7.1+. Treat the buffer data
|
||||
* as a raster texture and copy via the TFU, avoiding the expensive
|
||||
* CL render job. Pick the largest cpp such that src/dst offsets
|
||||
* and size are all cpp-aligned: cpp=4 (R8G8B8A8_UINT) is the
|
||||
* expected common case; cpp=2 (R8G8_UINT) and cpp=1 (R8_UINT)
|
||||
* handle Vulkan-permitted unaligned vkCmdCopyBuffer regions.
|
||||
*/
|
||||
if (!V3D_DBG(DISABLE_TFU)) {
|
||||
const uint64_t abs_src = (uint64_t)src_offset + region->srcOffset;
|
||||
const uint64_t abs_dst = (uint64_t)dst_offset + region->dstOffset;
|
||||
const uint64_t align_mask =
|
||||
abs_src | abs_dst | (uint64_t)region->size;
|
||||
|
||||
uint32_t cpp;
|
||||
VkFormat vk_format;
|
||||
if ((align_mask & 3) == 0) {
|
||||
cpp = 4;
|
||||
vk_format = VK_FORMAT_R8G8B8A8_UINT;
|
||||
} else if ((align_mask & 1) == 0) {
|
||||
cpp = 2;
|
||||
vk_format = VK_FORMAT_R8G8_UINT;
|
||||
} else {
|
||||
cpp = 1;
|
||||
vk_format = VK_FORMAT_R8_UINT;
|
||||
}
|
||||
|
||||
if (cpp != 4) {
|
||||
perf_debug("meta_copy_buffer: TFU cpp=%u fallback "
|
||||
"(src=%" PRIu64 " dst=%" PRIu64
|
||||
" size=%" PRIu64 ").\n",
|
||||
cpp, abs_src, abs_dst, (uint64_t)region->size);
|
||||
}
|
||||
|
||||
const struct v3dv_format *format = v3dX(get_format)(vk_format);
|
||||
assert(format && format->plane_count == 1);
|
||||
|
||||
uint32_t num_pixels = region->size / cpp;
|
||||
uint32_t cur_src = src_offset + region->srcOffset;
|
||||
uint32_t cur_dst = dst_offset + region->dstOffset;
|
||||
|
||||
while (num_pixels > 0) {
|
||||
uint32_t width = MIN2(num_pixels, V3D_TFU_MAX_DIM);
|
||||
uint32_t height = MAX2(1, MIN2(num_pixels / width, V3D_TFU_MAX_DIM));
|
||||
uint32_t pixels_this_job = width * height;
|
||||
assert(pixels_this_job <= num_pixels);
|
||||
|
||||
v3dX(meta_emit_tfu_job)(cmd_buffer,
|
||||
dst->handle,
|
||||
dst->offset + cur_dst,
|
||||
V3D_TILING_RASTER,
|
||||
width * cpp, cpp,
|
||||
src->handle,
|
||||
src->offset + cur_src,
|
||||
V3D_TILING_RASTER,
|
||||
width * cpp, cpp,
|
||||
width, height,
|
||||
&format->planes[0]);
|
||||
|
||||
num_pixels -= pixels_this_job;
|
||||
cur_src += pixels_this_job * cpp;
|
||||
cur_dst += pixels_this_job * cpp;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
if (V3D_DBG(DISABLE_TFU))
|
||||
perf_debug("meta_copy_buffer: TFU disabled, using TLB.\n");
|
||||
#endif
|
||||
|
||||
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
|
||||
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue