diff --git a/src/broadcom/common/v3d_tfu.h b/src/broadcom/common/v3d_tfu.h index 3c2576cbf4a..3782535e256 100644 --- a/src/broadcom/common/v3d_tfu.h +++ b/src/broadcom/common/v3d_tfu.h @@ -24,6 +24,11 @@ #ifndef V3D_TFU_H #define V3D_TFU_H +/* Maximum width and height (in pixels) per submitted TFU job. The ios + * register encodes width and height as 14-bit values each. + */ +#define V3D_TFU_MAX_DIM 16384 + /* Disable level 0 write, just write following mipmaps */ #define V3D33_TFU_IOA_DIMTW (1 << 0) #define V3D33_TFU_IOA_FORMAT_SHIFT 3 diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c index 257e9423cfa..7b349642a96 100644 --- a/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/src/broadcom/vulkan/v3dv_meta_copy.c @@ -1851,15 +1851,12 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, .dstOffset = dstOffset, .size = dataSize, }; - struct v3dv_job *copy_job = - v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer) + v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer) (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset, src_bo, 0, ®ion); - if (copy_job) { - v3dv_cmd_buffer_add_private_obj( - cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb); - } + v3dv_cmd_buffer_add_private_obj( + cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb); cmd_buffer->state.is_transfer = false; } diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c index 76edd97a986..017f9d791a0 100644 --- a/src/broadcom/vulkan/v3dvx_meta_common.c +++ b/src/broadcom/vulkan/v3dvx_meta_common.c @@ -1363,6 +1363,75 @@ v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, uint32_t src_offset, const VkBufferCopy2 *region) { +#if V3D_VERSION >= 71 + /* Use TFU raster-to-raster copy on V3D 7.1+. Treat the buffer data + * as a raster texture and copy via the TFU, avoiding the expensive + * CL render job. Pick the largest cpp such that src/dst offsets + * and size are all cpp-aligned: cpp=4 (R8G8B8A8_UINT) is the + * expected common case; cpp=2 (R8G8_UINT) and cpp=1 (R8_UINT) + * handle Vulkan-permitted unaligned vkCmdCopyBuffer regions. + */ + if (!V3D_DBG(DISABLE_TFU)) { + const uint64_t abs_src = (uint64_t)src_offset + region->srcOffset; + const uint64_t abs_dst = (uint64_t)dst_offset + region->dstOffset; + const uint64_t align_mask = + abs_src | abs_dst | (uint64_t)region->size; + + uint32_t cpp; + VkFormat vk_format; + if ((align_mask & 3) == 0) { + cpp = 4; + vk_format = VK_FORMAT_R8G8B8A8_UINT; + } else if ((align_mask & 1) == 0) { + cpp = 2; + vk_format = VK_FORMAT_R8G8_UINT; + } else { + cpp = 1; + vk_format = VK_FORMAT_R8_UINT; + } + + if (cpp != 4) { + perf_debug("meta_copy_buffer: TFU cpp=%u fallback " + "(src=%" PRIu64 " dst=%" PRIu64 + " size=%" PRIu64 ").\n", + cpp, abs_src, abs_dst, (uint64_t)region->size); + } + + const struct v3dv_format *format = v3dX(get_format)(vk_format); + assert(format && format->plane_count == 1); + + uint32_t num_pixels = region->size / cpp; + uint32_t cur_src = src_offset + region->srcOffset; + uint32_t cur_dst = dst_offset + region->dstOffset; + + while (num_pixels > 0) { + uint32_t width = MIN2(num_pixels, V3D_TFU_MAX_DIM); + uint32_t height = MAX2(1, MIN2(num_pixels / width, V3D_TFU_MAX_DIM)); + uint32_t pixels_this_job = width * height; + assert(pixels_this_job <= num_pixels); + + v3dX(meta_emit_tfu_job)(cmd_buffer, + dst->handle, + dst->offset + cur_dst, + V3D_TILING_RASTER, + width * cpp, cpp, + src->handle, + src->offset + cur_src, + V3D_TILING_RASTER, + width * cpp, cpp, + width, height, + &format->planes[0]); + + num_pixels -= pixels_this_job; + cur_src += pixels_this_job * cpp; + cur_dst += pixels_this_job * cpp; + } + return NULL; + } + if (V3D_DBG(DISABLE_TFU)) + perf_debug("meta_copy_buffer: TFU disabled, using TLB.\n"); +#endif + const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;