ac/surface: allow to select hybrid/block memcpy path for host copies

Based on my profiling, the hybrid mode performs better (+~20%) with
block compressed formats, so let's use that.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41019>
This commit is contained in:
Samuel Pitoiset 2026-04-17 12:16:34 +02:00 committed by Marge Bot
parent de978d1e5f
commit 6699eecb6f
2 changed files with 21 additions and 0 deletions

View file

@ -4713,6 +4713,16 @@ gfx10_surface_copy_mem_surface(struct ac_addrlib *addrlib, const struct radeon_i
input.pbXor = surf->tile_swizzle;
input.pMappedSurface = (char *)surf_copy_region->surf_ptr +
(surf_copy_region->is_stencil_only ? surf->u.gfx9.zs.stencil_offset : 0);
if (surf_copy_region->memcpy) {
if (surf->blk_w == 4 && surf->blk_h == 4) {
/* The hybrid memcpy seems to perform better with block compressed
* formats due to the 256B alignment.
*/
input.copyFlags.hybridMemcpy = true;
} else {
input.copyFlags.blockMemcpy = true;
}
}
ADDR_E_RETURNCODE res;
ADDR2_COPY_MEMSURFACE_REGION region = {0};
@ -4786,6 +4796,16 @@ gfx12_surface_copy_mem_surface(struct ac_addrlib *addrlib, const struct radeon_i
input.pbXor = surf->tile_swizzle;
input.pMappedSurface = (char *)surf_copy_region->surf_ptr +
(surf_copy_region->is_stencil_only ? surf->u.gfx9.zs.stencil_offset : 0);
if (surf_copy_region->memcpy) {
if (surf->blk_w == 4 && surf->blk_h == 4) {
/* The hybrid memcpy seems to perform better with block compressed
* formats due to the 256B alignment.
*/
input.copyFlags.hybridMemcpy = true;
} else {
input.copyFlags.blockMemcpy = true;
}
}
ADDR_E_RETURNCODE res;
ADDR3_COPY_MEMSURFACE_REGION region = {0};

View file

@ -560,6 +560,7 @@ struct ac_surface_copy_region {
uint64_t mem_slice_pitch;
bool is_stencil_only;
bool memcpy;
};
bool ac_surface_copy_mem_to_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,