radeonsi/sdma: use multiple commands if required

Instead of failing the copy we can use multiple chunks.

This codepath shouldn't really be used since the source
image should usually be tiled but it still better to not
fail when possible.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24771>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2023-08-18 11:53:34 +02:00 committed by Marge Bot
parent a69ffbd08a
commit ca25d5f846

View file

@ -73,23 +73,29 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur
struct radeon_cmdbuf *cs = sctx->sdma_cs;
uint64_t bytes = (uint64_t)src_pitch * copy_height * bpp;
if (!(bytes <= (1u << (is_v5_2 ? 30 : 22))))
return false;
uint32_t chunk_size = 1u << (is_v5_2 ? 30 : 22);
uint32_t chunk_count = DIV_ROUND_UP(bytes, chunk_size);
src_address += ssrc->surface.u.gfx9.offset[0];
dst_address += sdst->surface.u.gfx9.offset[0];
radeon_begin(cs);
radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
(tmz ? 4 : 0)));
radeon_emit(bytes - 1);
radeon_emit(0);
radeon_emit(src_address);
radeon_emit(src_address >> 32);
radeon_emit(dst_address);
radeon_emit(dst_address >> 32);
for (int i = 0; i < chunk_count; i++) {
uint32_t size = MIN2(chunk_size, bytes);
radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
(tmz ? 4 : 0)));
radeon_emit(size - 1);
radeon_emit(0);
radeon_emit(src_address);
radeon_emit(src_address >> 32);
radeon_emit(dst_address);
radeon_emit(dst_address >> 32);
src_address += size;
dst_address += size;
bytes -= size;
}
radeon_end();
return true;
}