ac,radv,radeonsi: add ac_emit_sdma_copy_linear_sub_window()

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38448>
This commit is contained in:
Samuel Pitoiset 2025-11-13 19:05:09 +01:00 committed by Marge Bot
parent da3d8c8b4b
commit 5f8fa6ae03
4 changed files with 134 additions and 55 deletions

View file

@ -123,3 +123,62 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version
return bytes_written;
}
static void
ac_sdma_check_pitches(uint32_t pitch, uint32_t slice_pitch, uint32_t bpp, bool uses_depth)
{
ASSERTED const uint32_t pitch_alignment = MAX2(1, 4 / bpp);
assert(pitch);
assert(pitch <= (1 << 14));
assert(util_is_aligned(pitch, pitch_alignment));
if (uses_depth) {
ASSERTED const uint32_t slice_pitch_alignment = 4;
assert(slice_pitch);
assert(slice_pitch <= (1 << 28));
assert(util_is_aligned(slice_pitch, slice_pitch_alignment));
}
}
void
ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version,
const struct ac_sdma_surf_linear *src,
const struct ac_sdma_surf_linear *dst,
uint32_t width, uint32_t height, uint32_t depth)
{
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions.
* The main difference is the bitfield sizes:
*
* v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits
* v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits
* v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits
*
* We currently use the smallest limits (from SDMA v2.4).
*/
assert(src->bpp == dst->bpp);
assert(util_is_power_of_two_nonzero(src->bpp));
ac_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false);
ac_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false);
ac_cmdbuf_begin(cs);
ac_cmdbuf_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
util_logbase2(src->bpp) << 29);
ac_cmdbuf_emit(src->va);
ac_cmdbuf_emit(src->va >> 32);
ac_cmdbuf_emit(src->offset.x | src->offset.y << 16);
ac_cmdbuf_emit(src->offset.z | (src->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13));
ac_cmdbuf_emit(src->slice_pitch - 1);
ac_cmdbuf_emit(dst->va);
ac_cmdbuf_emit(dst->va >> 32);
ac_cmdbuf_emit(dst->offset.x | dst->offset.y << 16);
ac_cmdbuf_emit(dst->offset.z | (dst->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13));
ac_cmdbuf_emit(dst->slice_pitch - 1);
if (sdma_ip_version == SDMA_2_0) {
ac_cmdbuf_emit(width | (height << 16));
ac_cmdbuf_emit(depth);
} else {
ac_cmdbuf_emit((width - 1) | (height - 1) << 16);
ac_cmdbuf_emit((depth - 1));
}
ac_cmdbuf_end();
}

View file

@ -33,6 +33,25 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version
uint64_t src_va, uint64_t dst_va, uint64_t size,
bool tmz);
struct ac_sdma_surf_linear {
uint64_t va;
struct {
uint32_t x;
uint32_t y;
uint32_t z;
} offset;
uint32_t bpp;
uint32_t pitch;
uint32_t slice_pitch;
};
void
ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version,
const struct ac_sdma_surf_linear *src,
const struct ac_sdma_surf_linear *dst,
uint32_t width, uint32_t height, uint32_t depth);
#ifdef __cplusplus
}
#endif

View file

@ -390,16 +390,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
const VkExtent3D pix_extent)
{
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions.
* The main difference is the bitfield sizes:
*
* v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits
* v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits
* v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits
*
* We currently use the smallest limits (from SDMA v2.4).
*/
const struct radv_physical_device *pdev = radv_device_physical(device);
VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h);
VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h);
@ -408,12 +398,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
const unsigned dst_pitch = radv_sdma_pixels_to_blocks(dst->pitch, dst->blk_w);
const unsigned src_slice_pitch = radv_sdma_pixel_area_to_blocks(src->slice_pitch, src->blk_w, src->blk_h);
const unsigned dst_slice_pitch = radv_sdma_pixel_area_to_blocks(dst->slice_pitch, dst->blk_w, dst->blk_h);
const enum sdma_version ver = pdev->info.sdma_ip_version;
assert(src->bpp == dst->bpp);
assert(util_is_power_of_two_nonzero(src->bpp));
radv_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false);
radv_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false);
/* Adjust offset/extent for 96-bits formats because SDMA expects a power of two bpp. */
const uint32_t texel_scale = src->texel_scale == 1 ? dst->texel_scale : src->texel_scale;
@ -422,26 +406,35 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
dst_off.x *= texel_scale;
ext.width *= texel_scale;
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13);
const struct ac_sdma_surf_linear surf_src = {
.va = src->va,
.offset =
{
.x = src_off.x,
.y = src_off.y,
.z = src_off.z,
},
.bpp = src->bpp,
.pitch = src_pitch,
.slice_pitch = src_slice_pitch,
};
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp)
<< 29);
radeon_emit(src->va);
radeon_emit(src->va >> 32);
radeon_emit(src_off.x | src_off.y << 16);
radeon_emit(src_off.z | (src_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13));
radeon_emit(src_slice_pitch - 1);
radeon_emit(dst->va);
radeon_emit(dst->va >> 32);
radeon_emit(dst_off.x | dst_off.y << 16);
radeon_emit(dst_off.z | (dst_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13));
radeon_emit(dst_slice_pitch - 1);
radeon_emit((ext.width - 1) | (ext.height - 1) << 16);
radeon_emit((ext.depth - 1));
radeon_end();
const struct ac_sdma_surf_linear surf_dst = {
.va = dst->va,
.offset =
{
.x = dst_off.x,
.y = dst_off.y,
.z = dst_off.z,
},
.bpp = dst->bpp,
.pitch = dst_pitch,
.slice_pitch = dst_slice_pitch,
};
assert(cs->b->cdw == cdw_end);
radeon_check_space(device->ws, cs->b, 13);
ac_emit_sdma_copy_linear_sub_window(cs->b, pdev->info.sdma_ip_version, &surf_src, &surf_dst, ext.width, ext.height,
ext.depth);
}
static void

View file

@ -234,27 +234,35 @@ bool cik_sdma_copy_texture(struct si_context *sctx, struct si_texture *sdst, str
(copy_width != (1 << 14) && copy_height != (1 << 14)))) {
struct radeon_cmdbuf *cs = sctx->sdma_cs;
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
(util_logbase2(bpp) << 29));
radeon_emit(src_address);
radeon_emit(src_address >> 32);
radeon_emit(0);
radeon_emit((src_pitch - 1) << 16);
radeon_emit(src_slice_pitch - 1);
radeon_emit(dst_address);
radeon_emit(dst_address >> 32);
radeon_emit(0);
radeon_emit((dst_pitch - 1) << 16);
radeon_emit(dst_slice_pitch - 1);
if (sctx->gfx_level == GFX7) {
radeon_emit(copy_width | (copy_height << 16));
radeon_emit(0);
} else {
radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
radeon_emit(0);
}
radeon_end();
const struct ac_sdma_surf_linear surf_src = {
.va = src_address,
.offset =
{
.x = 0,
.y = 0,
.z = 0,
},
.bpp = bpp,
.pitch = src_pitch,
.slice_pitch = src_slice_pitch,
};
const struct ac_sdma_surf_linear surf_dst = {
.va = dst_address,
.offset =
{
.x = 0,
.y = 0,
.z = 0,
},
.bpp = bpp,
.pitch = dst_pitch,
.slice_pitch = dst_slice_pitch,
};
ac_emit_sdma_copy_linear_sub_window(&cs->current, info->sdma_ip_version,
&surf_src, &surf_dst, copy_width,
copy_height, 1);
return true;
}