mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-17 23:20:33 +01:00
ac,radv,radeonsi: add ac_emit_sdma_copy_linear_sub_window()
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38448>
This commit is contained in:
parent
da3d8c8b4b
commit
5f8fa6ae03
4 changed files with 134 additions and 55 deletions
|
|
@ -123,3 +123,62 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version
|
|||
|
||||
return bytes_written;
|
||||
}
|
||||
|
||||
static void
|
||||
ac_sdma_check_pitches(uint32_t pitch, uint32_t slice_pitch, uint32_t bpp, bool uses_depth)
|
||||
{
|
||||
ASSERTED const uint32_t pitch_alignment = MAX2(1, 4 / bpp);
|
||||
assert(pitch);
|
||||
assert(pitch <= (1 << 14));
|
||||
assert(util_is_aligned(pitch, pitch_alignment));
|
||||
|
||||
if (uses_depth) {
|
||||
ASSERTED const uint32_t slice_pitch_alignment = 4;
|
||||
assert(slice_pitch);
|
||||
assert(slice_pitch <= (1 << 28));
|
||||
assert(util_is_aligned(slice_pitch, slice_pitch_alignment));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version,
|
||||
const struct ac_sdma_surf_linear *src,
|
||||
const struct ac_sdma_surf_linear *dst,
|
||||
uint32_t width, uint32_t height, uint32_t depth)
|
||||
{
|
||||
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions.
|
||||
* The main difference is the bitfield sizes:
|
||||
*
|
||||
* v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits
|
||||
* v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits
|
||||
* v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits
|
||||
*
|
||||
* We currently use the smallest limits (from SDMA v2.4).
|
||||
*/
|
||||
assert(src->bpp == dst->bpp);
|
||||
assert(util_is_power_of_two_nonzero(src->bpp));
|
||||
ac_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false);
|
||||
ac_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
|
||||
util_logbase2(src->bpp) << 29);
|
||||
ac_cmdbuf_emit(src->va);
|
||||
ac_cmdbuf_emit(src->va >> 32);
|
||||
ac_cmdbuf_emit(src->offset.x | src->offset.y << 16);
|
||||
ac_cmdbuf_emit(src->offset.z | (src->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13));
|
||||
ac_cmdbuf_emit(src->slice_pitch - 1);
|
||||
ac_cmdbuf_emit(dst->va);
|
||||
ac_cmdbuf_emit(dst->va >> 32);
|
||||
ac_cmdbuf_emit(dst->offset.x | dst->offset.y << 16);
|
||||
ac_cmdbuf_emit(dst->offset.z | (dst->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13));
|
||||
ac_cmdbuf_emit(dst->slice_pitch - 1);
|
||||
if (sdma_ip_version == SDMA_2_0) {
|
||||
ac_cmdbuf_emit(width | (height << 16));
|
||||
ac_cmdbuf_emit(depth);
|
||||
} else {
|
||||
ac_cmdbuf_emit((width - 1) | (height - 1) << 16);
|
||||
ac_cmdbuf_emit((depth - 1));
|
||||
}
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,25 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version
|
|||
uint64_t src_va, uint64_t dst_va, uint64_t size,
|
||||
bool tmz);
|
||||
|
||||
struct ac_sdma_surf_linear {
|
||||
uint64_t va;
|
||||
|
||||
struct {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
} offset;
|
||||
|
||||
uint32_t bpp;
|
||||
uint32_t pitch;
|
||||
uint32_t slice_pitch;
|
||||
};
|
||||
|
||||
void
|
||||
ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version,
|
||||
const struct ac_sdma_surf_linear *src,
|
||||
const struct ac_sdma_surf_linear *dst,
|
||||
uint32_t width, uint32_t height, uint32_t depth);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -390,16 +390,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
|
|||
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
|
||||
const VkExtent3D pix_extent)
|
||||
{
|
||||
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions.
|
||||
* The main difference is the bitfield sizes:
|
||||
*
|
||||
* v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits
|
||||
* v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits
|
||||
* v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits
|
||||
*
|
||||
* We currently use the smallest limits (from SDMA v2.4).
|
||||
*/
|
||||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h);
|
||||
VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h);
|
||||
|
|
@ -408,12 +398,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
|
|||
const unsigned dst_pitch = radv_sdma_pixels_to_blocks(dst->pitch, dst->blk_w);
|
||||
const unsigned src_slice_pitch = radv_sdma_pixel_area_to_blocks(src->slice_pitch, src->blk_w, src->blk_h);
|
||||
const unsigned dst_slice_pitch = radv_sdma_pixel_area_to_blocks(dst->slice_pitch, dst->blk_w, dst->blk_h);
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
|
||||
assert(src->bpp == dst->bpp);
|
||||
assert(util_is_power_of_two_nonzero(src->bpp));
|
||||
radv_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false);
|
||||
radv_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false);
|
||||
|
||||
/* Adjust offset/extent for 96-bits formats because SDMA expects a power of two bpp. */
|
||||
const uint32_t texel_scale = src->texel_scale == 1 ? dst->texel_scale : src->texel_scale;
|
||||
|
|
@ -422,26 +406,35 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
|
|||
dst_off.x *= texel_scale;
|
||||
ext.width *= texel_scale;
|
||||
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13);
|
||||
const struct ac_sdma_surf_linear surf_src = {
|
||||
.va = src->va,
|
||||
.offset =
|
||||
{
|
||||
.x = src_off.x,
|
||||
.y = src_off.y,
|
||||
.z = src_off.z,
|
||||
},
|
||||
.bpp = src->bpp,
|
||||
.pitch = src_pitch,
|
||||
.slice_pitch = src_slice_pitch,
|
||||
};
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp)
|
||||
<< 29);
|
||||
radeon_emit(src->va);
|
||||
radeon_emit(src->va >> 32);
|
||||
radeon_emit(src_off.x | src_off.y << 16);
|
||||
radeon_emit(src_off.z | (src_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13));
|
||||
radeon_emit(src_slice_pitch - 1);
|
||||
radeon_emit(dst->va);
|
||||
radeon_emit(dst->va >> 32);
|
||||
radeon_emit(dst_off.x | dst_off.y << 16);
|
||||
radeon_emit(dst_off.z | (dst_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13));
|
||||
radeon_emit(dst_slice_pitch - 1);
|
||||
radeon_emit((ext.width - 1) | (ext.height - 1) << 16);
|
||||
radeon_emit((ext.depth - 1));
|
||||
radeon_end();
|
||||
const struct ac_sdma_surf_linear surf_dst = {
|
||||
.va = dst->va,
|
||||
.offset =
|
||||
{
|
||||
.x = dst_off.x,
|
||||
.y = dst_off.y,
|
||||
.z = dst_off.z,
|
||||
},
|
||||
.bpp = dst->bpp,
|
||||
.pitch = dst_pitch,
|
||||
.slice_pitch = dst_slice_pitch,
|
||||
};
|
||||
|
||||
assert(cs->b->cdw == cdw_end);
|
||||
radeon_check_space(device->ws, cs->b, 13);
|
||||
ac_emit_sdma_copy_linear_sub_window(cs->b, pdev->info.sdma_ip_version, &surf_src, &surf_dst, ext.width, ext.height,
|
||||
ext.depth);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -234,27 +234,35 @@ bool cik_sdma_copy_texture(struct si_context *sctx, struct si_texture *sdst, str
|
|||
(copy_width != (1 << 14) && copy_height != (1 << 14)))) {
|
||||
struct radeon_cmdbuf *cs = sctx->sdma_cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
|
||||
(util_logbase2(bpp) << 29));
|
||||
radeon_emit(src_address);
|
||||
radeon_emit(src_address >> 32);
|
||||
radeon_emit(0);
|
||||
radeon_emit((src_pitch - 1) << 16);
|
||||
radeon_emit(src_slice_pitch - 1);
|
||||
radeon_emit(dst_address);
|
||||
radeon_emit(dst_address >> 32);
|
||||
radeon_emit(0);
|
||||
radeon_emit((dst_pitch - 1) << 16);
|
||||
radeon_emit(dst_slice_pitch - 1);
|
||||
if (sctx->gfx_level == GFX7) {
|
||||
radeon_emit(copy_width | (copy_height << 16));
|
||||
radeon_emit(0);
|
||||
} else {
|
||||
radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
|
||||
radeon_emit(0);
|
||||
}
|
||||
radeon_end();
|
||||
const struct ac_sdma_surf_linear surf_src = {
|
||||
.va = src_address,
|
||||
.offset =
|
||||
{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
.bpp = bpp,
|
||||
.pitch = src_pitch,
|
||||
.slice_pitch = src_slice_pitch,
|
||||
};
|
||||
|
||||
const struct ac_sdma_surf_linear surf_dst = {
|
||||
.va = dst_address,
|
||||
.offset =
|
||||
{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
.bpp = bpp,
|
||||
.pitch = dst_pitch,
|
||||
.slice_pitch = dst_slice_pitch,
|
||||
};
|
||||
|
||||
ac_emit_sdma_copy_linear_sub_window(&cs->current, info->sdma_ip_version,
|
||||
&surf_src, &surf_dst, copy_width,
|
||||
copy_height, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue