diff --git a/src/amd/common/ac_cmdbuf_sdma.c b/src/amd/common/ac_cmdbuf_sdma.c index d5b90f5cd94..5a74076f530 100644 --- a/src/amd/common/ac_cmdbuf_sdma.c +++ b/src/amd/common/ac_cmdbuf_sdma.c @@ -123,3 +123,62 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version return bytes_written; } + +static void +ac_sdma_check_pitches(uint32_t pitch, uint32_t slice_pitch, uint32_t bpp, bool uses_depth) +{ + ASSERTED const uint32_t pitch_alignment = MAX2(1, 4 / bpp); + assert(pitch); + assert(pitch <= (1 << 14)); + assert(util_is_aligned(pitch, pitch_alignment)); + + if (uses_depth) { + ASSERTED const uint32_t slice_pitch_alignment = 4; + assert(slice_pitch); + assert(slice_pitch <= (1 << 28)); + assert(util_is_aligned(slice_pitch, slice_pitch_alignment)); + } +} + +void +ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version, + const struct ac_sdma_surf_linear *src, + const struct ac_sdma_surf_linear *dst, + uint32_t width, uint32_t height, uint32_t depth) +{ + /* This packet is the same since SDMA v2.4, haven't bothered to check older versions. + * The main difference is the bitfield sizes: + * + * v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits + * v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits + * v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits + * + * We currently use the smallest limits (from SDMA v2.4). + */ + assert(src->bpp == dst->bpp); + assert(util_is_power_of_two_nonzero(src->bpp)); + ac_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false); + ac_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false); + + ac_cmdbuf_begin(cs); + ac_cmdbuf_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | + util_logbase2(src->bpp) << 29); + ac_cmdbuf_emit(src->va); + ac_cmdbuf_emit(src->va >> 32); + ac_cmdbuf_emit(src->offset.x | src->offset.y << 16); + ac_cmdbuf_emit(src->offset.z | (src->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13)); + ac_cmdbuf_emit(src->slice_pitch - 1); + ac_cmdbuf_emit(dst->va); + ac_cmdbuf_emit(dst->va >> 32); + ac_cmdbuf_emit(dst->offset.x | dst->offset.y << 16); + ac_cmdbuf_emit(dst->offset.z | (dst->pitch - 1) << (sdma_ip_version >= SDMA_7_0 ? 16 : 13)); + ac_cmdbuf_emit(dst->slice_pitch - 1); + if (sdma_ip_version == SDMA_2_0) { + ac_cmdbuf_emit(width | (height << 16)); + ac_cmdbuf_emit(depth); + } else { + ac_cmdbuf_emit((width - 1) | (height - 1) << 16); + ac_cmdbuf_emit((depth - 1)); + } + ac_cmdbuf_end(); +} diff --git a/src/amd/common/ac_cmdbuf_sdma.h b/src/amd/common/ac_cmdbuf_sdma.h index 67c771bf4ee..ffd8e695c93 100644 --- a/src/amd/common/ac_cmdbuf_sdma.h +++ b/src/amd/common/ac_cmdbuf_sdma.h @@ -33,6 +33,25 @@ ac_emit_sdma_copy_linear(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version uint64_t src_va, uint64_t dst_va, uint64_t size, bool tmz); +struct ac_sdma_surf_linear { + uint64_t va; + + struct { + uint32_t x; + uint32_t y; + uint32_t z; + } offset; + + uint32_t bpp; + uint32_t pitch; + uint32_t slice_pitch; +}; + +void +ac_emit_sdma_copy_linear_sub_window(struct ac_cmdbuf *cs, enum sdma_version sdma_ip_version, + const struct ac_sdma_surf_linear *src, + const struct ac_sdma_surf_linear *dst, + uint32_t width, uint32_t height, uint32_t depth); #ifdef __cplusplus } #endif diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index 00c859361f3..8a47ea55b52 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -390,16 +390,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, const VkExtent3D pix_extent) { - /* This packet is the same since SDMA v2.4, haven't bothered to check older versions. - * The main difference is the bitfield sizes: - * - * v2.4 - src/dst_pitch: 14 bits, rect_z: 11 bits - * v4.0 - src/dst_pitch: 19 bits, rect_z: 11 bits - * v5.0 - src/dst_pitch: 19 bits, rect_z: 13 bits - * - * We currently use the smallest limits (from SDMA v2.4). - */ - const struct radv_physical_device *pdev = radv_device_physical(device); VkOffset3D src_off = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h); VkOffset3D dst_off = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h); @@ -408,12 +398,6 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r const unsigned dst_pitch = radv_sdma_pixels_to_blocks(dst->pitch, dst->blk_w); const unsigned src_slice_pitch = radv_sdma_pixel_area_to_blocks(src->slice_pitch, src->blk_w, src->blk_h); const unsigned dst_slice_pitch = radv_sdma_pixel_area_to_blocks(dst->slice_pitch, dst->blk_w, dst->blk_h); - const enum sdma_version ver = pdev->info.sdma_ip_version; - - assert(src->bpp == dst->bpp); - assert(util_is_power_of_two_nonzero(src->bpp)); - radv_sdma_check_pitches(src->pitch, src->slice_pitch, src->bpp, false); - radv_sdma_check_pitches(dst->pitch, dst->slice_pitch, dst->bpp, false); /* Adjust offset/extent for 96-bits formats because SDMA expects a power of two bpp. */ const uint32_t texel_scale = src->texel_scale == 1 ? dst->texel_scale : src->texel_scale; @@ -422,26 +406,35 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r dst_off.x *= texel_scale; ext.width *= texel_scale; - ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13); + const struct ac_sdma_surf_linear surf_src = { + .va = src->va, + .offset = + { + .x = src_off.x, + .y = src_off.y, + .z = src_off.z, + }, + .bpp = src->bpp, + .pitch = src_pitch, + .slice_pitch = src_slice_pitch, + }; - radeon_begin(cs); - radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp) - << 29); - radeon_emit(src->va); - radeon_emit(src->va >> 32); - radeon_emit(src_off.x | src_off.y << 16); - radeon_emit(src_off.z | (src_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13)); - radeon_emit(src_slice_pitch - 1); - radeon_emit(dst->va); - radeon_emit(dst->va >> 32); - radeon_emit(dst_off.x | dst_off.y << 16); - radeon_emit(dst_off.z | (dst_pitch - 1) << (ver >= SDMA_7_0 ? 16 : 13)); - radeon_emit(dst_slice_pitch - 1); - radeon_emit((ext.width - 1) | (ext.height - 1) << 16); - radeon_emit((ext.depth - 1)); - radeon_end(); + const struct ac_sdma_surf_linear surf_dst = { + .va = dst->va, + .offset = + { + .x = dst_off.x, + .y = dst_off.y, + .z = dst_off.z, + }, + .bpp = dst->bpp, + .pitch = dst_pitch, + .slice_pitch = dst_slice_pitch, + }; - assert(cs->b->cdw == cdw_end); + radeon_check_space(device->ws, cs->b, 13); + ac_emit_sdma_copy_linear_sub_window(cs->b, pdev->info.sdma_ip_version, &surf_src, &surf_dst, ext.width, ext.height, + ext.depth); } static void diff --git a/src/gallium/drivers/radeonsi/si_sdma_copy_image.c b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c index 8733b2008e5..df4653c0587 100644 --- a/src/gallium/drivers/radeonsi/si_sdma_copy_image.c +++ b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c @@ -234,27 +234,35 @@ bool cik_sdma_copy_texture(struct si_context *sctx, struct si_texture *sdst, str (copy_width != (1 << 14) && copy_height != (1 << 14)))) { struct radeon_cmdbuf *cs = sctx->sdma_cs; - radeon_begin(cs); - radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | - (util_logbase2(bpp) << 29)); - radeon_emit(src_address); - radeon_emit(src_address >> 32); - radeon_emit(0); - radeon_emit((src_pitch - 1) << 16); - radeon_emit(src_slice_pitch - 1); - radeon_emit(dst_address); - radeon_emit(dst_address >> 32); - radeon_emit(0); - radeon_emit((dst_pitch - 1) << 16); - radeon_emit(dst_slice_pitch - 1); - if (sctx->gfx_level == GFX7) { - radeon_emit(copy_width | (copy_height << 16)); - radeon_emit(0); - } else { - radeon_emit((copy_width - 1) | ((copy_height - 1) << 16)); - radeon_emit(0); - } - radeon_end(); + const struct ac_sdma_surf_linear surf_src = { + .va = src_address, + .offset = + { + .x = 0, + .y = 0, + .z = 0, + }, + .bpp = bpp, + .pitch = src_pitch, + .slice_pitch = src_slice_pitch, + }; + + const struct ac_sdma_surf_linear surf_dst = { + .va = dst_address, + .offset = + { + .x = 0, + .y = 0, + .z = 0, + }, + .bpp = bpp, + .pitch = dst_pitch, + .slice_pitch = dst_slice_pitch, + }; + + ac_emit_sdma_copy_linear_sub_window(&cs->current, info->sdma_ip_version, + &surf_src, &surf_dst, copy_width, + copy_height, 1); return true; }