panfrost: add support for (de)interleaving Z24S8 in pan_tiling

This is needed for VK_EXT_host_image_copy which, like the buffer<->image
copy commands, treats depth/stencil like separate image planes and
requires copying each separately.

Signed-off-by: Olivia Lee <olivia.lee@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35910>
This commit is contained in:
Olivia Lee 2025-06-28 22:30:34 -07:00 committed by Marge Bot
parent 93c5d1be94
commit 91c037f228
6 changed files with 172 additions and 66 deletions

View file

@ -654,7 +654,8 @@ lima_transfer_map(struct pipe_context *pctx,
ptrans->box.width, ptrans->box.height,
ptrans->stride,
row_stride,
pres->format);
pres->format,
PAN_INTERLEAVE_NONE);
}
return trans->staging;
@ -759,7 +760,8 @@ lima_transfer_flush_region(struct pipe_context *pctx,
ptrans->box.width, ptrans->box.height,
row_stride,
ptrans->stride,
pres->format);
pres->format,
PAN_INTERLEAVE_NONE);
}
}
}

View file

@ -1386,7 +1386,7 @@ panfrost_load_tiled_images(struct panfrost_transfer *transfer,
dst, map, ptrans->box.x, ptrans->box.y, ptrans->box.width,
ptrans->box.height, ptrans->stride,
rsrc->plane.layout.slices[level].tiled_or_linear.row_stride_B,
rsrc->image.props.format);
rsrc->image.props.format, PAN_INTERLEAVE_NONE);
}
}
@ -1502,7 +1502,7 @@ panfrost_store_tiled_images(struct panfrost_transfer *transfer,
map, src, ptrans->box.x, ptrans->box.y, ptrans->box.width,
ptrans->box.height,
rsrc->plane.layout.slices[level].tiled_or_linear.row_stride_B,
ptrans->stride, rsrc->image.props.format);
ptrans->stride, rsrc->image.props.format, PAN_INTERLEAVE_NONE);
}
}

View file

@ -111,19 +111,63 @@ const unsigned space_4[16] = {
#define TILE_HEIGHT 16
#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
enum pan_interleave_zs
pan_get_interleave_zs(enum pipe_format format, bool depth, bool stencil)
{
if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
if (depth && stencil) {
return PAN_INTERLEAVE_NONE;
} else if (depth && !stencil) {
return PAN_INTERLEAVE_DEPTH;
} else if (!depth && stencil) {
return PAN_INTERLEAVE_STENCIL;
} else if (!depth && !stencil) {
unreachable("at least one aspect must be specified");
}
}
return PAN_INTERLEAVE_NONE;
}
static ALWAYS_INLINE
void pan_access_image_pixel(void *dst, void *src, const unsigned pixel_size,
bool is_store)
enum pan_interleave_zs interleave, bool is_store)
{
if (util_is_power_of_two_nonzero(pixel_size)) {
src = __builtin_assume_aligned(src, pixel_size);
dst = __builtin_assume_aligned(dst, pixel_size);
if (interleave != PAN_INTERLEAVE_STENCIL)
dst = __builtin_assume_aligned(dst, pixel_size);
}
if (is_store)
memcpy(dst, src, pixel_size);
else
memcpy(src, dst, pixel_size);
switch (interleave) {
case PAN_INTERLEAVE_NONE:
if (is_store)
memcpy(dst, src, pixel_size);
else
memcpy(src, dst, pixel_size);
break;
case PAN_INTERLEAVE_DEPTH:
/* interleave only applies to Z24S8 */
assert(pixel_size == 4);
if (is_store) {
uint32_t src_pixel = *(uint32_t *) src;
*(uint16_t *) dst = src_pixel & 0xffff;
*((uint8_t *) dst + 2) = (src_pixel >> 16) & 0xff;
} else {
/* The top 8 bits of Z24X8 are unused, so we can overwrite them
* with zeros in a single 32B write, instead of needing separate
* 16B and 8B writes */
*(uint32_t *) src = *(uint32_t *) dst & 0xffffff;
}
break;
case PAN_INTERLEAVE_STENCIL:
/* interleave only applies to Z24S8 */
assert(pixel_size == 4);
if (is_store)
*((uint8_t *) dst + 3) = *(uint8_t *) src;
else
*(uint8_t *) src = *((uint8_t *) dst + 3);
break;
}
}
/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
@ -159,21 +203,27 @@ void pan_access_image_pixel(void *dst, void *src, const unsigned pixel_size,
static ALWAYS_INLINE void
pan_access_tiled_image_aligned(
void *dst, void *src, unsigned pixel_size, unsigned shift,
uint16_t sx, uint16_t sy, uint16_t w, uint16_t h,
uint32_t dst_stride, uint32_t src_stride, bool is_store)
void *dst, void *src,
unsigned dst_pixel_size, unsigned src_pixel_size,
unsigned shift,
uint16_t sx, uint16_t sy,
uint16_t w, uint16_t h,
uint32_t dst_stride, uint32_t src_stride,
enum pan_interleave_zs interleave,
bool is_store)
{
uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * pixel_size);
uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * dst_pixel_size);
for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {
uint8_t *dest = (uint8_t *)(dest_start + ((y >> 4) * dst_stride));
void *source = src + (src_y * src_stride);
void *source_end = source + w * pixel_size;
void *source_end = source + w * src_pixel_size;
unsigned expanded_y = bit_duplication[y & 0xF] << shift;
for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) {
for (uint8_t i = 0; i < 16; ++i) {
unsigned index = expanded_y ^ (space_4[i] << shift);
pan_access_image_pixel(dest + index, source, pixel_size, is_store);
source += pixel_size;
pan_access_image_pixel(dest + index, source, dst_pixel_size,
interleave, is_store);
source += src_pixel_size;
}
}
}
@ -181,9 +231,14 @@ pan_access_tiled_image_aligned(
static ALWAYS_INLINE void
pan_access_tiled_image_unaligned(
void *dst, void *src, unsigned pixel_size, unsigned tile_shift,
uint16_t sx, uint16_t sy, uint16_t w, uint16_t h,
uint32_t dst_stride, uint32_t src_stride, bool is_store)
void *dst, void *src,
unsigned dst_pixel_size, unsigned src_pixel_size,
unsigned tile_shift,
uint16_t sx, uint16_t sy,
uint16_t w, uint16_t h,
uint32_t dst_stride, uint32_t src_stride,
enum pan_interleave_zs interleave,
bool is_store)
{
const unsigned mask = (1 << tile_shift) - 1;
for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {
@ -194,37 +249,44 @@ pan_access_tiled_image_unaligned(
for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) {
unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2));
unsigned index = expanded_y ^ space_4[x & mask];
uint8_t *source = src + source_start + pixel_size * src_x;
uint8_t *dest = dst + block_start_s + pixel_size * (block_x_s + index);
pan_access_image_pixel(dest, source, pixel_size, is_store);
uint8_t *source = src + source_start + src_pixel_size * src_x;
uint8_t *dest =
dst + block_start_s + dst_pixel_size * (block_x_s + index);
pan_access_image_pixel(dest, source, dst_pixel_size, interleave,
is_store);
}
}
}
#define TILED_UNALIGNED_VARIANT(bpp, store, shift) \
pan_access_tiled_image_unaligned(dst, src, (bpp) / 8, shift, sx, sy, w, h, \
dst_stride, src_stride, store)
#define TILED_UNALIGNED_VARIANT(dst_bpp, src_bpp, interleave, store, shift) \
pan_access_tiled_image_unaligned(dst, src, (dst_bpp) / 8, (src_bpp) / 8, \
shift, sx, sy, w, h, \
dst_stride, src_stride, interleave, store)
/* We have a separate call for each configuration, to ensure that the inlined
* function is specialized */
#define TILED_UNALIGNED_VARIANTS(store, shift) \
{ \
if (bpp == 8) \
TILED_UNALIGNED_VARIANT(8, store, shift); \
TILED_UNALIGNED_VARIANT(8, 8, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 16) \
TILED_UNALIGNED_VARIANT(16, store, shift); \
TILED_UNALIGNED_VARIANT(16, 16, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 24) \
TILED_UNALIGNED_VARIANT(24, store, shift); \
else if (bpp == 32) \
TILED_UNALIGNED_VARIANT(32, store, shift); \
TILED_UNALIGNED_VARIANT(24, 24, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_NONE) \
TILED_UNALIGNED_VARIANT(32, 32, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_DEPTH) \
TILED_UNALIGNED_VARIANT(32, 32, PAN_INTERLEAVE_DEPTH, store, shift); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_STENCIL) \
TILED_UNALIGNED_VARIANT(32, 8, PAN_INTERLEAVE_STENCIL, store, shift); \
else if (bpp == 48) \
TILED_UNALIGNED_VARIANT(48, store, shift); \
TILED_UNALIGNED_VARIANT(48, 48, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 64) \
TILED_UNALIGNED_VARIANT(64, store, shift); \
TILED_UNALIGNED_VARIANT(64, 64, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 96) \
TILED_UNALIGNED_VARIANT(96, store, shift); \
TILED_UNALIGNED_VARIANT(96, 96, PAN_INTERLEAVE_NONE, store, shift); \
else if (bpp == 128) \
TILED_UNALIGNED_VARIANT(128, store, shift); \
TILED_UNALIGNED_VARIANT(128, 128, PAN_INTERLEAVE_NONE, store, shift); \
}
/*
@ -238,6 +300,7 @@ pan_access_tiled_image_generic(void *dst, void *src, unsigned sx, unsigned sy,
unsigned w, unsigned h, uint32_t dst_stride,
uint32_t src_stride,
const struct util_format_description *desc,
enum pan_interleave_zs interleave,
bool _is_store)
{
unsigned bpp = desc->block.bits;
@ -261,22 +324,27 @@ pan_access_tiled_image_generic(void *dst, void *src, unsigned sx, unsigned sy,
}
}
#define TILED_ALIGNED_VARIANT(store, bpp, shift) \
pan_access_tiled_image_aligned(dst, src, (bpp) / 8, shift, sx, sy, w, h, \
dst_stride, src_stride, store)
#define TILED_ALIGNED_VARIANT(interleave, store, dst_bpp, src_bpp, shift) \
pan_access_tiled_image_aligned(dst, src, (dst_bpp) / 8, (src_bpp) / 8, \
shift, sx, sy, w, h, \
dst_stride, src_stride, interleave, store)
#define TILED_ALIGNED_VARIANTS(store) \
{ \
if (bpp == 8) \
TILED_ALIGNED_VARIANT(store, 8, 0); \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 8, 8, 0); \
else if (bpp == 16) \
TILED_ALIGNED_VARIANT(store, 16, 1); \
else if (bpp == 32) \
TILED_ALIGNED_VARIANT(store, 32, 2); \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 16, 16, 1); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_NONE) \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 32, 32, 2); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_DEPTH) \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_DEPTH, store, 32, 32, 2); \
else if (bpp == 32 && interleave == PAN_INTERLEAVE_STENCIL) \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_STENCIL, store, 32, 8, 2); \
else if (bpp == 64) \
TILED_ALIGNED_VARIANT(store, 64, 3); \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 64, 64, 3); \
else if (bpp == 128) \
TILED_ALIGNED_VARIANT(store, 128, 4); \
TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 128, 128, 4); \
}
/* Optimized variant of pan_access_tiled_image_generic except that requires
@ -285,7 +353,8 @@ static void
pan_access_tiled_image_generic_aligned(
void *dst, void *src, unsigned sx, unsigned sy, unsigned w, unsigned h,
uint32_t dst_stride, uint32_t src_stride,
const struct util_format_description *desc,bool is_store)
const struct util_format_description *desc,
enum pan_interleave_zs interleave, bool is_store)
{
unsigned bpp = desc->block.bits;
@ -309,8 +378,12 @@ pan_access_tiled_image_generic_aligned(
static ALWAYS_INLINE void
pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
unsigned h, uint32_t dst_stride, uint32_t src_stride,
enum pipe_format format, bool is_store)
enum pipe_format format,
enum pan_interleave_zs interleave, bool is_store)
{
if (interleave != PAN_INTERLEAVE_NONE)
assert(format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
const struct util_format_description *desc = util_format_description(format);
unsigned bpp = desc->block.bits;
@ -325,7 +398,7 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
if (desc->block.width > 1 ||
!util_is_power_of_two_nonzero(desc->block.bits)) {
pan_access_tiled_image_generic(dst, (void *)src, x, y, w, h, dst_stride,
src_stride, desc, is_store);
src_stride, desc, interleave, is_store);
return;
}
@ -343,7 +416,8 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
unsigned dist = MIN2(first_full_tile_y - y, h);
pan_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, w, dist,
dst_stride, src_stride, desc, is_store);
dst_stride, src_stride, desc, interleave,
is_store);
if (dist == h)
return;
@ -358,7 +432,7 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
pan_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y), x,
last_full_tile_y, w, dist, dst_stride,
src_stride, desc, is_store);
src_stride, desc, interleave, is_store);
h -= dist;
}
@ -368,7 +442,8 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
unsigned dist = MIN2(first_full_tile_x - x, w);
pan_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, dist, h,
dst_stride, src_stride, desc, is_store);
dst_stride, src_stride, desc, interleave,
is_store);
if (dist == w)
return;
@ -383,14 +458,14 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
pan_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
last_full_tile_x, y, dist, h, dst_stride,
src_stride, desc, is_store);
src_stride, desc, interleave, is_store);
w -= dist;
}
pan_access_tiled_image_generic_aligned(dst, OFFSET(src, x, y), x, y, w,
h, dst_stride, src_stride, desc,
is_store);
interleave, is_store);
}
/**
@ -401,19 +476,21 @@ pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
void
pan_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
unsigned w, unsigned h, uint32_t dst_stride,
uint32_t src_stride, enum pipe_format format)
uint32_t src_stride, enum pipe_format format,
enum pan_interleave_zs interleave)
{
pan_access_tiled_image(dst, (void *)src, x, y, w, h, dst_stride, src_stride,
format, true);
format, interleave, true);
}
void
pan_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
unsigned w, unsigned h, uint32_t dst_stride,
uint32_t src_stride, enum pipe_format format)
uint32_t src_stride, enum pipe_format format,
enum pan_interleave_zs interleave)
{
pan_access_tiled_image((void *)src, dst, x, y, w, h, src_stride, dst_stride,
format, false);
format, interleave, false);
}
void
@ -499,10 +576,10 @@ pan_copy_tiled_image(void *dst, const void *src, unsigned dst_x, unsigned dst_y,
pan_load_tiled_image(
chunk, src, src_chunk_x, src_chunk_y, width, height,
chunk_row_stride_B, src_stride, format);
chunk_row_stride_B, src_stride, format, PAN_INTERLEAVE_NONE);
pan_store_tiled_image(
dst, chunk, dst_chunk_x, dst_chunk_y, width, height, dst_stride,
chunk_row_stride_B, format);
chunk_row_stride_B, format, PAN_INTERLEAVE_NONE);
}
}

View file

@ -34,6 +34,29 @@
extern "C" {
#endif
/* The depth and stencil aspects of a Z24_UNORM_S8_UINT image are interleaved,
* where the bottom 24 bits are depth and the top 8 bits are stencil. When
* copying to/from a Z24S8 tiled image, the pan_interleave_zs enum specifies
* whether to (de)interleave the depth/stencil aspects */
enum pan_interleave_zs {
/* Copy all aspects, no interleaving */
PAN_INTERLEAVE_NONE,
/* Copy only the depth aspect of a Z24S8 tiled image to/from linear Z24X8 */
PAN_INTERLEAVE_DEPTH,
/* Copy only the stencil aspect of a Z24S8 tiled image to/from linear S8 */
PAN_INTERLEAVE_STENCIL,
};
/**
* Get the appropriate pan_interleave_zs mode for copying to/from a given
* format.
*
* @depth Whether to copy the depth aspect
* @stencil Whether to copy the stencil aspect
*/
enum pan_interleave_zs
pan_get_interleave_zs(enum pipe_format format, bool depth, bool stencil);
/**
* Load a rectangular region from a tiled image to a linear staging image.
*
@ -46,10 +69,12 @@ extern "C" {
* @dst_stride Stride in bytes of linear destination
* @src_stride Number of bytes between adjacent rows of tiles in source.
* @format Format of the source and destination image
* @interleave How to deinterleave ZS aspects from the tiled image
*/
void pan_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
unsigned w, unsigned h, uint32_t dst_stride,
uint32_t src_stride, enum pipe_format format);
uint32_t src_stride, enum pipe_format format,
enum pan_interleave_zs interleave);
/**
* Store a linear staging image to a rectangular region of a tiled image.
@ -63,10 +88,12 @@ void pan_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
* @dst_stride Number of bytes between adjacent rows of tiles in destination.
* @src_stride Stride in bytes of linear source
* @format Format of the source and destination image
* @interleave How to interleave a ZS aspects to the tiled image
*/
void pan_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
unsigned w, unsigned h, uint32_t dst_stride,
uint32_t src_stride, enum pipe_format format);
uint32_t src_stride, enum pipe_format format,
enum pan_interleave_zs interleave);
/**
* Copy a rectangular region from one tiled image to another.

View file

@ -151,14 +151,14 @@ test(unsigned width, unsigned height, unsigned rx, unsigned ry, unsigned rw,
}
pan_store_tiled_image(tiled, linear, rx, ry, rw, rh, dst_stride,
src_stride, format);
src_stride, format, PAN_INTERLEAVE_NONE);
} else {
for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
((uint8_t *)tiled)[i] = (i & 0xFF);
}
pan_load_tiled_image(linear, tiled, rx, ry, rw, rh, dst_stride,
src_stride, format);
src_stride, format, PAN_INTERLEAVE_NONE);
}
ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, dst_stride,

View file

@ -125,14 +125,14 @@ panvk_copy_image_to_from_memory(struct image_params img,
img.offset.x, img.offset.y, extent.width, extent.height,
slice_layout->tiled_or_linear.row_stride_B,
mem.layout.row_stride_B,
pfmt);
pfmt, PAN_INTERLEAVE_NONE);
else
pan_load_tiled_image(
mem_depth_ptr, img_depth_ptr,
img.offset.x, img.offset.y, extent.width, extent.height,
mem.layout.row_stride_B,
slice_layout->tiled_or_linear.row_stride_B,
pfmt);
pfmt, PAN_INTERLEAVE_NONE);
}
}
}
@ -355,7 +355,7 @@ panvk_copy_image_to_image(struct panvk_image *dst, void *dst_cpu,
region->extent.width, region->extent.height,
dst_slice_layout->tiled_or_linear.row_stride_B,
src_slice_layout->tiled_or_linear.row_stride_B,
src_pfmt);
src_pfmt, PAN_INTERLEAVE_NONE);
} else if (!src_linear && dst_linear) {
unsigned dst_y_bl = region->dstOffset.y / block_height_px;
unsigned dst_x_bl = region->dstOffset.x / block_width_px;
@ -368,7 +368,7 @@ panvk_copy_image_to_image(struct panvk_image *dst, void *dst_cpu,
region->extent.width, region->extent.height,
dst_slice_layout->tiled_or_linear.row_stride_B,
src_slice_layout->tiled_or_linear.row_stride_B,
dst_pfmt);
dst_pfmt, PAN_INTERLEAVE_NONE);
} else {
pan_copy_tiled_image(
dst_depth_ptr, src_depth_ptr, region->dstOffset.x,