From f39194cdd3211ec0ad3dcdd12e105c78b71d9701 Mon Sep 17 00:00:00 2001 From: "Eric R. Smith" Date: Mon, 21 Oct 2024 12:54:52 -0300 Subject: [PATCH] panfrost: support MTK 16L32S detiling This is a preliminary implementation of detiling for NV12_16L32 tiled format external images. When we encounter such an image, decode it into a secondary buffer which will then be used to actually texture from. In some cases applications may wish to represent the individual planes of an NV12 image separately, we support that by allowing detiling of just an R8 (luma) or R8G8 (chroma) plane. Acked-by: Boris Brezillon Acked-by: Daniel Stone Part-of: --- src/gallium/drivers/panfrost/meson.build | 2 +- src/gallium/drivers/panfrost/pan_cmdstream.c | 138 ++++++++++- src/gallium/drivers/panfrost/pan_context.h | 4 +- src/gallium/drivers/panfrost/pan_job.c | 4 + .../{pan_afbc_cso.c => pan_mod_conv_cso.c} | 230 ++++++++++++++++-- .../{pan_afbc_cso.h => pan_mod_conv_cso.h} | 30 ++- src/gallium/drivers/panfrost/pan_resource.c | 109 +++++++-- src/gallium/drivers/panfrost/pan_resource.h | 4 + src/gallium/drivers/panfrost/pan_screen.c | 7 +- src/gallium/drivers/panfrost/pan_screen.h | 3 + src/panfrost/lib/pan_afbc.c | 15 ++ src/panfrost/lib/pan_layout.c | 5 +- src/panfrost/lib/pan_texture.h | 8 +- 13 files changed, 483 insertions(+), 76 deletions(-) rename src/gallium/drivers/panfrost/{pan_afbc_cso.c => pan_mod_conv_cso.c} (55%) rename src/gallium/drivers/panfrost/{pan_afbc_cso.h => pan_mod_conv_cso.h} (76%) diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 8be6479a041..bc66afdf737 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -4,7 +4,7 @@ files_panfrost = files( 'driinfo_panfrost.h', - 'pan_afbc_cso.c', + 'pan_mod_conv_cso.c', 'pan_bo.c', 'pan_device.c', 'pan_disk_cache.c', diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index bec355c3e7c..83746f20170 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -40,7 +40,7 @@ #include "genxml/gen_macros.h" -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "pan_blend.h" #include "pan_bo.h" #include "pan_cmdstream.h" @@ -1613,6 +1613,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, enum pipe_format format = so->base.format; assert(prsrc->bo); + bool is_shadow = false; /* Format to access the stencil/depth portion of a Z32_S8 texture */ if (format == PIPE_FORMAT_X32_S8X24_UINT) { assert(prsrc->separate_stencil); @@ -1621,6 +1622,11 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, format = texture->format; } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { format = PIPE_FORMAT_Z32_FLOAT; + } else if (prsrc->shadow_image) { + prsrc = prsrc->shadow_image; + texture = &prsrc->base; + format = texture ->format; + is_shadow = true; } so->texture_bo = prsrc->image.data.base; @@ -1709,9 +1715,10 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, const struct util_format_description *desc = util_format_description(format); - if ((device->debug & PAN_DBG_YUV) && panfrost_format_is_yuv(format)) { - + if ((device->debug & PAN_DBG_YUV) && panfrost_format_is_yuv(format) && + !(is_shadow && panfrost_format_supports_mtk_tiled(format)) ) { if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { + iview.swizzle[1] = PIPE_SWIZZLE_0; iview.swizzle[2] = PIPE_SWIZZLE_1; } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLANAR2) { iview.swizzle[1] = PIPE_SWIZZLE_0; @@ -2706,6 +2713,8 @@ panfrost_initialize_surface(struct panfrost_batch *batch, BITSET_SET(rsrc->valid.data, surf->u.tex.level); if (rsrc->separate_stencil) BITSET_SET(rsrc->separate_stencil->valid.data, surf->u.tex.level); + if (rsrc->shadow_image) + BITSET_SET(rsrc->shadow_image->valid.data, surf->u.tex.level); } } @@ -3373,7 +3382,7 @@ panfrost_launch_grid(struct pipe_context *pipe, #define AFBC_BLOCK_ALIGN 16 static void -panfrost_launch_afbc_shader(struct panfrost_batch *batch, void *cso, +panfrost_launch_convert_shader(struct panfrost_batch *batch, void *cso, struct pipe_constant_buffer *cbuf, unsigned nr_blocks) { @@ -3403,13 +3412,13 @@ panfrost_launch_afbc_shader(struct panfrost_batch *batch, void *cso, pctx->set_constant_buffer(pctx, PIPE_SHADER_COMPUTE, 0, true, &saved_const); } -#define LAUNCH_AFBC_SHADER(name, batch, rsrc, consts, nr_blocks) \ - struct pan_afbc_shader_data *shaders = \ - panfrost_afbc_get_shaders(batch->ctx, rsrc, AFBC_BLOCK_ALIGN); \ +#define LAUNCH_CONVERT_SHADER(name, batch, rsrc, consts, nr_blocks) \ + struct pan_mod_convert_shader_data *shaders = \ + panfrost_get_mod_convert_shaders(batch->ctx, rsrc, AFBC_BLOCK_ALIGN); \ struct pipe_constant_buffer constant_buffer = { \ .buffer_size = sizeof(consts), \ .user_buffer = &consts}; \ - panfrost_launch_afbc_shader(batch, shaders->name##_cso, &constant_buffer, \ + panfrost_launch_convert_shader(batch, shaders->name##_cso, &constant_buffer, \ nr_blocks); static void @@ -3427,7 +3436,7 @@ panfrost_afbc_size(struct panfrost_batch *batch, struct panfrost_resource *src, panfrost_batch_read_rsrc(batch, src, PIPE_SHADER_COMPUTE); panfrost_batch_write_bo(batch, metadata, PIPE_SHADER_COMPUTE); - LAUNCH_AFBC_SHADER(size, batch, src, consts, slice->afbc.nr_blocks); + LAUNCH_CONVERT_SHADER(afbc_size, batch, src, consts, slice->afbc.nr_blocks); } static void @@ -3452,7 +3461,111 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src, panfrost_batch_write_bo(batch, dst, PIPE_SHADER_COMPUTE); panfrost_batch_add_bo(batch, metadata, PIPE_SHADER_COMPUTE); - LAUNCH_AFBC_SHADER(pack, batch, src, consts, dst_slice->afbc.nr_blocks); + LAUNCH_CONVERT_SHADER(afbc_pack, batch, src, consts, dst_slice->afbc.nr_blocks); +} + +static void +panfrost_mtk_detile_compute(struct panfrost_context *ctx, struct pipe_blit_info *info) +{ + struct pipe_context *pipe = &ctx->base; + struct pipe_resource *y_src = info->src.resource; + struct pipe_resource *uv_src = y_src->next; + struct pipe_resource *y_dst = info->dst.resource; + struct pipe_resource *uv_dst = y_dst->next; + + unsigned width = info->src.box.width; + unsigned height = info->src.box.height; + unsigned src_stride = pan_resource(y_src)->image.layout.slices[0].row_stride; + unsigned dst_stride = pan_resource(y_dst)->image.layout.slices[0].row_stride; + + /* 4 images: y_src, uv_src, y_dst, uv_dst */ + struct pipe_image_view image[4] = { 0 }; + + if (!uv_src) { + /* single plane conversion; this must be R8 or R8G8 */ + assert(!uv_dst); + if (y_src->format == PIPE_FORMAT_R8G8_UNORM) { + /* R8G8 would be the single chroma plane of an image */ + /* adjust for dimensions of original luma plane */ + width *= 2; + height *= 2; + uv_src = y_src; + uv_dst = y_dst; + y_src = y_dst = NULL; + } + } + image[0].resource = y_src; + image[0].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ; + image[0].u.tex.level = info->src.level; + image[0].u.tex.first_layer = 0; + image[0].u.tex.last_layer = y_src ? (unsigned)(y_src->array_size - 1) : 0; + + image[1].resource = uv_src; + image[1].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_READ; + image[1].u.tex.level = info->src.level; + image[1].u.tex.first_layer = 0; + image[1].u.tex.last_layer = uv_src ? (unsigned)(uv_src->array_size - 1) : 0; + + image[2].resource = y_dst; + image[2].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[2].shader_access = image[2].access = PIPE_IMAGE_ACCESS_WRITE; + image[2].u.tex.level = info->dst.level; + image[2].u.tex.first_layer = 0; + image[2].u.tex.last_layer = y_dst ? (unsigned)(y_dst->array_size - 1) : 0; + + image[3].resource = uv_dst; + image[3].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[3].shader_access = image[3].access = PIPE_IMAGE_ACCESS_WRITE; + image[3].u.tex.level = info->dst.level; + image[3].u.tex.first_layer = 0; + image[3].u.tex.last_layer = uv_dst ? (unsigned)(uv_dst->array_size - 1) : 0; + + struct panfrost_mtk_detile_info consts = { + .tiles_per_stride = src_stride >> 4, + .src_width = width, + .src_height = height, + .dst_stride = dst_stride, + }; + panfrost_flush_all_batches(ctx, "mtk_detile pre-barrier"); + + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + pipe->set_shader_images(pipe, PIPE_SHADER_COMPUTE, 0, 4, 0, image); + + /* launch the compute shader */ + struct pan_mod_convert_shader_data *shaders = + panfrost_get_mod_convert_shaders(ctx, pan_resource(y_dst ? y_dst : uv_dst), AFBC_BLOCK_ALIGN); + struct pipe_constant_buffer cbuf = { + .buffer_size = sizeof(consts), + .user_buffer = &consts}; + + struct pipe_grid_info grid_info = { + .block[0] = 4, + .last_block[0] = (width/4) % 4, + .block[1] = 16, + .last_block[1] = height % 16, + .block[2] = 1, + .last_block[2] = 0, + .grid[0] = DIV_ROUND_UP(width/4, 4), + .grid[1] = DIV_ROUND_UP(height, 16), + .grid[2] = 1, + }; + + struct pipe_constant_buffer saved_const = {}; + struct panfrost_constant_buffer *pbuf = + &batch->ctx->constant_buffer[PIPE_SHADER_COMPUTE]; + void *saved_cso = batch->ctx->uncompiled[PIPE_SHADER_COMPUTE]; + void *cso = shaders->mtk_detile_cso; + util_copy_constant_buffer(&pbuf->cb[0], &saved_const, true); + + pipe->bind_compute_state(pipe, cso); + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cbuf); + + panfrost_launch_grid_on_batch(pipe, batch, &grid_info); + + pipe->bind_compute_state(pipe, saved_cso); + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, true, &saved_const); } static void * @@ -3707,10 +3820,10 @@ panfrost_create_sampler_view(struct pipe_context *pctx, struct panfrost_context *ctx = pan_context(pctx); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); + struct panfrost_resource *ptexture = pan_resource(texture); - pan_legalize_format(ctx, pan_resource(texture), template->format, false, + pan_legalize_format(ctx, ptexture, template->format, false, false); - pipe_reference(NULL, &texture->reference); so->base = *template; @@ -4098,6 +4211,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.compile_shader = GENX(pan_shader_compile); screen->vtbl.afbc_size = panfrost_afbc_size; screen->vtbl.afbc_pack = panfrost_afbc_pack; + screen->vtbl.mtk_detile = panfrost_mtk_detile_compute; screen->vtbl.emit_write_timestamp = emit_write_timestamp; screen->vtbl.select_tile_size = GENX(pan_select_tile_size); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 7d714004a69..2b2fb01d9d1 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -28,7 +28,7 @@ #define _LARGEFILE64_SOURCE 1 #include #include -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "pan_blend_cso.h" #include "pan_earlyzs.h" #include "pan_encoder.h" @@ -209,7 +209,7 @@ struct panfrost_context { struct blitter_context *blitter; - struct pan_afbc_shaders afbc_shaders; + struct pan_mod_convert_shaders mod_convert_shaders; struct panfrost_blend_state *blend; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index e785da90f19..03bc6bf8d99 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -381,6 +381,8 @@ panfrost_batch_read_rsrc(struct panfrost_batch *batch, if (rsrc->separate_stencil) panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access); + if (rsrc->shadow_image) + panfrost_batch_add_bo_old(batch, rsrc->shadow_image->bo, access); panfrost_batch_update_access(batch, rsrc, false); } @@ -396,6 +398,8 @@ panfrost_batch_write_rsrc(struct panfrost_batch *batch, if (rsrc->separate_stencil) panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access); + if (rsrc->shadow_image) + panfrost_batch_add_bo_old(batch, rsrc->shadow_image->bo, access); panfrost_batch_update_access(batch, rsrc, true); } diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.c b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c similarity index 55% rename from src/gallium/drivers/panfrost/pan_afbc_cso.c rename to src/gallium/drivers/panfrost/pan_mod_conv_cso.c index 8ed0a7d8c84..025fd4a7890 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.c +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c @@ -21,12 +21,13 @@ * SOFTWARE. */ -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "nir/pipe_nir.h" #include "nir_builder.h" #include "pan_context.h" #include "pan_resource.h" #include "pan_screen.h" +#include "pan_shader.h" #define panfrost_afbc_add_info_ubo(name, b) \ nir_variable *info_ubo = nir_variable_create( \ @@ -43,6 +44,21 @@ nir_imm_int(b, offsetof(struct panfrost_afbc_##name##_info, field)), \ .align_mul = 4, .range = ~0) +#define panfrost_mtk_add_info_ubo(name, b) \ + nir_variable *info_ubo = nir_variable_create( \ + b.shader, nir_var_mem_ubo, \ + glsl_array_type(glsl_uint_type(), \ + sizeof(struct panfrost_mtk_##name##_info) / 4, 0), \ + "info_ubo"); \ + info_ubo->data.driver_location = 0; + +#define panfrost_mtk_get_info_field(name, b, field) \ + nir_load_ubo( \ + (b), 1, sizeof(((struct panfrost_mtk_##name##_info *)0)->field) * 8, \ + nir_imm_int(b, 0), \ + nir_imm_int(b, offsetof(struct panfrost_mtk_##name##_info, field)), \ + .align_mul = 4, .range = ~0) + static nir_def * read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx) { @@ -197,7 +213,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz, panfrost_afbc_get_info_field(size, b, field) static nir_shader * -panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp, +panfrost_create_afbc_size_shader(struct panfrost_screen *screen, unsigned bpp, unsigned align) { struct panfrost_device *dev = pan_device(&screen->base); @@ -233,7 +249,7 @@ panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp, panfrost_afbc_get_info_field(pack, b, field) static nir_shader * -panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align, +panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, unsigned align, bool tiled) { nir_builder b = nir_builder_init_simple_shader( @@ -260,64 +276,230 @@ panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align, return b.shader; } -struct pan_afbc_shader_data * -panfrost_afbc_get_shaders(struct panfrost_context *ctx, - struct panfrost_resource *rsrc, unsigned align) +#define panfrost_mtk_detile_get_info_field(b, field) \ + panfrost_mtk_get_info_field(detile, b, field) + +static nir_def * +pan_mtk_tiled_from_linear(nir_builder *b, nir_def *linear, nir_def *tiles_per_stride, nir_def *width) +{ + nir_def *tiled; + /* uvec2 tlc = uvec2(linear) >> uvec2(2u, 5u) */ + nir_def *tlc = nir_ushr(b, linear, + nir_imm_ivec2(b, 2, 5)); + + /* uvec2 txc = uvec2(linear) & uvec2(3u, 31u) */ + nir_def *txc = nir_iand(b, linear, + nir_imm_ivec2(b, 3, 31)); + + /* uint tlo = tlc.y * tiles_per_stride + tlc.x */ + nir_def *tlo = nir_iadd(b, + nir_imul(b, + nir_channel(b, tlc, 1), + tiles_per_stride), + nir_channel(b, tlc, 0)); + nir_def *txcx = nir_channel(b, txc, 0); + nir_def *txcy = nir_channel(b, txc, 1); + nir_def *txcytmp = nir_vec2(b, txcy, + nir_ushr_imm(b, txcy, 1)); + + /* txo = (uvec2(txc.y, txc.y >> 1) << uvec2(2u)) | txc.xx */ + nir_def *txo = nir_ior(b, + nir_ishl_imm(b, txcytmp, 2), + nir_vec2(b, txcx, txcx)); + + /* uvec2 off = (uvec2(tlo) << uvec2(7u, 6u)) | txo */ + nir_def *off = nir_ior(b, + nir_ishl(b, + nir_vec2(b, tlo, tlo), + nir_imm_ivec2(b, 7, 6)), + txo); + + /* convert to 2D coord + * tiled.xy = off % (width / 4, width / 4) + * tiled.zw = off / (width / 4, width / 4) */ + nir_def *width4 = nir_ishl_imm(b, tiles_per_stride, 2); + width4 = nir_vec2(b, width4, width4); + nir_def *tiled_xy = nir_umod(b, off, width4); + nir_def *tiled_zw = nir_udiv(b, off, width4); + tiled = nir_vec4(b, + nir_channel(b, tiled_xy, 0), + nir_channel(b, tiled_xy, 1), + nir_channel(b, tiled_zw, 0), + nir_channel(b, tiled_zw, 1)); + + return tiled; +} + +static nir_shader * +panfrost_create_mtk_detile_shader(struct panfrost_screen *screen, unsigned align, + bool is_tiled) +{ + const struct panfrost_device *device = &screen->dev; + bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0; + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(), + "panfrost_mtk_detile"); + b.shader->info.workgroup_size[0] = 4; + b.shader->info.workgroup_size[1] = 16; + b.shader->info.workgroup_size[2] = 1; + + const struct glsl_type *image_type = + glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_UINT); + + panfrost_mtk_add_info_ubo(detile, b); + + nir_variable *y_tiled = + nir_variable_create(b.shader, nir_var_image, image_type, "y_tiled"); + y_tiled->data.binding = 0; + y_tiled->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 0); + nir_variable *uv_tiled = + nir_variable_create(b.shader, nir_var_image, image_type, "uv_tiled"); + uv_tiled->data.binding = 1; + uv_tiled->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 1); + + nir_variable *y_linear = + nir_variable_create(b.shader, nir_var_image, image_type, "y_linear"); + y_linear->data.binding = 2; + y_linear->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 2); + + nir_variable *uv_linear = + nir_variable_create(b.shader, nir_var_image, image_type, "uv_linear"); + uv_linear->data.binding = 3; + uv_linear->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 3); + + nir_def *linear = nir_load_global_invocation_id(&b, 32); + nir_def *tiles_per_stride = + panfrost_mtk_detile_get_info_field(&b, tiles_per_stride); + nir_def *src_width = panfrost_mtk_detile_get_info_field(&b, src_width); + + nir_def *zero = nir_imm_int(&b, 0); + + nir_def *coord = nir_vec2(&b, + nir_channel(&b, linear, 0), + nir_channel(&b, linear, 1)); + + nir_def *tiled = pan_mtk_tiled_from_linear(&b, coord, tiles_per_stride, src_width); + + nir_def *tiled_xz = nir_vec4(&b, nir_channel(&b, tiled, 0), + nir_channel(&b, tiled, 2), zero, zero); + nir_def *tiled_yw = nir_vec4(&b, nir_channel(&b, tiled, 1), + nir_channel(&b, tiled, 3), zero, zero); + + nir_def *yval = nir_image_load(&b, 4, 32, zero, tiled_xz, + zero /* sample */, zero /* lod */, + .access = ACCESS_NON_WRITEABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, + .dest_type = nir_type_uint32); + nir_def *uvval; + + nir_def *dst_y_coord = nir_vec4(&b, + nir_channel(&b, coord, 0), + nir_channel(&b, coord, 1), + zero, zero); + /* store Y data */ + nir_def *img_deref_st_y = nir_imm_int(&b, 2); + nir_image_store(&b, img_deref_st_y, dst_y_coord, zero /* sample */, + yval, zero /* lod */, + .access = ACCESS_NON_READABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, .src_type = nir_type_uint32); + + /* store UV data */ + nir_def *odd_even_line = nir_iand_imm(&b, + nir_channel(&b, dst_y_coord, 1), + 1); + nir_push_if(&b, nir_ieq_imm(&b, odd_even_line, 0)); + { + if (tint_yuv) { + /* use just blue for chroma */ + uvval = nir_imm_ivec4(&b, 0xc0, 0x80, 0xc0, 0x80); + } else { + nir_def *img_deref_uv = nir_imm_int(&b, 1); + uvval = nir_image_load(&b, 4, 32, img_deref_uv, tiled_yw, + zero /* sample */, zero /* lod */, + .access = ACCESS_NON_WRITEABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, + .dest_type = nir_type_uint32); + } + nir_def *dst_uv_coord = nir_ishr(&b, dst_y_coord, + nir_imm_ivec4(&b, 0, 1, 0, 0)); + nir_def *img_deref_st_uv = nir_imm_int(&b, 3); + nir_image_store(&b, img_deref_st_uv, dst_uv_coord, zero /* sample */, + uvval, zero /* lod */, + .access = ACCESS_NON_READABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, .src_type = nir_type_uint32); + } + nir_pop_if(&b, NULL); + + return b.shader; +} + +struct pan_mod_convert_shader_data * +panfrost_get_mod_convert_shaders(struct panfrost_context *ctx, + struct panfrost_resource *rsrc, unsigned align) { struct pipe_context *pctx = &ctx->base; struct panfrost_screen *screen = pan_screen(ctx->base.screen); bool tiled = rsrc->image.layout.modifier & AFBC_FORMAT_MOD_TILED; - struct pan_afbc_shader_key key = { + struct pan_mod_convert_shader_key key = { .bpp = util_format_get_blocksizebits(rsrc->base.format), .align = align, .tiled = tiled, }; - pthread_mutex_lock(&ctx->afbc_shaders.lock); + pthread_mutex_lock(&ctx->mod_convert_shaders.lock); struct hash_entry *he = - _mesa_hash_table_search(ctx->afbc_shaders.shaders, &key); - struct pan_afbc_shader_data *shader = he ? he->data : NULL; - pthread_mutex_unlock(&ctx->afbc_shaders.lock); + _mesa_hash_table_search(ctx->mod_convert_shaders.shaders, &key); + struct pan_mod_convert_shader_data *shader = he ? he->data : NULL; + pthread_mutex_unlock(&ctx->mod_convert_shaders.lock); if (shader) return shader; - shader = rzalloc(ctx->afbc_shaders.shaders, struct pan_afbc_shader_data); + shader = rzalloc(ctx->mod_convert_shaders.shaders, struct pan_mod_convert_shader_data); shader->key = key; - _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader); + _mesa_hash_table_insert(ctx->mod_convert_shaders.shaders, &shader->key, shader); #define COMPILE_SHADER(name, ...) \ { \ nir_shader *nir = \ - panfrost_afbc_create_##name##_shader(screen, __VA_ARGS__); \ + panfrost_create_##name##_shader(screen, __VA_ARGS__); \ nir->info.num_ubos = 1; \ shader->name##_cso = pipe_shader_from_nir(pctx, nir); \ } - COMPILE_SHADER(size, key.bpp, key.align); - COMPILE_SHADER(pack, key.align, key.tiled); + COMPILE_SHADER(afbc_size, key.bpp, key.align); + COMPILE_SHADER(afbc_pack, key.align, key.tiled); + COMPILE_SHADER(mtk_detile, key.bpp, key.align); #undef COMPILE_SHADER - pthread_mutex_lock(&ctx->afbc_shaders.lock); - _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader); - pthread_mutex_unlock(&ctx->afbc_shaders.lock); + pthread_mutex_lock(&ctx->mod_convert_shaders.lock); + _mesa_hash_table_insert(ctx->mod_convert_shaders.shaders, &shader->key, shader); + pthread_mutex_unlock(&ctx->mod_convert_shaders.lock); return shader; } -DERIVE_HASH_TABLE(pan_afbc_shader_key); +DERIVE_HASH_TABLE(pan_mod_convert_shader_key); void panfrost_afbc_context_init(struct panfrost_context *ctx) { - ctx->afbc_shaders.shaders = pan_afbc_shader_key_table_create(NULL); - pthread_mutex_init(&ctx->afbc_shaders.lock, NULL); + ctx->mod_convert_shaders.shaders = pan_mod_convert_shader_key_table_create(NULL); + pthread_mutex_init(&ctx->mod_convert_shaders.lock, NULL); } void panfrost_afbc_context_destroy(struct panfrost_context *ctx) { - _mesa_hash_table_destroy(ctx->afbc_shaders.shaders, NULL); - pthread_mutex_destroy(&ctx->afbc_shaders.lock); + _mesa_hash_table_destroy(ctx->mod_convert_shaders.shaders, NULL); + pthread_mutex_destroy(&ctx->mod_convert_shaders.lock); } diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.h b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h similarity index 76% rename from src/gallium/drivers/panfrost/pan_afbc_cso.h rename to src/gallium/drivers/panfrost/pan_mod_conv_cso.h index dec054e3d94..63eb47175ba 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.h +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h @@ -21,8 +21,8 @@ * SOFTWARE. */ -#ifndef __PAN_AFBC_CSO_H__ -#define __PAN_AFBC_CSO_H__ +#ifndef __PAN_MOD_CONV_CSO_H__ +#define __PAN_MOD_CONV_CSO_H__ #include "util/hash_table.h" @@ -33,19 +33,20 @@ struct panfrost_context; struct panfrost_resource; struct panfrost_screen; -struct pan_afbc_shader_key { +struct pan_mod_convert_shader_key { unsigned bpp; unsigned align; bool tiled; }; -struct pan_afbc_shader_data { - struct pan_afbc_shader_key key; - void *size_cso; - void *pack_cso; +struct pan_mod_convert_shader_data { + struct pan_mod_convert_shader_key key; + void *afbc_size_cso; + void *afbc_pack_cso; + void *mtk_detile_cso; }; -struct pan_afbc_shaders { +struct pan_mod_convert_shaders { struct hash_table *shaders; pthread_mutex_t lock; }; @@ -70,11 +71,18 @@ struct panfrost_afbc_pack_info { uint32_t padding[3]; // FIXME } PACKED; +struct panfrost_mtk_detile_info { + uint32_t tiles_per_stride; + uint32_t src_width; + uint32_t src_height; + uint32_t dst_stride; +} PACKED; + void panfrost_afbc_context_init(struct panfrost_context *ctx); void panfrost_afbc_context_destroy(struct panfrost_context *ctx); -struct pan_afbc_shader_data * -panfrost_afbc_get_shaders(struct panfrost_context *ctx, - struct panfrost_resource *rsrc, unsigned align); +struct pan_mod_convert_shader_data * +panfrost_get_mod_convert_shaders(struct panfrost_context *ctx, + struct panfrost_resource *rsrc, unsigned align); #endif diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 733d16be7b8..89d1f3d69d6 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -914,6 +914,10 @@ panfrost_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pt) if (rsrc->scanout) renderonly_scanout_destroy(rsrc->scanout, dev->ro); + if (rsrc->shadow_image) + pipe_resource_reference( + (struct pipe_resource **)&rsrc->shadow_image, NULL); + if (rsrc->bo) panfrost_bo_unreference(rsrc->bo); @@ -1324,7 +1328,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, /* Shadowing with separate stencil may require additional accounting. * Bail in these exotic cases. */ - if (rsrc->separate_stencil) { + if (rsrc->separate_stencil || rsrc->shadow_image) { create_new_bo = false; copy_resource = false; } @@ -1401,8 +1405,9 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, struct pipe_box box_blocks; u_box_pixels_to_blocks(&box_blocks, box, format); - if (rsrc->image.layout.modifier == - DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) { + switch(rsrc->image.layout.modifier) { + case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: + case DRM_FORMAT_MOD_MTK_16L_32S_TILE: transfer->base.stride = box_blocks.width * bytes_per_block; transfer->base.layer_stride = transfer->base.stride * box_blocks.height; transfer->map = @@ -1412,7 +1417,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, panfrost_load_tiled_images(transfer, rsrc); return transfer->map; - } else { + default: assert(rsrc->image.layout.modifier == DRM_FORMAT_MOD_LINEAR); /* Direct, persistent writes create holes in time for @@ -1449,12 +1454,54 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, struct panfrost_resource *rsrc, uint64_t modifier, bool copy_resource, const char *reason) { - assert(!rsrc->modifier_constant); + bool need_shadow = rsrc->modifier_constant; - struct pipe_resource *tmp_prsrc = panfrost_resource_create_with_modifier( - ctx->base.screen, &rsrc->base, modifier); + assert(!rsrc->modifier_constant || copy_resource); + + struct pipe_resource template = rsrc->base; + struct pipe_resource *tmp_prsrc; + struct pipe_resource *next_tmp_prsrc = NULL; + struct panfrost_resource *next_tmp_rsrc = NULL; + if (template.next) { + struct pipe_resource second_template = *template.next; + bool fix_stride; + assert(drm_is_mtk_tiled(rsrc->base.format, rsrc->image.layout.modifier)); + /* fix up the stride */ + switch (rsrc->base.format) { + case PIPE_FORMAT_R8_G8B8_420_UNORM: + case PIPE_FORMAT_R8_G8B8_422_UNORM: + case PIPE_FORMAT_R10_G10B10_420_UNORM: + case PIPE_FORMAT_R10_G10B10_422_UNORM: + fix_stride = true; + break; + default: + fix_stride = false; + break; + } + template.next = NULL; + if (fix_stride) { + second_template.width0 *= 2; /* temporarily adjust size for subsampling */ + } + next_tmp_prsrc = panfrost_resource_create_with_modifier( + ctx->base.screen, &second_template, modifier); + next_tmp_rsrc = pan_resource(next_tmp_prsrc); + if (fix_stride) { + next_tmp_rsrc->base.width0 /= 2; + next_tmp_rsrc->image.layout.width /= 2; + } + } + tmp_prsrc = panfrost_resource_create_with_modifier( + ctx->base.screen, &template, modifier); struct panfrost_resource *tmp_rsrc = pan_resource(tmp_prsrc); + if (next_tmp_prsrc) { + tmp_prsrc->next = next_tmp_prsrc; + } + if (need_shadow && rsrc->shadow_image) { + /* free the old shadow image */ + pipe_resource_reference( + (struct pipe_resource **)&rsrc->shadow_image, NULL); + } if (copy_resource) { struct pipe_blit_info blit = { .dst.resource = &tmp_rsrc->base, @@ -1465,6 +1512,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, .filter = PIPE_TEX_FILTER_NEAREST, }; + struct panfrost_screen *screen = pan_screen(ctx->base.screen); /* data_valid is not valid until flushed */ panfrost_flush_writer(ctx, rsrc, "AFBC/AFRC decompressing blit"); @@ -1477,7 +1525,11 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, util_num_layers(&rsrc->base, i), &blit.dst.box); blit.src.box = blit.dst.box; - panfrost_blit_no_afbc_legalization(&ctx->base, &blit); + if (drm_is_mtk_tiled(rsrc->base.format, + rsrc->image.layout.modifier)) + screen->vtbl.mtk_detile(ctx, &blit); + else + panfrost_blit_no_afbc_legalization(&ctx->base, &blit); } } @@ -1487,20 +1539,26 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, panfrost_flush_writer(ctx, tmp_rsrc, "AFBC/AFRC decompressing blit"); } - panfrost_bo_unreference(rsrc->bo); + if (need_shadow) { + panfrost_resource_setup(ctx->base.screen, tmp_rsrc, + modifier, tmp_rsrc->base.format); + rsrc->shadow_image = tmp_rsrc; + } else { + panfrost_bo_unreference(rsrc->bo); - rsrc->bo = tmp_rsrc->bo; - rsrc->image.data.base = rsrc->bo->ptr.gpu; - panfrost_bo_reference(rsrc->bo); + rsrc->bo = tmp_rsrc->bo; + rsrc->image.data.base = rsrc->bo->ptr.gpu; + panfrost_bo_reference(rsrc->bo); - panfrost_resource_setup(ctx->base.screen, rsrc, modifier, - tmp_rsrc->base.format); - /* panfrost_resource_setup will force the modifier to stay constant when - * called with a specific modifier. We don't want that here, we want to - * be able to convert back to another modifier if needed */ - rsrc->modifier_constant = false; - pipe_resource_reference(&tmp_prsrc, NULL); - perf_debug(ctx, "resource_modifier_convert required due to: %s", reason); + panfrost_resource_setup(ctx->base.screen, rsrc, modifier, + tmp_rsrc->base.format); + /* panfrost_resource_setup will force the modifier to stay constant when + * called with a specific modifier. We don't want that here, we want to + * be able to convert back to another modifier if needed */ + rsrc->modifier_constant = false; + pipe_resource_reference(&tmp_prsrc, NULL); + perf_debug(ctx, "resource_modifier_convert required due to: %s", reason); + } } /* Validate that an AFBC/AFRC resource may be used as a particular format. If it @@ -1516,9 +1574,11 @@ pan_legalize_format(struct panfrost_context *ctx, enum pipe_format old_format = rsrc->base.format; enum pipe_format new_format = format; bool compatible = true; + uint64_t dest_modifier = DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; if (!drm_is_afbc(rsrc->image.layout.modifier) && - !drm_is_afrc(rsrc->image.layout.modifier)) + !drm_is_afrc(rsrc->image.layout.modifier) && + !drm_is_mtk_tiled(old_format, rsrc->image.layout.modifier)) return; if (drm_is_afbc(rsrc->image.layout.modifier)) { @@ -1530,14 +1590,17 @@ pan_legalize_format(struct panfrost_context *ctx, struct pan_afrc_format_info new_info = panfrost_afrc_get_format_info(new_format); compatible = !memcmp(&old_info, &new_info, sizeof(old_info)); + } else if (drm_is_mtk_tiled(old_format, rsrc->image.layout.modifier)) { + compatible = false; + dest_modifier = DRM_FORMAT_MOD_LINEAR; } if (!compatible) { pan_resource_modifier_convert( - ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, !discard, + ctx, rsrc, dest_modifier, !discard, drm_is_afbc(rsrc->image.layout.modifier) ? "Reinterpreting AFBC surface as incompatible format" - : "Reinterpreting AFRC surface as incompatible format"); + : "Reinterpreting tiled surface as incompatible format"); return; } diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 28d1b859658..978e3419a8d 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -53,6 +53,10 @@ struct panfrost_resource { struct panfrost_resource *separate_stencil; + /* image created when detiling a resource whose + constant modifier we cannot change */ + struct panfrost_resource *shadow_image; + struct util_range valid_buffer_range; /* Description of the resource layout */ diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 017d33d7d57..726ddbe5a13 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -51,6 +51,7 @@ #include "pan_resource.h" #include "pan_screen.h" #include "pan_shader.h" +#include "pan_texture.h" #include "pan_util.h" #include "pan_context.h" @@ -376,6 +377,10 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen, if (drm_is_afrc(pan_best_modifiers[i]) && !afrc) continue; + if (drm_is_mtk_tiled(format, pan_best_modifiers[i]) && + !panfrost_format_supports_mtk_tiled(format)) + continue; + if (test_modifier != DRM_FORMAT_MOD_INVALID && test_modifier != pan_best_modifiers[i]) continue; @@ -384,7 +389,7 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen, modifiers[count] = pan_best_modifiers[i]; if (external_only) - external_only[count] = false; + external_only[count] = drm_is_mtk_tiled(format, modifiers[count]); } count++; } diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 0bc81a215e0..813563d983c 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -110,6 +110,9 @@ struct panfrost_vtable { /* Select the tile size and calculate the color buffer allocation size */ void (*select_tile_size)(struct pan_fb_info *fb); + + /* Run a compute shader to detile an MTK 16L32 image */ + void (*mtk_detile)(struct panfrost_context *ctx, struct pipe_blit_info *info); }; struct panfrost_screen { diff --git a/src/panfrost/lib/pan_afbc.c b/src/panfrost/lib/pan_afbc.c index d9c3b69c6c2..f8370563249 100644 --- a/src/panfrost/lib/pan_afbc.c +++ b/src/panfrost/lib/pan_afbc.c @@ -211,3 +211,18 @@ panfrost_afbc_can_pack(enum pipe_format format) return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB; } + +/* check for whether a format can be used with MTK_16L32S format */ + +bool panfrost_format_supports_mtk_tiled(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_NV12: + case PIPE_FORMAT_R8_G8B8_420_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return true; + default: + return false; + } +} diff --git a/src/panfrost/lib/pan_layout.c b/src/panfrost/lib/pan_layout.c index 0fbff2731dc..40408119195 100644 --- a/src/panfrost/lib/pan_layout.c +++ b/src/panfrost/lib/pan_layout.c @@ -32,7 +32,8 @@ * List of supported modifiers, in descending order of preference. AFBC is * faster than u-interleaved tiling which is faster than linear. Within AFBC, * enabling the YUV-like transform is typically a win where possible. - * AFRC is only used if explicitely asked for (only for RGB formats). + * AFRC is only used if explicitly asked for (only for RGB formats). + * Similarly MTK 16L32 is only used if explicitly asked for. */ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | @@ -73,6 +74,8 @@ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_32) | AFRC_FORMAT_MOD_LAYOUT_SCAN), + + DRM_FORMAT_MOD_MTK_16L_32S_TILE, }; /* Table of AFBC superblock sizes */ diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index be6104d4d71..f0dab3cbf94 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -47,7 +47,7 @@ extern "C" { #define MAX_MIP_LEVELS 17 #define MAX_IMAGE_PLANES 3 -#define PAN_MODIFIER_COUNT 14 +#define PAN_MODIFIER_COUNT 15 extern uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT]; struct pan_image_slice_layout { @@ -427,6 +427,8 @@ unsigned panfrost_texture_offset(const struct pan_image_layout *layout, unsigned level, unsigned array_idx, unsigned surface_idx); +bool panfrost_format_supports_mtk_tiled(enum pipe_format format); + /* DRM modifier helper */ #define drm_is_afbc(mod) \ @@ -437,6 +439,10 @@ unsigned panfrost_texture_offset(const struct pan_image_layout *layout, ((mod >> 52) == \ (DRM_FORMAT_MOD_ARM_TYPE_AFRC | (DRM_FORMAT_MOD_VENDOR_ARM << 4))) +#define drm_is_mtk_tiled(format, mod) \ + ((mod >> 52) == \ + (0 | (DRM_FORMAT_MOD_VENDOR_MTK << 4))) + struct pan_image_explicit_layout { unsigned offset; unsigned row_stride;