diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 8be6479a041..bc66afdf737 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -4,7 +4,7 @@ files_panfrost = files( 'driinfo_panfrost.h', - 'pan_afbc_cso.c', + 'pan_mod_conv_cso.c', 'pan_bo.c', 'pan_device.c', 'pan_disk_cache.c', diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index bec355c3e7c..83746f20170 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -40,7 +40,7 @@ #include "genxml/gen_macros.h" -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "pan_blend.h" #include "pan_bo.h" #include "pan_cmdstream.h" @@ -1613,6 +1613,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, enum pipe_format format = so->base.format; assert(prsrc->bo); + bool is_shadow = false; /* Format to access the stencil/depth portion of a Z32_S8 texture */ if (format == PIPE_FORMAT_X32_S8X24_UINT) { assert(prsrc->separate_stencil); @@ -1621,6 +1622,11 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, format = texture->format; } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { format = PIPE_FORMAT_Z32_FLOAT; + } else if (prsrc->shadow_image) { + prsrc = prsrc->shadow_image; + texture = &prsrc->base; + format = texture ->format; + is_shadow = true; } so->texture_bo = prsrc->image.data.base; @@ -1709,9 +1715,10 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, const struct util_format_description *desc = util_format_description(format); - if ((device->debug & PAN_DBG_YUV) && panfrost_format_is_yuv(format)) { - + if ((device->debug & PAN_DBG_YUV) && panfrost_format_is_yuv(format) && + !(is_shadow && panfrost_format_supports_mtk_tiled(format)) ) { if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { + iview.swizzle[1] = PIPE_SWIZZLE_0; iview.swizzle[2] = PIPE_SWIZZLE_1; } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLANAR2) { iview.swizzle[1] = PIPE_SWIZZLE_0; @@ -2706,6 +2713,8 @@ panfrost_initialize_surface(struct panfrost_batch *batch, BITSET_SET(rsrc->valid.data, surf->u.tex.level); if (rsrc->separate_stencil) BITSET_SET(rsrc->separate_stencil->valid.data, surf->u.tex.level); + if (rsrc->shadow_image) + BITSET_SET(rsrc->shadow_image->valid.data, surf->u.tex.level); } } @@ -3373,7 +3382,7 @@ panfrost_launch_grid(struct pipe_context *pipe, #define AFBC_BLOCK_ALIGN 16 static void -panfrost_launch_afbc_shader(struct panfrost_batch *batch, void *cso, +panfrost_launch_convert_shader(struct panfrost_batch *batch, void *cso, struct pipe_constant_buffer *cbuf, unsigned nr_blocks) { @@ -3403,13 +3412,13 @@ panfrost_launch_afbc_shader(struct panfrost_batch *batch, void *cso, pctx->set_constant_buffer(pctx, PIPE_SHADER_COMPUTE, 0, true, &saved_const); } -#define LAUNCH_AFBC_SHADER(name, batch, rsrc, consts, nr_blocks) \ - struct pan_afbc_shader_data *shaders = \ - panfrost_afbc_get_shaders(batch->ctx, rsrc, AFBC_BLOCK_ALIGN); \ +#define LAUNCH_CONVERT_SHADER(name, batch, rsrc, consts, nr_blocks) \ + struct pan_mod_convert_shader_data *shaders = \ + panfrost_get_mod_convert_shaders(batch->ctx, rsrc, AFBC_BLOCK_ALIGN); \ struct pipe_constant_buffer constant_buffer = { \ .buffer_size = sizeof(consts), \ .user_buffer = &consts}; \ - panfrost_launch_afbc_shader(batch, shaders->name##_cso, &constant_buffer, \ + panfrost_launch_convert_shader(batch, shaders->name##_cso, &constant_buffer, \ nr_blocks); static void @@ -3427,7 +3436,7 @@ panfrost_afbc_size(struct panfrost_batch *batch, struct panfrost_resource *src, panfrost_batch_read_rsrc(batch, src, PIPE_SHADER_COMPUTE); panfrost_batch_write_bo(batch, metadata, PIPE_SHADER_COMPUTE); - LAUNCH_AFBC_SHADER(size, batch, src, consts, slice->afbc.nr_blocks); + LAUNCH_CONVERT_SHADER(afbc_size, batch, src, consts, slice->afbc.nr_blocks); } static void @@ -3452,7 +3461,111 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src, panfrost_batch_write_bo(batch, dst, PIPE_SHADER_COMPUTE); panfrost_batch_add_bo(batch, metadata, PIPE_SHADER_COMPUTE); - LAUNCH_AFBC_SHADER(pack, batch, src, consts, dst_slice->afbc.nr_blocks); + LAUNCH_CONVERT_SHADER(afbc_pack, batch, src, consts, dst_slice->afbc.nr_blocks); +} + +static void +panfrost_mtk_detile_compute(struct panfrost_context *ctx, struct pipe_blit_info *info) +{ + struct pipe_context *pipe = &ctx->base; + struct pipe_resource *y_src = info->src.resource; + struct pipe_resource *uv_src = y_src->next; + struct pipe_resource *y_dst = info->dst.resource; + struct pipe_resource *uv_dst = y_dst->next; + + unsigned width = info->src.box.width; + unsigned height = info->src.box.height; + unsigned src_stride = pan_resource(y_src)->image.layout.slices[0].row_stride; + unsigned dst_stride = pan_resource(y_dst)->image.layout.slices[0].row_stride; + + /* 4 images: y_src, uv_src, y_dst, uv_dst */ + struct pipe_image_view image[4] = { 0 }; + + if (!uv_src) { + /* single plane conversion; this must be R8 or R8G8 */ + assert(!uv_dst); + if (y_src->format == PIPE_FORMAT_R8G8_UNORM) { + /* R8G8 would be the single chroma plane of an image */ + /* adjust for dimensions of original luma plane */ + width *= 2; + height *= 2; + uv_src = y_src; + uv_dst = y_dst; + y_src = y_dst = NULL; + } + } + image[0].resource = y_src; + image[0].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ; + image[0].u.tex.level = info->src.level; + image[0].u.tex.first_layer = 0; + image[0].u.tex.last_layer = y_src ? (unsigned)(y_src->array_size - 1) : 0; + + image[1].resource = uv_src; + image[1].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_READ; + image[1].u.tex.level = info->src.level; + image[1].u.tex.first_layer = 0; + image[1].u.tex.last_layer = uv_src ? (unsigned)(uv_src->array_size - 1) : 0; + + image[2].resource = y_dst; + image[2].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[2].shader_access = image[2].access = PIPE_IMAGE_ACCESS_WRITE; + image[2].u.tex.level = info->dst.level; + image[2].u.tex.first_layer = 0; + image[2].u.tex.last_layer = y_dst ? (unsigned)(y_dst->array_size - 1) : 0; + + image[3].resource = uv_dst; + image[3].format = PIPE_FORMAT_R8G8B8A8_UINT; + image[3].shader_access = image[3].access = PIPE_IMAGE_ACCESS_WRITE; + image[3].u.tex.level = info->dst.level; + image[3].u.tex.first_layer = 0; + image[3].u.tex.last_layer = uv_dst ? (unsigned)(uv_dst->array_size - 1) : 0; + + struct panfrost_mtk_detile_info consts = { + .tiles_per_stride = src_stride >> 4, + .src_width = width, + .src_height = height, + .dst_stride = dst_stride, + }; + panfrost_flush_all_batches(ctx, "mtk_detile pre-barrier"); + + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + pipe->set_shader_images(pipe, PIPE_SHADER_COMPUTE, 0, 4, 0, image); + + /* launch the compute shader */ + struct pan_mod_convert_shader_data *shaders = + panfrost_get_mod_convert_shaders(ctx, pan_resource(y_dst ? y_dst : uv_dst), AFBC_BLOCK_ALIGN); + struct pipe_constant_buffer cbuf = { + .buffer_size = sizeof(consts), + .user_buffer = &consts}; + + struct pipe_grid_info grid_info = { + .block[0] = 4, + .last_block[0] = (width/4) % 4, + .block[1] = 16, + .last_block[1] = height % 16, + .block[2] = 1, + .last_block[2] = 0, + .grid[0] = DIV_ROUND_UP(width/4, 4), + .grid[1] = DIV_ROUND_UP(height, 16), + .grid[2] = 1, + }; + + struct pipe_constant_buffer saved_const = {}; + struct panfrost_constant_buffer *pbuf = + &batch->ctx->constant_buffer[PIPE_SHADER_COMPUTE]; + void *saved_cso = batch->ctx->uncompiled[PIPE_SHADER_COMPUTE]; + void *cso = shaders->mtk_detile_cso; + util_copy_constant_buffer(&pbuf->cb[0], &saved_const, true); + + pipe->bind_compute_state(pipe, cso); + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cbuf); + + panfrost_launch_grid_on_batch(pipe, batch, &grid_info); + + pipe->bind_compute_state(pipe, saved_cso); + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, true, &saved_const); } static void * @@ -3707,10 +3820,10 @@ panfrost_create_sampler_view(struct pipe_context *pctx, struct panfrost_context *ctx = pan_context(pctx); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); + struct panfrost_resource *ptexture = pan_resource(texture); - pan_legalize_format(ctx, pan_resource(texture), template->format, false, + pan_legalize_format(ctx, ptexture, template->format, false, false); - pipe_reference(NULL, &texture->reference); so->base = *template; @@ -4098,6 +4211,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.compile_shader = GENX(pan_shader_compile); screen->vtbl.afbc_size = panfrost_afbc_size; screen->vtbl.afbc_pack = panfrost_afbc_pack; + screen->vtbl.mtk_detile = panfrost_mtk_detile_compute; screen->vtbl.emit_write_timestamp = emit_write_timestamp; screen->vtbl.select_tile_size = GENX(pan_select_tile_size); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 7d714004a69..2b2fb01d9d1 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -28,7 +28,7 @@ #define _LARGEFILE64_SOURCE 1 #include #include -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "pan_blend_cso.h" #include "pan_earlyzs.h" #include "pan_encoder.h" @@ -209,7 +209,7 @@ struct panfrost_context { struct blitter_context *blitter; - struct pan_afbc_shaders afbc_shaders; + struct pan_mod_convert_shaders mod_convert_shaders; struct panfrost_blend_state *blend; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index e785da90f19..03bc6bf8d99 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -381,6 +381,8 @@ panfrost_batch_read_rsrc(struct panfrost_batch *batch, if (rsrc->separate_stencil) panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access); + if (rsrc->shadow_image) + panfrost_batch_add_bo_old(batch, rsrc->shadow_image->bo, access); panfrost_batch_update_access(batch, rsrc, false); } @@ -396,6 +398,8 @@ panfrost_batch_write_rsrc(struct panfrost_batch *batch, if (rsrc->separate_stencil) panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access); + if (rsrc->shadow_image) + panfrost_batch_add_bo_old(batch, rsrc->shadow_image->bo, access); panfrost_batch_update_access(batch, rsrc, true); } diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.c b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c similarity index 55% rename from src/gallium/drivers/panfrost/pan_afbc_cso.c rename to src/gallium/drivers/panfrost/pan_mod_conv_cso.c index 8ed0a7d8c84..025fd4a7890 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.c +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c @@ -21,12 +21,13 @@ * SOFTWARE. */ -#include "pan_afbc_cso.h" +#include "pan_mod_conv_cso.h" #include "nir/pipe_nir.h" #include "nir_builder.h" #include "pan_context.h" #include "pan_resource.h" #include "pan_screen.h" +#include "pan_shader.h" #define panfrost_afbc_add_info_ubo(name, b) \ nir_variable *info_ubo = nir_variable_create( \ @@ -43,6 +44,21 @@ nir_imm_int(b, offsetof(struct panfrost_afbc_##name##_info, field)), \ .align_mul = 4, .range = ~0) +#define panfrost_mtk_add_info_ubo(name, b) \ + nir_variable *info_ubo = nir_variable_create( \ + b.shader, nir_var_mem_ubo, \ + glsl_array_type(glsl_uint_type(), \ + sizeof(struct panfrost_mtk_##name##_info) / 4, 0), \ + "info_ubo"); \ + info_ubo->data.driver_location = 0; + +#define panfrost_mtk_get_info_field(name, b, field) \ + nir_load_ubo( \ + (b), 1, sizeof(((struct panfrost_mtk_##name##_info *)0)->field) * 8, \ + nir_imm_int(b, 0), \ + nir_imm_int(b, offsetof(struct panfrost_mtk_##name##_info, field)), \ + .align_mul = 4, .range = ~0) + static nir_def * read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx) { @@ -197,7 +213,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz, panfrost_afbc_get_info_field(size, b, field) static nir_shader * -panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp, +panfrost_create_afbc_size_shader(struct panfrost_screen *screen, unsigned bpp, unsigned align) { struct panfrost_device *dev = pan_device(&screen->base); @@ -233,7 +249,7 @@ panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp, panfrost_afbc_get_info_field(pack, b, field) static nir_shader * -panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align, +panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, unsigned align, bool tiled) { nir_builder b = nir_builder_init_simple_shader( @@ -260,64 +276,230 @@ panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align, return b.shader; } -struct pan_afbc_shader_data * -panfrost_afbc_get_shaders(struct panfrost_context *ctx, - struct panfrost_resource *rsrc, unsigned align) +#define panfrost_mtk_detile_get_info_field(b, field) \ + panfrost_mtk_get_info_field(detile, b, field) + +static nir_def * +pan_mtk_tiled_from_linear(nir_builder *b, nir_def *linear, nir_def *tiles_per_stride, nir_def *width) +{ + nir_def *tiled; + /* uvec2 tlc = uvec2(linear) >> uvec2(2u, 5u) */ + nir_def *tlc = nir_ushr(b, linear, + nir_imm_ivec2(b, 2, 5)); + + /* uvec2 txc = uvec2(linear) & uvec2(3u, 31u) */ + nir_def *txc = nir_iand(b, linear, + nir_imm_ivec2(b, 3, 31)); + + /* uint tlo = tlc.y * tiles_per_stride + tlc.x */ + nir_def *tlo = nir_iadd(b, + nir_imul(b, + nir_channel(b, tlc, 1), + tiles_per_stride), + nir_channel(b, tlc, 0)); + nir_def *txcx = nir_channel(b, txc, 0); + nir_def *txcy = nir_channel(b, txc, 1); + nir_def *txcytmp = nir_vec2(b, txcy, + nir_ushr_imm(b, txcy, 1)); + + /* txo = (uvec2(txc.y, txc.y >> 1) << uvec2(2u)) | txc.xx */ + nir_def *txo = nir_ior(b, + nir_ishl_imm(b, txcytmp, 2), + nir_vec2(b, txcx, txcx)); + + /* uvec2 off = (uvec2(tlo) << uvec2(7u, 6u)) | txo */ + nir_def *off = nir_ior(b, + nir_ishl(b, + nir_vec2(b, tlo, tlo), + nir_imm_ivec2(b, 7, 6)), + txo); + + /* convert to 2D coord + * tiled.xy = off % (width / 4, width / 4) + * tiled.zw = off / (width / 4, width / 4) */ + nir_def *width4 = nir_ishl_imm(b, tiles_per_stride, 2); + width4 = nir_vec2(b, width4, width4); + nir_def *tiled_xy = nir_umod(b, off, width4); + nir_def *tiled_zw = nir_udiv(b, off, width4); + tiled = nir_vec4(b, + nir_channel(b, tiled_xy, 0), + nir_channel(b, tiled_xy, 1), + nir_channel(b, tiled_zw, 0), + nir_channel(b, tiled_zw, 1)); + + return tiled; +} + +static nir_shader * +panfrost_create_mtk_detile_shader(struct panfrost_screen *screen, unsigned align, + bool is_tiled) +{ + const struct panfrost_device *device = &screen->dev; + bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0; + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(), + "panfrost_mtk_detile"); + b.shader->info.workgroup_size[0] = 4; + b.shader->info.workgroup_size[1] = 16; + b.shader->info.workgroup_size[2] = 1; + + const struct glsl_type *image_type = + glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_UINT); + + panfrost_mtk_add_info_ubo(detile, b); + + nir_variable *y_tiled = + nir_variable_create(b.shader, nir_var_image, image_type, "y_tiled"); + y_tiled->data.binding = 0; + y_tiled->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 0); + nir_variable *uv_tiled = + nir_variable_create(b.shader, nir_var_image, image_type, "uv_tiled"); + uv_tiled->data.binding = 1; + uv_tiled->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 1); + + nir_variable *y_linear = + nir_variable_create(b.shader, nir_var_image, image_type, "y_linear"); + y_linear->data.binding = 2; + y_linear->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 2); + + nir_variable *uv_linear = + nir_variable_create(b.shader, nir_var_image, image_type, "uv_linear"); + uv_linear->data.binding = 3; + uv_linear->data.image.format = PIPE_FORMAT_R8G8B8A8_UINT; + BITSET_SET(b.shader->info.images_used, 3); + + nir_def *linear = nir_load_global_invocation_id(&b, 32); + nir_def *tiles_per_stride = + panfrost_mtk_detile_get_info_field(&b, tiles_per_stride); + nir_def *src_width = panfrost_mtk_detile_get_info_field(&b, src_width); + + nir_def *zero = nir_imm_int(&b, 0); + + nir_def *coord = nir_vec2(&b, + nir_channel(&b, linear, 0), + nir_channel(&b, linear, 1)); + + nir_def *tiled = pan_mtk_tiled_from_linear(&b, coord, tiles_per_stride, src_width); + + nir_def *tiled_xz = nir_vec4(&b, nir_channel(&b, tiled, 0), + nir_channel(&b, tiled, 2), zero, zero); + nir_def *tiled_yw = nir_vec4(&b, nir_channel(&b, tiled, 1), + nir_channel(&b, tiled, 3), zero, zero); + + nir_def *yval = nir_image_load(&b, 4, 32, zero, tiled_xz, + zero /* sample */, zero /* lod */, + .access = ACCESS_NON_WRITEABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, + .dest_type = nir_type_uint32); + nir_def *uvval; + + nir_def *dst_y_coord = nir_vec4(&b, + nir_channel(&b, coord, 0), + nir_channel(&b, coord, 1), + zero, zero); + /* store Y data */ + nir_def *img_deref_st_y = nir_imm_int(&b, 2); + nir_image_store(&b, img_deref_st_y, dst_y_coord, zero /* sample */, + yval, zero /* lod */, + .access = ACCESS_NON_READABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, .src_type = nir_type_uint32); + + /* store UV data */ + nir_def *odd_even_line = nir_iand_imm(&b, + nir_channel(&b, dst_y_coord, 1), + 1); + nir_push_if(&b, nir_ieq_imm(&b, odd_even_line, 0)); + { + if (tint_yuv) { + /* use just blue for chroma */ + uvval = nir_imm_ivec4(&b, 0xc0, 0x80, 0xc0, 0x80); + } else { + nir_def *img_deref_uv = nir_imm_int(&b, 1); + uvval = nir_image_load(&b, 4, 32, img_deref_uv, tiled_yw, + zero /* sample */, zero /* lod */, + .access = ACCESS_NON_WRITEABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, + .dest_type = nir_type_uint32); + } + nir_def *dst_uv_coord = nir_ishr(&b, dst_y_coord, + nir_imm_ivec4(&b, 0, 1, 0, 0)); + nir_def *img_deref_st_uv = nir_imm_int(&b, 3); + nir_image_store(&b, img_deref_st_uv, dst_uv_coord, zero /* sample */, + uvval, zero /* lod */, + .access = ACCESS_NON_READABLE, + .image_dim = GLSL_SAMPLER_DIM_2D, + .image_array = false, .src_type = nir_type_uint32); + } + nir_pop_if(&b, NULL); + + return b.shader; +} + +struct pan_mod_convert_shader_data * +panfrost_get_mod_convert_shaders(struct panfrost_context *ctx, + struct panfrost_resource *rsrc, unsigned align) { struct pipe_context *pctx = &ctx->base; struct panfrost_screen *screen = pan_screen(ctx->base.screen); bool tiled = rsrc->image.layout.modifier & AFBC_FORMAT_MOD_TILED; - struct pan_afbc_shader_key key = { + struct pan_mod_convert_shader_key key = { .bpp = util_format_get_blocksizebits(rsrc->base.format), .align = align, .tiled = tiled, }; - pthread_mutex_lock(&ctx->afbc_shaders.lock); + pthread_mutex_lock(&ctx->mod_convert_shaders.lock); struct hash_entry *he = - _mesa_hash_table_search(ctx->afbc_shaders.shaders, &key); - struct pan_afbc_shader_data *shader = he ? he->data : NULL; - pthread_mutex_unlock(&ctx->afbc_shaders.lock); + _mesa_hash_table_search(ctx->mod_convert_shaders.shaders, &key); + struct pan_mod_convert_shader_data *shader = he ? he->data : NULL; + pthread_mutex_unlock(&ctx->mod_convert_shaders.lock); if (shader) return shader; - shader = rzalloc(ctx->afbc_shaders.shaders, struct pan_afbc_shader_data); + shader = rzalloc(ctx->mod_convert_shaders.shaders, struct pan_mod_convert_shader_data); shader->key = key; - _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader); + _mesa_hash_table_insert(ctx->mod_convert_shaders.shaders, &shader->key, shader); #define COMPILE_SHADER(name, ...) \ { \ nir_shader *nir = \ - panfrost_afbc_create_##name##_shader(screen, __VA_ARGS__); \ + panfrost_create_##name##_shader(screen, __VA_ARGS__); \ nir->info.num_ubos = 1; \ shader->name##_cso = pipe_shader_from_nir(pctx, nir); \ } - COMPILE_SHADER(size, key.bpp, key.align); - COMPILE_SHADER(pack, key.align, key.tiled); + COMPILE_SHADER(afbc_size, key.bpp, key.align); + COMPILE_SHADER(afbc_pack, key.align, key.tiled); + COMPILE_SHADER(mtk_detile, key.bpp, key.align); #undef COMPILE_SHADER - pthread_mutex_lock(&ctx->afbc_shaders.lock); - _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader); - pthread_mutex_unlock(&ctx->afbc_shaders.lock); + pthread_mutex_lock(&ctx->mod_convert_shaders.lock); + _mesa_hash_table_insert(ctx->mod_convert_shaders.shaders, &shader->key, shader); + pthread_mutex_unlock(&ctx->mod_convert_shaders.lock); return shader; } -DERIVE_HASH_TABLE(pan_afbc_shader_key); +DERIVE_HASH_TABLE(pan_mod_convert_shader_key); void panfrost_afbc_context_init(struct panfrost_context *ctx) { - ctx->afbc_shaders.shaders = pan_afbc_shader_key_table_create(NULL); - pthread_mutex_init(&ctx->afbc_shaders.lock, NULL); + ctx->mod_convert_shaders.shaders = pan_mod_convert_shader_key_table_create(NULL); + pthread_mutex_init(&ctx->mod_convert_shaders.lock, NULL); } void panfrost_afbc_context_destroy(struct panfrost_context *ctx) { - _mesa_hash_table_destroy(ctx->afbc_shaders.shaders, NULL); - pthread_mutex_destroy(&ctx->afbc_shaders.lock); + _mesa_hash_table_destroy(ctx->mod_convert_shaders.shaders, NULL); + pthread_mutex_destroy(&ctx->mod_convert_shaders.lock); } diff --git a/src/gallium/drivers/panfrost/pan_afbc_cso.h b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h similarity index 76% rename from src/gallium/drivers/panfrost/pan_afbc_cso.h rename to src/gallium/drivers/panfrost/pan_mod_conv_cso.h index dec054e3d94..63eb47175ba 100644 --- a/src/gallium/drivers/panfrost/pan_afbc_cso.h +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h @@ -21,8 +21,8 @@ * SOFTWARE. */ -#ifndef __PAN_AFBC_CSO_H__ -#define __PAN_AFBC_CSO_H__ +#ifndef __PAN_MOD_CONV_CSO_H__ +#define __PAN_MOD_CONV_CSO_H__ #include "util/hash_table.h" @@ -33,19 +33,20 @@ struct panfrost_context; struct panfrost_resource; struct panfrost_screen; -struct pan_afbc_shader_key { +struct pan_mod_convert_shader_key { unsigned bpp; unsigned align; bool tiled; }; -struct pan_afbc_shader_data { - struct pan_afbc_shader_key key; - void *size_cso; - void *pack_cso; +struct pan_mod_convert_shader_data { + struct pan_mod_convert_shader_key key; + void *afbc_size_cso; + void *afbc_pack_cso; + void *mtk_detile_cso; }; -struct pan_afbc_shaders { +struct pan_mod_convert_shaders { struct hash_table *shaders; pthread_mutex_t lock; }; @@ -70,11 +71,18 @@ struct panfrost_afbc_pack_info { uint32_t padding[3]; // FIXME } PACKED; +struct panfrost_mtk_detile_info { + uint32_t tiles_per_stride; + uint32_t src_width; + uint32_t src_height; + uint32_t dst_stride; +} PACKED; + void panfrost_afbc_context_init(struct panfrost_context *ctx); void panfrost_afbc_context_destroy(struct panfrost_context *ctx); -struct pan_afbc_shader_data * -panfrost_afbc_get_shaders(struct panfrost_context *ctx, - struct panfrost_resource *rsrc, unsigned align); +struct pan_mod_convert_shader_data * +panfrost_get_mod_convert_shaders(struct panfrost_context *ctx, + struct panfrost_resource *rsrc, unsigned align); #endif diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 733d16be7b8..89d1f3d69d6 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -914,6 +914,10 @@ panfrost_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pt) if (rsrc->scanout) renderonly_scanout_destroy(rsrc->scanout, dev->ro); + if (rsrc->shadow_image) + pipe_resource_reference( + (struct pipe_resource **)&rsrc->shadow_image, NULL); + if (rsrc->bo) panfrost_bo_unreference(rsrc->bo); @@ -1324,7 +1328,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, /* Shadowing with separate stencil may require additional accounting. * Bail in these exotic cases. */ - if (rsrc->separate_stencil) { + if (rsrc->separate_stencil || rsrc->shadow_image) { create_new_bo = false; copy_resource = false; } @@ -1401,8 +1405,9 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, struct pipe_box box_blocks; u_box_pixels_to_blocks(&box_blocks, box, format); - if (rsrc->image.layout.modifier == - DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) { + switch(rsrc->image.layout.modifier) { + case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: + case DRM_FORMAT_MOD_MTK_16L_32S_TILE: transfer->base.stride = box_blocks.width * bytes_per_block; transfer->base.layer_stride = transfer->base.stride * box_blocks.height; transfer->map = @@ -1412,7 +1417,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, panfrost_load_tiled_images(transfer, rsrc); return transfer->map; - } else { + default: assert(rsrc->image.layout.modifier == DRM_FORMAT_MOD_LINEAR); /* Direct, persistent writes create holes in time for @@ -1449,12 +1454,54 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, struct panfrost_resource *rsrc, uint64_t modifier, bool copy_resource, const char *reason) { - assert(!rsrc->modifier_constant); + bool need_shadow = rsrc->modifier_constant; - struct pipe_resource *tmp_prsrc = panfrost_resource_create_with_modifier( - ctx->base.screen, &rsrc->base, modifier); + assert(!rsrc->modifier_constant || copy_resource); + + struct pipe_resource template = rsrc->base; + struct pipe_resource *tmp_prsrc; + struct pipe_resource *next_tmp_prsrc = NULL; + struct panfrost_resource *next_tmp_rsrc = NULL; + if (template.next) { + struct pipe_resource second_template = *template.next; + bool fix_stride; + assert(drm_is_mtk_tiled(rsrc->base.format, rsrc->image.layout.modifier)); + /* fix up the stride */ + switch (rsrc->base.format) { + case PIPE_FORMAT_R8_G8B8_420_UNORM: + case PIPE_FORMAT_R8_G8B8_422_UNORM: + case PIPE_FORMAT_R10_G10B10_420_UNORM: + case PIPE_FORMAT_R10_G10B10_422_UNORM: + fix_stride = true; + break; + default: + fix_stride = false; + break; + } + template.next = NULL; + if (fix_stride) { + second_template.width0 *= 2; /* temporarily adjust size for subsampling */ + } + next_tmp_prsrc = panfrost_resource_create_with_modifier( + ctx->base.screen, &second_template, modifier); + next_tmp_rsrc = pan_resource(next_tmp_prsrc); + if (fix_stride) { + next_tmp_rsrc->base.width0 /= 2; + next_tmp_rsrc->image.layout.width /= 2; + } + } + tmp_prsrc = panfrost_resource_create_with_modifier( + ctx->base.screen, &template, modifier); struct panfrost_resource *tmp_rsrc = pan_resource(tmp_prsrc); + if (next_tmp_prsrc) { + tmp_prsrc->next = next_tmp_prsrc; + } + if (need_shadow && rsrc->shadow_image) { + /* free the old shadow image */ + pipe_resource_reference( + (struct pipe_resource **)&rsrc->shadow_image, NULL); + } if (copy_resource) { struct pipe_blit_info blit = { .dst.resource = &tmp_rsrc->base, @@ -1465,6 +1512,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, .filter = PIPE_TEX_FILTER_NEAREST, }; + struct panfrost_screen *screen = pan_screen(ctx->base.screen); /* data_valid is not valid until flushed */ panfrost_flush_writer(ctx, rsrc, "AFBC/AFRC decompressing blit"); @@ -1477,7 +1525,11 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, util_num_layers(&rsrc->base, i), &blit.dst.box); blit.src.box = blit.dst.box; - panfrost_blit_no_afbc_legalization(&ctx->base, &blit); + if (drm_is_mtk_tiled(rsrc->base.format, + rsrc->image.layout.modifier)) + screen->vtbl.mtk_detile(ctx, &blit); + else + panfrost_blit_no_afbc_legalization(&ctx->base, &blit); } } @@ -1487,20 +1539,26 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, panfrost_flush_writer(ctx, tmp_rsrc, "AFBC/AFRC decompressing blit"); } - panfrost_bo_unreference(rsrc->bo); + if (need_shadow) { + panfrost_resource_setup(ctx->base.screen, tmp_rsrc, + modifier, tmp_rsrc->base.format); + rsrc->shadow_image = tmp_rsrc; + } else { + panfrost_bo_unreference(rsrc->bo); - rsrc->bo = tmp_rsrc->bo; - rsrc->image.data.base = rsrc->bo->ptr.gpu; - panfrost_bo_reference(rsrc->bo); + rsrc->bo = tmp_rsrc->bo; + rsrc->image.data.base = rsrc->bo->ptr.gpu; + panfrost_bo_reference(rsrc->bo); - panfrost_resource_setup(ctx->base.screen, rsrc, modifier, - tmp_rsrc->base.format); - /* panfrost_resource_setup will force the modifier to stay constant when - * called with a specific modifier. We don't want that here, we want to - * be able to convert back to another modifier if needed */ - rsrc->modifier_constant = false; - pipe_resource_reference(&tmp_prsrc, NULL); - perf_debug(ctx, "resource_modifier_convert required due to: %s", reason); + panfrost_resource_setup(ctx->base.screen, rsrc, modifier, + tmp_rsrc->base.format); + /* panfrost_resource_setup will force the modifier to stay constant when + * called with a specific modifier. We don't want that here, we want to + * be able to convert back to another modifier if needed */ + rsrc->modifier_constant = false; + pipe_resource_reference(&tmp_prsrc, NULL); + perf_debug(ctx, "resource_modifier_convert required due to: %s", reason); + } } /* Validate that an AFBC/AFRC resource may be used as a particular format. If it @@ -1516,9 +1574,11 @@ pan_legalize_format(struct panfrost_context *ctx, enum pipe_format old_format = rsrc->base.format; enum pipe_format new_format = format; bool compatible = true; + uint64_t dest_modifier = DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; if (!drm_is_afbc(rsrc->image.layout.modifier) && - !drm_is_afrc(rsrc->image.layout.modifier)) + !drm_is_afrc(rsrc->image.layout.modifier) && + !drm_is_mtk_tiled(old_format, rsrc->image.layout.modifier)) return; if (drm_is_afbc(rsrc->image.layout.modifier)) { @@ -1530,14 +1590,17 @@ pan_legalize_format(struct panfrost_context *ctx, struct pan_afrc_format_info new_info = panfrost_afrc_get_format_info(new_format); compatible = !memcmp(&old_info, &new_info, sizeof(old_info)); + } else if (drm_is_mtk_tiled(old_format, rsrc->image.layout.modifier)) { + compatible = false; + dest_modifier = DRM_FORMAT_MOD_LINEAR; } if (!compatible) { pan_resource_modifier_convert( - ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, !discard, + ctx, rsrc, dest_modifier, !discard, drm_is_afbc(rsrc->image.layout.modifier) ? "Reinterpreting AFBC surface as incompatible format" - : "Reinterpreting AFRC surface as incompatible format"); + : "Reinterpreting tiled surface as incompatible format"); return; } diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 28d1b859658..978e3419a8d 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -53,6 +53,10 @@ struct panfrost_resource { struct panfrost_resource *separate_stencil; + /* image created when detiling a resource whose + constant modifier we cannot change */ + struct panfrost_resource *shadow_image; + struct util_range valid_buffer_range; /* Description of the resource layout */ diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 017d33d7d57..726ddbe5a13 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -51,6 +51,7 @@ #include "pan_resource.h" #include "pan_screen.h" #include "pan_shader.h" +#include "pan_texture.h" #include "pan_util.h" #include "pan_context.h" @@ -376,6 +377,10 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen, if (drm_is_afrc(pan_best_modifiers[i]) && !afrc) continue; + if (drm_is_mtk_tiled(format, pan_best_modifiers[i]) && + !panfrost_format_supports_mtk_tiled(format)) + continue; + if (test_modifier != DRM_FORMAT_MOD_INVALID && test_modifier != pan_best_modifiers[i]) continue; @@ -384,7 +389,7 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen, modifiers[count] = pan_best_modifiers[i]; if (external_only) - external_only[count] = false; + external_only[count] = drm_is_mtk_tiled(format, modifiers[count]); } count++; } diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 0bc81a215e0..813563d983c 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -110,6 +110,9 @@ struct panfrost_vtable { /* Select the tile size and calculate the color buffer allocation size */ void (*select_tile_size)(struct pan_fb_info *fb); + + /* Run a compute shader to detile an MTK 16L32 image */ + void (*mtk_detile)(struct panfrost_context *ctx, struct pipe_blit_info *info); }; struct panfrost_screen { diff --git a/src/panfrost/lib/pan_afbc.c b/src/panfrost/lib/pan_afbc.c index d9c3b69c6c2..f8370563249 100644 --- a/src/panfrost/lib/pan_afbc.c +++ b/src/panfrost/lib/pan_afbc.c @@ -211,3 +211,18 @@ panfrost_afbc_can_pack(enum pipe_format format) return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB; } + +/* check for whether a format can be used with MTK_16L32S format */ + +bool panfrost_format_supports_mtk_tiled(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_NV12: + case PIPE_FORMAT_R8_G8B8_420_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return true; + default: + return false; + } +} diff --git a/src/panfrost/lib/pan_layout.c b/src/panfrost/lib/pan_layout.c index 0fbff2731dc..40408119195 100644 --- a/src/panfrost/lib/pan_layout.c +++ b/src/panfrost/lib/pan_layout.c @@ -32,7 +32,8 @@ * List of supported modifiers, in descending order of preference. AFBC is * faster than u-interleaved tiling which is faster than linear. Within AFBC, * enabling the YUV-like transform is typically a win where possible. - * AFRC is only used if explicitely asked for (only for RGB formats). + * AFRC is only used if explicitly asked for (only for RGB formats). + * Similarly MTK 16L32 is only used if explicitly asked for. */ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | @@ -73,6 +74,8 @@ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_32) | AFRC_FORMAT_MOD_LAYOUT_SCAN), + + DRM_FORMAT_MOD_MTK_16L_32S_TILE, }; /* Table of AFBC superblock sizes */ diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index be6104d4d71..f0dab3cbf94 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -47,7 +47,7 @@ extern "C" { #define MAX_MIP_LEVELS 17 #define MAX_IMAGE_PLANES 3 -#define PAN_MODIFIER_COUNT 14 +#define PAN_MODIFIER_COUNT 15 extern uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT]; struct pan_image_slice_layout { @@ -427,6 +427,8 @@ unsigned panfrost_texture_offset(const struct pan_image_layout *layout, unsigned level, unsigned array_idx, unsigned surface_idx); +bool panfrost_format_supports_mtk_tiled(enum pipe_format format); + /* DRM modifier helper */ #define drm_is_afbc(mod) \ @@ -437,6 +439,10 @@ unsigned panfrost_texture_offset(const struct pan_image_layout *layout, ((mod >> 52) == \ (DRM_FORMAT_MOD_ARM_TYPE_AFRC | (DRM_FORMAT_MOD_VENDOR_ARM << 4))) +#define drm_is_mtk_tiled(format, mod) \ + ((mod >> 52) == \ + (0 | (DRM_FORMAT_MOD_VENDOR_MTK << 4))) + struct pan_image_explicit_layout { unsigned offset; unsigned row_stride;