Merge branch 'pan_empty_tile_elimination' into 'main'

panfrost: Optimize Transaction Elimination (2/2)

See merge request mesa/mesa!39216
This commit is contained in:
Loïc Molinari 2026-05-08 02:10:09 +02:00
commit b3b9b89a4f
8 changed files with 346 additions and 159 deletions

View file

@ -1018,8 +1018,7 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
static void
pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
struct pan_fb_info *fb, bool zs, uint64_t coordinates,
uint64_t tsd, struct mali_draw_packed *out,
bool always_write)
uint64_t tsd, struct mali_draw_packed *out)
{
unsigned tex_count = 0;
uint64_t textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
@ -1031,7 +1030,7 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
/* Tiles updated by preload shaders are still considered clean (separate
* for colour and Z/S), allowing us to suppress unnecessary writeback
*/
UNUSED bool clean_fragment_write = !always_write;
UNUSED bool clean_fragment_write = true;
/* Image view used when patching stencil formats for combined
* depth/stencil preloads.
@ -1186,27 +1185,8 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache,
void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
/* We only use crc_rt to determine whether to force writes for updating
* the CRCs, so use a conservative tile size (16x16).
*/
int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd);
bool always_write = false;
/* If CRC data is currently invalid and this batch will make it valid,
* write even clean tiles to make sure CRC data is updated. */
if (crc_rt >= 0) {
bool *valid = fb->rts[crc_rt].crc_valid;
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
if (full && !(*valid))
always_write = true;
}
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
always_write);
if (zs) {
enum pipe_format fmt = fb->zs.view.zs
? fb->zs.view.zs->planes[0].image->props.format
@ -1266,8 +1246,7 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache,
#endif
} else {
fb->bifrost.pre_post.modes[dcd_idx] =
always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
: MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
}
}
#else
@ -1282,7 +1261,7 @@ pan_preload_emit_tiler_job(struct pan_fb_preload_cache *cache,
return (struct pan_ptr){0};
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
pan_section_ptr(job.cpu, TILER_JOB, DRAW));
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;

View file

@ -531,7 +531,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
rts[i].nr_samples =
surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
fb->rts[i].crc_valid = &prsrc->valid.crc;
fb->rts[i].crc_state = &prsrc->crc_state;
fb->rts[i].view = &rts[i];
/* Preload if the RT is read or updated */

View file

@ -280,6 +280,8 @@ panfrost_resource_import_bo(struct panfrost_resource *rsc,
if (!rsc->bo)
return -1;
pan_crc_state_set_ptr(&rsc->crc_state, &rsc->bo->ptr);
return 0;
}
@ -846,7 +848,7 @@ panfrost_should_checksum(const struct panfrost_device *dev,
return pres->base.bind & PIPE_BIND_RENDER_TARGET && panfrost_is_2d(pres) &&
bytes_per_pixel <= bytes_per_pixel_max &&
pres->base.last_level == 0 && !(dev->debug & PAN_DBG_NO_CRC);
!(dev->debug & PAN_DBG_NO_CRC);
}
static bool
@ -1166,6 +1168,7 @@ panfrost_resource_create_with_modifier(struct pipe_screen *screen,
so->bo =
panfrost_bo_create(dev, so->plane.layout.data_size_B, flags, res_label);
pan_crc_state_set_ptr(&so->crc_state, &so->bo->ptr);
if (!so->bo) {
panfrost_resource_destroy(screen, &so->base);
@ -1714,6 +1717,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource,
panfrost_bo_unreference(rsrc->bo);
rsrc->bo = newbo;
rsrc->plane.base = newbo->ptr.gpu;
pan_crc_state_set_ptr(&rsrc->crc_state, &newbo->ptr);
if (!copy_resource && drm_is_afbc(rsrc->modifier)) {
if (panfrost_resource_init_afbc_headers(rsrc))
@ -1887,6 +1891,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx,
rsrc->bo = tmp_rsrc->bo;
rsrc->plane.base = rsrc->bo->ptr.gpu;
panfrost_bo_reference(rsrc->bo);
pan_crc_state_set_ptr(&rsrc->crc_state, &rsrc->bo->ptr);
rsrc->owns_label = tmp_rsrc->owns_label;
tmp_rsrc->owns_label = false;
@ -2222,7 +2227,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx,
prsrc->plane.layout.data_size_B = prsrc->afbcp->size;
prsrc->plane.base = prsrc->afbcp->packed_bo->ptr.gpu;
prsrc->image.props.crc = false;
prsrc->valid.crc = false;
pan_crc_state_invalidate(&prsrc->crc_state);
for (unsigned level = 0; level <= prsrc->base.last_level; ++level)
prsrc->plane.layout.slices[level] =
@ -2234,6 +2239,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx,
panfrost_bo_unreference(prsrc->bo);
prsrc->bo = prsrc->afbcp->packed_bo;
prsrc->afbcp->packed_bo = NULL;
pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr);
pan_resource_afbcp_stop(prsrc);
}
@ -2321,7 +2327,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
struct panfrost_device *dev = pan_device(pctx->screen);
if (transfer->usage & PIPE_MAP_WRITE)
prsrc->valid.crc = false;
pan_crc_state_invalidate(&prsrc->crc_state);
/* AFBC/AFRC will use a staging resource. `initialized` will be set when
* the fragment job is created; this is deferred to prevent useless surface
@ -2345,6 +2351,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
prsrc->bo = pan_resource(trans->staging.rsrc)->bo;
prsrc->plane.base = prsrc->bo->ptr.gpu;
panfrost_bo_reference(prsrc->bo);
pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr);
prsrc->owns_label = pan_resource(trans->staging.rsrc)->owns_label;
pan_resource(trans->staging.rsrc)->owns_label = false;

View file

@ -76,10 +76,6 @@ struct panfrost_resource {
struct panfrost_bo *bo;
struct {
/* Is the checksum for this image valid? Implicitly refers to
* the first slice; we only checksum non-mipmapped 2D images */
bool crc;
/* Has anything been written to this slice? */
BITSET_DECLARE(data, PAN_MAX_MIP_LEVELS);
} valid;
@ -102,6 +98,9 @@ struct panfrost_resource {
/* Whether the resource owns the backing BO's label */
bool owns_label;
/* CRC state */
struct pan_crc_state crc_state;
/* AFBC-P state */
struct pan_afbcp *afbcp;
};

View file

@ -92,73 +92,6 @@ pan_warn_on_afbc_reverse_issue_order(const struct pan_attachment_info *att,
}
#endif
static bool
renderblock_fits_in_single_pass(const struct pan_image_view *view,
unsigned tile_size)
{
const struct pan_image_plane_ref pref = pan_image_view_get_first_plane(view);
uint64_t mod = pref.image->props.modifier;
if (!drm_is_afbc(mod))
return tile_size >= 16 * 16;
struct pan_image_block_size renderblk_sz = pan_afbc_renderblock_size(mod);
return tile_size >= renderblk_sz.width * renderblk_sz.height;
}
int
GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
{
/* Disable CRC when the tile size is smaller than 16x16. In the hardware,
* CRC tiles are the same size as the tiles of the framebuffer. However,
* our code only handles 16x16 tiles. Therefore under the current
* implementation, we must disable CRC when 16x16 tiles are not used.
*
* This may hurt performance. However, smaller tile sizes are rare, and
* CRCs are more expensive at smaller tile sizes, reducing the benefit.
* Restricting CRC to 16x16 should work in practice.
*/
if (tile_size < 16 * 16)
return -1;
#if PAN_ARCH <= 6
if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
pan_image_view_has_crc(fb->rts[0].view))
return 0;
return -1;
#else
bool best_rt_valid = false;
int best_rt = -1;
for (unsigned i = 0; i < fb->rt_count; i++) {
if (!fb->rts[i].view || fb->rts[i].discard ||
!pan_image_view_has_crc(fb->rts[i].view))
continue;
if (!renderblock_fits_in_single_pass(fb->rts[i].view, tile_size))
continue;
bool valid = *(fb->rts[i].crc_valid);
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
if (!full && !valid)
continue;
if (best_rt < 0 || (valid && !best_rt_valid)) {
best_rt = i;
best_rt_valid = valid;
}
if (valid)
break;
}
return best_rt;
#endif
}
static enum mali_zs_format
translate_zs_format(enum pipe_format in)
{
@ -454,45 +387,37 @@ GENX(pan_emit_interleaved_64k_zs_attachment)(const struct pan_attachment_info *a
#endif
static void
pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
struct MALI_CRC *crc)
pan_emit_crc(const struct pan_fb_info *fb, struct pan_crc *crc,
struct MALI_CRC *cfg)
{
if (rt_crc < 0)
if (!pan_crc_is_enabled(crc))
return;
assert(rt_crc < fb->rt_count);
const struct pan_image_view *rt = fb->rts[rt_crc].view;
const struct pan_image_view *rt = fb->rts[crc->index].view;
const struct pan_image_plane_ref pref = pan_image_view_get_color_plane(rt);
const struct pan_image *image = pref.image;
const struct pan_image_plane *plane = image->planes[pref.plane_idx];
const struct pan_image_slice_layout *slice =
&plane->layout.slices[rt->first_level];
crc->base = plane->base + slice->crc.offset_B;
crc->row_stride = slice->crc.stride_B;
cfg->base = plane->base + slice->crc.offset_B;
cfg->row_stride = slice->crc.stride_B;
#if PAN_ARCH >= 7
crc->render_target = rt_crc;
if (fb->rts[rt_crc].clear) {
uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
crc->clear_color = clear_val | 0xc000000000000000 |
(((uint64_t)clear_val & 0xffff) << 32);
}
cfg->render_target = crc->index;
cfg->clear_color = crc->clear_color;
#endif
}
static void
pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
int rt_crc,
struct mali_zs_crc_extension_packed *zs_crc_ext,
struct pan_clean_tile clean_tile)
struct pan_crc *crc, struct pan_clean_tile clean_tile)
{
struct mali_zs_crc_extension_packed desc;
pan_pack(&desc, ZS_CRC_EXTENSION, cfg) {
pan_prepare_crc(fb, rt_crc, &cfg.crc);
pan_emit_crc(fb, crc, &cfg.crc);
#if PAN_ARCH == 5
cfg.zs.clean_pixel_write_enable =
pan_clean_tile_write_zs_enabled(clean_tile);
@ -1085,6 +1010,243 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
}
#endif
static bool
pan_fb_color_attachment_should_crc(const struct pan_fb_color_attachment *rt,
unsigned tile_size)
{
uint64_t mod;
struct pan_image_block_size renderblk_sz;
if (!rt->view || rt->discard || !rt->crc_state ||
!pan_image_view_has_crc(rt->view))
return false;
mod = pan_image_view_get_first_plane(rt->view).image->props.modifier;
if (!drm_is_afbc(mod))
return true;
/* AFBC-P images are read only. */
if (!(mod & AFBC_FORMAT_MOD_SPARSE))
return false;
/* AFBC render block size must fit in a single pass. */
renderblk_sz = pan_afbc_renderblock_size(mod);
if (tile_size < renderblk_sz.width * renderblk_sz.height)
return false;
return true;
}
static int
pan_select_crc_rt(const struct pan_fb_info *fb)
{
int best_rt = -1;
#if PAN_ARCH <= 5
/* CRC was introduced in v4 and MRT in v5 but unlike v6 there's no details
* how both work together. */
if (fb->rt_count > 1)
return best_rt;
#elif PAN_ARCH == 6
/* On v6, all enabled RTs are used to compute a CRC (no crc_render_target
* field on the DBD). The write buffer size of the enabled color
* attachments for a tile must fit within 1600 bytes. */
if (fb->rt_count > 1 &&
pan_cbuf_bytes_per_pixel(fb) * fb->tile_size > 1600)
return best_rt;
#endif
for (unsigned i = 0; i < fb->rt_count; i++) {
/* Skip unusable RTs. */
if (!pan_fb_color_attachment_should_crc(&fb->rts[i], fb->tile_size))
continue;
/* Select the first RT with a valid CRC buffer. */
if (fb->rts[i].crc_state->valid) {
best_rt = i;
break;
}
/* Store the first usable RT otherwise. */
if (best_rt == -1)
best_rt = i;
}
return best_rt;
}
static void
pan_crc_enable(struct pan_crc *crc)
{
crc->read = true;
crc->write = true;
}
#if PAN_ARCH >= 7
/* Initialize the CRC buffer by zero'ing it. The all-zero CRC can't collide
* thanks to the crc_clear_color field, see pan_crc_clear_color(). Drawback is
* the CRC BO must be CPU mapped. */
static void
pan_crc_enable_zeroed(struct pan_crc *crc, struct pan_crc_state *state,
const struct pan_image_view *view)
{
const struct pan_image_plane_ref pref =
pan_image_view_get_color_plane(view);
const struct pan_image_plane *plane = pref.image->planes[pref.plane_idx];
const struct pan_image_slice_layout *slice =
&plane->layout.slices[view->first_level];
assert(state->ptr && state->ptr->cpu);
memset(state->ptr->cpu + slice->crc.offset_B, 0, slice->crc.size_B);
pan_crc_enable(crc);
state->valid = true;
}
#endif
/* Take advantage of a full frame draw to initialize the CRC buffer by
* forcefully writing back all the tiles and flush the CRC values. Drawback
* is it only works on full frames. */
static void
pan_crc_maybe_enable_flushed(struct pan_crc *crc, struct pan_crc_state *state,
const struct pan_fb_info *fb)
{
if (!pan_fb_info_is_fully_covered(fb))
return;
crc->write = true;
crc->force_clean_tile_write = true;
state->valid = true;
}
#if PAN_ARCH >= 7
static uint64_t
pan_crc_clear_color(const struct pan_fb_info *fb)
{
uint64_t base[4] = { 0, }; /* Compiler auto-vectorization hint */
uint64_t crc_clear_flag = 0;
uint64_t crc_clear_base = 1ull << 46;
uint64_t crc_init = 0;
/* When a tile is clear (i.e. no polygons intersect it), the configured
* crc_clear_color is written as is as CRC value by the GPU if both CRC
* write (crc_write_enable flag) and Empty Tile Elimination write
* (empty_tile_write_enable flag) are enabled. If Empty Tile Elimination
* read (empty_tile_read_enable flag) is enabled, this then allows to skip
* the pre-loading of clear tiles which were also clear at the previous
* render on the selected RT. It's done by comparing CRCs in the CRC buffer
* to the crc_clear_color.
*
* The crc_clear_flag sub-field (bit 63) is flagged unset here. It's
* flipped by the GPU when writing standard (i.e. non-empty) CRCs. This
* prevents standard CRCs from using the all-zero CRC value. Empty CRCs
* can't use the all-zero CRC value either because crc_clear_base's most
* significant bit is flagged set here. This allows to invalidate a CRC
* buffer by zero'ing it.
*
* v10 introduced the crc_init sub-field (bits 15:0). v7 and v9 can use
* those as additional crc_clear_base bits. We don't use it for now and
* keep those 16 bits clear regardless of arch.
*
* This leaves 46 bits in the crc_clear_base sub-field (bits 62:16). Clear
* color changes on any RTs must be reflected into this field in order to
* properly invalidate CRCs stored this way. This is done by hashing the
* clear value channels of each cleared RT. Each clear color channel value
* is multiplied with a prime number followed by a XOR to the destination
* hash. Clear values in pan_fb_info struct are expected to be packed with
* respect to the format and dithering of the underlying RTs so that a
* change of format (without a clear color change) can generate a different
* hash. The prime number 16381 is carefully selected so that the 32 bits
* of each clear color channel take at most 46 bits after the mul (the next
* prime number 16411 takes at most 47 bits). The resulting hash value is
* guaranteed not to overflow and can safely be packed. */
for (unsigned i = 0; i < fb->rt_count; ++i)
if (fb->rts[i].clear)
for (unsigned j = 0; j < 4; ++j)
base[i] ^= 16381 * fb->rts[i].clear_value[j];
crc_clear_base |= (base[0] ^ base[1]) ^ (base[2] ^ base[3]);
return (crc_clear_flag << 63) | (crc_clear_base << 16) | crc_init;
}
#endif
#if PAN_ARCH >= 6
static bool
pan_crc_has_empty_tile_elimination(struct pan_crc *crc,
const struct pan_fb_info *fb)
{
#if PAN_ARCH == 6
/* For v6, there's no details how MRT interacts with Empty Tile
* Elimination, especially how the clear value is generated from the color
* attachment clear values. The feature is disabled for that use case. */
if (fb->rt_count > 1)
return false;
#endif
return crc->read || crc->write;
}
#endif
static struct pan_crc
pan_get_crc_info(const struct pan_fb_info *fb)
{
struct pan_crc crc = { .index = -1, };
const struct pan_fb_color_attachment *rt;
/* Disable TE when the tile size is smaller than 16x16. In the hardware,
* CRC tiles are the same size as the tiles of the framebuffer. However,
* our code only handles 16x16 tiles. Therefore under the current
* implementation, we must disable TE when 16x16 tiles are not used. This
* may hurt performance. However, smaller tile sizes are rare, and CRCs are
* more expensive at smaller tile sizes, reducing the benefit. Restricting
* CRC to 16x16 should work in practice. */
if (fb->tile_size < 16 * 16)
goto skip;
crc.index = pan_select_crc_rt(fb);
if (crc.index == -1)
goto skip;
rt = &fb->rts[crc.index];
/* Transaction Elimination. */
if (rt->crc_state->valid) {
pan_crc_enable(&crc);
} else {
#if PAN_ARCH >= 7
if (rt->crc_state->ptr && rt->crc_state->ptr->cpu)
pan_crc_enable_zeroed(&crc, rt->crc_state, rt->view);
else
pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb);
#else
pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb);
#endif
}
#if PAN_ARCH >= 6
/* Empty Tile Elimination. */
if (pan_crc_has_empty_tile_elimination(&crc, fb)) {
#if PAN_ARCH >= 7
crc.clear_color = pan_crc_clear_color(fb);
#endif
crc.empty_tile_read = crc.read;
crc.empty_tile_write = crc.write;
}
#endif
skip:
/* Flag CRC buffer states of unselected RTs as invalid. */
for (unsigned i = 0; i < fb->rt_count; i++)
if (i != crc.index && fb->rts[i].crc_state)
fb->rts[i].crc_state->valid = false;
return crc;
}
/* Clean tiles must be written back for AFBC buffers (color, z/s) when either
* one of the effective tile size dimension is smaller than the superblock
* dimension.
@ -1111,8 +1273,14 @@ GENX(pan_force_clean_write_on)(const struct pan_image *image,
#endif
}
static bool
pan_force_clean_write_crc(struct pan_crc *crc, int index)
{
return index == crc->index && crc->force_clean_tile_write;
}
static struct pan_clean_tile
pan_get_clean_tile_info(const struct pan_fb_info *fb)
pan_get_clean_tile_info(const struct pan_fb_info *fb, struct pan_crc *crc)
{
struct pan_clean_tile clean_tile = { 0, };
const struct pan_image *img;
@ -1127,7 +1295,8 @@ pan_get_clean_tile_info(const struct pan_fb_info *fb)
img = fb->rts[i].view ?
pan_image_view_get_color_plane(fb->rts[i].view).image : NULL;
if (fb->rts[i].clear ||
GENX(pan_force_clean_write_on)(img, fb->tile_size))
GENX(pan_force_clean_write_on)(img, fb->tile_size) ||
pan_force_clean_write_crc(crc, i))
clean_tile.write_rt_mask |= 1 << i;
}
@ -1188,9 +1357,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
#endif
int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
struct pan_crc crc = pan_get_crc_info(fb);
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, &crc);
bool has_zs_crc_ext = fb->zs.view.zs || fb->zs.view.s ||
pan_crc_is_enabled(&crc);
pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
#if PAN_ARCH >= 6
@ -1254,26 +1424,13 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
cfg.has_zs_crc_extension = has_zs_crc_ext;
if (crc_rt >= 0) {
bool *valid = fb->rts[crc_rt].crc_valid;
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
/* If the CRC was valid it stays valid, if it wasn't, we must ensure
* the render operation covers the full frame, and clean tiles are
* pushed to memory. */
bool new_valid = *valid |
(full && pan_clean_tile_write_rt_enabled(clean_tile, crc_rt));
cfg.crc_read_enable = *valid;
/* If the data is currently invalid, still write CRC
* data if we are doing a full write, so that it is
* valid for next time. */
cfg.crc_write_enable = new_valid;
*valid = new_valid;
if (pan_crc_is_enabled(&crc)) {
cfg.crc_read_enable = crc.read;
cfg.crc_write_enable = crc.write;
#if PAN_ARCH >= 7
cfg.empty_tile_read_enable = crc.empty_tile_read;
cfg.empty_tile_write_enable = crc.empty_tile_write;
#endif
}
#if PAN_ARCH >= 9
@ -1324,7 +1481,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
struct mali_zs_crc_extension_packed *zs_crc_ext =
out + pan_size(FRAMEBUFFER);
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
pan_emit_zs_crc_ext(fb, layer_idx, zs_crc_ext, &crc, clean_tile);
rtd += pan_size(ZS_CRC_EXTENSION);
}
@ -1339,9 +1496,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
fb->tile_size *
pan_image_view_get_nr_samples(fb->rts[i].view);
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
*(fb->rts[i].crc_valid) = false;
}
struct mali_framebuffer_pointer_packed tag;
@ -1454,11 +1608,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED
? MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED
: MALI_BLOCK_FORMAT_LINEAR;
if (pan_image_view_has_crc(rt)) {
cfg.crc_buffer.row_stride = slayout->crc.stride_B;
cfg.crc_buffer.base = plane->base + slayout->crc.offset_B;
}
}
if (fb->zs.view.zs) {

View file

@ -28,7 +28,7 @@ struct pan_compute_dim {
struct pan_fb_color_attachment {
const struct pan_image_view *view;
bool *crc_valid;
struct pan_crc_state *crc_state;
bool clear;
bool preload;
bool discard;
@ -154,6 +154,33 @@ struct pan_fb_info {
bool pls_enabled;
};
struct pan_crc {
/* Empty Tile Elimination clear color */
uint64_t clear_color;
/* Selected RT index (8 max), -1 if none. */
int8_t index;
/* Transaction Elimination flags */
bool read : 1;
bool write : 1;
/* Force clean writes for CRC buffer init */
bool force_clean_tile_write : 1;
/* Empty Tile Elimination flags */
bool empty_tile_read : 1;
bool empty_tile_write : 1;
};
struct pan_crc_state {
/* Pointer to BO mapping. */
struct pan_ptr *ptr;
/* Is the CRC buffer valid? Implicitly refers to the first slice. */
bool valid;
};
struct pan_clean_tile {
/* clean_tile_write_enable mask on the 8 color attachments. */
uint8_t write_rt_mask;
@ -162,6 +189,33 @@ struct pan_clean_tile {
uint8_t write_zs : 1;
};
static inline bool
pan_fb_info_is_fully_covered(const struct pan_fb_info *fb)
{
return !fb->draw_extent.minx &&
!fb->draw_extent.miny &&
fb->draw_extent.maxx == (fb->width - 1) &&
fb->draw_extent.maxy == (fb->height - 1);
}
static inline bool
pan_crc_is_enabled(struct pan_crc *crc)
{
return crc->index != -1;
}
static inline void
pan_crc_state_invalidate(struct pan_crc_state *state)
{
state->valid = false;
}
static inline void
pan_crc_state_set_ptr(struct pan_crc_state *state, struct pan_ptr *ptr)
{
state->ptr = ptr;
}
static inline bool
pan_clean_tile_write_rt_enabled(struct pan_clean_tile clean_tile,
unsigned index)
@ -294,8 +348,6 @@ bool GENX(pan_force_clean_write_on)(const struct pan_image *image,
void GENX(pan_emit_tls)(const struct pan_tls_info *info,
struct mali_local_storage_packed *out);
int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size);
struct pan_attachment_info {
const struct pan_image_view *iview;
unsigned layer_or_z_slice;

View file

@ -146,7 +146,8 @@ pan_image_view_has_crc(const struct pan_image_view *iview)
if (!p.image)
return false;
return p.image->props.crc;
/* Only mip level 0 gets a CRC buffer allocated. */
return p.image->props.crc && iview->first_level == 0;
}
static inline struct pan_image_plane_ref

View file

@ -125,8 +125,8 @@ pan_image_layout_init(
layout_constraints.offset_B += slayout->size_B;
/* Add a checksum region if necessary */
if (props->crc) {
/* Add a CRC buffer at level 0 if necessary */
if (l == 0 && props->crc) {
init_slice_crc_info(arch, slayout, mip_extent_px.width,
mip_extent_px.height, layout_constraints.offset_B);
layout_constraints.offset_B += slayout->crc.size_B;