pan/crc: Cache temporary CRC info

Retrieve and cache temporary CRC info once at the beginning of
pan_emit_fbd(). This makes CRC info retrieval more localized and
avoids duplication.

Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
This commit is contained in:
Loïc Molinari 2026-02-10 09:38:41 +01:00
parent d911ef7c9d
commit 32229dba83
2 changed files with 102 additions and 66 deletions

View file

@ -387,30 +387,28 @@ GENX(pan_emit_interleaved_64k_zs_attachment)(const struct pan_attachment_info *a
#endif
static void
pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
struct MALI_CRC *crc)
pan_emit_crc(const struct pan_fb_info *fb, struct pan_crc *crc,
struct MALI_CRC *cfg)
{
if (rt_crc < 0)
if (!pan_crc_is_enabled(crc))
return;
assert(rt_crc < fb->rt_count);
const struct pan_image_view *rt = fb->rts[rt_crc].view;
const struct pan_image_view *rt = fb->rts[crc->index].view;
const struct pan_image_plane_ref pref = pan_image_view_get_color_plane(rt);
const struct pan_image *image = pref.image;
const struct pan_image_plane *plane = image->planes[pref.plane_idx];
const struct pan_image_slice_layout *slice =
&plane->layout.slices[rt->first_level];
crc->base = plane->base + slice->crc.offset_B;
crc->row_stride = slice->crc.stride_B;
cfg->base = plane->base + slice->crc.offset_B;
cfg->row_stride = slice->crc.stride_B;
#if PAN_ARCH >= 7
crc->render_target = rt_crc;
cfg->render_target = crc->index;
if (fb->rts[rt_crc].clear) {
uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
crc->clear_color = clear_val | 0xc000000000000000 |
if (fb->rts[crc->index].clear) {
uint32_t clear_val = fb->rts[crc->index].clear_value[0];
cfg->clear_color = clear_val | 0xc000000000000000 |
(((uint64_t)clear_val & 0xffff) << 32);
}
#endif
@ -418,14 +416,13 @@ pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
static void
pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
int rt_crc,
struct mali_zs_crc_extension_packed *zs_crc_ext,
struct pan_clean_tile clean_tile)
struct pan_crc *crc, struct pan_clean_tile clean_tile)
{
struct mali_zs_crc_extension_packed desc;
pan_pack(&desc, ZS_CRC_EXTENSION, cfg) {
pan_prepare_crc(fb, rt_crc, &cfg.crc);
pan_emit_crc(fb, crc, &cfg.crc);
#if PAN_ARCH == 5
cfg.zs.clean_pixel_write_enable =
pan_clean_tile_write_zs_enabled(clean_tile);
@ -1050,18 +1047,6 @@ pan_select_crc_rt(const struct pan_fb_info *fb)
{
int best_rt = -1;
/* Disable CRC when the tile size is smaller than 16x16. In the hardware,
* CRC tiles are the same size as the tiles of the framebuffer. However,
* our code only handles 16x16 tiles. Therefore under the current
* implementation, we must disable CRC when 16x16 tiles are not used.
*
* This may hurt performance. However, smaller tile sizes are rare, and
* CRCs are more expensive at smaller tile sizes, reducing the benefit.
* Restricting CRC to 16x16 should work in practice.
*/
if (fb->tile_size < 16 * 16)
return best_rt;
#if PAN_ARCH <= 6
if (fb->rt_count > 1)
return best_rt;
@ -1083,15 +1068,69 @@ pan_select_crc_rt(const struct pan_fb_info *fb)
best_rt = i;
}
/* The selected RT must be fully covered for now in order to correctly
* initialize the CRC buffer. */
if (best_rt != -1 && !*fb->rts[best_rt].crc_valid &&
!pan_fb_info_is_fully_covered(fb))
best_rt = -1;
return best_rt;
}
static void
pan_crc_enable(struct pan_crc *crc)
{
crc->read = true;
crc->write = true;
}
/* Take advantage of a full frame draw to initialize the CRC buffer by
* forcefully writing back all the tiles and flush the CRC values. Drawback
* is it only works on full frames. */
static void
pan_crc_maybe_enable_flushed(struct pan_crc *crc,
const struct pan_fb_info *fb)
{
if (!pan_fb_info_is_fully_covered(fb))
return;
crc->write = true;
crc->force_clean_tile_write = true;
}
static struct pan_crc
pan_get_crc_info(const struct pan_fb_info *fb)
{
struct pan_crc crc = { .index = -1, };
const struct pan_fb_color_attachment *rt;
/* Disable TE when the tile size is smaller than 16x16. In the hardware,
* CRC tiles are the same size as the tiles of the framebuffer. However,
* our code only handles 16x16 tiles. Therefore under the current
* implementation, we must disable TE when 16x16 tiles are not used. This
* may hurt performance. However, smaller tile sizes are rare, and CRCs are
* more expensive at smaller tile sizes, reducing the benefit. Restricting
* CRC to 16x16 should work in practice. */
if (fb->tile_size < 16 * 16)
goto skip;
crc.index = pan_select_crc_rt(fb);
if (crc.index == -1)
goto skip;
rt = &fb->rts[crc.index];
/* Transaction Elimination. */
if (*rt->crc_valid) {
pan_crc_enable(&crc);
} else {
pan_crc_maybe_enable_flushed(&crc, fb);
*rt->crc_valid = true;
}
skip:
/* Flag CRC buffer states of unselected RTs as invalid. */
for (unsigned i = 0; i < fb->rt_count; i++)
if (i != crc.index && fb->rts[i].crc_valid)
*fb->rts[i].crc_valid = false;
return crc;
}
/* Clean tiles must be written back for AFBC buffers (color, z/s) when either
* one of the effective tile size dimension is smaller than the superblock
* dimension.
@ -1119,17 +1158,13 @@ GENX(pan_force_clean_write_on)(const struct pan_image *image,
}
static bool
pan_force_clean_write_crc(const struct pan_fb_info *fb, int index, int crc_rt)
pan_force_clean_write_crc(struct pan_crc *crc, int index)
{
bool match = index == crc_rt;
bool valid = *(fb->rts[index].crc_valid);
bool full = pan_fb_info_is_fully_covered(fb);
return match && !valid && full;
return index == crc->index && crc->force_clean_tile_write;
}
static struct pan_clean_tile
pan_get_clean_tile_info(const struct pan_fb_info *fb, int crc_rt)
pan_get_clean_tile_info(const struct pan_fb_info *fb, struct pan_crc *crc)
{
struct pan_clean_tile clean_tile = { 0, };
const struct pan_image *img;
@ -1145,7 +1180,7 @@ pan_get_clean_tile_info(const struct pan_fb_info *fb, int crc_rt)
pan_image_view_get_color_plane(fb->rts[i].view).image : NULL;
if (fb->rts[i].clear ||
GENX(pan_force_clean_write_on)(img, fb->tile_size) ||
pan_force_clean_write_crc(fb, i, crc_rt))
pan_force_clean_write_crc(crc, i))
clean_tile.write_rt_mask |= 1 << i;
}
@ -1206,9 +1241,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
#endif
int crc_rt = pan_select_crc_rt(fb);
bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, crc_rt);
struct pan_crc crc = pan_get_crc_info(fb);
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, &crc);
bool has_zs_crc_ext = fb->zs.view.zs || fb->zs.view.s ||
pan_crc_is_enabled(&crc);
pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
#if PAN_ARCH >= 6
@ -1272,24 +1308,9 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
cfg.has_zs_crc_extension = has_zs_crc_ext;
if (crc_rt >= 0) {
bool *valid = fb->rts[crc_rt].crc_valid;
bool full = pan_fb_info_is_fully_covered(fb);
/* If the CRC was valid it stays valid, if it wasn't, we must ensure
* the render operation covers the full frame, and clean tiles are
* pushed to memory. */
bool new_valid = *valid |
(full && pan_clean_tile_write_rt_enabled(clean_tile, crc_rt));
cfg.crc_read_enable = *valid;
/* If the data is currently invalid, still write CRC
* data if we are doing a full write, so that it is
* valid for next time. */
cfg.crc_write_enable = new_valid;
*valid = new_valid;
if (pan_crc_is_enabled(&crc)) {
cfg.crc_read_enable = crc.read;
cfg.crc_write_enable = crc.write;
}
#if PAN_ARCH >= 9
@ -1340,7 +1361,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
struct mali_zs_crc_extension_packed *zs_crc_ext =
out + pan_size(FRAMEBUFFER);
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
pan_emit_zs_crc_ext(fb, layer_idx, zs_crc_ext, &crc, clean_tile);
rtd += pan_size(ZS_CRC_EXTENSION);
}
@ -1355,9 +1376,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
fb->tile_size *
pan_image_view_get_nr_samples(fb->rts[i].view);
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
*(fb->rts[i].crc_valid) = false;
}
struct mali_framebuffer_pointer_packed tag;

View file

@ -154,6 +154,18 @@ struct pan_fb_info {
bool pls_enabled;
};
struct pan_crc {
/* Selected RT index (8 max), -1 if none. */
int8_t index;
/* Transaction Elimination flags */
bool read : 1;
bool write : 1;
/* Force clean writes for CRC buffer init */
bool force_clean_tile_write : 1;
};
struct pan_clean_tile {
/* clean_tile_write_enable mask on the 8 color attachments. */
uint8_t write_rt_mask;
@ -171,6 +183,12 @@ pan_fb_info_is_fully_covered(const struct pan_fb_info *fb)
fb->draw_extent.maxy == (fb->height - 1);
}
static inline bool
pan_crc_is_enabled(struct pan_crc *crc)
{
return crc->index != -1;
}
static inline bool
pan_clean_tile_write_rt_enabled(struct pan_clean_tile clean_tile,
unsigned index)