From 32229dba83aee53bbcd360eb6a3078b4758fb3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Tue, 10 Feb 2026 09:38:41 +0100 Subject: [PATCH] pan/crc: Cache temporary CRC info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Retrieve and cache temporary CRC info once at the beginning of pan_emit_fbd(). This makes CRC info retrieval more localized and avoids duplication. Signed-off-by: Loïc Molinari --- src/panfrost/lib/pan_desc.c | 150 ++++++++++++++++++++---------------- src/panfrost/lib/pan_desc.h | 18 +++++ 2 files changed, 102 insertions(+), 66 deletions(-) diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index cc9dd8e288e..23c7ee8ba1c 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -387,30 +387,28 @@ GENX(pan_emit_interleaved_64k_zs_attachment)(const struct pan_attachment_info *a #endif static void -pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, - struct MALI_CRC *crc) +pan_emit_crc(const struct pan_fb_info *fb, struct pan_crc *crc, + struct MALI_CRC *cfg) { - if (rt_crc < 0) + if (!pan_crc_is_enabled(crc)) return; - assert(rt_crc < fb->rt_count); - - const struct pan_image_view *rt = fb->rts[rt_crc].view; + const struct pan_image_view *rt = fb->rts[crc->index].view; const struct pan_image_plane_ref pref = pan_image_view_get_color_plane(rt); const struct pan_image *image = pref.image; const struct pan_image_plane *plane = image->planes[pref.plane_idx]; const struct pan_image_slice_layout *slice = &plane->layout.slices[rt->first_level]; - crc->base = plane->base + slice->crc.offset_B; - crc->row_stride = slice->crc.stride_B; + cfg->base = plane->base + slice->crc.offset_B; + cfg->row_stride = slice->crc.stride_B; #if PAN_ARCH >= 7 - crc->render_target = rt_crc; + cfg->render_target = crc->index; - if (fb->rts[rt_crc].clear) { - uint32_t clear_val = fb->rts[rt_crc].clear_value[0]; - crc->clear_color = clear_val | 0xc000000000000000 | + if (fb->rts[crc->index].clear) { + uint32_t clear_val = fb->rts[crc->index].clear_value[0]; + cfg->clear_color = clear_val | 0xc000000000000000 | (((uint64_t)clear_val & 0xffff) << 32); } #endif @@ -418,14 +416,13 @@ pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, static void pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx, - int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext, - struct pan_clean_tile clean_tile) + struct pan_crc *crc, struct pan_clean_tile clean_tile) { struct mali_zs_crc_extension_packed desc; pan_pack(&desc, ZS_CRC_EXTENSION, cfg) { - pan_prepare_crc(fb, rt_crc, &cfg.crc); + pan_emit_crc(fb, crc, &cfg.crc); #if PAN_ARCH == 5 cfg.zs.clean_pixel_write_enable = pan_clean_tile_write_zs_enabled(clean_tile); @@ -1050,18 +1047,6 @@ pan_select_crc_rt(const struct pan_fb_info *fb) { int best_rt = -1; - /* Disable CRC when the tile size is smaller than 16x16. In the hardware, - * CRC tiles are the same size as the tiles of the framebuffer. However, - * our code only handles 16x16 tiles. Therefore under the current - * implementation, we must disable CRC when 16x16 tiles are not used. - * - * This may hurt performance. However, smaller tile sizes are rare, and - * CRCs are more expensive at smaller tile sizes, reducing the benefit. - * Restricting CRC to 16x16 should work in practice. - */ - if (fb->tile_size < 16 * 16) - return best_rt; - #if PAN_ARCH <= 6 if (fb->rt_count > 1) return best_rt; @@ -1083,15 +1068,69 @@ pan_select_crc_rt(const struct pan_fb_info *fb) best_rt = i; } - /* The selected RT must be fully covered for now in order to correctly - * initialize the CRC buffer. */ - if (best_rt != -1 && !*fb->rts[best_rt].crc_valid && - !pan_fb_info_is_fully_covered(fb)) - best_rt = -1; - return best_rt; } +static void +pan_crc_enable(struct pan_crc *crc) +{ + crc->read = true; + crc->write = true; +} + +/* Take advantage of a full frame draw to initialize the CRC buffer by + * forcefully writing back all the tiles and flush the CRC values. Drawback + * is it only works on full frames. */ +static void +pan_crc_maybe_enable_flushed(struct pan_crc *crc, + const struct pan_fb_info *fb) +{ + if (!pan_fb_info_is_fully_covered(fb)) + return; + + crc->write = true; + crc->force_clean_tile_write = true; +} + +static struct pan_crc +pan_get_crc_info(const struct pan_fb_info *fb) +{ + struct pan_crc crc = { .index = -1, }; + const struct pan_fb_color_attachment *rt; + + /* Disable TE when the tile size is smaller than 16x16. In the hardware, + * CRC tiles are the same size as the tiles of the framebuffer. However, + * our code only handles 16x16 tiles. Therefore under the current + * implementation, we must disable TE when 16x16 tiles are not used. This + * may hurt performance. However, smaller tile sizes are rare, and CRCs are + * more expensive at smaller tile sizes, reducing the benefit. Restricting + * CRC to 16x16 should work in practice. */ + if (fb->tile_size < 16 * 16) + goto skip; + + crc.index = pan_select_crc_rt(fb); + if (crc.index == -1) + goto skip; + + rt = &fb->rts[crc.index]; + + /* Transaction Elimination. */ + if (*rt->crc_valid) { + pan_crc_enable(&crc); + } else { + pan_crc_maybe_enable_flushed(&crc, fb); + *rt->crc_valid = true; + } + + skip: + /* Flag CRC buffer states of unselected RTs as invalid. */ + for (unsigned i = 0; i < fb->rt_count; i++) + if (i != crc.index && fb->rts[i].crc_valid) + *fb->rts[i].crc_valid = false; + + return crc; +} + /* Clean tiles must be written back for AFBC buffers (color, z/s) when either * one of the effective tile size dimension is smaller than the superblock * dimension. @@ -1119,17 +1158,13 @@ GENX(pan_force_clean_write_on)(const struct pan_image *image, } static bool -pan_force_clean_write_crc(const struct pan_fb_info *fb, int index, int crc_rt) +pan_force_clean_write_crc(struct pan_crc *crc, int index) { - bool match = index == crc_rt; - bool valid = *(fb->rts[index].crc_valid); - bool full = pan_fb_info_is_fully_covered(fb); - - return match && !valid && full; + return index == crc->index && crc->force_clean_tile_write; } static struct pan_clean_tile -pan_get_clean_tile_info(const struct pan_fb_info *fb, int crc_rt) +pan_get_clean_tile_info(const struct pan_fb_info *fb, struct pan_crc *crc) { struct pan_clean_tile clean_tile = { 0, }; const struct pan_image *img; @@ -1145,7 +1180,7 @@ pan_get_clean_tile_info(const struct pan_fb_info *fb, int crc_rt) pan_image_view_get_color_plane(fb->rts[i].view).image : NULL; if (fb->rts[i].clear || GENX(pan_force_clean_write_on)(img, fb->tile_size) || - pan_force_clean_write_crc(fb, i, crc_rt)) + pan_force_clean_write_crc(crc, i)) clean_tile.write_rt_mask |= 1 << i; } @@ -1206,9 +1241,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); #endif - int crc_rt = pan_select_crc_rt(fb); - bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0); - struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, crc_rt); + struct pan_crc crc = pan_get_crc_info(fb); + struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, &crc); + bool has_zs_crc_ext = fb->zs.view.zs || fb->zs.view.s || + pan_crc_is_enabled(&crc); pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { #if PAN_ARCH >= 6 @@ -1272,24 +1308,9 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); cfg.has_zs_crc_extension = has_zs_crc_ext; - if (crc_rt >= 0) { - bool *valid = fb->rts[crc_rt].crc_valid; - bool full = pan_fb_info_is_fully_covered(fb); - - /* If the CRC was valid it stays valid, if it wasn't, we must ensure - * the render operation covers the full frame, and clean tiles are - * pushed to memory. */ - bool new_valid = *valid | - (full && pan_clean_tile_write_rt_enabled(clean_tile, crc_rt)); - - cfg.crc_read_enable = *valid; - - /* If the data is currently invalid, still write CRC - * data if we are doing a full write, so that it is - * valid for next time. */ - cfg.crc_write_enable = new_valid; - - *valid = new_valid; + if (pan_crc_is_enabled(&crc)) { + cfg.crc_read_enable = crc.read; + cfg.crc_write_enable = crc.write; } #if PAN_ARCH >= 9 @@ -1340,7 +1361,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, struct mali_zs_crc_extension_packed *zs_crc_ext = out + pan_size(FRAMEBUFFER); - pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile); + pan_emit_zs_crc_ext(fb, layer_idx, zs_crc_ext, &crc, clean_tile); rtd += pan_size(ZS_CRC_EXTENSION); } @@ -1355,9 +1376,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * fb->tile_size * pan_image_view_get_nr_samples(fb->rts[i].view); - - if (i != crc_rt && fb->rts[i].crc_valid != NULL) - *(fb->rts[i].crc_valid) = false; } struct mali_framebuffer_pointer_packed tag; diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index 08ac5c588b8..2772d266dc1 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -154,6 +154,18 @@ struct pan_fb_info { bool pls_enabled; }; +struct pan_crc { + /* Selected RT index (8 max), -1 if none. */ + int8_t index; + + /* Transaction Elimination flags */ + bool read : 1; + bool write : 1; + + /* Force clean writes for CRC buffer init */ + bool force_clean_tile_write : 1; +}; + struct pan_clean_tile { /* clean_tile_write_enable mask on the 8 color attachments. */ uint8_t write_rt_mask; @@ -171,6 +183,12 @@ pan_fb_info_is_fully_covered(const struct pan_fb_info *fb) fb->draw_extent.maxy == (fb->height - 1); } +static inline bool +pan_crc_is_enabled(struct pan_crc *crc) +{ + return crc->index != -1; +} + static inline bool pan_clean_tile_write_rt_enabled(struct pan_clean_tile clean_tile, unsigned index)