diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 172398b6ec8..1fabb21286b 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -1018,8 +1018,7 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny, static void pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool, struct pan_fb_info *fb, bool zs, uint64_t coordinates, - uint64_t tsd, struct mali_draw_packed *out, - bool always_write) + uint64_t tsd, struct mali_draw_packed *out) { unsigned tex_count = 0; uint64_t textures = pan_preload_emit_textures(pool, fb, zs, &tex_count); @@ -1031,7 +1030,7 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool, /* Tiles updated by preload shaders are still considered clean (separate * for colour and Z/S), allowing us to suppress unnecessary writeback */ - UNUSED bool clean_fragment_write = !always_write; + UNUSED bool clean_fragment_write = true; /* Image view used when patching stencil formats for combined * depth/stencil preloads. @@ -1186,27 +1185,8 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache, void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW)); - /* We only use crc_rt to determine whether to force writes for updating - * the CRCs, so use a conservative tile size (16x16). - */ - int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16); + pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd); - bool always_write = false; - - /* If CRC data is currently invalid and this batch will make it valid, - * write even clean tiles to make sure CRC data is updated. */ - if (crc_rt >= 0) { - bool *valid = fb->rts[crc_rt].crc_valid; - bool full = !fb->draw_extent.minx && !fb->draw_extent.miny && - fb->draw_extent.maxx == (fb->width - 1) && - fb->draw_extent.maxy == (fb->height - 1); - - if (full && !(*valid)) - always_write = true; - } - - pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd, - always_write); if (zs) { enum pipe_format fmt = fb->zs.view.zs ? fb->zs.view.zs->planes[0].image->props.format @@ -1266,8 +1246,7 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache, #endif } else { fb->bifrost.pre_post.modes[dcd_idx] = - always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS - : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT; + MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT; } } #else @@ -1282,7 +1261,7 @@ pan_preload_emit_tiler_job(struct pan_fb_preload_cache *cache, return (struct pan_ptr){0}; pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, - pan_section_ptr(job.cpu, TILER_JOB, DRAW), false); + pan_section_ptr(job.cpu, TILER_JOB, DRAW)); pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 80fb5c2eb16..3d7bea33f50 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -531,7 +531,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch, rts[i].nr_samples = surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1); memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle)); - fb->rts[i].crc_valid = &prsrc->valid.crc; + fb->rts[i].crc_state = &prsrc->crc_state; fb->rts[i].view = &rts[i]; /* Preload if the RT is read or updated */ diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 5ac43cdeeae..c65886bdaaa 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -280,6 +280,8 @@ panfrost_resource_import_bo(struct panfrost_resource *rsc, if (!rsc->bo) return -1; + pan_crc_state_set_ptr(&rsc->crc_state, &rsc->bo->ptr); + return 0; } @@ -846,7 +848,7 @@ panfrost_should_checksum(const struct panfrost_device *dev, return pres->base.bind & PIPE_BIND_RENDER_TARGET && panfrost_is_2d(pres) && bytes_per_pixel <= bytes_per_pixel_max && - pres->base.last_level == 0 && !(dev->debug & PAN_DBG_NO_CRC); + !(dev->debug & PAN_DBG_NO_CRC); } static bool @@ -1166,6 +1168,7 @@ panfrost_resource_create_with_modifier(struct pipe_screen *screen, so->bo = panfrost_bo_create(dev, so->plane.layout.data_size_B, flags, res_label); + pan_crc_state_set_ptr(&so->crc_state, &so->bo->ptr); if (!so->bo) { panfrost_resource_destroy(screen, &so->base); @@ -1714,6 +1717,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, panfrost_bo_unreference(rsrc->bo); rsrc->bo = newbo; rsrc->plane.base = newbo->ptr.gpu; + pan_crc_state_set_ptr(&rsrc->crc_state, &newbo->ptr); if (!copy_resource && drm_is_afbc(rsrc->modifier)) { if (panfrost_resource_init_afbc_headers(rsrc)) @@ -1887,6 +1891,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, rsrc->bo = tmp_rsrc->bo; rsrc->plane.base = rsrc->bo->ptr.gpu; panfrost_bo_reference(rsrc->bo); + pan_crc_state_set_ptr(&rsrc->crc_state, &rsrc->bo->ptr); rsrc->owns_label = tmp_rsrc->owns_label; tmp_rsrc->owns_label = false; @@ -2222,7 +2227,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx, prsrc->plane.layout.data_size_B = prsrc->afbcp->size; prsrc->plane.base = prsrc->afbcp->packed_bo->ptr.gpu; prsrc->image.props.crc = false; - prsrc->valid.crc = false; + pan_crc_state_invalidate(&prsrc->crc_state); for (unsigned level = 0; level <= prsrc->base.last_level; ++level) prsrc->plane.layout.slices[level] = @@ -2234,6 +2239,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx, panfrost_bo_unreference(prsrc->bo); prsrc->bo = prsrc->afbcp->packed_bo; prsrc->afbcp->packed_bo = NULL; + pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr); pan_resource_afbcp_stop(prsrc); } @@ -2321,7 +2327,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer) struct panfrost_device *dev = pan_device(pctx->screen); if (transfer->usage & PIPE_MAP_WRITE) - prsrc->valid.crc = false; + pan_crc_state_invalidate(&prsrc->crc_state); /* AFBC/AFRC will use a staging resource. `initialized` will be set when * the fragment job is created; this is deferred to prevent useless surface @@ -2345,6 +2351,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer) prsrc->bo = pan_resource(trans->staging.rsrc)->bo; prsrc->plane.base = prsrc->bo->ptr.gpu; panfrost_bo_reference(prsrc->bo); + pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr); prsrc->owns_label = pan_resource(trans->staging.rsrc)->owns_label; pan_resource(trans->staging.rsrc)->owns_label = false; diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 5220b5a11d4..bfd334343c4 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -76,10 +76,6 @@ struct panfrost_resource { struct panfrost_bo *bo; struct { - /* Is the checksum for this image valid? Implicitly refers to - * the first slice; we only checksum non-mipmapped 2D images */ - bool crc; - /* Has anything been written to this slice? */ BITSET_DECLARE(data, PAN_MAX_MIP_LEVELS); } valid; @@ -102,6 +98,9 @@ struct panfrost_resource { /* Whether the resource owns the backing BO's label */ bool owns_label; + /* CRC state */ + struct pan_crc_state crc_state; + /* AFBC-P state */ struct pan_afbcp *afbcp; }; diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 3df01de0090..78d0e9b2687 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -92,73 +92,6 @@ pan_warn_on_afbc_reverse_issue_order(const struct pan_attachment_info *att, } #endif -static bool -renderblock_fits_in_single_pass(const struct pan_image_view *view, - unsigned tile_size) -{ - const struct pan_image_plane_ref pref = pan_image_view_get_first_plane(view); - uint64_t mod = pref.image->props.modifier; - - if (!drm_is_afbc(mod)) - return tile_size >= 16 * 16; - - struct pan_image_block_size renderblk_sz = pan_afbc_renderblock_size(mod); - return tile_size >= renderblk_sz.width * renderblk_sz.height; -} - -int -GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size) -{ - /* Disable CRC when the tile size is smaller than 16x16. In the hardware, - * CRC tiles are the same size as the tiles of the framebuffer. However, - * our code only handles 16x16 tiles. Therefore under the current - * implementation, we must disable CRC when 16x16 tiles are not used. - * - * This may hurt performance. However, smaller tile sizes are rare, and - * CRCs are more expensive at smaller tile sizes, reducing the benefit. - * Restricting CRC to 16x16 should work in practice. - */ - if (tile_size < 16 * 16) - return -1; - -#if PAN_ARCH <= 6 - if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard && - pan_image_view_has_crc(fb->rts[0].view)) - return 0; - - return -1; -#else - bool best_rt_valid = false; - int best_rt = -1; - - for (unsigned i = 0; i < fb->rt_count; i++) { - if (!fb->rts[i].view || fb->rts[i].discard || - !pan_image_view_has_crc(fb->rts[i].view)) - continue; - - if (!renderblock_fits_in_single_pass(fb->rts[i].view, tile_size)) - continue; - - bool valid = *(fb->rts[i].crc_valid); - bool full = !fb->draw_extent.minx && !fb->draw_extent.miny && - fb->draw_extent.maxx == (fb->width - 1) && - fb->draw_extent.maxy == (fb->height - 1); - if (!full && !valid) - continue; - - if (best_rt < 0 || (valid && !best_rt_valid)) { - best_rt = i; - best_rt_valid = valid; - } - - if (valid) - break; - } - - return best_rt; -#endif -} - static enum mali_zs_format translate_zs_format(enum pipe_format in) { @@ -454,45 +387,37 @@ GENX(pan_emit_interleaved_64k_zs_attachment)(const struct pan_attachment_info *a #endif static void -pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, - struct MALI_CRC *crc) +pan_emit_crc(const struct pan_fb_info *fb, struct pan_crc *crc, + struct MALI_CRC *cfg) { - if (rt_crc < 0) + if (!pan_crc_is_enabled(crc)) return; - assert(rt_crc < fb->rt_count); - - const struct pan_image_view *rt = fb->rts[rt_crc].view; + const struct pan_image_view *rt = fb->rts[crc->index].view; const struct pan_image_plane_ref pref = pan_image_view_get_color_plane(rt); const struct pan_image *image = pref.image; const struct pan_image_plane *plane = image->planes[pref.plane_idx]; const struct pan_image_slice_layout *slice = &plane->layout.slices[rt->first_level]; - crc->base = plane->base + slice->crc.offset_B; - crc->row_stride = slice->crc.stride_B; + cfg->base = plane->base + slice->crc.offset_B; + cfg->row_stride = slice->crc.stride_B; #if PAN_ARCH >= 7 - crc->render_target = rt_crc; - - if (fb->rts[rt_crc].clear) { - uint32_t clear_val = fb->rts[rt_crc].clear_value[0]; - crc->clear_color = clear_val | 0xc000000000000000 | - (((uint64_t)clear_val & 0xffff) << 32); - } + cfg->render_target = crc->index; + cfg->clear_color = crc->clear_color; #endif } static void pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx, - int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext, - struct pan_clean_tile clean_tile) + struct pan_crc *crc, struct pan_clean_tile clean_tile) { struct mali_zs_crc_extension_packed desc; pan_pack(&desc, ZS_CRC_EXTENSION, cfg) { - pan_prepare_crc(fb, rt_crc, &cfg.crc); + pan_emit_crc(fb, crc, &cfg.crc); #if PAN_ARCH == 5 cfg.zs.clean_pixel_write_enable = pan_clean_tile_write_zs_enabled(clean_tile); @@ -1085,6 +1010,243 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, } #endif +static bool +pan_fb_color_attachment_should_crc(const struct pan_fb_color_attachment *rt, + unsigned tile_size) +{ + uint64_t mod; + struct pan_image_block_size renderblk_sz; + + if (!rt->view || rt->discard || !rt->crc_state || + !pan_image_view_has_crc(rt->view)) + return false; + + mod = pan_image_view_get_first_plane(rt->view).image->props.modifier; + + if (!drm_is_afbc(mod)) + return true; + + /* AFBC-P images are read only. */ + if (!(mod & AFBC_FORMAT_MOD_SPARSE)) + return false; + + /* AFBC render block size must fit in a single pass. */ + renderblk_sz = pan_afbc_renderblock_size(mod); + if (tile_size < renderblk_sz.width * renderblk_sz.height) + return false; + + return true; +} + +static int +pan_select_crc_rt(const struct pan_fb_info *fb) +{ + int best_rt = -1; + +#if PAN_ARCH <= 5 + /* CRC was introduced in v4 and MRT in v5 but unlike v6 there's no details + * how both work together. */ + if (fb->rt_count > 1) + return best_rt; + +#elif PAN_ARCH == 6 + /* On v6, all enabled RTs are used to compute a CRC (no crc_render_target + * field on the DBD). The write buffer size of the enabled color + * attachments for a tile must fit within 1600 bytes. */ + if (fb->rt_count > 1 && + pan_cbuf_bytes_per_pixel(fb) * fb->tile_size > 1600) + return best_rt; +#endif + + for (unsigned i = 0; i < fb->rt_count; i++) { + /* Skip unusable RTs. */ + if (!pan_fb_color_attachment_should_crc(&fb->rts[i], fb->tile_size)) + continue; + + /* Select the first RT with a valid CRC buffer. */ + if (fb->rts[i].crc_state->valid) { + best_rt = i; + break; + } + + /* Store the first usable RT otherwise. */ + if (best_rt == -1) + best_rt = i; + } + + return best_rt; +} + +static void +pan_crc_enable(struct pan_crc *crc) +{ + crc->read = true; + crc->write = true; +} + +#if PAN_ARCH >= 7 +/* Initialize the CRC buffer by zero'ing it. The all-zero CRC can't collide + * thanks to the crc_clear_color field, see pan_crc_clear_color(). Drawback is + * the CRC BO must be CPU mapped. */ +static void +pan_crc_enable_zeroed(struct pan_crc *crc, struct pan_crc_state *state, + const struct pan_image_view *view) +{ + const struct pan_image_plane_ref pref = + pan_image_view_get_color_plane(view); + const struct pan_image_plane *plane = pref.image->planes[pref.plane_idx]; + const struct pan_image_slice_layout *slice = + &plane->layout.slices[view->first_level]; + + assert(state->ptr && state->ptr->cpu); + memset(state->ptr->cpu + slice->crc.offset_B, 0, slice->crc.size_B); + + pan_crc_enable(crc); + state->valid = true; +} +#endif + +/* Take advantage of a full frame draw to initialize the CRC buffer by + * forcefully writing back all the tiles and flush the CRC values. Drawback + * is it only works on full frames. */ +static void +pan_crc_maybe_enable_flushed(struct pan_crc *crc, struct pan_crc_state *state, + const struct pan_fb_info *fb) +{ + if (!pan_fb_info_is_fully_covered(fb)) + return; + + crc->write = true; + crc->force_clean_tile_write = true; + state->valid = true; +} + +#if PAN_ARCH >= 7 +static uint64_t +pan_crc_clear_color(const struct pan_fb_info *fb) +{ + uint64_t base[4] = { 0, }; /* Compiler auto-vectorization hint */ + uint64_t crc_clear_flag = 0; + uint64_t crc_clear_base = 1ull << 46; + uint64_t crc_init = 0; + + /* When a tile is clear (i.e. no polygons intersect it), the configured + * crc_clear_color is written as is as CRC value by the GPU if both CRC + * write (crc_write_enable flag) and Empty Tile Elimination write + * (empty_tile_write_enable flag) are enabled. If Empty Tile Elimination + * read (empty_tile_read_enable flag) is enabled, this then allows to skip + * the pre-loading of clear tiles which were also clear at the previous + * render on the selected RT. It's done by comparing CRCs in the CRC buffer + * to the crc_clear_color. + * + * The crc_clear_flag sub-field (bit 63) is flagged unset here. It's + * flipped by the GPU when writing standard (i.e. non-empty) CRCs. This + * prevents standard CRCs from using the all-zero CRC value. Empty CRCs + * can't use the all-zero CRC value either because crc_clear_base's most + * significant bit is flagged set here. This allows to invalidate a CRC + * buffer by zero'ing it. + * + * v10 introduced the crc_init sub-field (bits 15:0). v7 and v9 can use + * those as additional crc_clear_base bits. We don't use it for now and + * keep those 16 bits clear regardless of arch. + * + * This leaves 46 bits in the crc_clear_base sub-field (bits 62:16). Clear + * color changes on any RTs must be reflected into this field in order to + * properly invalidate CRCs stored this way. This is done by hashing the + * clear value channels of each cleared RT. Each clear color channel value + * is multiplied with a prime number followed by a XOR to the destination + * hash. Clear values in pan_fb_info struct are expected to be packed with + * respect to the format and dithering of the underlying RTs so that a + * change of format (without a clear color change) can generate a different + * hash. The prime number 16381 is carefully selected so that the 32 bits + * of each clear color channel take at most 46 bits after the mul (the next + * prime number 16411 takes at most 47 bits). The resulting hash value is + * guaranteed not to overflow and can safely be packed. */ + + for (unsigned i = 0; i < fb->rt_count; ++i) + if (fb->rts[i].clear) + for (unsigned j = 0; j < 4; ++j) + base[i] ^= 16381 * fb->rts[i].clear_value[j]; + + crc_clear_base |= (base[0] ^ base[1]) ^ (base[2] ^ base[3]); + + return (crc_clear_flag << 63) | (crc_clear_base << 16) | crc_init; +} +#endif + +#if PAN_ARCH >= 6 +static bool +pan_crc_has_empty_tile_elimination(struct pan_crc *crc, + const struct pan_fb_info *fb) +{ +#if PAN_ARCH == 6 + /* For v6, there's no details how MRT interacts with Empty Tile + * Elimination, especially how the clear value is generated from the color + * attachment clear values. The feature is disabled for that use case. */ + if (fb->rt_count > 1) + return false; +#endif + + return crc->read || crc->write; +} +#endif + +static struct pan_crc +pan_get_crc_info(const struct pan_fb_info *fb) +{ + struct pan_crc crc = { .index = -1, }; + const struct pan_fb_color_attachment *rt; + + /* Disable TE when the tile size is smaller than 16x16. In the hardware, + * CRC tiles are the same size as the tiles of the framebuffer. However, + * our code only handles 16x16 tiles. Therefore under the current + * implementation, we must disable TE when 16x16 tiles are not used. This + * may hurt performance. However, smaller tile sizes are rare, and CRCs are + * more expensive at smaller tile sizes, reducing the benefit. Restricting + * CRC to 16x16 should work in practice. */ + if (fb->tile_size < 16 * 16) + goto skip; + + crc.index = pan_select_crc_rt(fb); + if (crc.index == -1) + goto skip; + + rt = &fb->rts[crc.index]; + + /* Transaction Elimination. */ + if (rt->crc_state->valid) { + pan_crc_enable(&crc); + } else { +#if PAN_ARCH >= 7 + if (rt->crc_state->ptr && rt->crc_state->ptr->cpu) + pan_crc_enable_zeroed(&crc, rt->crc_state, rt->view); + else + pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb); +#else + pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb); +#endif + } + +#if PAN_ARCH >= 6 + /* Empty Tile Elimination. */ + if (pan_crc_has_empty_tile_elimination(&crc, fb)) { +#if PAN_ARCH >= 7 + crc.clear_color = pan_crc_clear_color(fb); +#endif + crc.empty_tile_read = crc.read; + crc.empty_tile_write = crc.write; + } +#endif + + skip: + /* Flag CRC buffer states of unselected RTs as invalid. */ + for (unsigned i = 0; i < fb->rt_count; i++) + if (i != crc.index && fb->rts[i].crc_state) + fb->rts[i].crc_state->valid = false; + + return crc; +} + /* Clean tiles must be written back for AFBC buffers (color, z/s) when either * one of the effective tile size dimension is smaller than the superblock * dimension. @@ -1111,8 +1273,14 @@ GENX(pan_force_clean_write_on)(const struct pan_image *image, #endif } +static bool +pan_force_clean_write_crc(struct pan_crc *crc, int index) +{ + return index == crc->index && crc->force_clean_tile_write; +} + static struct pan_clean_tile -pan_get_clean_tile_info(const struct pan_fb_info *fb) +pan_get_clean_tile_info(const struct pan_fb_info *fb, struct pan_crc *crc) { struct pan_clean_tile clean_tile = { 0, }; const struct pan_image *img; @@ -1127,7 +1295,8 @@ pan_get_clean_tile_info(const struct pan_fb_info *fb) img = fb->rts[i].view ? pan_image_view_get_color_plane(fb->rts[i].view).image : NULL; if (fb->rts[i].clear || - GENX(pan_force_clean_write_on)(img, fb->tile_size)) + GENX(pan_force_clean_write_on)(img, fb->tile_size) || + pan_force_clean_write_crc(crc, i)) clean_tile.write_rt_mask |= 1 << i; } @@ -1188,9 +1357,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); #endif - int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size); - bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0); - struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb); + struct pan_crc crc = pan_get_crc_info(fb); + struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, &crc); + bool has_zs_crc_ext = fb->zs.view.zs || fb->zs.view.s || + pan_crc_is_enabled(&crc); pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { #if PAN_ARCH >= 6 @@ -1254,26 +1424,13 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); cfg.has_zs_crc_extension = has_zs_crc_ext; - if (crc_rt >= 0) { - bool *valid = fb->rts[crc_rt].crc_valid; - bool full = !fb->draw_extent.minx && !fb->draw_extent.miny && - fb->draw_extent.maxx == (fb->width - 1) && - fb->draw_extent.maxy == (fb->height - 1); - - /* If the CRC was valid it stays valid, if it wasn't, we must ensure - * the render operation covers the full frame, and clean tiles are - * pushed to memory. */ - bool new_valid = *valid | - (full && pan_clean_tile_write_rt_enabled(clean_tile, crc_rt)); - - cfg.crc_read_enable = *valid; - - /* If the data is currently invalid, still write CRC - * data if we are doing a full write, so that it is - * valid for next time. */ - cfg.crc_write_enable = new_valid; - - *valid = new_valid; + if (pan_crc_is_enabled(&crc)) { + cfg.crc_read_enable = crc.read; + cfg.crc_write_enable = crc.write; +#if PAN_ARCH >= 7 + cfg.empty_tile_read_enable = crc.empty_tile_read; + cfg.empty_tile_write_enable = crc.empty_tile_write; +#endif } #if PAN_ARCH >= 9 @@ -1324,7 +1481,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, struct mali_zs_crc_extension_packed *zs_crc_ext = out + pan_size(FRAMEBUFFER); - pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile); + pan_emit_zs_crc_ext(fb, layer_idx, zs_crc_ext, &crc, clean_tile); rtd += pan_size(ZS_CRC_EXTENSION); } @@ -1339,9 +1496,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * fb->tile_size * pan_image_view_get_nr_samples(fb->rts[i].view); - - if (i != crc_rt && fb->rts[i].crc_valid != NULL) - *(fb->rts[i].crc_valid) = false; } struct mali_framebuffer_pointer_packed tag; @@ -1454,11 +1608,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ? MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED : MALI_BLOCK_FORMAT_LINEAR; - - if (pan_image_view_has_crc(rt)) { - cfg.crc_buffer.row_stride = slayout->crc.stride_B; - cfg.crc_buffer.base = plane->base + slayout->crc.offset_B; - } } if (fb->zs.view.zs) { diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index db5b6588ad3..3f2ec6a6b85 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -28,7 +28,7 @@ struct pan_compute_dim { struct pan_fb_color_attachment { const struct pan_image_view *view; - bool *crc_valid; + struct pan_crc_state *crc_state; bool clear; bool preload; bool discard; @@ -154,6 +154,33 @@ struct pan_fb_info { bool pls_enabled; }; +struct pan_crc { + /* Empty Tile Elimination clear color */ + uint64_t clear_color; + + /* Selected RT index (8 max), -1 if none. */ + int8_t index; + + /* Transaction Elimination flags */ + bool read : 1; + bool write : 1; + + /* Force clean writes for CRC buffer init */ + bool force_clean_tile_write : 1; + + /* Empty Tile Elimination flags */ + bool empty_tile_read : 1; + bool empty_tile_write : 1; +}; + +struct pan_crc_state { + /* Pointer to BO mapping. */ + struct pan_ptr *ptr; + + /* Is the CRC buffer valid? Implicitly refers to the first slice. */ + bool valid; +}; + struct pan_clean_tile { /* clean_tile_write_enable mask on the 8 color attachments. */ uint8_t write_rt_mask; @@ -162,6 +189,33 @@ struct pan_clean_tile { uint8_t write_zs : 1; }; +static inline bool +pan_fb_info_is_fully_covered(const struct pan_fb_info *fb) +{ + return !fb->draw_extent.minx && + !fb->draw_extent.miny && + fb->draw_extent.maxx == (fb->width - 1) && + fb->draw_extent.maxy == (fb->height - 1); +} + +static inline bool +pan_crc_is_enabled(struct pan_crc *crc) +{ + return crc->index != -1; +} + +static inline void +pan_crc_state_invalidate(struct pan_crc_state *state) +{ + state->valid = false; +} + +static inline void +pan_crc_state_set_ptr(struct pan_crc_state *state, struct pan_ptr *ptr) +{ + state->ptr = ptr; +} + static inline bool pan_clean_tile_write_rt_enabled(struct pan_clean_tile clean_tile, unsigned index) @@ -294,8 +348,6 @@ bool GENX(pan_force_clean_write_on)(const struct pan_image *image, void GENX(pan_emit_tls)(const struct pan_tls_info *info, struct mali_local_storage_packed *out); -int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size); - struct pan_attachment_info { const struct pan_image_view *iview; unsigned layer_or_z_slice; diff --git a/src/panfrost/lib/pan_image.h b/src/panfrost/lib/pan_image.h index 1f9a0db2ec1..e2a1a2a3500 100644 --- a/src/panfrost/lib/pan_image.h +++ b/src/panfrost/lib/pan_image.h @@ -146,7 +146,8 @@ pan_image_view_has_crc(const struct pan_image_view *iview) if (!p.image) return false; - return p.image->props.crc; + /* Only mip level 0 gets a CRC buffer allocated. */ + return p.image->props.crc && iview->first_level == 0; } static inline struct pan_image_plane_ref diff --git a/src/panfrost/lib/pan_layout.c b/src/panfrost/lib/pan_layout.c index 67a47e65e06..b0114493623 100644 --- a/src/panfrost/lib/pan_layout.c +++ b/src/panfrost/lib/pan_layout.c @@ -125,8 +125,8 @@ pan_image_layout_init( layout_constraints.offset_B += slayout->size_B; - /* Add a checksum region if necessary */ - if (props->crc) { + /* Add a CRC buffer at level 0 if necessary */ + if (l == 0 && props->crc) { init_slice_crc_info(arch, slayout, mip_extent_px.width, mip_extent_px.height, layout_constraints.offset_B); layout_constraints.offset_B += slayout->crc.size_B;