mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'pan_empty_tile_elimination' into 'main'
panfrost: Optimize Transaction Elimination (2/2) See merge request mesa/mesa!39216
This commit is contained in:
commit
b3b9b89a4f
8 changed files with 346 additions and 159 deletions
|
|
@ -1018,8 +1018,7 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
|
|||
static void
|
||||
pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
|
||||
struct pan_fb_info *fb, bool zs, uint64_t coordinates,
|
||||
uint64_t tsd, struct mali_draw_packed *out,
|
||||
bool always_write)
|
||||
uint64_t tsd, struct mali_draw_packed *out)
|
||||
{
|
||||
unsigned tex_count = 0;
|
||||
uint64_t textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
|
||||
|
|
@ -1031,7 +1030,7 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
|
|||
/* Tiles updated by preload shaders are still considered clean (separate
|
||||
* for colour and Z/S), allowing us to suppress unnecessary writeback
|
||||
*/
|
||||
UNUSED bool clean_fragment_write = !always_write;
|
||||
UNUSED bool clean_fragment_write = true;
|
||||
|
||||
/* Image view used when patching stencil formats for combined
|
||||
* depth/stencil preloads.
|
||||
|
|
@ -1186,27 +1185,8 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache,
|
|||
|
||||
void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
|
||||
|
||||
/* We only use crc_rt to determine whether to force writes for updating
|
||||
* the CRCs, so use a conservative tile size (16x16).
|
||||
*/
|
||||
int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
|
||||
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd);
|
||||
|
||||
bool always_write = false;
|
||||
|
||||
/* If CRC data is currently invalid and this batch will make it valid,
|
||||
* write even clean tiles to make sure CRC data is updated. */
|
||||
if (crc_rt >= 0) {
|
||||
bool *valid = fb->rts[crc_rt].crc_valid;
|
||||
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
|
||||
if (full && !(*valid))
|
||||
always_write = true;
|
||||
}
|
||||
|
||||
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
|
||||
always_write);
|
||||
if (zs) {
|
||||
enum pipe_format fmt = fb->zs.view.zs
|
||||
? fb->zs.view.zs->planes[0].image->props.format
|
||||
|
|
@ -1266,8 +1246,7 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache,
|
|||
#endif
|
||||
} else {
|
||||
fb->bifrost.pre_post.modes[dcd_idx] =
|
||||
always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
|
||||
: MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
|
||||
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
|
@ -1282,7 +1261,7 @@ pan_preload_emit_tiler_job(struct pan_fb_preload_cache *cache,
|
|||
return (struct pan_ptr){0};
|
||||
|
||||
pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
|
||||
pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
|
||||
pan_section_ptr(job.cpu, TILER_JOB, DRAW));
|
||||
|
||||
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
|
||||
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
|
|||
rts[i].nr_samples =
|
||||
surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
|
||||
memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
|
||||
fb->rts[i].crc_valid = &prsrc->valid.crc;
|
||||
fb->rts[i].crc_state = &prsrc->crc_state;
|
||||
fb->rts[i].view = &rts[i];
|
||||
|
||||
/* Preload if the RT is read or updated */
|
||||
|
|
|
|||
|
|
@ -280,6 +280,8 @@ panfrost_resource_import_bo(struct panfrost_resource *rsc,
|
|||
if (!rsc->bo)
|
||||
return -1;
|
||||
|
||||
pan_crc_state_set_ptr(&rsc->crc_state, &rsc->bo->ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -846,7 +848,7 @@ panfrost_should_checksum(const struct panfrost_device *dev,
|
|||
|
||||
return pres->base.bind & PIPE_BIND_RENDER_TARGET && panfrost_is_2d(pres) &&
|
||||
bytes_per_pixel <= bytes_per_pixel_max &&
|
||||
pres->base.last_level == 0 && !(dev->debug & PAN_DBG_NO_CRC);
|
||||
!(dev->debug & PAN_DBG_NO_CRC);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1166,6 +1168,7 @@ panfrost_resource_create_with_modifier(struct pipe_screen *screen,
|
|||
|
||||
so->bo =
|
||||
panfrost_bo_create(dev, so->plane.layout.data_size_B, flags, res_label);
|
||||
pan_crc_state_set_ptr(&so->crc_state, &so->bo->ptr);
|
||||
|
||||
if (!so->bo) {
|
||||
panfrost_resource_destroy(screen, &so->base);
|
||||
|
|
@ -1714,6 +1717,7 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource,
|
|||
panfrost_bo_unreference(rsrc->bo);
|
||||
rsrc->bo = newbo;
|
||||
rsrc->plane.base = newbo->ptr.gpu;
|
||||
pan_crc_state_set_ptr(&rsrc->crc_state, &newbo->ptr);
|
||||
|
||||
if (!copy_resource && drm_is_afbc(rsrc->modifier)) {
|
||||
if (panfrost_resource_init_afbc_headers(rsrc))
|
||||
|
|
@ -1887,6 +1891,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx,
|
|||
rsrc->bo = tmp_rsrc->bo;
|
||||
rsrc->plane.base = rsrc->bo->ptr.gpu;
|
||||
panfrost_bo_reference(rsrc->bo);
|
||||
pan_crc_state_set_ptr(&rsrc->crc_state, &rsrc->bo->ptr);
|
||||
|
||||
rsrc->owns_label = tmp_rsrc->owns_label;
|
||||
tmp_rsrc->owns_label = false;
|
||||
|
|
@ -2222,7 +2227,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx,
|
|||
prsrc->plane.layout.data_size_B = prsrc->afbcp->size;
|
||||
prsrc->plane.base = prsrc->afbcp->packed_bo->ptr.gpu;
|
||||
prsrc->image.props.crc = false;
|
||||
prsrc->valid.crc = false;
|
||||
pan_crc_state_invalidate(&prsrc->crc_state);
|
||||
|
||||
for (unsigned level = 0; level <= prsrc->base.last_level; ++level)
|
||||
prsrc->plane.layout.slices[level] =
|
||||
|
|
@ -2234,6 +2239,7 @@ pan_resource_afbcp_commit(struct panfrost_context *ctx,
|
|||
panfrost_bo_unreference(prsrc->bo);
|
||||
prsrc->bo = prsrc->afbcp->packed_bo;
|
||||
prsrc->afbcp->packed_bo = NULL;
|
||||
pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr);
|
||||
|
||||
pan_resource_afbcp_stop(prsrc);
|
||||
}
|
||||
|
|
@ -2321,7 +2327,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
|
|||
struct panfrost_device *dev = pan_device(pctx->screen);
|
||||
|
||||
if (transfer->usage & PIPE_MAP_WRITE)
|
||||
prsrc->valid.crc = false;
|
||||
pan_crc_state_invalidate(&prsrc->crc_state);
|
||||
|
||||
/* AFBC/AFRC will use a staging resource. `initialized` will be set when
|
||||
* the fragment job is created; this is deferred to prevent useless surface
|
||||
|
|
@ -2345,6 +2351,7 @@ panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
|
|||
prsrc->bo = pan_resource(trans->staging.rsrc)->bo;
|
||||
prsrc->plane.base = prsrc->bo->ptr.gpu;
|
||||
panfrost_bo_reference(prsrc->bo);
|
||||
pan_crc_state_set_ptr(&prsrc->crc_state, &prsrc->bo->ptr);
|
||||
|
||||
prsrc->owns_label = pan_resource(trans->staging.rsrc)->owns_label;
|
||||
pan_resource(trans->staging.rsrc)->owns_label = false;
|
||||
|
|
|
|||
|
|
@ -76,10 +76,6 @@ struct panfrost_resource {
|
|||
struct panfrost_bo *bo;
|
||||
|
||||
struct {
|
||||
/* Is the checksum for this image valid? Implicitly refers to
|
||||
* the first slice; we only checksum non-mipmapped 2D images */
|
||||
bool crc;
|
||||
|
||||
/* Has anything been written to this slice? */
|
||||
BITSET_DECLARE(data, PAN_MAX_MIP_LEVELS);
|
||||
} valid;
|
||||
|
|
@ -102,6 +98,9 @@ struct panfrost_resource {
|
|||
/* Whether the resource owns the backing BO's label */
|
||||
bool owns_label;
|
||||
|
||||
/* CRC state */
|
||||
struct pan_crc_state crc_state;
|
||||
|
||||
/* AFBC-P state */
|
||||
struct pan_afbcp *afbcp;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -92,73 +92,6 @@ pan_warn_on_afbc_reverse_issue_order(const struct pan_attachment_info *att,
|
|||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
renderblock_fits_in_single_pass(const struct pan_image_view *view,
|
||||
unsigned tile_size)
|
||||
{
|
||||
const struct pan_image_plane_ref pref = pan_image_view_get_first_plane(view);
|
||||
uint64_t mod = pref.image->props.modifier;
|
||||
|
||||
if (!drm_is_afbc(mod))
|
||||
return tile_size >= 16 * 16;
|
||||
|
||||
struct pan_image_block_size renderblk_sz = pan_afbc_renderblock_size(mod);
|
||||
return tile_size >= renderblk_sz.width * renderblk_sz.height;
|
||||
}
|
||||
|
||||
int
|
||||
GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
|
||||
{
|
||||
/* Disable CRC when the tile size is smaller than 16x16. In the hardware,
|
||||
* CRC tiles are the same size as the tiles of the framebuffer. However,
|
||||
* our code only handles 16x16 tiles. Therefore under the current
|
||||
* implementation, we must disable CRC when 16x16 tiles are not used.
|
||||
*
|
||||
* This may hurt performance. However, smaller tile sizes are rare, and
|
||||
* CRCs are more expensive at smaller tile sizes, reducing the benefit.
|
||||
* Restricting CRC to 16x16 should work in practice.
|
||||
*/
|
||||
if (tile_size < 16 * 16)
|
||||
return -1;
|
||||
|
||||
#if PAN_ARCH <= 6
|
||||
if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
|
||||
pan_image_view_has_crc(fb->rts[0].view))
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
#else
|
||||
bool best_rt_valid = false;
|
||||
int best_rt = -1;
|
||||
|
||||
for (unsigned i = 0; i < fb->rt_count; i++) {
|
||||
if (!fb->rts[i].view || fb->rts[i].discard ||
|
||||
!pan_image_view_has_crc(fb->rts[i].view))
|
||||
continue;
|
||||
|
||||
if (!renderblock_fits_in_single_pass(fb->rts[i].view, tile_size))
|
||||
continue;
|
||||
|
||||
bool valid = *(fb->rts[i].crc_valid);
|
||||
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
if (!full && !valid)
|
||||
continue;
|
||||
|
||||
if (best_rt < 0 || (valid && !best_rt_valid)) {
|
||||
best_rt = i;
|
||||
best_rt_valid = valid;
|
||||
}
|
||||
|
||||
if (valid)
|
||||
break;
|
||||
}
|
||||
|
||||
return best_rt;
|
||||
#endif
|
||||
}
|
||||
|
||||
static enum mali_zs_format
|
||||
translate_zs_format(enum pipe_format in)
|
||||
{
|
||||
|
|
@ -454,45 +387,37 @@ GENX(pan_emit_interleaved_64k_zs_attachment)(const struct pan_attachment_info *a
|
|||
#endif
|
||||
|
||||
static void
|
||||
pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
|
||||
struct MALI_CRC *crc)
|
||||
pan_emit_crc(const struct pan_fb_info *fb, struct pan_crc *crc,
|
||||
struct MALI_CRC *cfg)
|
||||
{
|
||||
if (rt_crc < 0)
|
||||
if (!pan_crc_is_enabled(crc))
|
||||
return;
|
||||
|
||||
assert(rt_crc < fb->rt_count);
|
||||
|
||||
const struct pan_image_view *rt = fb->rts[rt_crc].view;
|
||||
const struct pan_image_view *rt = fb->rts[crc->index].view;
|
||||
const struct pan_image_plane_ref pref = pan_image_view_get_color_plane(rt);
|
||||
const struct pan_image *image = pref.image;
|
||||
const struct pan_image_plane *plane = image->planes[pref.plane_idx];
|
||||
const struct pan_image_slice_layout *slice =
|
||||
&plane->layout.slices[rt->first_level];
|
||||
|
||||
crc->base = plane->base + slice->crc.offset_B;
|
||||
crc->row_stride = slice->crc.stride_B;
|
||||
cfg->base = plane->base + slice->crc.offset_B;
|
||||
cfg->row_stride = slice->crc.stride_B;
|
||||
|
||||
#if PAN_ARCH >= 7
|
||||
crc->render_target = rt_crc;
|
||||
|
||||
if (fb->rts[rt_crc].clear) {
|
||||
uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
|
||||
crc->clear_color = clear_val | 0xc000000000000000 |
|
||||
(((uint64_t)clear_val & 0xffff) << 32);
|
||||
}
|
||||
cfg->render_target = crc->index;
|
||||
cfg->clear_color = crc->clear_color;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
int rt_crc,
|
||||
struct mali_zs_crc_extension_packed *zs_crc_ext,
|
||||
struct pan_clean_tile clean_tile)
|
||||
struct pan_crc *crc, struct pan_clean_tile clean_tile)
|
||||
{
|
||||
struct mali_zs_crc_extension_packed desc;
|
||||
|
||||
pan_pack(&desc, ZS_CRC_EXTENSION, cfg) {
|
||||
pan_prepare_crc(fb, rt_crc, &cfg.crc);
|
||||
pan_emit_crc(fb, crc, &cfg.crc);
|
||||
#if PAN_ARCH == 5
|
||||
cfg.zs.clean_pixel_write_enable =
|
||||
pan_clean_tile_write_zs_enabled(clean_tile);
|
||||
|
|
@ -1085,6 +1010,243 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
|
|||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
pan_fb_color_attachment_should_crc(const struct pan_fb_color_attachment *rt,
|
||||
unsigned tile_size)
|
||||
{
|
||||
uint64_t mod;
|
||||
struct pan_image_block_size renderblk_sz;
|
||||
|
||||
if (!rt->view || rt->discard || !rt->crc_state ||
|
||||
!pan_image_view_has_crc(rt->view))
|
||||
return false;
|
||||
|
||||
mod = pan_image_view_get_first_plane(rt->view).image->props.modifier;
|
||||
|
||||
if (!drm_is_afbc(mod))
|
||||
return true;
|
||||
|
||||
/* AFBC-P images are read only. */
|
||||
if (!(mod & AFBC_FORMAT_MOD_SPARSE))
|
||||
return false;
|
||||
|
||||
/* AFBC render block size must fit in a single pass. */
|
||||
renderblk_sz = pan_afbc_renderblock_size(mod);
|
||||
if (tile_size < renderblk_sz.width * renderblk_sz.height)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
pan_select_crc_rt(const struct pan_fb_info *fb)
|
||||
{
|
||||
int best_rt = -1;
|
||||
|
||||
#if PAN_ARCH <= 5
|
||||
/* CRC was introduced in v4 and MRT in v5 but unlike v6 there's no details
|
||||
* how both work together. */
|
||||
if (fb->rt_count > 1)
|
||||
return best_rt;
|
||||
|
||||
#elif PAN_ARCH == 6
|
||||
/* On v6, all enabled RTs are used to compute a CRC (no crc_render_target
|
||||
* field on the DBD). The write buffer size of the enabled color
|
||||
* attachments for a tile must fit within 1600 bytes. */
|
||||
if (fb->rt_count > 1 &&
|
||||
pan_cbuf_bytes_per_pixel(fb) * fb->tile_size > 1600)
|
||||
return best_rt;
|
||||
#endif
|
||||
|
||||
for (unsigned i = 0; i < fb->rt_count; i++) {
|
||||
/* Skip unusable RTs. */
|
||||
if (!pan_fb_color_attachment_should_crc(&fb->rts[i], fb->tile_size))
|
||||
continue;
|
||||
|
||||
/* Select the first RT with a valid CRC buffer. */
|
||||
if (fb->rts[i].crc_state->valid) {
|
||||
best_rt = i;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Store the first usable RT otherwise. */
|
||||
if (best_rt == -1)
|
||||
best_rt = i;
|
||||
}
|
||||
|
||||
return best_rt;
|
||||
}
|
||||
|
||||
static void
|
||||
pan_crc_enable(struct pan_crc *crc)
|
||||
{
|
||||
crc->read = true;
|
||||
crc->write = true;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 7
|
||||
/* Initialize the CRC buffer by zero'ing it. The all-zero CRC can't collide
|
||||
* thanks to the crc_clear_color field, see pan_crc_clear_color(). Drawback is
|
||||
* the CRC BO must be CPU mapped. */
|
||||
static void
|
||||
pan_crc_enable_zeroed(struct pan_crc *crc, struct pan_crc_state *state,
|
||||
const struct pan_image_view *view)
|
||||
{
|
||||
const struct pan_image_plane_ref pref =
|
||||
pan_image_view_get_color_plane(view);
|
||||
const struct pan_image_plane *plane = pref.image->planes[pref.plane_idx];
|
||||
const struct pan_image_slice_layout *slice =
|
||||
&plane->layout.slices[view->first_level];
|
||||
|
||||
assert(state->ptr && state->ptr->cpu);
|
||||
memset(state->ptr->cpu + slice->crc.offset_B, 0, slice->crc.size_B);
|
||||
|
||||
pan_crc_enable(crc);
|
||||
state->valid = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Take advantage of a full frame draw to initialize the CRC buffer by
|
||||
* forcefully writing back all the tiles and flush the CRC values. Drawback
|
||||
* is it only works on full frames. */
|
||||
static void
|
||||
pan_crc_maybe_enable_flushed(struct pan_crc *crc, struct pan_crc_state *state,
|
||||
const struct pan_fb_info *fb)
|
||||
{
|
||||
if (!pan_fb_info_is_fully_covered(fb))
|
||||
return;
|
||||
|
||||
crc->write = true;
|
||||
crc->force_clean_tile_write = true;
|
||||
state->valid = true;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 7
|
||||
static uint64_t
|
||||
pan_crc_clear_color(const struct pan_fb_info *fb)
|
||||
{
|
||||
uint64_t base[4] = { 0, }; /* Compiler auto-vectorization hint */
|
||||
uint64_t crc_clear_flag = 0;
|
||||
uint64_t crc_clear_base = 1ull << 46;
|
||||
uint64_t crc_init = 0;
|
||||
|
||||
/* When a tile is clear (i.e. no polygons intersect it), the configured
|
||||
* crc_clear_color is written as is as CRC value by the GPU if both CRC
|
||||
* write (crc_write_enable flag) and Empty Tile Elimination write
|
||||
* (empty_tile_write_enable flag) are enabled. If Empty Tile Elimination
|
||||
* read (empty_tile_read_enable flag) is enabled, this then allows to skip
|
||||
* the pre-loading of clear tiles which were also clear at the previous
|
||||
* render on the selected RT. It's done by comparing CRCs in the CRC buffer
|
||||
* to the crc_clear_color.
|
||||
*
|
||||
* The crc_clear_flag sub-field (bit 63) is flagged unset here. It's
|
||||
* flipped by the GPU when writing standard (i.e. non-empty) CRCs. This
|
||||
* prevents standard CRCs from using the all-zero CRC value. Empty CRCs
|
||||
* can't use the all-zero CRC value either because crc_clear_base's most
|
||||
* significant bit is flagged set here. This allows to invalidate a CRC
|
||||
* buffer by zero'ing it.
|
||||
*
|
||||
* v10 introduced the crc_init sub-field (bits 15:0). v7 and v9 can use
|
||||
* those as additional crc_clear_base bits. We don't use it for now and
|
||||
* keep those 16 bits clear regardless of arch.
|
||||
*
|
||||
* This leaves 46 bits in the crc_clear_base sub-field (bits 62:16). Clear
|
||||
* color changes on any RTs must be reflected into this field in order to
|
||||
* properly invalidate CRCs stored this way. This is done by hashing the
|
||||
* clear value channels of each cleared RT. Each clear color channel value
|
||||
* is multiplied with a prime number followed by a XOR to the destination
|
||||
* hash. Clear values in pan_fb_info struct are expected to be packed with
|
||||
* respect to the format and dithering of the underlying RTs so that a
|
||||
* change of format (without a clear color change) can generate a different
|
||||
* hash. The prime number 16381 is carefully selected so that the 32 bits
|
||||
* of each clear color channel take at most 46 bits after the mul (the next
|
||||
* prime number 16411 takes at most 47 bits). The resulting hash value is
|
||||
* guaranteed not to overflow and can safely be packed. */
|
||||
|
||||
for (unsigned i = 0; i < fb->rt_count; ++i)
|
||||
if (fb->rts[i].clear)
|
||||
for (unsigned j = 0; j < 4; ++j)
|
||||
base[i] ^= 16381 * fb->rts[i].clear_value[j];
|
||||
|
||||
crc_clear_base |= (base[0] ^ base[1]) ^ (base[2] ^ base[3]);
|
||||
|
||||
return (crc_clear_flag << 63) | (crc_clear_base << 16) | crc_init;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
static bool
|
||||
pan_crc_has_empty_tile_elimination(struct pan_crc *crc,
|
||||
const struct pan_fb_info *fb)
|
||||
{
|
||||
#if PAN_ARCH == 6
|
||||
/* For v6, there's no details how MRT interacts with Empty Tile
|
||||
* Elimination, especially how the clear value is generated from the color
|
||||
* attachment clear values. The feature is disabled for that use case. */
|
||||
if (fb->rt_count > 1)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
return crc->read || crc->write;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct pan_crc
|
||||
pan_get_crc_info(const struct pan_fb_info *fb)
|
||||
{
|
||||
struct pan_crc crc = { .index = -1, };
|
||||
const struct pan_fb_color_attachment *rt;
|
||||
|
||||
/* Disable TE when the tile size is smaller than 16x16. In the hardware,
|
||||
* CRC tiles are the same size as the tiles of the framebuffer. However,
|
||||
* our code only handles 16x16 tiles. Therefore under the current
|
||||
* implementation, we must disable TE when 16x16 tiles are not used. This
|
||||
* may hurt performance. However, smaller tile sizes are rare, and CRCs are
|
||||
* more expensive at smaller tile sizes, reducing the benefit. Restricting
|
||||
* CRC to 16x16 should work in practice. */
|
||||
if (fb->tile_size < 16 * 16)
|
||||
goto skip;
|
||||
|
||||
crc.index = pan_select_crc_rt(fb);
|
||||
if (crc.index == -1)
|
||||
goto skip;
|
||||
|
||||
rt = &fb->rts[crc.index];
|
||||
|
||||
/* Transaction Elimination. */
|
||||
if (rt->crc_state->valid) {
|
||||
pan_crc_enable(&crc);
|
||||
} else {
|
||||
#if PAN_ARCH >= 7
|
||||
if (rt->crc_state->ptr && rt->crc_state->ptr->cpu)
|
||||
pan_crc_enable_zeroed(&crc, rt->crc_state, rt->view);
|
||||
else
|
||||
pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb);
|
||||
#else
|
||||
pan_crc_maybe_enable_flushed(&crc, rt->crc_state, fb);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
/* Empty Tile Elimination. */
|
||||
if (pan_crc_has_empty_tile_elimination(&crc, fb)) {
|
||||
#if PAN_ARCH >= 7
|
||||
crc.clear_color = pan_crc_clear_color(fb);
|
||||
#endif
|
||||
crc.empty_tile_read = crc.read;
|
||||
crc.empty_tile_write = crc.write;
|
||||
}
|
||||
#endif
|
||||
|
||||
skip:
|
||||
/* Flag CRC buffer states of unselected RTs as invalid. */
|
||||
for (unsigned i = 0; i < fb->rt_count; i++)
|
||||
if (i != crc.index && fb->rts[i].crc_state)
|
||||
fb->rts[i].crc_state->valid = false;
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Clean tiles must be written back for AFBC buffers (color, z/s) when either
|
||||
* one of the effective tile size dimension is smaller than the superblock
|
||||
* dimension.
|
||||
|
|
@ -1111,8 +1273,14 @@ GENX(pan_force_clean_write_on)(const struct pan_image *image,
|
|||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
pan_force_clean_write_crc(struct pan_crc *crc, int index)
|
||||
{
|
||||
return index == crc->index && crc->force_clean_tile_write;
|
||||
}
|
||||
|
||||
static struct pan_clean_tile
|
||||
pan_get_clean_tile_info(const struct pan_fb_info *fb)
|
||||
pan_get_clean_tile_info(const struct pan_fb_info *fb, struct pan_crc *crc)
|
||||
{
|
||||
struct pan_clean_tile clean_tile = { 0, };
|
||||
const struct pan_image *img;
|
||||
|
|
@ -1127,7 +1295,8 @@ pan_get_clean_tile_info(const struct pan_fb_info *fb)
|
|||
img = fb->rts[i].view ?
|
||||
pan_image_view_get_color_plane(fb->rts[i].view).image : NULL;
|
||||
if (fb->rts[i].clear ||
|
||||
GENX(pan_force_clean_write_on)(img, fb->tile_size))
|
||||
GENX(pan_force_clean_write_on)(img, fb->tile_size) ||
|
||||
pan_force_clean_write_crc(crc, i))
|
||||
clean_tile.write_rt_mask |= 1 << i;
|
||||
}
|
||||
|
||||
|
|
@ -1188,9 +1357,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
|
||||
#endif
|
||||
|
||||
int crc_rt = GENX(pan_select_crc_rt)(fb, fb->tile_size);
|
||||
bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
|
||||
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb);
|
||||
struct pan_crc crc = pan_get_crc_info(fb);
|
||||
struct pan_clean_tile clean_tile = pan_get_clean_tile_info(fb, &crc);
|
||||
bool has_zs_crc_ext = fb->zs.view.zs || fb->zs.view.s ||
|
||||
pan_crc_is_enabled(&crc);
|
||||
|
||||
pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
|
||||
#if PAN_ARCH >= 6
|
||||
|
|
@ -1254,26 +1424,13 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
|
||||
cfg.has_zs_crc_extension = has_zs_crc_ext;
|
||||
|
||||
if (crc_rt >= 0) {
|
||||
bool *valid = fb->rts[crc_rt].crc_valid;
|
||||
bool full = !fb->draw_extent.minx && !fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
|
||||
/* If the CRC was valid it stays valid, if it wasn't, we must ensure
|
||||
* the render operation covers the full frame, and clean tiles are
|
||||
* pushed to memory. */
|
||||
bool new_valid = *valid |
|
||||
(full && pan_clean_tile_write_rt_enabled(clean_tile, crc_rt));
|
||||
|
||||
cfg.crc_read_enable = *valid;
|
||||
|
||||
/* If the data is currently invalid, still write CRC
|
||||
* data if we are doing a full write, so that it is
|
||||
* valid for next time. */
|
||||
cfg.crc_write_enable = new_valid;
|
||||
|
||||
*valid = new_valid;
|
||||
if (pan_crc_is_enabled(&crc)) {
|
||||
cfg.crc_read_enable = crc.read;
|
||||
cfg.crc_write_enable = crc.write;
|
||||
#if PAN_ARCH >= 7
|
||||
cfg.empty_tile_read_enable = crc.empty_tile_read;
|
||||
cfg.empty_tile_write_enable = crc.empty_tile_write;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
|
|
@ -1324,7 +1481,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
struct mali_zs_crc_extension_packed *zs_crc_ext =
|
||||
out + pan_size(FRAMEBUFFER);
|
||||
|
||||
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext, clean_tile);
|
||||
pan_emit_zs_crc_ext(fb, layer_idx, zs_crc_ext, &crc, clean_tile);
|
||||
rtd += pan_size(ZS_CRC_EXTENSION);
|
||||
}
|
||||
|
||||
|
|
@ -1339,9 +1496,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
|
||||
fb->tile_size *
|
||||
pan_image_view_get_nr_samples(fb->rts[i].view);
|
||||
|
||||
if (i != crc_rt && fb->rts[i].crc_valid != NULL)
|
||||
*(fb->rts[i].crc_valid) = false;
|
||||
}
|
||||
|
||||
struct mali_framebuffer_pointer_packed tag;
|
||||
|
|
@ -1454,11 +1608,6 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED
|
||||
? MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED
|
||||
: MALI_BLOCK_FORMAT_LINEAR;
|
||||
|
||||
if (pan_image_view_has_crc(rt)) {
|
||||
cfg.crc_buffer.row_stride = slayout->crc.stride_B;
|
||||
cfg.crc_buffer.base = plane->base + slayout->crc.offset_B;
|
||||
}
|
||||
}
|
||||
|
||||
if (fb->zs.view.zs) {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ struct pan_compute_dim {
|
|||
|
||||
struct pan_fb_color_attachment {
|
||||
const struct pan_image_view *view;
|
||||
bool *crc_valid;
|
||||
struct pan_crc_state *crc_state;
|
||||
bool clear;
|
||||
bool preload;
|
||||
bool discard;
|
||||
|
|
@ -154,6 +154,33 @@ struct pan_fb_info {
|
|||
bool pls_enabled;
|
||||
};
|
||||
|
||||
struct pan_crc {
|
||||
/* Empty Tile Elimination clear color */
|
||||
uint64_t clear_color;
|
||||
|
||||
/* Selected RT index (8 max), -1 if none. */
|
||||
int8_t index;
|
||||
|
||||
/* Transaction Elimination flags */
|
||||
bool read : 1;
|
||||
bool write : 1;
|
||||
|
||||
/* Force clean writes for CRC buffer init */
|
||||
bool force_clean_tile_write : 1;
|
||||
|
||||
/* Empty Tile Elimination flags */
|
||||
bool empty_tile_read : 1;
|
||||
bool empty_tile_write : 1;
|
||||
};
|
||||
|
||||
struct pan_crc_state {
|
||||
/* Pointer to BO mapping. */
|
||||
struct pan_ptr *ptr;
|
||||
|
||||
/* Is the CRC buffer valid? Implicitly refers to the first slice. */
|
||||
bool valid;
|
||||
};
|
||||
|
||||
struct pan_clean_tile {
|
||||
/* clean_tile_write_enable mask on the 8 color attachments. */
|
||||
uint8_t write_rt_mask;
|
||||
|
|
@ -162,6 +189,33 @@ struct pan_clean_tile {
|
|||
uint8_t write_zs : 1;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
pan_fb_info_is_fully_covered(const struct pan_fb_info *fb)
|
||||
{
|
||||
return !fb->draw_extent.minx &&
|
||||
!fb->draw_extent.miny &&
|
||||
fb->draw_extent.maxx == (fb->width - 1) &&
|
||||
fb->draw_extent.maxy == (fb->height - 1);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
pan_crc_is_enabled(struct pan_crc *crc)
|
||||
{
|
||||
return crc->index != -1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_crc_state_invalidate(struct pan_crc_state *state)
|
||||
{
|
||||
state->valid = false;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_crc_state_set_ptr(struct pan_crc_state *state, struct pan_ptr *ptr)
|
||||
{
|
||||
state->ptr = ptr;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
pan_clean_tile_write_rt_enabled(struct pan_clean_tile clean_tile,
|
||||
unsigned index)
|
||||
|
|
@ -294,8 +348,6 @@ bool GENX(pan_force_clean_write_on)(const struct pan_image *image,
|
|||
void GENX(pan_emit_tls)(const struct pan_tls_info *info,
|
||||
struct mali_local_storage_packed *out);
|
||||
|
||||
int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size);
|
||||
|
||||
struct pan_attachment_info {
|
||||
const struct pan_image_view *iview;
|
||||
unsigned layer_or_z_slice;
|
||||
|
|
|
|||
|
|
@ -146,7 +146,8 @@ pan_image_view_has_crc(const struct pan_image_view *iview)
|
|||
if (!p.image)
|
||||
return false;
|
||||
|
||||
return p.image->props.crc;
|
||||
/* Only mip level 0 gets a CRC buffer allocated. */
|
||||
return p.image->props.crc && iview->first_level == 0;
|
||||
}
|
||||
|
||||
static inline struct pan_image_plane_ref
|
||||
|
|
|
|||
|
|
@ -125,8 +125,8 @@ pan_image_layout_init(
|
|||
|
||||
layout_constraints.offset_B += slayout->size_B;
|
||||
|
||||
/* Add a checksum region if necessary */
|
||||
if (props->crc) {
|
||||
/* Add a CRC buffer at level 0 if necessary */
|
||||
if (l == 0 && props->crc) {
|
||||
init_slice_crc_info(arch, slayout, mip_extent_px.width,
|
||||
mip_extent_px.height, layout_constraints.offset_B);
|
||||
layout_constraints.offset_B += slayout->crc.size_B;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue