mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-06 20:50:31 +01:00
panfrost: Implement pan_tiler for non-hierarchy GPUs
The algorithm is as described. Nothing fancy here, just need to add some new code paths depending on which model we're running on. Tomeu: - Also disable tiling when !hierarchy and !vertex_count - Avoid creating polygon lists smaller than the minimum when vertex_count > 0 but tile size smaller than 16 byte - Take into account tile size when calculating polygon list size for !hierarchy - Allow 0-sized tiles in a single dimension Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
This commit is contained in:
parent
63cd5b8198
commit
9fb0904712
6 changed files with 106 additions and 136 deletions
|
|
@ -59,24 +59,25 @@ static struct midgard_tiler_descriptor
|
|||
panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
|
||||
bool hierarchy = !(screen->quirks & MIDGARD_NO_HIER_TILING);
|
||||
struct midgard_tiler_descriptor t = {0};
|
||||
unsigned height = batch->key.height;
|
||||
unsigned width = batch->key.width;
|
||||
|
||||
t.hierarchy_mask =
|
||||
panfrost_choose_hierarchy_mask(width, height, vertex_count);
|
||||
panfrost_choose_hierarchy_mask(width, height, vertex_count, hierarchy);
|
||||
|
||||
/* Compute the polygon header size and use that to offset the body */
|
||||
|
||||
unsigned header_size = panfrost_tiler_header_size(
|
||||
width, height, t.hierarchy_mask);
|
||||
width, height, t.hierarchy_mask, hierarchy);
|
||||
|
||||
t.polygon_list_size = panfrost_tiler_full_size(
|
||||
width, height, t.hierarchy_mask);
|
||||
width, height, t.hierarchy_mask, hierarchy);
|
||||
|
||||
/* Sanity check */
|
||||
|
||||
if (t.hierarchy_mask) {
|
||||
if (vertex_count) {
|
||||
struct panfrost_bo *tiler_heap;
|
||||
|
||||
tiler_heap = panfrost_batch_get_tiler_heap(batch);
|
||||
|
|
@ -92,6 +93,7 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
|
|||
struct panfrost_bo *tiler_dummy;
|
||||
|
||||
tiler_dummy = panfrost_batch_get_tiler_dummy(batch);
|
||||
header_size = MALI_TILER_MINIMUM_HEADER_SIZE;
|
||||
|
||||
/* The tiler is disabled, so don't allow the tiler heap */
|
||||
t.heap_start = tiler_dummy->gpu;
|
||||
|
|
@ -101,11 +103,11 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
|
|||
t.polygon_list = tiler_dummy->gpu;
|
||||
|
||||
/* Disable the tiler */
|
||||
t.hierarchy_mask |= MALI_TILER_DISABLED;
|
||||
|
||||
if (screen->quirks & MIDGARD_SFBD) {
|
||||
t.hierarchy_mask = 0xFFF; /* TODO: What's this? */
|
||||
t.polygon_list_size = 0x200;
|
||||
if (hierarchy)
|
||||
t.hierarchy_mask |= MALI_TILER_DISABLED;
|
||||
else {
|
||||
t.hierarchy_mask = MALI_TILER_USER;
|
||||
t.polygon_list_size = MALI_TILER_MINIMUM_HEADER_SIZE + 4;
|
||||
|
||||
/* We don't have a SET_VALUE job, so write the polygon list manually */
|
||||
uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->cpu + header_size);
|
||||
|
|
|
|||
|
|
@ -302,7 +302,8 @@ panfrost_scoreboard_set_value(struct panfrost_batch *batch)
|
|||
/* Okay, we do. Let's generate it. We'll need the job's polygon list
|
||||
* regardless of size. */
|
||||
|
||||
mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch, 0);
|
||||
mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch,
|
||||
MALI_TILER_MINIMUM_HEADER_SIZE);
|
||||
|
||||
struct panfrost_transfer job =
|
||||
panfrost_set_value_job(batch, polygon_list);
|
||||
|
|
|
|||
|
|
@ -56,14 +56,14 @@ panfrost_pack_work_groups_fused(
|
|||
/* Tiler structure size computation */
|
||||
|
||||
unsigned
|
||||
panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask);
|
||||
panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);
|
||||
|
||||
unsigned
|
||||
panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask);
|
||||
panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);
|
||||
|
||||
unsigned
|
||||
panfrost_choose_hierarchy_mask(
|
||||
unsigned width, unsigned height,
|
||||
unsigned vertex_count);
|
||||
unsigned vertex_count, bool hierarchy);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -218,13 +218,6 @@
|
|||
/* Likewise, each tile per level has 512 bytes of body */
|
||||
#define FULL_BYTES_PER_TILE 0x200
|
||||
|
||||
/* Absent any geometry, the minimum size of the header */
|
||||
#define MINIMUM_HEADER_SIZE 0x200
|
||||
|
||||
/* Mask of valid hierarchy levels: one bit for each level from min...max
|
||||
* inclusive */
|
||||
#define HIERARCHY_MASK (((MAX_TILE_SIZE / MIN_TILE_SIZE) << 1) - 1)
|
||||
|
||||
/* If the width-x-height framebuffer is divided into tile_size-x-tile_size
|
||||
* tiles, how many tiles are there? Rounding up in each direction. For the
|
||||
* special case of tile_size=16, this aligns with the usual Midgard count.
|
||||
|
|
@ -233,108 +226,86 @@
|
|||
* a a fixed-tile size (not any of a number of power-of-twos) */
|
||||
|
||||
static unsigned
|
||||
pan_tile_count(unsigned width, unsigned height, unsigned tile_size)
|
||||
pan_tile_count(unsigned width, unsigned height, unsigned tile_width, unsigned tile_height)
|
||||
{
|
||||
unsigned aligned_width = ALIGN_POT(width, tile_size);
|
||||
unsigned aligned_height = ALIGN_POT(height, tile_size);
|
||||
unsigned aligned_width = ALIGN_POT(width, tile_width);
|
||||
unsigned aligned_height = ALIGN_POT(height, tile_height);
|
||||
|
||||
unsigned tile_count_x = aligned_width / tile_size;
|
||||
unsigned tile_count_y = aligned_height / tile_size;
|
||||
unsigned tile_count_x = aligned_width / tile_width;
|
||||
unsigned tile_count_y = aligned_height / tile_height;
|
||||
|
||||
return tile_count_x * tile_count_y;
|
||||
}
|
||||
|
||||
/* For `masked_count` of the smallest tile sizes masked out, computes how the
|
||||
* size of the polygon list header. We iterate the tile sizes (16x16 through
|
||||
* 2048x2048, if nothing is masked; (16*2^masked_count)x(16*2^masked_count)
|
||||
* through 2048x2048 more generally. For each tile size, we figure out how many
|
||||
* tiles there are at this hierarchy level and therefore many bytes this level
|
||||
* is, leaving us with a byte count for each level. We then just sum up the
|
||||
* byte counts across the levels to find a byte count for all levels. */
|
||||
* 2048x2048). For each tile size, we figure out how many tiles there are at
|
||||
* this hierarchy level and therefore many bytes this level is, leaving us with
|
||||
* a byte count for each level. We then just sum up the byte counts across the
|
||||
* levels to find a byte count for all levels. */
|
||||
|
||||
static unsigned
|
||||
panfrost_raw_segment_size(
|
||||
panfrost_hierarchy_size(
|
||||
unsigned width,
|
||||
unsigned height,
|
||||
unsigned masked_count,
|
||||
unsigned end_level,
|
||||
unsigned mask,
|
||||
unsigned bytes_per_tile)
|
||||
{
|
||||
unsigned size = PROLOGUE_SIZE;
|
||||
|
||||
/* Normally we start at 16x16 tiles (MIN_TILE_SHIFT), but we add more
|
||||
* if anything is masked off */
|
||||
/* Iterate hierarchy levels */
|
||||
|
||||
unsigned start_level = MIN_TILE_SHIFT + masked_count;
|
||||
for (unsigned b = 0; b < (MAX_TILE_SHIFT - MIN_TILE_SHIFT); ++b) {
|
||||
/* Check if this level is enabled */
|
||||
if (!(mask & (1 << b)))
|
||||
continue;
|
||||
|
||||
/* Iterate hierarchy levels / tile sizes */
|
||||
|
||||
for (unsigned i = start_level; i <= end_level; ++i) {
|
||||
/* Shift from a level to a tile size */
|
||||
unsigned tile_size = (1 << i);
|
||||
unsigned tile_size = (1 << b) * MIN_TILE_SIZE;
|
||||
|
||||
unsigned tile_count = pan_tile_count(width, height, tile_size);
|
||||
unsigned tile_count = pan_tile_count(width, height, tile_size, tile_size);
|
||||
unsigned level_count = bytes_per_tile * tile_count;
|
||||
|
||||
size += level_count;
|
||||
}
|
||||
|
||||
/* This size will be used as an offset, so ensure it's aligned */
|
||||
return ALIGN_POT(size, 512);
|
||||
return ALIGN_POT(size, 0x200);
|
||||
}
|
||||
|
||||
/* Given a hierarchy mask and a framebuffer size, compute the size of one of
|
||||
* the segments (header or body) */
|
||||
/* Implement the formula:
|
||||
*
|
||||
* 0x200 + bytes_per_tile * ceil(W / w) * ceil(H / h)
|
||||
*
|
||||
* rounding down the answer to the nearest 0x200. This is used to compute both
|
||||
* header and body sizes for GPUs without hierarchical tiling. Essentially,
|
||||
* computing a single hierarchy level, since there isn't any hierarchy!
|
||||
*/
|
||||
|
||||
static unsigned
|
||||
panfrost_segment_size(
|
||||
unsigned width, unsigned height,
|
||||
unsigned mask, unsigned bytes_per_tile)
|
||||
panfrost_flat_size(unsigned width, unsigned height, unsigned dim, unsigned bytes_per_tile)
|
||||
{
|
||||
/* The tiler-disabled case should have been handled by the caller */
|
||||
assert(mask);
|
||||
/* First, extract the tile dimensions */
|
||||
|
||||
/* Some levels are enabled. Ensure that only smaller levels are
|
||||
* disabled and there are no gaps. Theoretically the hardware is more
|
||||
* flexible, but there's no known reason to use other configurations
|
||||
* and this keeps the code simple. Since we know the 0x80 or 0x100 bit
|
||||
* is set, ctz(mask) will return the number of masked off levels. */
|
||||
unsigned tw = (1 << (dim & 0b111)) * 8;
|
||||
unsigned th = (1 << ((dim & (0b111 << 6)) >> 6)) * 8;
|
||||
|
||||
unsigned masked_count = __builtin_ctz(mask);
|
||||
/* tile_count is ceil(W/w) * ceil(H/h) */
|
||||
unsigned raw = pan_tile_count(width, height, tw, th) * bytes_per_tile;
|
||||
|
||||
assert(mask & (0x80 | 0x100));
|
||||
assert(((mask >> masked_count) & ((mask >> masked_count) + 1)) == 0);
|
||||
|
||||
/* Figure out the top level */
|
||||
unsigned unused_count = __builtin_clz(mask);
|
||||
unsigned top_bit = ((8 * sizeof(mask)) - 1) - unused_count;
|
||||
|
||||
/* We don't have bits for nonexistant levels below 16x16 */
|
||||
unsigned top_level = top_bit + 4;
|
||||
|
||||
/* Everything looks good. Use the number of trailing zeroes we found to
|
||||
* figure out how many smaller levels are disabled to compute the
|
||||
* actual header size */
|
||||
|
||||
return panfrost_raw_segment_size(width, height,
|
||||
masked_count, top_level, bytes_per_tile);
|
||||
/* Round down and add offset */
|
||||
return 0x200 + ((raw / 0x200) * 0x200);
|
||||
}
|
||||
|
||||
|
||||
/* Given a hierarchy mask and a framebuffer size, compute the header size */
|
||||
|
||||
unsigned
|
||||
panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask)
|
||||
panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy)
|
||||
{
|
||||
mask &= HIERARCHY_MASK;
|
||||
|
||||
/* If no hierarchy levels are enabled, that means there is no geometry
|
||||
* for the tiler to process, so use a minimum size. Used for clears */
|
||||
|
||||
if (mask == 0x00)
|
||||
return MINIMUM_HEADER_SIZE;
|
||||
|
||||
return panfrost_segment_size(width, height, mask, HEADER_BYTES_PER_TILE);
|
||||
if (hierarchy)
|
||||
return panfrost_hierarchy_size(width, height, mask, HEADER_BYTES_PER_TILE);
|
||||
else
|
||||
return panfrost_flat_size(width, height, mask, HEADER_BYTES_PER_TILE);
|
||||
}
|
||||
|
||||
/* The combined header/body is sized similarly (but it is significantly
|
||||
|
|
@ -343,14 +314,38 @@ panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask)
|
|||
*/
|
||||
|
||||
unsigned
|
||||
panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask)
|
||||
panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy)
|
||||
{
|
||||
mask &= HIERARCHY_MASK;
|
||||
if (hierarchy)
|
||||
return panfrost_hierarchy_size(width, height, mask, FULL_BYTES_PER_TILE);
|
||||
else
|
||||
return panfrost_flat_size(width, height, mask, FULL_BYTES_PER_TILE);
|
||||
}
|
||||
|
||||
if (mask == 0x00)
|
||||
return MINIMUM_HEADER_SIZE;
|
||||
/* On GPUs without hierarchical tiling, we choose a tile size directly and
|
||||
* stuff it into the field otherwise known as hierarchy mask (not a mask). */
|
||||
|
||||
return panfrost_segment_size(width, height, mask, FULL_BYTES_PER_TILE);
|
||||
static unsigned
|
||||
panfrost_choose_tile_size(
|
||||
unsigned width, unsigned height, unsigned vertex_count)
|
||||
{
|
||||
/* Figure out the ideal tile size. Eventually a heuristic should be
|
||||
* used for this */
|
||||
|
||||
unsigned best_w = 16;
|
||||
unsigned best_h = 16;
|
||||
|
||||
/* Clamp so there are less than 64 tiles in each direction */
|
||||
|
||||
best_w = MAX2(best_w, util_next_power_of_two(width / 63));
|
||||
best_h = MAX2(best_h, util_next_power_of_two(height / 63));
|
||||
|
||||
/* We have our ideal tile size, so encode */
|
||||
|
||||
unsigned exp_w = util_logbase2(best_w / 16);
|
||||
unsigned exp_h = util_logbase2(best_h / 16);
|
||||
|
||||
return exp_w | (exp_h << 6);
|
||||
}
|
||||
|
||||
/* In the future, a heuristic to choose a tiler hierarchy mask would go here.
|
||||
|
|
@ -362,13 +357,16 @@ panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask)
|
|||
unsigned
|
||||
panfrost_choose_hierarchy_mask(
|
||||
unsigned width, unsigned height,
|
||||
unsigned vertex_count)
|
||||
unsigned vertex_count, bool hierarchy)
|
||||
{
|
||||
/* If there is no geometry, we don't bother enabling anything */
|
||||
|
||||
if (!vertex_count)
|
||||
return 0x00;
|
||||
|
||||
if (!hierarchy)
|
||||
return panfrost_choose_tile_size(width, height, vertex_count);
|
||||
|
||||
/* Otherwise, default everything on. TODO: Proper tests */
|
||||
|
||||
return 0xFF;
|
||||
|
|
|
|||
|
|
@ -1392,9 +1392,17 @@ struct mali_payload_fragment {
|
|||
/* See pan_tiler.c for derivation */
|
||||
#define MALI_HIERARCHY_MASK ((1 << 9) - 1)
|
||||
|
||||
/* Flag disabling the tiler for clear-only jobs */
|
||||
/* Flag disabling the tiler for clear-only jobs, with
|
||||
hierarchical tiling */
|
||||
#define MALI_TILER_DISABLED (1 << 12)
|
||||
|
||||
/* Flag selecting userspace-generated polygon list, for clear-only jobs without
|
||||
* hierarhical tiling. */
|
||||
#define MALI_TILER_USER 0xFFF
|
||||
|
||||
/* Absent any geometry, the minimum size of the polygon list header */
|
||||
#define MALI_TILER_MINIMUM_HEADER_SIZE 0x200
|
||||
|
||||
struct midgard_tiler_descriptor {
|
||||
/* Size of the entire polygon list; see pan_tiler.c for the
|
||||
* computation. It's based on hierarchical tiling */
|
||||
|
|
|
|||
|
|
@ -513,7 +513,8 @@ pandecode_midgard_tiler_descriptor(
|
|||
const struct midgard_tiler_descriptor *t,
|
||||
unsigned width,
|
||||
unsigned height,
|
||||
bool is_fragment)
|
||||
bool is_fragment,
|
||||
bool has_hierarchy)
|
||||
{
|
||||
pandecode_log(".tiler = {\n");
|
||||
pandecode_indent++;
|
||||
|
|
@ -546,8 +547,8 @@ pandecode_midgard_tiler_descriptor(
|
|||
/* Now that we've sanity checked, we'll try to calculate the sizes
|
||||
* ourselves for comparison */
|
||||
|
||||
unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask);
|
||||
unsigned ref_size = panfrost_tiler_full_size(width, height, t->hierarchy_mask);
|
||||
unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask, has_hierarchy);
|
||||
unsigned ref_size = panfrost_tiler_full_size(width, height, t->hierarchy_mask, has_hierarchy);
|
||||
|
||||
if (!((ref_header == body_offset) && (ref_size == t->polygon_list_size))) {
|
||||
pandecode_msg("XXX: bad polygon list size (expected %d / 0x%x)\n",
|
||||
|
|
@ -630,44 +631,6 @@ pandecode_midgard_tiler_descriptor(
|
|||
pandecode_log("}\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_midgard_tiler_descriptor_0x20(
|
||||
const struct midgard_tiler_descriptor *t)
|
||||
{
|
||||
pandecode_log(".tiler = {\n");
|
||||
pandecode_indent++;
|
||||
|
||||
pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
|
||||
pandecode_prop("flags = 0x%" PRIx16, t->flags);
|
||||
MEMORY_PROP(t, polygon_list);
|
||||
MEMORY_PROP(t, polygon_list_body);
|
||||
pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
|
||||
MEMORY_PROP(t, heap_start);
|
||||
MEMORY_PROP(t, heap_end);
|
||||
|
||||
/* We've never seen weights used in practice, but we know from the
|
||||
* kernel these fields are there */
|
||||
|
||||
bool nonzero_weights = false;
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
|
||||
nonzero_weights |= t->weights[w] != 0x0;
|
||||
}
|
||||
|
||||
if (nonzero_weights) {
|
||||
pandecode_log(".weights = {");
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
|
||||
pandecode_log("%d, ", t->weights[w]);
|
||||
}
|
||||
|
||||
pandecode_log("},");
|
||||
}
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("}\n");
|
||||
}
|
||||
|
||||
/* Information about the framebuffer passed back for
|
||||
* additional analysis */
|
||||
|
||||
|
|
@ -792,11 +755,9 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
|
|||
|
||||
MEMORY_PROP(s, unknown_address_0);
|
||||
const struct midgard_tiler_descriptor t = s->tiler;
|
||||
if (gpu_id == 0x0720 || gpu_id == 0x0820 || gpu_id == 0x0830)
|
||||
/* These ones don't have an "Advanced Tiling Unit" */
|
||||
pandecode_midgard_tiler_descriptor_0x20(&t);
|
||||
else
|
||||
pandecode_midgard_tiler_descriptor(&t, s->width + 1, s->height + 1, is_fragment);
|
||||
|
||||
bool has_hierarchy = !(gpu_id == 0x0720 || gpu_id == 0x0820 || gpu_id == 0x0830);
|
||||
pandecode_midgard_tiler_descriptor(&t, s->width + 1, s->height + 1, is_fragment, has_hierarchy);
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("};\n");
|
||||
|
|
@ -1157,7 +1118,7 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment)
|
|||
pandecode_prop("unknown2 = 0x%x", fb->unknown2);
|
||||
MEMORY_PROP(fb, scratchpad);
|
||||
const struct midgard_tiler_descriptor t = fb->tiler;
|
||||
pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1, is_fragment);
|
||||
pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1, is_fragment, true);
|
||||
|
||||
if (fb->zero3 || fb->zero4) {
|
||||
pandecode_msg("XXX: framebuffer zeros tripped\n");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue