panfrost: Rename tiler fields per tiler research

Following the research into Midgard's hierarchical tiling
infrastructure, we now understand (in broad stokes) the purpose of each
tiler field in the MFBD. Additionally, we understand more of the tiling
fields in the SFBD and in Bifrost's structures, although this knowledge
is still incomplete.

Update the names, decoder, and comments to reflect this new
understanding.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-06-13 10:25:32 -07:00
parent 8d6fb66e3a
commit 7f26bb3553
3 changed files with 55 additions and 61 deletions

View file

@ -963,7 +963,8 @@ struct bifrost_tiler_heap_meta {
struct bifrost_tiler_meta { struct bifrost_tiler_meta {
u64 zero0; u64 zero0;
u32 unk; // = 0xf0 u16 hierarchy_mask;
u16 flags;
u16 width; u16 width;
u16 height; u16 height;
u64 zero1; u64 zero1;
@ -1362,13 +1363,18 @@ struct mali_single_framebuffer {
u32 zero6[7]; u32 zero6[7];
/* Very weird format, see generation code in trans_builder.c */ /* Logically, by symmetry to the MFBD, this ought to be the size of the
u32 tiler_resolution_check; * polygon list. But this doesn't quite compute up. More investigation
u32 tiler_flags; * is needed. */
/* Guesses? */ u32 tiler_resolution_check;
mali_ptr tiler_scratch_start; /* Pointing towards... a zero buffer? */
mali_ptr tiler_scratch_middle; u16 tiler_hierarchy_mask;
u16 tiler_flags;
/* See pan_tiler.c */
mali_ptr tiler_polygon_list;
mali_ptr tiler_polygon_list_body;
/* See mali_kbase_replay.c */ /* See mali_kbase_replay.c */
mali_ptr tiler_heap_free; mali_ptr tiler_heap_free;
@ -1523,21 +1529,23 @@ struct bifrost_framebuffer {
/* Tiler section begins here */ /* Tiler section begins here */
u32 tiler_unknown; u32 tiler_polygon_list_size;
/* Name known from the replay workaround in the kernel. What exactly is /* Name known from the replay workaround in the kernel. What exactly is
* flagged here is less known. We do that (tiler_flags & 0x1ff) * flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
* specifies a mask of hierarchy weights, which explains some of the * specifies a mask of hierarchy weights, which explains some of the
* performance mysteries around setting it. We also known (1 << 16) * performance mysteries around setting it. We also see the bottom bit
* should be set, but there's no explanation in the kernel why. */ * of tiler_flags set in the kernel, but no comment why. */
u32 tiler_flags;
/* Note: these are guesses! */ u16 tiler_hierarchy_mask;
mali_ptr tiler_scratch_start; u16 tiler_flags;
mali_ptr tiler_scratch_middle;
/* These are not, since we see symmetry with replay /* See mali_tiler.c for an explanation */
* jobs which name these explicitly */ mali_ptr tiler_polygon_list;
mali_ptr tiler_polygon_list_body;
/* Names based on we see symmetry with replay jobs which name these
* explicitly */
mali_ptr tiler_heap_start; /* tiler heap_free_address */ mali_ptr tiler_heap_start; /* tiler heap_free_address */
mali_ptr tiler_heap_end; mali_ptr tiler_heap_end;

View file

@ -118,9 +118,10 @@ panfrost_emit_sfbd(struct panfrost_context *ctx)
.format = 0x30000000, .format = 0x30000000,
.clear_flags = 0x1000, .clear_flags = 0x1000,
.unknown_address_0 = ctx->scratchpad.gpu, .unknown_address_0 = ctx->scratchpad.gpu,
.tiler_scratch_start = ctx->misc_0.gpu, .tiler_polygon_list = ctx->misc_0.gpu,
.tiler_scratch_middle = ctx->misc_0.gpu + 40960, .tiler_polygon_list_body = ctx->misc_0.gpu + 40960,
.tiler_flags = 0xf0, .tiler_hierarchy_mask = 0xF0,
.tiler_flags = 0x0,
.tiler_heap_free = ctx->tiler_heap.gpu, .tiler_heap_free = ctx->tiler_heap.gpu,
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
}; };
@ -134,22 +135,22 @@ struct bifrost_framebuffer
panfrost_emit_mfbd(struct panfrost_context *ctx) panfrost_emit_mfbd(struct panfrost_context *ctx)
{ {
struct bifrost_framebuffer framebuffer = { struct bifrost_framebuffer framebuffer = {
/* It is not yet clear what this means or how it's /* The lower 0x1ff controls the hierarchy mask. Set more bits
* calculated, but we can tell it is a (monotonically
* increasing?) function of tile count and geometry complexity;
* I suspect it defines a memory size of some kind? for the
* tiler. It's really unclear at the moment... but to add to
* the confusion, the hardware is happy enough to accept a zero
* in this field, so we don't even have to worry about it right
* now. */
.tiler_unknown = 0x0,
/* The lower 0xff controls the hierarchy mask. Set more bits
* on for more tile granularity (which can be a performance win * on for more tile granularity (which can be a performance win
* on some scenes, at memory bandwidth costs). For now, be lazy * on some scenes, at memory bandwidth costs). For now, be lazy
* and enable everything. This might be a terrible idea. */ * and enable everything. This might be a terrible idea. */
.tiler_flags = 0xff,
.tiler_hierarchy_mask = 0xff,
.tiler_flags = 0x0,
/* The hardware deals with suballocation; we don't care */
.tiler_heap_start = ctx->tiler_heap.gpu,
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
/* See pan_tiler.c */
.tiler_polygon_list = ctx->misc_0.gpu,
.tiler_polygon_list_body = ctx->misc_0.gpu + 0xf0000,
.tiler_polygon_list_size = 0x0,
.width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
.height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
@ -164,26 +165,7 @@ panfrost_emit_mfbd(struct panfrost_context *ctx)
.unknown2 = 0x1f, .unknown2 = 0x1f,
/* Corresponds to unknown_address_X of SFBD */
.scratchpad = ctx->scratchpad.gpu, .scratchpad = ctx->scratchpad.gpu,
.tiler_scratch_start = ctx->misc_0.gpu,
/* The constant added here is, like the lower word of
* tiler_meta, (loosely) another product of framebuffer size
* and geometry complexity. It must be sufficiently large for
* the tiler_meta fast path to work; if it's too small, there
* will be DATA_INVALID_FAULTs. Conversely, it must be less
* than the total size of misc_0, or else there's no room. It's
* possible this constant configures a partition between two
* parts of misc_0? We haven't investigated the functionality,
* as these buffers are internally used by the hardware
* (presumably by the tiler) but not seemingly touched by the driver
*/
.tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
.tiler_heap_start = ctx->tiler_heap.gpu,
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
}; };
return framebuffer; return framebuffer;

View file

@ -463,11 +463,12 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
} }
MEMORY_PROP(s, unknown_address_0); MEMORY_PROP(s, unknown_address_0);
MEMORY_PROP(s, tiler_scratch_start); MEMORY_PROP(s, tiler_polygon_list);
MEMORY_PROP(s, tiler_scratch_middle); MEMORY_PROP(s, tiler_polygon_list_body);
pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check); pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check);
pandecode_prop("tiler_flags = 0x%" PRIx32, s->tiler_flags); pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask);
pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags);
MEMORY_PROP(s, tiler_heap_free); MEMORY_PROP(s, tiler_heap_free);
MEMORY_PROP(s, tiler_heap_end); MEMORY_PROP(s, tiler_heap_end);
@ -644,8 +645,9 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
* now */ * now */
MEMORY_PROP(fb, unknown1); MEMORY_PROP(fb, unknown1);
pandecode_prop("tiler_unknown = 0x%x", fb->tiler_unknown); pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size);
pandecode_prop("tiler_flags = 0x%x", fb->tiler_flags); pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask);
pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags);
pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1); pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1); pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
@ -663,8 +665,8 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
pandecode_prop("unknown2 = 0x%x", fb->unknown2); pandecode_prop("unknown2 = 0x%x", fb->unknown2);
MEMORY_PROP(fb, scratchpad); MEMORY_PROP(fb, scratchpad);
MEMORY_PROP(fb, tiler_scratch_start); MEMORY_PROP(fb, tiler_polygon_list);
MEMORY_PROP(fb, tiler_scratch_middle); MEMORY_PROP(fb, tiler_polygon_list_body);
MEMORY_PROP(fb, tiler_heap_start); MEMORY_PROP(fb, tiler_heap_start);
MEMORY_PROP(fb, tiler_heap_end); MEMORY_PROP(fb, tiler_heap_end);
@ -1741,7 +1743,7 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no)
pandecode_replay_tiler_heap_meta(t->tiler_heap_meta, job_no); pandecode_replay_tiler_heap_meta(t->tiler_heap_meta, job_no);
pandecode_log("struct mali_tiler_meta tiler_meta_%d = {\n", job_no); pandecode_log("struct bifrost_tiler_meta tiler_meta_%d = {\n", job_no);
pandecode_indent++; pandecode_indent++;
if (t->zero0 || t->zero1) { if (t->zero0 || t->zero1) {
@ -1750,7 +1752,9 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no)
pandecode_prop("zero1 = 0x%" PRIx64, t->zero1); pandecode_prop("zero1 = 0x%" PRIx64, t->zero1);
} }
pandecode_prop("unk = 0x%x", t->unk); pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
pandecode_prop("flags = 0x%" PRIx16, t->flags);
pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1); pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1);
pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1); pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1);
DYN_MEMORY_PROP(t, job_no, tiler_heap_meta); DYN_MEMORY_PROP(t, job_no, tiler_heap_meta);