panfrost: Identify shared tiler structure

This is identical across SFBD/MFBD so pull it out to allow for better
code sharing.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-07-10 07:22:19 -07:00
parent 6eb99c78e2
commit 31fc52a4e7
3 changed files with 142 additions and 135 deletions

View file

@ -1334,6 +1334,40 @@ struct mali_payload_fragment {
#define MALI_CLEAR_SLOW (1 << 28)
#define MALI_CLEAR_SLOW_STENCIL (1 << 31)
/* Configures hierarchical tiling on Midgard for both SFBD/MFBD (embedded
* within the larget framebuffer descriptor). Analogous to
* bifrost_tiler_heap_meta and bifrost_tiler_meta*/
struct midgard_tiler_descriptor {
/* Size of the entire polygon list; see pan_tiler.c for the
* computation. It's based on hierarchical tiling */
u32 polygon_list_size;
/* Name known from the replay workaround in the kernel. What exactly is
* flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
* specifies a mask of hierarchy weights, which explains some of the
* performance mysteries around setting it. We also see the bottom bit
* of tiler_flags set in the kernel, but no comment why. */
u16 hierarchy_mask;
u16 flags;
/* See mali_tiler.c for an explanation */
mali_ptr polygon_list;
mali_ptr polygon_list_body;
/* Names based on we see symmetry with replay jobs which name these
* explicitly */
mali_ptr heap_start; /* tiler heap_free_address */
mali_ptr heap_end;
/* Hierarchy weights. We know these are weights based on the kernel,
* but I've never seen them be anything other than zero */
u32 weights[8];
};
struct mali_single_framebuffer {
u32 unknown1;
u32 unknown2;
@ -1394,22 +1428,7 @@ struct mali_single_framebuffer {
u32 zero6[7];
/* Logically, by symmetry to the MFBD, this ought to be the size of the
* polygon list. But this doesn't quite compute up. More investigation
* is needed. */
u32 tiler_resolution_check;
u16 tiler_hierarchy_mask;
u16 tiler_flags;
/* See pan_tiler.c */
mali_ptr tiler_polygon_list;
mali_ptr tiler_polygon_list_body;
/* See mali_kbase_replay.c */
mali_ptr tiler_heap_free;
mali_ptr tiler_heap_end;
struct midgard_tiler_descriptor tiler;
/* More below this, maybe */
} __attribute__((packed));
@ -1574,30 +1593,7 @@ struct bifrost_framebuffer {
u32 mfbd_flags : 24; // = 0x100
float clear_depth;
/* Tiler section begins here */
u32 tiler_polygon_list_size;
/* Name known from the replay workaround in the kernel. What exactly is
* flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
* specifies a mask of hierarchy weights, which explains some of the
* performance mysteries around setting it. We also see the bottom bit
* of tiler_flags set in the kernel, but no comment why. */
u16 tiler_hierarchy_mask;
u16 tiler_flags;
/* See mali_tiler.c for an explanation */
mali_ptr tiler_polygon_list;
mali_ptr tiler_polygon_list_body;
/* Names based on we see symmetry with replay jobs which name these
* explicitly */
mali_ptr tiler_heap_start; /* tiler heap_free_address */
mali_ptr tiler_heap_end;
u32 tiler_weights[8];
struct midgard_tiler_descriptor tiler;
/* optional: struct bifrost_fb_extra extra */
/* struct bifrost_render_target rts[] */

View file

@ -78,38 +78,78 @@ panfrost_job_type_for_pipe(enum pipe_shader_type type)
/* Framebuffer descriptor */
static void
panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
static struct midgard_tiler_descriptor
panfrost_emit_midg_tiler(
struct panfrost_context *ctx,
unsigned width,
unsigned height,
unsigned vertex_count)
{
fb->width = MALI_POSITIVE(w);
fb->height = MALI_POSITIVE(h);
struct midgard_tiler_descriptor t = {};
/* No idea why this is needed, but it's how resolution_check is
* calculated. It's not clear to us yet why the hardware wants this.
* The formula itself was discovered mostly by manual bruteforce and
* aggressive algebraic simplification. */
t.hierarchy_mask =
panfrost_choose_hierarchy_mask(width, height, vertex_count);
fb->tiler_resolution_check = ((w + h) / 3) << 4;
/* Compute the polygon header size and use that to offset the body */
unsigned header_size = panfrost_tiler_header_size(
width, height, t.hierarchy_mask);
unsigned body_size = panfrost_tiler_body_size(
width, height, t.hierarchy_mask);
/* Sanity check */
unsigned total_size = header_size + body_size;
if (t.hierarchy_mask) {
assert(ctx->tiler_polygon_list.bo->size >= total_size);
/* Specify allocated tiler structures */
t.polygon_list = ctx->tiler_polygon_list.bo->gpu;
/* Allow the entire tiler heap */
t.heap_start = ctx->tiler_heap.bo->gpu;
t.heap_end =
ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size;
} else {
/* The tiler is disabled, so don't allow the tiler heap */
t.heap_start = ctx->tiler_heap.bo->gpu;
t.heap_end = t.heap_start;
/* Use a dummy polygon list */
t.polygon_list = ctx->tiler_dummy.bo->gpu;
/* Also, set a "tiler disabled?" flag? */
t.hierarchy_mask |= 0x1000;
}
t.polygon_list_body =
t.polygon_list + header_size;
t.polygon_list_size =
header_size + body_size;
return t;
}
struct mali_single_framebuffer
panfrost_emit_sfbd(struct panfrost_context *ctx, unsigned vertex_count)
{
unsigned width = ctx->pipe_framebuffer.width;
unsigned height = ctx->pipe_framebuffer.height;
struct mali_single_framebuffer framebuffer = {
.width = MALI_POSITIVE(width),
.height = MALI_POSITIVE(width),
.unknown2 = 0x1f,
.format = 0x30000000,
.clear_flags = 0x1000,
.unknown_address_0 = ctx->scratchpad.bo->gpu,
.tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu,
.tiler_polygon_list_body = ctx->tiler_polygon_list.bo->gpu + 40960,
.tiler_hierarchy_mask = 0xF0,
.tiler_flags = 0x0,
.tiler_heap_free = ctx->tiler_heap.bo->gpu,
.tiler_heap_end = ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size,
.tiler = panfrost_emit_midg_tiler(ctx,
width, height, vertex_count),
};
panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
return framebuffer;
}
@ -134,53 +174,10 @@ panfrost_emit_mfbd(struct panfrost_context *ctx, unsigned vertex_count)
.unknown2 = 0x1f,
.scratchpad = ctx->scratchpad.bo->gpu,
.tiler = panfrost_emit_midg_tiler(ctx,
width, height, vertex_count)
};
framebuffer.tiler_hierarchy_mask =
panfrost_choose_hierarchy_mask(width, height, vertex_count);
/* Compute the polygon header size and use that to offset the body */
unsigned header_size = panfrost_tiler_header_size(
width, height, framebuffer.tiler_hierarchy_mask);
unsigned body_size = panfrost_tiler_body_size(
width, height, framebuffer.tiler_hierarchy_mask);
/* Sanity check */
unsigned total_size = header_size + body_size;
if (framebuffer.tiler_hierarchy_mask) {
assert(ctx->tiler_polygon_list.bo->size >= total_size);
/* Specify allocated tiler structures */
framebuffer.tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu;
/* Allow the entire tiler heap */
framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu;
framebuffer.tiler_heap_end =
ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size;
} else {
/* The tiler is disabled, so don't allow the tiler heap */
framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu;
framebuffer.tiler_heap_end = framebuffer.tiler_heap_start;
/* Use a dummy polygon list */
framebuffer.tiler_polygon_list = ctx->tiler_dummy.bo->gpu;
/* Also, set a "tiler disabled?" flag? */
framebuffer.tiler_hierarchy_mask |= 0x1000;
}
framebuffer.tiler_polygon_list_body =
framebuffer.tiler_polygon_list + header_size;
framebuffer.tiler_polygon_list_size =
header_size + body_size;
return framebuffer;
}

View file

@ -444,6 +444,51 @@ pandecode_decode_fbd_type(enum mali_fbd_type type)
else return "WATFBD /* XXX */";
}
/* Midgard's tiler descriptor is embedded within the
* larger FBD */
static void
pandecode_midgard_tiler_descriptor(const struct midgard_tiler_descriptor *t)
{
pandecode_log(".tiler = {\n");
pandecode_indent++;
pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
pandecode_prop("flags = 0x%" PRIx16, t->flags);
pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
MEMORY_PROP(t, polygon_list);
MEMORY_PROP(t, polygon_list_body);
MEMORY_PROP(t, heap_start);
{
/* Points to the end of a buffer */
char *a = pointer_as_memory_reference(t->heap_end - 1);
pandecode_prop("heap_end = %s + 1", a);
free(a);
}
bool nonzero_weights = false;
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
nonzero_weights |= t->weights[w] != 0x0;
}
if (nonzero_weights) {
pandecode_log(".weights = {");
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
pandecode_log("%d, ", t->weights[w]);
}
pandecode_log("},");
}
pandecode_indent--;
pandecode_log("}\n");
}
static void
pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
{
@ -502,15 +547,7 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
}
MEMORY_PROP(s, unknown_address_0);
MEMORY_PROP(s, tiler_polygon_list);
MEMORY_PROP(s, tiler_polygon_list_body);
pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check);
pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask);
pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags);
MEMORY_PROP(s, tiler_heap_free);
MEMORY_PROP(s, tiler_heap_end);
pandecode_midgard_tiler_descriptor(&s->tiler);
pandecode_indent--;
pandecode_log("};\n");
@ -716,10 +753,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
* now */
MEMORY_PROP(fb, unknown1);
pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size);
pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask);
pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags);
pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
pandecode_prop("width2 = MALI_POSITIVE(%d)", fb->width2 + 1);
@ -739,10 +772,7 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
pandecode_prop("unknown2 = 0x%x", fb->unknown2);
MEMORY_PROP(fb, scratchpad);
MEMORY_PROP(fb, tiler_polygon_list);
MEMORY_PROP(fb, tiler_polygon_list_body);
MEMORY_PROP(fb, tiler_heap_start);
MEMORY_PROP(fb, tiler_heap_end);
pandecode_midgard_tiler_descriptor(&fb->tiler);
if (fb->zero3 || fb->zero4) {
pandecode_msg("framebuffer zeros tripped\n");
@ -750,22 +780,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
pandecode_prop("zero4 = 0x%" PRIx32, fb->zero4);
}
bool nonzero_weights = false;
for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) {
nonzero_weights |= fb->tiler_weights[w] != 0x0;
}
if (nonzero_weights) {
pandecode_log(".tiler_weights = {");
for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) {
pandecode_log("%d, ", fb->tiler_weights[w]);
}
pandecode_log("},");
}
pandecode_indent--;
pandecode_log("};\n");