mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-24 14:40:21 +01:00
panfrost: Identify shared tiler structure
This is identical across SFBD/MFBD so pull it out to allow for better code sharing. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
parent
6eb99c78e2
commit
31fc52a4e7
3 changed files with 142 additions and 135 deletions
|
|
@ -1334,6 +1334,40 @@ struct mali_payload_fragment {
|
|||
#define MALI_CLEAR_SLOW (1 << 28)
|
||||
#define MALI_CLEAR_SLOW_STENCIL (1 << 31)
|
||||
|
||||
/* Configures hierarchical tiling on Midgard for both SFBD/MFBD (embedded
|
||||
* within the larget framebuffer descriptor). Analogous to
|
||||
* bifrost_tiler_heap_meta and bifrost_tiler_meta*/
|
||||
|
||||
struct midgard_tiler_descriptor {
|
||||
/* Size of the entire polygon list; see pan_tiler.c for the
|
||||
* computation. It's based on hierarchical tiling */
|
||||
|
||||
u32 polygon_list_size;
|
||||
|
||||
/* Name known from the replay workaround in the kernel. What exactly is
|
||||
* flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
|
||||
* specifies a mask of hierarchy weights, which explains some of the
|
||||
* performance mysteries around setting it. We also see the bottom bit
|
||||
* of tiler_flags set in the kernel, but no comment why. */
|
||||
|
||||
u16 hierarchy_mask;
|
||||
u16 flags;
|
||||
|
||||
/* See mali_tiler.c for an explanation */
|
||||
mali_ptr polygon_list;
|
||||
mali_ptr polygon_list_body;
|
||||
|
||||
/* Names based on we see symmetry with replay jobs which name these
|
||||
* explicitly */
|
||||
|
||||
mali_ptr heap_start; /* tiler heap_free_address */
|
||||
mali_ptr heap_end;
|
||||
|
||||
/* Hierarchy weights. We know these are weights based on the kernel,
|
||||
* but I've never seen them be anything other than zero */
|
||||
u32 weights[8];
|
||||
};
|
||||
|
||||
struct mali_single_framebuffer {
|
||||
u32 unknown1;
|
||||
u32 unknown2;
|
||||
|
|
@ -1394,22 +1428,7 @@ struct mali_single_framebuffer {
|
|||
|
||||
u32 zero6[7];
|
||||
|
||||
/* Logically, by symmetry to the MFBD, this ought to be the size of the
|
||||
* polygon list. But this doesn't quite compute up. More investigation
|
||||
* is needed. */
|
||||
|
||||
u32 tiler_resolution_check;
|
||||
|
||||
u16 tiler_hierarchy_mask;
|
||||
u16 tiler_flags;
|
||||
|
||||
/* See pan_tiler.c */
|
||||
mali_ptr tiler_polygon_list;
|
||||
mali_ptr tiler_polygon_list_body;
|
||||
|
||||
/* See mali_kbase_replay.c */
|
||||
mali_ptr tiler_heap_free;
|
||||
mali_ptr tiler_heap_end;
|
||||
struct midgard_tiler_descriptor tiler;
|
||||
|
||||
/* More below this, maybe */
|
||||
} __attribute__((packed));
|
||||
|
|
@ -1574,30 +1593,7 @@ struct bifrost_framebuffer {
|
|||
u32 mfbd_flags : 24; // = 0x100
|
||||
float clear_depth;
|
||||
|
||||
|
||||
/* Tiler section begins here */
|
||||
u32 tiler_polygon_list_size;
|
||||
|
||||
/* Name known from the replay workaround in the kernel. What exactly is
|
||||
* flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
|
||||
* specifies a mask of hierarchy weights, which explains some of the
|
||||
* performance mysteries around setting it. We also see the bottom bit
|
||||
* of tiler_flags set in the kernel, but no comment why. */
|
||||
|
||||
u16 tiler_hierarchy_mask;
|
||||
u16 tiler_flags;
|
||||
|
||||
/* See mali_tiler.c for an explanation */
|
||||
mali_ptr tiler_polygon_list;
|
||||
mali_ptr tiler_polygon_list_body;
|
||||
|
||||
/* Names based on we see symmetry with replay jobs which name these
|
||||
* explicitly */
|
||||
|
||||
mali_ptr tiler_heap_start; /* tiler heap_free_address */
|
||||
mali_ptr tiler_heap_end;
|
||||
|
||||
u32 tiler_weights[8];
|
||||
struct midgard_tiler_descriptor tiler;
|
||||
|
||||
/* optional: struct bifrost_fb_extra extra */
|
||||
/* struct bifrost_render_target rts[] */
|
||||
|
|
|
|||
|
|
@ -78,38 +78,78 @@ panfrost_job_type_for_pipe(enum pipe_shader_type type)
|
|||
|
||||
/* Framebuffer descriptor */
|
||||
|
||||
static void
|
||||
panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
|
||||
static struct midgard_tiler_descriptor
|
||||
panfrost_emit_midg_tiler(
|
||||
struct panfrost_context *ctx,
|
||||
unsigned width,
|
||||
unsigned height,
|
||||
unsigned vertex_count)
|
||||
{
|
||||
fb->width = MALI_POSITIVE(w);
|
||||
fb->height = MALI_POSITIVE(h);
|
||||
struct midgard_tiler_descriptor t = {};
|
||||
|
||||
/* No idea why this is needed, but it's how resolution_check is
|
||||
* calculated. It's not clear to us yet why the hardware wants this.
|
||||
* The formula itself was discovered mostly by manual bruteforce and
|
||||
* aggressive algebraic simplification. */
|
||||
t.hierarchy_mask =
|
||||
panfrost_choose_hierarchy_mask(width, height, vertex_count);
|
||||
|
||||
fb->tiler_resolution_check = ((w + h) / 3) << 4;
|
||||
/* Compute the polygon header size and use that to offset the body */
|
||||
|
||||
unsigned header_size = panfrost_tiler_header_size(
|
||||
width, height, t.hierarchy_mask);
|
||||
|
||||
unsigned body_size = panfrost_tiler_body_size(
|
||||
width, height, t.hierarchy_mask);
|
||||
|
||||
/* Sanity check */
|
||||
|
||||
unsigned total_size = header_size + body_size;
|
||||
|
||||
if (t.hierarchy_mask) {
|
||||
assert(ctx->tiler_polygon_list.bo->size >= total_size);
|
||||
|
||||
/* Specify allocated tiler structures */
|
||||
t.polygon_list = ctx->tiler_polygon_list.bo->gpu;
|
||||
|
||||
/* Allow the entire tiler heap */
|
||||
t.heap_start = ctx->tiler_heap.bo->gpu;
|
||||
t.heap_end =
|
||||
ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size;
|
||||
} else {
|
||||
/* The tiler is disabled, so don't allow the tiler heap */
|
||||
t.heap_start = ctx->tiler_heap.bo->gpu;
|
||||
t.heap_end = t.heap_start;
|
||||
|
||||
/* Use a dummy polygon list */
|
||||
t.polygon_list = ctx->tiler_dummy.bo->gpu;
|
||||
|
||||
/* Also, set a "tiler disabled?" flag? */
|
||||
t.hierarchy_mask |= 0x1000;
|
||||
}
|
||||
|
||||
t.polygon_list_body =
|
||||
t.polygon_list + header_size;
|
||||
|
||||
t.polygon_list_size =
|
||||
header_size + body_size;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
struct mali_single_framebuffer
|
||||
panfrost_emit_sfbd(struct panfrost_context *ctx, unsigned vertex_count)
|
||||
{
|
||||
unsigned width = ctx->pipe_framebuffer.width;
|
||||
unsigned height = ctx->pipe_framebuffer.height;
|
||||
|
||||
struct mali_single_framebuffer framebuffer = {
|
||||
.width = MALI_POSITIVE(width),
|
||||
.height = MALI_POSITIVE(width),
|
||||
.unknown2 = 0x1f,
|
||||
.format = 0x30000000,
|
||||
.clear_flags = 0x1000,
|
||||
.unknown_address_0 = ctx->scratchpad.bo->gpu,
|
||||
.tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu,
|
||||
.tiler_polygon_list_body = ctx->tiler_polygon_list.bo->gpu + 40960,
|
||||
.tiler_hierarchy_mask = 0xF0,
|
||||
.tiler_flags = 0x0,
|
||||
.tiler_heap_free = ctx->tiler_heap.bo->gpu,
|
||||
.tiler_heap_end = ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size,
|
||||
.tiler = panfrost_emit_midg_tiler(ctx,
|
||||
width, height, vertex_count),
|
||||
};
|
||||
|
||||
panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
|
|
@ -134,53 +174,10 @@ panfrost_emit_mfbd(struct panfrost_context *ctx, unsigned vertex_count)
|
|||
.unknown2 = 0x1f,
|
||||
|
||||
.scratchpad = ctx->scratchpad.bo->gpu,
|
||||
.tiler = panfrost_emit_midg_tiler(ctx,
|
||||
width, height, vertex_count)
|
||||
};
|
||||
|
||||
framebuffer.tiler_hierarchy_mask =
|
||||
panfrost_choose_hierarchy_mask(width, height, vertex_count);
|
||||
|
||||
/* Compute the polygon header size and use that to offset the body */
|
||||
|
||||
unsigned header_size = panfrost_tiler_header_size(
|
||||
width, height, framebuffer.tiler_hierarchy_mask);
|
||||
|
||||
unsigned body_size = panfrost_tiler_body_size(
|
||||
width, height, framebuffer.tiler_hierarchy_mask);
|
||||
|
||||
/* Sanity check */
|
||||
|
||||
unsigned total_size = header_size + body_size;
|
||||
|
||||
if (framebuffer.tiler_hierarchy_mask) {
|
||||
assert(ctx->tiler_polygon_list.bo->size >= total_size);
|
||||
|
||||
/* Specify allocated tiler structures */
|
||||
framebuffer.tiler_polygon_list = ctx->tiler_polygon_list.bo->gpu;
|
||||
|
||||
/* Allow the entire tiler heap */
|
||||
framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu;
|
||||
framebuffer.tiler_heap_end =
|
||||
ctx->tiler_heap.bo->gpu + ctx->tiler_heap.bo->size;
|
||||
} else {
|
||||
/* The tiler is disabled, so don't allow the tiler heap */
|
||||
framebuffer.tiler_heap_start = ctx->tiler_heap.bo->gpu;
|
||||
framebuffer.tiler_heap_end = framebuffer.tiler_heap_start;
|
||||
|
||||
/* Use a dummy polygon list */
|
||||
framebuffer.tiler_polygon_list = ctx->tiler_dummy.bo->gpu;
|
||||
|
||||
/* Also, set a "tiler disabled?" flag? */
|
||||
framebuffer.tiler_hierarchy_mask |= 0x1000;
|
||||
}
|
||||
|
||||
framebuffer.tiler_polygon_list_body =
|
||||
framebuffer.tiler_polygon_list + header_size;
|
||||
|
||||
framebuffer.tiler_polygon_list_size =
|
||||
header_size + body_size;
|
||||
|
||||
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -444,6 +444,51 @@ pandecode_decode_fbd_type(enum mali_fbd_type type)
|
|||
else return "WATFBD /* XXX */";
|
||||
}
|
||||
|
||||
/* Midgard's tiler descriptor is embedded within the
|
||||
* larger FBD */
|
||||
|
||||
static void
|
||||
pandecode_midgard_tiler_descriptor(const struct midgard_tiler_descriptor *t)
|
||||
{
|
||||
pandecode_log(".tiler = {\n");
|
||||
pandecode_indent++;
|
||||
|
||||
pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
|
||||
pandecode_prop("flags = 0x%" PRIx16, t->flags);
|
||||
pandecode_prop("polygon_list_size = 0x%x", t->polygon_list_size);
|
||||
|
||||
MEMORY_PROP(t, polygon_list);
|
||||
MEMORY_PROP(t, polygon_list_body);
|
||||
|
||||
MEMORY_PROP(t, heap_start);
|
||||
|
||||
{
|
||||
/* Points to the end of a buffer */
|
||||
char *a = pointer_as_memory_reference(t->heap_end - 1);
|
||||
pandecode_prop("heap_end = %s + 1", a);
|
||||
free(a);
|
||||
}
|
||||
|
||||
bool nonzero_weights = false;
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
|
||||
nonzero_weights |= t->weights[w] != 0x0;
|
||||
}
|
||||
|
||||
if (nonzero_weights) {
|
||||
pandecode_log(".weights = {");
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(t->weights); ++w) {
|
||||
pandecode_log("%d, ", t->weights[w]);
|
||||
}
|
||||
|
||||
pandecode_log("},");
|
||||
}
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("}\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
|
||||
{
|
||||
|
|
@ -502,15 +547,7 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
|
|||
}
|
||||
|
||||
MEMORY_PROP(s, unknown_address_0);
|
||||
MEMORY_PROP(s, tiler_polygon_list);
|
||||
MEMORY_PROP(s, tiler_polygon_list_body);
|
||||
|
||||
pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check);
|
||||
pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask);
|
||||
pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags);
|
||||
|
||||
MEMORY_PROP(s, tiler_heap_free);
|
||||
MEMORY_PROP(s, tiler_heap_end);
|
||||
pandecode_midgard_tiler_descriptor(&s->tiler);
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("};\n");
|
||||
|
|
@ -716,10 +753,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
|
|||
* now */
|
||||
MEMORY_PROP(fb, unknown1);
|
||||
|
||||
pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size);
|
||||
pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask);
|
||||
pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags);
|
||||
|
||||
pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
|
||||
pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
|
||||
pandecode_prop("width2 = MALI_POSITIVE(%d)", fb->width2 + 1);
|
||||
|
|
@ -739,10 +772,7 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
|
|||
|
||||
pandecode_prop("unknown2 = 0x%x", fb->unknown2);
|
||||
MEMORY_PROP(fb, scratchpad);
|
||||
MEMORY_PROP(fb, tiler_polygon_list);
|
||||
MEMORY_PROP(fb, tiler_polygon_list_body);
|
||||
MEMORY_PROP(fb, tiler_heap_start);
|
||||
MEMORY_PROP(fb, tiler_heap_end);
|
||||
pandecode_midgard_tiler_descriptor(&fb->tiler);
|
||||
|
||||
if (fb->zero3 || fb->zero4) {
|
||||
pandecode_msg("framebuffer zeros tripped\n");
|
||||
|
|
@ -750,22 +780,6 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
|
|||
pandecode_prop("zero4 = 0x%" PRIx32, fb->zero4);
|
||||
}
|
||||
|
||||
bool nonzero_weights = false;
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) {
|
||||
nonzero_weights |= fb->tiler_weights[w] != 0x0;
|
||||
}
|
||||
|
||||
if (nonzero_weights) {
|
||||
pandecode_log(".tiler_weights = {");
|
||||
|
||||
for (unsigned w = 0; w < ARRAY_SIZE(fb->tiler_weights); ++w) {
|
||||
pandecode_log("%d, ", fb->tiler_weights[w]);
|
||||
}
|
||||
|
||||
pandecode_log("},");
|
||||
}
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("};\n");
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue