diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h index 401fef8fcec..c7cb2d7b5f4 100644 --- a/src/gallium/drivers/panfrost/include/panfrost-job.h +++ b/src/gallium/drivers/panfrost/include/panfrost-job.h @@ -963,7 +963,8 @@ struct bifrost_tiler_heap_meta { struct bifrost_tiler_meta { u64 zero0; - u32 unk; // = 0xf0 + u16 hierarchy_mask; + u16 flags; u16 width; u16 height; u64 zero1; @@ -1362,13 +1363,18 @@ struct mali_single_framebuffer { u32 zero6[7]; - /* Very weird format, see generation code in trans_builder.c */ - u32 tiler_resolution_check; - u32 tiler_flags; + /* Logically, by symmetry to the MFBD, this ought to be the size of the + * polygon list. But this doesn't quite compute up. More investigation + * is needed. */ - /* Guesses? */ - mali_ptr tiler_scratch_start; /* Pointing towards... a zero buffer? */ - mali_ptr tiler_scratch_middle; + u32 tiler_resolution_check; + + u16 tiler_hierarchy_mask; + u16 tiler_flags; + + /* See pan_tiler.c */ + mali_ptr tiler_polygon_list; + mali_ptr tiler_polygon_list_body; /* See mali_kbase_replay.c */ mali_ptr tiler_heap_free; @@ -1523,21 +1529,23 @@ struct bifrost_framebuffer { /* Tiler section begins here */ - u32 tiler_unknown; + u32 tiler_polygon_list_size; /* Name known from the replay workaround in the kernel. What exactly is - * flagged here is less known. We do that (tiler_flags & 0x1ff) + * flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff) * specifies a mask of hierarchy weights, which explains some of the - * performance mysteries around setting it. We also known (1 << 16) - * should be set, but there's no explanation in the kernel why. */ - u32 tiler_flags; + * performance mysteries around setting it. We also see the bottom bit + * of tiler_flags set in the kernel, but no comment why. */ - /* Note: these are guesses! */ - mali_ptr tiler_scratch_start; - mali_ptr tiler_scratch_middle; + u16 tiler_hierarchy_mask; + u16 tiler_flags; - /* These are not, since we see symmetry with replay - * jobs which name these explicitly */ + /* See mali_tiler.c for an explanation */ + mali_ptr tiler_polygon_list; + mali_ptr tiler_polygon_list_body; + + /* Names based on we see symmetry with replay jobs which name these + * explicitly */ mali_ptr tiler_heap_start; /* tiler heap_free_address */ mali_ptr tiler_heap_end; diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 2a530872c02..009ff6859e5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -118,9 +118,10 @@ panfrost_emit_sfbd(struct panfrost_context *ctx) .format = 0x30000000, .clear_flags = 0x1000, .unknown_address_0 = ctx->scratchpad.gpu, - .tiler_scratch_start = ctx->misc_0.gpu, - .tiler_scratch_middle = ctx->misc_0.gpu + 40960, - .tiler_flags = 0xf0, + .tiler_polygon_list = ctx->misc_0.gpu, + .tiler_polygon_list_body = ctx->misc_0.gpu + 40960, + .tiler_hierarchy_mask = 0xF0, + .tiler_flags = 0x0, .tiler_heap_free = ctx->tiler_heap.gpu, .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, }; @@ -134,22 +135,22 @@ struct bifrost_framebuffer panfrost_emit_mfbd(struct panfrost_context *ctx) { struct bifrost_framebuffer framebuffer = { - /* It is not yet clear what this means or how it's - * calculated, but we can tell it is a (monotonically - * increasing?) function of tile count and geometry complexity; - * I suspect it defines a memory size of some kind? for the - * tiler. It's really unclear at the moment... but to add to - * the confusion, the hardware is happy enough to accept a zero - * in this field, so we don't even have to worry about it right - * now. */ - - .tiler_unknown = 0x0, - - /* The lower 0xff controls the hierarchy mask. Set more bits + /* The lower 0x1ff controls the hierarchy mask. Set more bits * on for more tile granularity (which can be a performance win * on some scenes, at memory bandwidth costs). For now, be lazy * and enable everything. This might be a terrible idea. */ - .tiler_flags = 0xff, + + .tiler_hierarchy_mask = 0xff, + .tiler_flags = 0x0, + + /* The hardware deals with suballocation; we don't care */ + .tiler_heap_start = ctx->tiler_heap.gpu, + .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, + + /* See pan_tiler.c */ + .tiler_polygon_list = ctx->misc_0.gpu, + .tiler_polygon_list_body = ctx->misc_0.gpu + 0xf0000, + .tiler_polygon_list_size = 0x0, .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), @@ -164,26 +165,7 @@ panfrost_emit_mfbd(struct panfrost_context *ctx) .unknown2 = 0x1f, - /* Corresponds to unknown_address_X of SFBD */ .scratchpad = ctx->scratchpad.gpu, - .tiler_scratch_start = ctx->misc_0.gpu, - - /* The constant added here is, like the lower word of - * tiler_meta, (loosely) another product of framebuffer size - * and geometry complexity. It must be sufficiently large for - * the tiler_meta fast path to work; if it's too small, there - * will be DATA_INVALID_FAULTs. Conversely, it must be less - * than the total size of misc_0, or else there's no room. It's - * possible this constant configures a partition between two - * parts of misc_0? We haven't investigated the functionality, - * as these buffers are internally used by the hardware - * (presumably by the tiler) but not seemingly touched by the driver - */ - - .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, - - .tiler_heap_start = ctx->tiler_heap.gpu, - .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, }; return framebuffer; diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c b/src/gallium/drivers/panfrost/pandecode/decode.c index 04de638a904..46cdef313b5 100644 --- a/src/gallium/drivers/panfrost/pandecode/decode.c +++ b/src/gallium/drivers/panfrost/pandecode/decode.c @@ -463,11 +463,12 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no) } MEMORY_PROP(s, unknown_address_0); - MEMORY_PROP(s, tiler_scratch_start); - MEMORY_PROP(s, tiler_scratch_middle); + MEMORY_PROP(s, tiler_polygon_list); + MEMORY_PROP(s, tiler_polygon_list_body); pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check); - pandecode_prop("tiler_flags = 0x%" PRIx32, s->tiler_flags); + pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask); + pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags); MEMORY_PROP(s, tiler_heap_free); MEMORY_PROP(s, tiler_heap_end); @@ -644,8 +645,9 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets) * now */ MEMORY_PROP(fb, unknown1); - pandecode_prop("tiler_unknown = 0x%x", fb->tiler_unknown); - pandecode_prop("tiler_flags = 0x%x", fb->tiler_flags); + pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size); + pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask); + pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags); pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1); pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1); @@ -663,8 +665,8 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets) pandecode_prop("unknown2 = 0x%x", fb->unknown2); MEMORY_PROP(fb, scratchpad); - MEMORY_PROP(fb, tiler_scratch_start); - MEMORY_PROP(fb, tiler_scratch_middle); + MEMORY_PROP(fb, tiler_polygon_list); + MEMORY_PROP(fb, tiler_polygon_list_body); MEMORY_PROP(fb, tiler_heap_start); MEMORY_PROP(fb, tiler_heap_end); @@ -1741,7 +1743,7 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no) pandecode_replay_tiler_heap_meta(t->tiler_heap_meta, job_no); - pandecode_log("struct mali_tiler_meta tiler_meta_%d = {\n", job_no); + pandecode_log("struct bifrost_tiler_meta tiler_meta_%d = {\n", job_no); pandecode_indent++; if (t->zero0 || t->zero1) { @@ -1750,7 +1752,9 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no) pandecode_prop("zero1 = 0x%" PRIx64, t->zero1); } - pandecode_prop("unk = 0x%x", t->unk); + pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask); + pandecode_prop("flags = 0x%" PRIx16, t->flags); + pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1); pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1); DYN_MEMORY_PROP(t, job_no, tiler_heap_meta);