diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c index 3c7d983a620..ec1914074dc 100644 --- a/src/gallium/drivers/panfrost/pan_fragment.c +++ b/src/gallium/drivers/panfrost/pan_fragment.c @@ -88,22 +88,24 @@ panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws) assert(batch->maxx > batch->minx); assert(batch->maxy > batch->miny); - struct mali_payload_fragment payload = { - .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(batch->minx, batch->miny), - .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(batch->maxx, batch->maxy), - .framebuffer = framebuffer, - }; - struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(&batch->pool, - MALI_JOB_HEADER_LENGTH + sizeof(payload), - 64); + MALI_FRAGMENT_JOB_LENGTH, 64); - pan_pack(transfer.cpu, JOB_HEADER, header) { + pan_section_pack(transfer.cpu, FRAGMENT_JOB, HEADER, header) { header.type = MALI_JOB_TYPE_FRAGMENT; header.index = 1; } - memcpy(transfer.cpu + MALI_JOB_HEADER_LENGTH, &payload, sizeof(payload)); + pan_section_pack(transfer.cpu, FRAGMENT_JOB, PAYLOAD, payload) { + payload.bound_min_x = batch->minx >> MALI_TILE_SHIFT; + payload.bound_min_y = batch->miny >> MALI_TILE_SHIFT; + + /* Batch max values are inclusive, we need to subtract 1. */ + payload.bound_max_x = (batch->maxx - 1) >> MALI_TILE_SHIFT; + payload.bound_max_y = (batch->maxy - 1) >> MALI_TILE_SHIFT; + payload.framebuffer = framebuffer; + } + return transfer.gpu; } diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index a0021508107..62a6e9edcd2 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -436,47 +436,4 @@ FIXED_16(float x, bool allow_negative) return (int) (x * 256.0); } -/* From presentations, 16x16 tiles externally. Use shift for fast computation - * of tile numbers. */ - -#define MALI_TILE_SHIFT 4 -#define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT) - -/* Tile coordinates are stored as a compact u32, as only 12 bits are needed to - * each component. Notice that this provides a theoretical upper bound of (1 << - * 12) = 4096 tiles in each direction, addressing a maximum framebuffer of size - * 65536x65536. Multiplying that together, times another four given that Mali - * framebuffers are 32-bit ARGB8888, means that this upper bound would take 16 - * gigabytes of RAM just to store the uncompressed framebuffer itself, let - * alone rendering in real-time to such a buffer. - * - * Nice job, guys.*/ - -/* From mali_kbase_10969_workaround.c */ -#define MALI_X_COORD_MASK 0x00000FFF -#define MALI_Y_COORD_MASK 0x0FFF0000 - -/* Extract parts of a tile coordinate */ - -#define MALI_TILE_COORD_X(coord) ((coord) & MALI_X_COORD_MASK) -#define MALI_TILE_COORD_Y(coord) (((coord) & MALI_Y_COORD_MASK) >> 16) - -/* Helpers to generate tile coordinates based on the boundary coordinates in - * screen space. So, with the bounds (0, 0) to (128, 128) for the screen, these - * functions would convert it to the bounding tiles (0, 0) to (7, 7). - * Intentional "off-by-one"; finding the tile number is a form of fencepost - * problem. */ - -#define MALI_MAKE_TILE_COORDS(X, Y) ((X) | ((Y) << 16)) -#define MALI_BOUND_TO_TILE(B, bias) ((B - bias) >> MALI_TILE_SHIFT) -#define MALI_COORDINATE_TO_TILE(W, H, bias) MALI_MAKE_TILE_COORDS(MALI_BOUND_TO_TILE(W, bias), MALI_BOUND_TO_TILE(H, bias)) -#define MALI_COORDINATE_TO_TILE_MIN(W, H) MALI_COORDINATE_TO_TILE(W, H, 0) -#define MALI_COORDINATE_TO_TILE_MAX(W, H) MALI_COORDINATE_TO_TILE(W, H, 1) - -struct mali_payload_fragment { - u32 min_tile_coord; - u32 max_tile_coord; - mali_ptr framebuffer; -} __attribute__((packed)); - #endif /* __PANFROST_JOB_H__ */ diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c index c36c4a95a75..4f483be3cec 100644 --- a/src/panfrost/lib/decode.c +++ b/src/panfrost/lib/decode.c @@ -1399,14 +1399,15 @@ pandecode_vertex_or_tiler_job_mdg(const struct MALI_JOB_HEADER *h, return sizeof(*v); } -static int +static void pandecode_fragment_job(const struct pandecode_mapped_memory *mem, - mali_ptr payload, int job_no, - bool is_bifrost, unsigned gpu_id) + mali_ptr job, int job_no, + bool is_bifrost, unsigned gpu_id) { - const struct mali_payload_fragment *PANDECODE_PTR_VAR(s, mem, payload); + struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, mem, job); + pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s); - bool is_mfbd = s->framebuffer & MALI_FBD_TAG_IS_MFBD; + bool is_mfbd = s.framebuffer & MALI_FBD_TAG_IS_MFBD; if (!is_mfbd && is_bifrost) pandecode_msg("XXX: Bifrost fragment must use MFBD\n"); @@ -1414,10 +1415,10 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem, struct pandecode_fbd info; if (is_mfbd) - info = pandecode_mfbd_bfr(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no, + info = pandecode_mfbd_bfr(s.framebuffer & ~MALI_FBD_TAG_MASK, job_no, true, false, is_bifrost, gpu_id); else - info = pandecode_sfbd(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no, + info = pandecode_sfbd(s.framebuffer & ~MALI_FBD_TAG_MASK, job_no, true, gpu_id); /* Compute the tag for the tagged pointer. This contains the type of @@ -1434,35 +1435,19 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem, expected_tag |= (MALI_POSITIVE(info.rt_count) << 2); } - if ((s->min_tile_coord | s->max_tile_coord) & ~(MALI_X_COORD_MASK | MALI_Y_COORD_MASK)) { - pandecode_msg("XXX: unexpected tile coordinate bits\n"); - pandecode_prop("min_tile_coord = 0x%X\n", s->min_tile_coord); - pandecode_prop("max_tile_coord = 0x%X\n", s->max_tile_coord); - } - /* Extract tile coordinates */ - unsigned min_x = MALI_TILE_COORD_X(s->min_tile_coord) << MALI_TILE_SHIFT; - unsigned min_y = MALI_TILE_COORD_Y(s->min_tile_coord) << MALI_TILE_SHIFT; - - unsigned max_x = (MALI_TILE_COORD_X(s->max_tile_coord) + 1) << MALI_TILE_SHIFT; - unsigned max_y = (MALI_TILE_COORD_Y(s->max_tile_coord) + 1) << MALI_TILE_SHIFT; - - /* For the max, we also want the floored (rather than ceiled) version for checking */ - - unsigned max_x_f = (MALI_TILE_COORD_X(s->max_tile_coord)) << MALI_TILE_SHIFT; - unsigned max_y_f = (MALI_TILE_COORD_Y(s->max_tile_coord)) << MALI_TILE_SHIFT; + unsigned min_x = s.bound_min_x << MALI_TILE_SHIFT; + unsigned min_y = s.bound_min_y << MALI_TILE_SHIFT; + unsigned max_x = s.bound_max_x << MALI_TILE_SHIFT; + unsigned max_y = s.bound_max_y << MALI_TILE_SHIFT; /* Validate the coordinates are well-ordered */ - if (min_x == max_x) - pandecode_msg("XXX: empty X coordinates (%u = %u)\n", min_x, max_x); - else if (min_x > max_x) + if (min_x > max_x) pandecode_msg("XXX: misordered X coordinates (%u > %u)\n", min_x, max_x); - if (min_y == max_y) - pandecode_msg("XXX: empty X coordinates (%u = %u)\n", min_x, max_x); - else if (min_y > max_y) + if (min_y > max_y) pandecode_msg("XXX: misordered X coordinates (%u > %u)\n", min_x, max_x); /* Validate the coordinates fit inside the framebuffer. We use floor, @@ -1470,24 +1455,24 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem, * coordinates for something like an 800x600 framebuffer will actually * resolve to 800x608, which would otherwise trigger a Y-overflow */ - if ((min_x > info.width) || (max_x_f > info.width)) + if (max_x + 1 > info.width) pandecode_msg("XXX: tile coordinates overflow in X direction\n"); - if ((min_y > info.height) || (max_y_f > info.height)) + if (max_y + 1 > info.height) pandecode_msg("XXX: tile coordinates overflow in Y direction\n"); /* After validation, we print */ - - pandecode_log("fragment (%u, %u) ... (%u, %u)\n\n", min_x, min_y, max_x, max_y); + DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n", + job + MALI_JOB_HEADER_LENGTH, job_no); /* The FBD is a tagged pointer */ - unsigned tag = (s->framebuffer & MALI_FBD_TAG_MASK); + unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK); if (tag != expected_tag) pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag); - return sizeof(*s); + pandecode_log("\n"); } static void @@ -1554,7 +1539,7 @@ pandecode_jc(mali_ptr jc_gpu_va, bool bifrost, unsigned gpu_id, bool minimal) break; case MALI_JOB_TYPE_FRAGMENT: - pandecode_fragment_job(mem, payload_ptr, job_no, bifrost, gpu_id); + pandecode_fragment_job(mem, jc_gpu_va, job_no, bifrost, gpu_id); break; default: diff --git a/src/panfrost/lib/gen_pack.py b/src/panfrost/lib/gen_pack.py index 2eb1936b785..cf212e9fb1c 100644 --- a/src/panfrost/lib/gen_pack.py +++ b/src/panfrost/lib/gen_pack.py @@ -163,6 +163,12 @@ __gen_unpack_padded(const uint8_t *restrict cl, uint32_t start, uint32_t end) #define pan_section_print(fp, A, S, var, indent) \\ MALI_ ## A ## _SECTION_ ## S ## _print(fp, &(var), indent) +/* From presentations, 16x16 tiles externally. Use shift for fast computation + * of tile numbers. */ + +#define MALI_TILE_SHIFT 4 +#define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT) + """ def to_alphanum(name): diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml index 7154b859a42..d8c17de17b9 100644 --- a/src/panfrost/lib/midgard.xml +++ b/src/panfrost/lib/midgard.xml @@ -1060,6 +1060,22 @@ + + + + + + + + + + + + +
+
+ +