panfrost: Avoid copying job descriptors around when we can

Job descriptors are written section by section and are never modified
after them been emitted. Let's avoid copying things around by allocating
descriptors upfront and letting the scoreboard logic only write the
header section.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
This commit is contained in:
Boris Brezillon 2020-09-08 20:32:41 +02:00 committed by Alyssa Rosenzweig
parent d289209ea6
commit 6b92303772
7 changed files with 57 additions and 68 deletions

View file

@ -1771,42 +1771,32 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
void
panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
void *vertex_job,
void *tiler_job)
const struct panfrost_transfer *vertex_job,
const struct panfrost_transfer *tiler_job)
{
struct panfrost_context *ctx = batch->ctx;
struct panfrost_device *device = pan_device(ctx->base.screen);
bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
void *vp = vertex_job + MALI_JOB_HEADER_LENGTH;
size_t vp_size = MALI_COMPUTE_JOB_LENGTH -
MALI_JOB_HEADER_LENGTH;
void *tp = tiler_job + MALI_JOB_HEADER_LENGTH;
bool is_bifrost = device->quirks & IS_BIFROST;
size_t tp_size = (is_bifrost ?
MALI_BIFROST_TILER_JOB_LENGTH :
MALI_MIDGARD_TILER_JOB_LENGTH) -
MALI_JOB_HEADER_LENGTH;
if (wallpapering) {
/* Inject in reverse order, with "predicted" job indices.
* THIS IS A HACK XXX */
panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false,
batch->scoreboard.job_index + 2, tp, tp_size, true);
panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
vp, vp_size, true);
panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false,
batch->scoreboard.job_index + 2, tiler_job, true);
panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
vertex_job, true);
return;
}
/* If rasterizer discard is enable, only submit the vertex */
unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
vp, vp_size, false);
unsigned vertex = panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
vertex_job, false);
if (ctx->rasterizer->base.rasterizer_discard)
return;
panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tp, tp_size,
false);
panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tiler_job, false);
}
/* TODO: stop hardcoding this */

View file

@ -84,8 +84,8 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
void
panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
void *vertex_job,
void *tiler_job);
const struct panfrost_transfer *vertex_job,
const struct panfrost_transfer *tiler_job);
mali_ptr
panfrost_emit_sample_locations(struct panfrost_batch *batch);

View file

@ -104,7 +104,10 @@ panfrost_launch_grid(struct pipe_context *pipe,
ctx->compute_grid = info;
/* TODO: Stub */
struct mali_compute_job_packed job = { 0 };
struct panfrost_transfer t =
panfrost_pool_alloc_aligned(&batch->pool,
MALI_COMPUTE_JOB_LENGTH,
64);
/* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
* reuse the graphics path for this by lowering to Gallium */
@ -122,7 +125,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
/* Invoke according to the grid info */
void *invocation =
pan_section_ptr(&job, COMPUTE_JOB, INVOCATION);
pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
panfrost_pack_work_groups_compute(invocation,
info->grid[0], info->grid[1],
info->grid[2],
@ -130,14 +133,14 @@ panfrost_launch_grid(struct pipe_context *pipe,
info->block[2],
false);
pan_section_pack(&job, COMPUTE_JOB, PARAMETERS, cfg) {
pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split =
util_logbase2_ceil(info->block[0] + 1) +
util_logbase2_ceil(info->block[1] + 1) +
util_logbase2_ceil(info->block[2] + 1);
}
pan_section_pack(&job, COMPUTE_JOB, DRAW, cfg) {
pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.unknown_1 = (dev->quirks & IS_BIFROST) ? 0x2 : 0x6;
cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
cfg.shared = panfrost_emit_shared_memory(batch, info);
@ -149,12 +152,8 @@ panfrost_launch_grid(struct pipe_context *pipe,
PIPE_SHADER_COMPUTE);
}
panfrost_new_job(&batch->pool, &batch->scoreboard,
MALI_JOB_TYPE_COMPUTE, true, 0,
((void *)&job) + MALI_JOB_HEADER_LENGTH,
MALI_COMPUTE_JOB_LENGTH -
MALI_JOB_HEADER_LENGTH,
false);
panfrost_add_job(&batch->pool, &batch->scoreboard,
MALI_JOB_TYPE_COMPUTE, true, 0, &t, true);
panfrost_flush_all_batches(ctx, 0);
}

View file

@ -454,13 +454,19 @@ panfrost_draw_vbo(
ctx->instance_count = info->instance_count;
ctx->active_prim = info->mode;
/* bifrost tiler is bigger than midgard's one, so let's use it as a
* generic container for both.
*/
struct mali_bifrost_tiler_job_packed tiler = {};
struct mali_compute_job_packed vertex = {};
unsigned vertex_count = ctx->vertex_count;
bool is_bifrost = device->quirks & IS_BIFROST;
struct panfrost_transfer tiler =
panfrost_pool_alloc_aligned(&batch->pool,
is_bifrost ?
MALI_BIFROST_TILER_JOB_LENGTH :
MALI_MIDGARD_TILER_JOB_LENGTH,
64);
struct panfrost_transfer vertex =
panfrost_pool_alloc_aligned(&batch->pool,
MALI_COMPUTE_JOB_LENGTH,
64);
unsigned vertex_count = ctx->vertex_count;
mali_ptr shared_mem = is_bifrost ?
panfrost_vt_emit_shared_memory(batch) :
@ -506,9 +512,9 @@ panfrost_draw_vbo(
/* Fire off the draw itself */
panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
vs_vary, varyings, &vertex);
vs_vary, varyings, vertex.cpu);
panfrost_draw_emit_tiler(batch, info, &invocation, shared_mem, indices,
fs_vary, varyings, pos, psiz, &tiler);
fs_vary, varyings, pos, psiz, tiler.cpu);
panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
/* Adjust the batch stack size based on the new shader stack sizes. */

View file

@ -340,9 +340,10 @@ panfrost_load_midg(
}
}
struct mali_midgard_tiler_job_packed payload = {};
struct panfrost_transfer t =
panfrost_pool_alloc_aligned(pool, MALI_MIDGARD_TILER_JOB_LENGTH, 64);
pan_section_pack(&payload, MIDGARD_TILER_JOB, DRAW, cfg) {
pan_section_pack(t.cpu, MIDGARD_TILER_JOB, DRAW, cfg) {
cfg.unknown_1 = 0x7;
cfg.position = coordinates;
cfg.textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu));
@ -354,16 +355,14 @@ panfrost_load_midg(
cfg.shared = fbd;
}
pan_section_pack(&payload, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
pan_section_pack(t.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES;
cfg.index_count = vertex_count;
cfg.unknown_3 = 6;
}
panfrost_pack_work_groups_compute(pan_section_ptr(&payload, MIDGARD_TILER_JOB, INVOCATION),
panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, MIDGARD_TILER_JOB, INVOCATION),
1, vertex_count, 1, 1, 1, 1, true);
panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0,
pan_section_ptr(&payload, MIDGARD_TILER_JOB, INVOCATION),
MALI_MIDGARD_TILER_JOB_LENGTH - MALI_JOB_HEADER_LENGTH, true);
panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &t, true);
}

View file

@ -106,13 +106,13 @@
* not wallpapering and set this, dragons will eat you. */
unsigned
panfrost_new_job(
panfrost_add_job(
struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
enum mali_job_type type,
bool barrier,
unsigned local_dep,
void *payload, size_t payload_size,
const struct panfrost_transfer *job,
bool inject)
{
unsigned global_dep = 0;
@ -133,24 +133,19 @@ panfrost_new_job(
/* Assign the index */
unsigned index = ++scoreboard->job_index;
struct panfrost_transfer transfer =
panfrost_pool_alloc_aligned(pool, MALI_JOB_HEADER_LENGTH + payload_size, 64);
pan_pack(transfer.cpu, JOB_HEADER, job) {
job.type = type;
job.barrier = barrier;
job.index = index;
job.dependency_1 = local_dep;
job.dependency_2 = global_dep;
pan_pack(job->cpu, JOB_HEADER, header) {
header.type = type;
header.barrier = barrier;
header.index = index;
header.dependency_1 = local_dep;
header.dependency_2 = global_dep;
if (inject)
job.next = scoreboard->first_job;
header.next = scoreboard->first_job;
}
memcpy(transfer.cpu + MALI_JOB_HEADER_LENGTH, payload, payload_size);
if (inject) {
scoreboard->first_job = transfer.gpu;
scoreboard->first_job = job->gpu;
return index;
}
@ -164,13 +159,13 @@ panfrost_new_job(
* TODO: Find a way to defer last job header emission until we
* have a new job to queue or the batch is ready for execution.
*/
scoreboard->prev_job->opaque[6] = transfer.gpu;
scoreboard->prev_job->opaque[7] = transfer.gpu >> 32;
scoreboard->prev_job->opaque[6] = job->gpu;
scoreboard->prev_job->opaque[7] = job->gpu >> 32;
} else {
scoreboard->first_job = transfer.gpu;
scoreboard->first_job = job->gpu;
}
scoreboard->prev_job = (struct mali_job_header_packed *)transfer.cpu;
scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;
return index;
}

View file

@ -49,13 +49,13 @@ struct pan_scoreboard {
};
unsigned
panfrost_new_job(
panfrost_add_job(
struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
enum mali_job_type type,
bool barrier,
unsigned local_dep,
void *payload, size_t payload_size,
const struct panfrost_transfer *job,
bool inject);
void panfrost_scoreboard_initialize_tiler(