diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c index 2430195cdb1..1140ff6b6e4 100644 --- a/src/gallium/drivers/panfrost/pan_blit.c +++ b/src/gallium/drivers/panfrost/pan_blit.c @@ -25,8 +25,8 @@ panfrost_blitter_draw_rectangle(struct blitter_context *blitter, struct panfrost_context *pctx = pan_context(ctx); struct panfrost_screen *scr = pan_screen(ctx->screen); - /* Always fallback for now. */ - goto fallback; + if (scr->dev.arch != 9 || depth != 0.0f || num_instances > 1) + goto fallback; /* Map viewport to the dest rect of the framebuffer. The tiler will then be * configured to use it as scissor box in order to clip fullscreen diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index b230ea62e27..6470921f16b 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -435,6 +435,27 @@ struct panfrost_shader_binary { struct util_dynarray binary; }; +struct panfrost_run_fullscreen_attrib { + float x, y, z, w; +}; + +/* The tiler always allocates packets that can hold 64 vertices in RUN_IDVS + * malloc mode. For RUN_FULLSCREEN, the vertex array is preallocated but must + * match the tiler allocation strategy. */ +#define PAN_RUN_FULLSCREEN_NUM_VERTICES 64 + +#define PAN_RUN_FULLSCREEN_ATTRIB_STRIDE \ + sizeof(struct panfrost_run_fullscreen_attrib) + +/* A RUN_FULLSCREEN packet is made of a position and a texcoord attrib. */ +#define PAN_RUN_FULLSCREEN_PACKET_STRIDE \ + (2 * sizeof(struct panfrost_run_fullscreen_attrib)) + +#define PAN_RUN_FULLSCREEN_ARRAY_SIZE \ + (PAN_RUN_FULLSCREEN_NUM_VERTICES * PAN_RUN_FULLSCREEN_PACKET_STRIDE) + +#define PAN_RUN_FULLSCREEN_ARRAY_ALIGN 64 + void panfrost_disk_cache_store(struct disk_cache *cache, const struct panfrost_uncompiled_shader *uncompiled, @@ -472,6 +493,10 @@ unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs, unsigned vbi, unsigned divisor); +struct pan_ptr panfrost_emit_fullscreen_vertex_array(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib); + struct panfrost_zsa_state; struct panfrost_sampler_state; struct panfrost_sampler_view; diff --git a/src/gallium/drivers/panfrost/pan_helpers.c b/src/gallium/drivers/panfrost/pan_helpers.c index 99012fd8dee..29d7d63518c 100644 --- a/src/gallium/drivers/panfrost/pan_helpers.c +++ b/src/gallium/drivers/panfrost/pan_helpers.c @@ -172,6 +172,41 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs, return idx; } +struct pan_ptr +panfrost_emit_fullscreen_vertex_array(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ + struct pan_ptr array = { .cpu = NULL, .gpu = 0 }; + struct panfrost_run_fullscreen_attrib *texcoords; + + if (type != UTIL_BLITTER_ATTRIB_TEXCOORD_XY && + type != UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW) + return array; + + array = pan_pool_alloc_aligned(&batch->pool.base, + PAN_RUN_FULLSCREEN_ARRAY_SIZE, + PAN_RUN_FULLSCREEN_ARRAY_ALIGN); + texcoords = (struct panfrost_run_fullscreen_attrib *) + ((uint8_t *)array.cpu + (PAN_RUN_FULLSCREEN_NUM_VERTICES * + PAN_RUN_FULLSCREEN_ATTRIB_STRIDE)); + + texcoords[0].x = attrib->texcoord.x1; + texcoords[0].y = attrib->texcoord.y1; + texcoords[0].z = attrib->texcoord.z; + texcoords[0].w = attrib->texcoord.w; + texcoords[1].x = attrib->texcoord.x2; + texcoords[1].y = attrib->texcoord.y1; + texcoords[1].z = attrib->texcoord.z; + texcoords[1].w = attrib->texcoord.w; + texcoords[2].x = attrib->texcoord.x1; + texcoords[2].y = attrib->texcoord.y2; + texcoords[2].z = attrib->texcoord.z; + texcoords[2].w = attrib->texcoord.w; + + return array; +} + /* * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together, * meaning that we draw to a given target. Adding to only one mask does not diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index fdcd060d7af..5a616163c97 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -497,7 +497,8 @@ jm_emit_vertex_job(struct panfrost_batch *batch, static void jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch, - bool fs_required, enum mesa_prim prim) + bool fs_required, enum mesa_prim prim, + struct pan_ptr *vertex_array) { struct panfrost_context *ctx = batch->ctx; struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; @@ -559,6 +560,13 @@ jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch, cfg.flags_0.aligned_line_ends = !rast->line_rectangular; cfg.vertex_array.packet = true; + if (vertex_array) { + cfg.vertex_array.pointer = vertex_array->gpu; + cfg.vertex_array.vertex_packet_stride = + PAN_RUN_FULLSCREEN_PACKET_STRIDE; + cfg.vertex_array.vertex_attribute_stride = + PAN_RUN_FULLSCREEN_ATTRIB_STRIDE; + } cfg.minimum_z = batch->minimum_z; cfg.maximum_z = batch->maximum_z; @@ -804,7 +812,7 @@ jm_emit_malloc_vertex_job(struct panfrost_batch *batch, } jm_emit_tiler_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), batch, - fs_required, u_reduced_prim(info->mode)); + fs_required, u_reduced_prim(info->mode), NULL); pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) { jm_emit_shader_env(batch, &cfg, MESA_SHADER_VERTEX, @@ -852,7 +860,8 @@ jm_emit_tiler_job(struct panfrost_batch *batch, ; #endif - jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, prim); + jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, + prim, NULL); panfrost_emit_primitive_size(ctx, prim == MESA_PRIM_POINTS, batch->varyings.psiz, prim_size); @@ -1025,7 +1034,44 @@ GENX(jm_launch_draw_fullscreen)(struct panfrost_batch *batch, enum blitter_attrib_type type, const struct blitter_attrib *attrib) { - UNREACHABLE("draw fullscreen not implemented for jm"); +#if PAN_ARCH == 9 + PAN_TRACE_FUNC(PAN_TRACE_GL_JM); + + struct pan_ptr job, dcd, vertex_array; + + job = pan_pool_alloc_desc(&batch->pool.base, FULLSCREEN_JOB); + if (!job.cpu) { + mesa_loge("jm_launch_draw failed"); + return; + } + + dcd = pan_pool_alloc_desc(&batch->pool.base, DRAW); + if (!dcd.cpu) { + mesa_loge("jm_launch_draw failed"); + return; + } + + vertex_array = panfrost_emit_fullscreen_vertex_array(batch, type, attrib); + jm_emit_tiler_draw(dcd.cpu, batch, true, u_reduced_prim(MESA_PRIM_QUADS), + &vertex_array); + + pan_section_pack(job.cpu, FULLSCREEN_JOB, PRIMITIVE, cfg) { + cfg.scissor_array_enable = false; + } + pan_section_pack(job.cpu, FULLSCREEN_JOB, DCD, cfg) { + cfg.address = dcd.gpu; + } + pan_section_pack(job.cpu, FULLSCREEN_JOB, TILER, cfg) { + cfg.address = jm_emit_tiler_desc(batch); + } + memcpy(pan_section_ptr(job.cpu, FULLSCREEN_JOB, SCISSOR), &batch->scissor, + pan_size(SCISSOR)); + + pan_jc_add_job(&batch->jm.jobs.vtc_jc, MALI_JOB_TYPE_FULLSCREEN, false, + false, 0, 0, &job, false); +#else + UNREACHABLE("draw fullscreen not available for arch < 9"); +#endif } void diff --git a/src/panfrost/genxml/v9.xml b/src/panfrost/genxml/v9.xml index d5bc4c1e110..de23971815a 100644 --- a/src/panfrost/genxml/v9.xml +++ b/src/panfrost/genxml/v9.xml @@ -43,6 +43,7 @@ + @@ -1558,10 +1559,8 @@ - + - - @@ -1616,6 +1615,10 @@ + + + + @@ -1643,6 +1646,14 @@
+ +
+
+
+
+
+ +
diff --git a/src/panfrost/lib/pan_jc.h b/src/panfrost/lib/pan_jc.h index c853887001e..4fb24690236 100644 --- a/src/panfrost/lib/pan_jc.h +++ b/src/panfrost/lib/pan_jc.h @@ -114,7 +114,8 @@ static inline bool job_uses_tiling(enum mali_job_type type) { #if PAN_ARCH >= 9 - if (type == MALI_JOB_TYPE_MALLOC_VERTEX) + if (type == MALI_JOB_TYPE_MALLOC_VERTEX || + type == MALI_JOB_TYPE_FULLSCREEN) return true; #endif