From b39de8bb4a1568eedfe31dacabfd145cc0dbbccb Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 1 Dec 2023 13:08:09 -0400 Subject: [PATCH] asahi: Implement ARB_texture_barrier by decompression Your perf goes not stonks, sorry. Passes KHR-GL40.texture_barrier_ARB.*. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/agx_device.h | 1 + src/gallium/drivers/asahi/agx_pipe.c | 2 + src/gallium/drivers/asahi/agx_state.c | 69 +++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index 6a5bde39c8f..ea65672db2e 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -34,6 +34,7 @@ enum agx_dbg { AGX_DBG_VARYINGS = BITFIELD_BIT(17), AGX_DBG_SCRATCH = BITFIELD_BIT(18), AGX_DBG_COMPBLIT = BITFIELD_BIT(19), + AGX_DBG_FEEDBACK = BITFIELD_BIT(20), }; /* Dummy partial declarations, pending real UAPI */ diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index a69fcee61c0..21c3eb445d5 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -75,6 +75,7 @@ static const struct debug_named_value agx_debug_options[] = { {"nowc", AGX_DBG_NOWC, "Disable write-combining"}, {"synctvb", AGX_DBG_SYNCTVB, "Synchronous TVB growth"}, {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"}, + {"feedback", AGX_DBG_FEEDBACK, "Debug feedback loops"}, {"nomsaa", AGX_DBG_NOMSAA, "Force disable MSAA"}, {"noshadow", AGX_DBG_NOSHADOW, "Force disable resource shadowing"}, {"varyings", AGX_DBG_VARYINGS, "Validate varying linkage"}, @@ -1569,6 +1570,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_NATIVE_FENCE_FD: + case PIPE_CAP_TEXTURE_BARRIER: return true; case PIPE_CAP_TIMER_RESOLUTION: diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 7ce79f11e80..7cb801af1db 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -4810,6 +4810,73 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, } } +/* + * From the ARB_texture_barrier spec: + * + * Specifically, the values of rendered fragments are undefined if any + * shader stage fetches texels and the same texels are written via fragment + * shader outputs, even if the reads and writes are not in the same Draw + * call, unless any of the following exceptions apply: + * + * - The reads and writes are from/to disjoint sets of texels (after + * accounting for texture filtering rules). + * + * - There is only a single read and write of each texel, and the read is in + * the fragment shader invocation that writes the same texel (e.g. using + * "texelFetch2D(sampler, ivec2(gl_FragCoord.xy), 0);"). + * + * - If a texel has been written, then in order to safely read the result + * a texel fetch must be in a subsequent Draw separated by the command + * + * void TextureBarrier(void); + * + * TextureBarrier() will guarantee that writes have completed and caches + * have been invalidated before subsequent Draws are executed." + * + * The wording is subtle, but we are not required to flush implicitly for + * feedback loops, even though we're a tiler. What we are required to do is + * decompress framebuffers involved in feedback loops, because otherwise + * the hardware will race itself with exception #1, where we have a disjoint + * group texels that intersects a compressed tile being written out. + */ +static void +agx_legalize_feedback_loops(struct agx_context *ctx) +{ + /* Trust that u_blitter knows what it's doing */ + if (ctx->blitter->running) + return; + + for (unsigned stage = 0; stage < ARRAY_SIZE(ctx->stage); ++stage) { + if (!(ctx->stage[stage].dirty & AGX_STAGE_DIRTY_IMAGE)) + continue; + + for (unsigned i = 0; i < ctx->stage[stage].texture_count; ++i) { + if (!ctx->stage[stage].textures[i]) + continue; + + struct agx_resource *rsrc = ctx->stage[stage].textures[i]->rsrc; + + for (unsigned cb = 0; cb < ctx->framebuffer.nr_cbufs; ++cb) { + if (ctx->framebuffer.cbufs[cb] && + agx_resource(ctx->framebuffer.cbufs[cb]->texture) == rsrc) { + + if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) { + /* Decompress if we can and shadow if we can't. */ + if (rsrc->base.bind & PIPE_BIND_SHARED) + unreachable("TODO"); + else + agx_decompress(ctx, rsrc, "Texture feedback loop"); + } + + /* Not required by the spec, just for debug */ + if (agx_device(ctx->base.screen)->debug & AGX_DBG_FEEDBACK) + agx_flush_writer(ctx, rsrc, "Feedback loop"); + } + } + } + } +} + static void agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -4876,6 +4943,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, return; } + agx_legalize_feedback_loops(ctx); + /* Only the rasterization stream counts */ if (ctx->active_queries && ctx->prims_generated[0] && !ctx->stage[PIPE_SHADER_GEOMETRY].shader) {