diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index a7dfa47eaca..b9d07efd049 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -72,13 +72,11 @@ struct blitter_context * * \param type Semantics of the attributes "attrib". * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. - * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes - * make up a constant RGBA color, and should go - * to the GENERIC0 varying slot of a fragment shader. - * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and - * {a3, a4} specify top-left and bottom-right texture - * coordinates of the rectangle, respectively, and should go - * to the GENERIC0 varying slot of a fragment shader. + * If type is UTIL_BLITTER_ATTRIB_TEXCOORD_XY or + * UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW, attrib stores the + * 2-component or 4-component texture coordinates of the + * rectangle, and should go to the GENERIC0 varying slot of a + * fragment shader. * * \param attrib See type. * diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 5b3e5e41d97..68aa5b3c845 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -17,7 +17,7 @@ files_panfrost = files( 'pan_resource.h', 'pan_context.c', 'pan_blend_cso.c', - 'pan_blit.c', + 'pan_blitter.c', 'pan_job.c', 'pan_shader.c', 'pan_mempool.c', diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c deleted file mode 100644 index 3ec8933e37c..00000000000 --- a/src/gallium/drivers/panfrost/pan_blit.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2014 Broadcom - * Copyright (C) 2019 Collabora, Ltd. - * SPDX-License-Identifier: MIT - */ - -#include "util/format/u_format.h" -#include "pan_context.h" -#include "pan_resource.h" -#include "pan_trace.h" -#include "pan_util.h" - -void -panfrost_blitter_save(struct panfrost_context *ctx, - const enum panfrost_blitter_op blitter_op) -{ - struct blitter_context *blitter = ctx->blitter; - - util_blitter_save_vertex_buffers(blitter, ctx->vertex_buffers, - util_last_bit(ctx->vb_mask)); - util_blitter_save_vertex_elements(blitter, ctx->vertex); - util_blitter_save_vertex_shader(blitter, - ctx->uncompiled[MESA_SHADER_VERTEX]); - util_blitter_save_rasterizer(blitter, ctx->rasterizer); - util_blitter_save_viewport(blitter, &ctx->pipe_viewport); - util_blitter_save_so_targets(blitter, 0, NULL, 0); - - if (blitter_op & PAN_SAVE_FRAGMENT_STATE) { - if (blitter_op & PAN_SAVE_FRAGMENT_CONSTANT) - util_blitter_save_fragment_constant_buffer_slot( - blitter, ctx->constant_buffer[MESA_SHADER_FRAGMENT].cb); - - util_blitter_save_blend(blitter, ctx->blend); - util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil); - util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref); - util_blitter_save_fragment_shader(blitter, - ctx->uncompiled[MESA_SHADER_FRAGMENT]); - util_blitter_save_sample_mask(blitter, ctx->sample_mask, - ctx->min_samples); - util_blitter_save_scissor(blitter, &ctx->scissor); - } - - if (blitter_op & PAN_SAVE_FRAMEBUFFER) - util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer); - - if (blitter_op & PAN_SAVE_TEXTURES) { - util_blitter_save_fragment_sampler_states( - blitter, ctx->sampler_count[MESA_SHADER_FRAGMENT], - (void **)(&ctx->samplers[MESA_SHADER_FRAGMENT])); - util_blitter_save_fragment_sampler_views( - blitter, ctx->sampler_view_count[MESA_SHADER_FRAGMENT], - (struct pipe_sampler_view **)&ctx->sampler_views[MESA_SHADER_FRAGMENT]); - } - - if (!(blitter_op & PAN_DISABLE_RENDER_COND)) { - util_blitter_save_render_condition(blitter, - (struct pipe_query *)ctx->cond_query, - ctx->cond_cond, ctx->cond_mode); - } -} - -void -panfrost_blit_no_afbc_legalization(struct pipe_context *pipe, - const struct pipe_blit_info *info) -{ - PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); - - struct panfrost_context *ctx = pan_context(pipe); - - panfrost_blitter_save(ctx, info->render_condition_enable - ? PAN_RENDER_BLIT_COND - : PAN_RENDER_BLIT); - util_blitter_blit(ctx->blitter, info, NULL); -} - -void -panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) -{ - PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); - - struct panfrost_context *ctx = pan_context(pipe); - - if (info->render_condition_enable && !panfrost_render_condition_check(ctx)) - return; - - if (!util_blitter_is_blit_supported(ctx->blitter, info)) - UNREACHABLE("Unsupported blit\n"); - - /* Legalize here because it could trigger a recursive blit otherwise */ - struct panfrost_resource *src = pan_resource(info->src.resource); - enum pipe_format src_view_format = util_format_linear(info->src.format); - pan_legalize_format(ctx, src, src_view_format, false, false); - - struct panfrost_resource *dst = pan_resource(info->dst.resource); - enum pipe_format dst_view_format = util_format_linear(info->dst.format); - pan_legalize_format(ctx, dst, dst_view_format, true, false); - - panfrost_flush_all_batches(ctx, "Blit"); - panfrost_blit_no_afbc_legalization(pipe, info); - panfrost_flush_all_batches(ctx, "Blit"); -} diff --git a/src/gallium/drivers/panfrost/pan_blitter.c b/src/gallium/drivers/panfrost/pan_blitter.c new file mode 100644 index 00000000000..53d8c3f7dec --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blitter.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2014 Broadcom + * Copyright (C) 2019 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "util/format/u_format.h" +#include "pan_blitter.h" +#include "pan_context.h" +#include "pan_resource.h" +#include "pan_trace.h" +#include "pan_util.h" + +enum pan_save_state { + PAN_SAVE_TEXTURES = BITFIELD_BIT(0), + PAN_SAVE_FRAMEBUFFER = BITFIELD_BIT(1), + PAN_SAVE_FRAGMENT_STATE = BITFIELD_BIT(2), + PAN_SAVE_FRAGMENT_CONSTANT = BITFIELD_BIT(3), + PAN_SAVE_RENDER_COND = BITFIELD_BIT(4), +}; + +static void +panfrost_blitter_draw_rectangle(struct blitter_context *blitter, + void *vertex_elements_cso, + blitter_get_vs_func get_vs, + int x1, int y1, int x2, int y2, + float depth, unsigned num_instances, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ + assert(num_instances); + + struct pipe_context *ctx = blitter->pipe; + struct panfrost_context *pctx = pan_context(ctx); + struct panfrost_screen *scr = pan_screen(ctx->screen); + + /* u_blitter allows src == dst for disjoint texel sets without any texture + * barrier enforcement. Mali tile-based architecture can't guarantee that a + * read from the dst texture will fetch up-to-date values since it depends + * on the tile processing order. Request a fresh batch to ensure any writes + * to the resource are flushed. Doing so in this callback ensures that the + * framebuffer state is set for the current blit. + * + * XXX This should ideally be done at the draw call handling level when it + * requests a batch for the current framebuffer (see prepare_draw) by first + * submitting any batches writing to the draw call's sampler views. This + * check (along with resource accesses handling) is currently done when + * emitting texture descriptors but it explicitly discards the special case + * where the batch writing to the draw call's sampler views is the current + * batch because it can't be submitted at this time of the draw call + * handling (see panfrost_batch_update_access). + */ + if (pctx->has_blit_loop) { + panfrost_get_fresh_batch_for_fbo(pctx, "Pre-blit flush"); + + /* XXX On all arch, Piglit's copy-pixels test sometimes fails to copy a + * block of pixels. Most easily reproducible at image size + * 50x38. Fallback to draw_vbo for now. + */ + goto fallback; + } + + if (scr->dev.arch <= 8 || depth != 0.0f || num_instances > 1) + goto fallback; + + /* Map viewport to the dest rect of the framebuffer. The tiler will then be + * configured to use it as scissor box in order to clip fullscreen + * fragments lying outside. + * + * Note that: tx = x1 + ((x2 - x1) / 2) = (x2 + x1) / 2 + * ty = y1 + ((y2 - y1) / 2) = (y2 + y1) / 2 + */ + const struct pipe_viewport_state viewport_state = { + .scale = { 0.5f * (x2 - x1), 0.5f * (y2 - y1), 1.0f }, + .translate = { 0.5f * (x2 + x1), 0.5f * (y2 + y1), 0.0f }, + .swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X, + .swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y, + .swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z, + .swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W + }; + ctx->set_viewport_states(ctx, 0, 1, &viewport_state); + + /* Map texture coordinates to the fullscreen framebuffer. */ + struct blitter_attrib fs_attrib; + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD_XY || + type == UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW) { + float dfdx = (attrib->texcoord.x2 - attrib->texcoord.x1) / (x2 - x1); + float dfdy = (attrib->texcoord.y2 - attrib->texcoord.y1) / (y2 - y1); + float w = pctx->pipe_framebuffer.width; + float h = pctx->pipe_framebuffer.height; + fs_attrib = *attrib; + fs_attrib.texcoord.x1 -= dfdx * x1; + fs_attrib.texcoord.y1 -= dfdy * y1; + fs_attrib.texcoord.x2 += dfdx * (w - x2); + fs_attrib.texcoord.y2 += dfdy * (h - y2); + }; + + scr->vtbl.draw_fullscreen(pan_context(ctx), get_vs(blitter), type, + &fs_attrib); + return; + + fallback: + /* Fallback to draw_vbo. */ + util_blitter_draw_rectangle(blitter, vertex_elements_cso, get_vs, x1, y1, + x2, y2, depth, num_instances, type, attrib); + + /* XXX Depth/Stencil blits on v9 need that too. Not sure why. */ + if (pctx->has_blit_loop && scr->dev.arch == 9) + panfrost_flush_all_batches(pctx, "Post-blit flush"); +} + +struct blitter_context * +panfrost_blitter_create(struct pipe_context *pipe) +{ + struct blitter_context *blitter; + + blitter = util_blitter_create(pipe); + blitter->draw_rectangle = panfrost_blitter_draw_rectangle; + + return blitter; +} + +static void +panfrost_blitter_save(struct panfrost_context *ctx, + const enum pan_save_state states) +{ + struct blitter_context *blitter = ctx->blitter; + + util_blitter_save_vertex_buffers(blitter, ctx->vertex_buffers, + util_last_bit(ctx->vb_mask)); + util_blitter_save_vertex_elements(blitter, ctx->vertex); + util_blitter_save_vertex_shader(blitter, + ctx->uncompiled[MESA_SHADER_VERTEX]); + util_blitter_save_rasterizer(blitter, ctx->rasterizer); + util_blitter_save_viewport(blitter, &ctx->pipe_viewport); + util_blitter_save_so_targets(blitter, 0, NULL, 0); + + if (states & PAN_SAVE_FRAGMENT_STATE) { + if (states & PAN_SAVE_FRAGMENT_CONSTANT) + util_blitter_save_fragment_constant_buffer_slot( + blitter, ctx->constant_buffer[MESA_SHADER_FRAGMENT].cb); + + util_blitter_save_blend(blitter, ctx->blend); + util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil); + util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref); + util_blitter_save_fragment_shader(blitter, + ctx->uncompiled[MESA_SHADER_FRAGMENT]); + util_blitter_save_sample_mask(blitter, ctx->sample_mask, + ctx->min_samples); + util_blitter_save_scissor(blitter, &ctx->scissor); + } + + if (states & PAN_SAVE_FRAMEBUFFER) + util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer); + + if (states & PAN_SAVE_TEXTURES) { + util_blitter_save_fragment_sampler_states( + blitter, ctx->sampler_count[MESA_SHADER_FRAGMENT], + (void **)(&ctx->samplers[MESA_SHADER_FRAGMENT])); + util_blitter_save_fragment_sampler_views( + blitter, ctx->sampler_view_count[MESA_SHADER_FRAGMENT], + (struct pipe_sampler_view **)&ctx->sampler_views[MESA_SHADER_FRAGMENT]); + } + + if (states & PAN_SAVE_RENDER_COND) { + util_blitter_save_render_condition(blitter, + (struct pipe_query *)ctx->cond_query, + ctx->cond_cond, ctx->cond_mode); + } +} + +void +panfrost_blitter_blit_legalized(struct pipe_context *pipe, + const struct pipe_blit_info *info) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); + + struct panfrost_context *ctx = pan_context(pipe); + enum pan_save_state states = + PAN_SAVE_TEXTURES | PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE; + + if (info->render_condition_enable) { + if (panfrost_render_condition_check(ctx)) + states |= PAN_SAVE_RENDER_COND; + else + return; + } + + panfrost_blitter_save(ctx, states); + ctx->has_blit_loop = info->src.resource == info->dst.resource; + util_blitter_blit(ctx->blitter, info, NULL); + ctx->has_blit_loop = false; +} + +void +panfrost_blitter_blit(struct pipe_context *pipe, + const struct pipe_blit_info *info) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); + + struct panfrost_context *ctx = pan_context(pipe); + + /* Direct calls from the driver to panfrost_blitter_blit_legalized() are + * expected to be supported so this check is only done for external blits. + * + * XXX This check fails when the dest format is PIPE_FORMAT_S8_UINT because + * of a workaround for this format in panfrost_is_format_supported(). It + * can be triggered when the check is moved to the legalized blit func with + * dEQP-GLES3.functional.texture.specification.texstorage2d.format.depth32f_stencil8_2d. + */ + if (!util_blitter_is_blit_supported(ctx->blitter, info)) + UNREACHABLE("Unsupported blit\n"); + + pan_legalize_format(ctx, pan_resource(info->src.resource), + util_format_linear(info->src.format), false, false); + pan_legalize_format(ctx, pan_resource(info->dst.resource), + util_format_linear(info->dst.format), true, false); + panfrost_blitter_blit_legalized(pipe, info); +} + +/* Setup HW tile buffer clears if the batch for the current FBO doesn't have + * any draw calls queued. Must be called after render condition check (which + * can submit the batch). + */ +static bool +panfrost_blitter_try_batch_clear(struct panfrost_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + + if (batch && !batch->draw_count) { + panfrost_batch_clear(batch, buffers, color, depth, stencil); + return true; + } + + return false; +} + +void +panfrost_blitter_clear(struct pipe_context *pipe, unsigned buffers, + uint32_t color_clear_mask, uint8_t stencil_clear_mask, + const struct pipe_scissor_state *scissor_state, + const union pipe_color_union *color, double depth, + unsigned stencil) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); + + struct panfrost_context *ctx = pan_context(pipe); + enum pan_save_state states = + PAN_SAVE_FRAGMENT_STATE | PAN_SAVE_FRAGMENT_CONSTANT | + PAN_SAVE_RENDER_COND; + + if (!panfrost_render_condition_check(ctx)) + return; + + if (panfrost_blitter_try_batch_clear(ctx, buffers, color, depth, stencil)) + return; + + /* Framebuffer legalization is done at batch initialization. */ + perf_debug(ctx, "Clearing with quad"); + panfrost_blitter_save(ctx, states); + util_blitter_clear( + ctx->blitter, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height, + util_framebuffer_get_num_layers(&ctx->pipe_framebuffer), buffers, color, + depth, stencil, + util_framebuffer_get_num_samples(&ctx->pipe_framebuffer) > 1); +} + +void +panfrost_blitter_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, double depth, + unsigned stencil, unsigned dstx, + unsigned dsty, unsigned width, + unsigned height, + bool render_condition_enabled) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); + + struct panfrost_context *ctx = pan_context(pipe); + enum pan_save_state states = + PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE; + + if (render_condition_enabled) { + if (panfrost_render_condition_check(ctx)) + states |= PAN_SAVE_RENDER_COND; + else + return; + } + + pan_legalize_format(ctx, pan_resource(dst->texture), + util_format_linear(dst->format), true, false); + panfrost_blitter_save(ctx, states); + util_blitter_clear_depth_stencil(ctx->blitter, dst, clear_flags, depth, + stencil, dstx, dsty, width, height); +} + +void +panfrost_blitter_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + bool render_condition_enabled) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_BLIT); + + struct panfrost_context *ctx = pan_context(pipe); + enum pan_save_state states = + PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE | + PAN_SAVE_FRAGMENT_CONSTANT; + + if (render_condition_enabled) { + if (panfrost_render_condition_check(ctx)) + states |= PAN_SAVE_RENDER_COND; + else + return; + } + + pan_legalize_format(ctx, pan_resource(dst->texture), + util_format_linear(dst->format), true, false); + panfrost_blitter_save(ctx, states); + util_blitter_clear_render_target(ctx->blitter, dst, color, dstx, dsty, + width, height); +} diff --git a/src/gallium/drivers/panfrost/pan_blitter.h b/src/gallium/drivers/panfrost/pan_blitter.h new file mode 100644 index 00000000000..b71cf4abb53 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blitter.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2026 Amazon.com, Inc. or its affiliates + * SPDX-License-Identifier: MIT + */ + +#ifndef __PAN_BLITTER_H__ +#define __PAN_BLITTER_H__ + +#include "pan_context.h" + +struct blitter_context *panfrost_blitter_create(struct pipe_context *pipe); + +/* Callers should ensure that all AFBC/AFRC resources that will be used in the + * blit operation are legalized before calling blitter operations, otherwise + * we may trigger a recursive blit */ +void panfrost_blitter_blit_legalized(struct pipe_context *pipe, + const struct pipe_blit_info *info); + +void panfrost_blitter_blit(struct pipe_context *pipe, + const struct pipe_blit_info *info); + +void panfrost_blitter_clear(struct pipe_context *pipe, unsigned buffers, + uint32_t color_clear_mask, + uint8_t stencil_clear_mask, + const struct pipe_scissor_state *scissor_state, + const union pipe_color_union *color, double depth, + unsigned stencil); + +void panfrost_blitter_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, double depth, + unsigned stencil, unsigned dstx, + unsigned dsty, unsigned width, + unsigned height, + bool render_condition_enabled); + +void panfrost_blitter_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + bool render_condition_enabled); + +#endif diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 87a3cbbe7ea..a5f1a40fb4d 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -3360,13 +3360,12 @@ panfrost_increase_vertex_count(struct panfrost_batch *batch, uint32_t increment) * because all dirty flags are set there. */ static void -panfrost_update_active_prim(struct panfrost_context *ctx, - const struct pipe_draw_info *info) +panfrost_update_active_prim(struct panfrost_context *ctx, enum mesa_prim prim) { const enum mesa_prim prev_prim = u_reduced_prim(ctx->active_prim); - const enum mesa_prim new_prim = u_reduced_prim(info->mode); + const enum mesa_prim new_prim = u_reduced_prim(prim); - ctx->active_prim = info->mode; + ctx->active_prim = prim; if ((ctx->dirty & PAN_DIRTY_RASTERIZER) || (prev_prim != new_prim)) { @@ -3433,7 +3432,7 @@ panfrost_single_draw_direct(struct panfrost_batch *batch, struct panfrost_context *ctx = batch->ctx; - panfrost_update_active_prim(ctx, info); + panfrost_update_active_prim(ctx, info->mode); /* Take into account a negative bias */ ctx->vertex_count = @@ -3509,7 +3508,7 @@ panfrost_compatible_batch_state(struct panfrost_batch *batch, } static struct panfrost_batch * -prepare_draw(struct pipe_context *pipe, const struct pipe_draw_info *info) +prepare_draw(struct pipe_context *pipe, enum mesa_prim prim) { struct panfrost_context *ctx = pan_context(pipe); struct panfrost_device *dev = pan_device(pipe->screen); @@ -3531,7 +3530,7 @@ prepare_draw(struct pipe_context *pipe, const struct pipe_draw_info *info) return NULL; } - enum mesa_prim reduced_prim = u_reduced_prim(info->mode); + enum mesa_prim reduced_prim = u_reduced_prim(prim); if (unlikely(!panfrost_compatible_batch_state(batch, reduced_prim))) { batch = panfrost_get_fresh_batch_for_fbo(ctx, "State change"); @@ -3575,7 +3574,7 @@ panfrost_draw_indirect(struct pipe_context *pipe, return; } - struct panfrost_batch *batch = prepare_draw(pipe, info); + struct panfrost_batch *batch = prepare_draw(pipe, info->mode); if (!batch) { mesa_loge("prepare_draw failed"); return; @@ -3586,7 +3585,7 @@ panfrost_draw_indirect(struct pipe_context *pipe, panfrost_batch_read_rsrc(batch, pan_resource(indirect->buffer), MESA_SHADER_VERTEX); - panfrost_update_active_prim(ctx, &tmp_info); + panfrost_update_active_prim(ctx, info->mode); ctx->drawid = drawid_offset; @@ -3624,7 +3623,7 @@ panfrost_multi_draw_direct(struct pipe_context *pipe, unsigned num_draws) { struct panfrost_context *ctx = pan_context(pipe); - struct panfrost_batch *batch = prepare_draw(pipe, info); + struct panfrost_batch *batch = prepare_draw(pipe, info->mode); if (!batch) { mesa_loge("prepare_draw failed"); return; @@ -3667,6 +3666,43 @@ panfrost_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, } } +static void +panfrost_draw_fullscreen(struct panfrost_context *ctx, + struct panfrost_uncompiled_shader *vs, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ + assert(!ctx->active_queries); + assert(!ctx->streamout.num_targets); + + PAN_TRACE_FUNC(PAN_TRACE_GL_CMDSTREAM); + + ctx->draw_calls++; + + struct panfrost_batch *batch = prepare_draw(&ctx->base, MESA_PRIM_QUADS); + if (!batch) { + mesa_loge("prepare_draw failed"); + return; + } + + /* Fullscreen draw calls don't configure any position or varying shader but + * link info is needed. The active primitive update takes care of the + * fragment shader variant update. */ + ctx->uncompiled[MESA_SHADER_VERTEX] = vs; + panfrost_update_shader_variant(ctx, MESA_SHADER_VERTEX); + panfrost_update_active_prim(ctx, MESA_PRIM_QUADS); + + /* Clear the dirty vertex flag to ensure the shader state update doesn't + * emit any vertex info. */ + ctx->dirty &= ~PAN_DIRTY_VERTEX; + panfrost_update_state_3d(batch); + panfrost_update_shader_state(batch, MESA_SHADER_FRAGMENT); + panfrost_clean_state_3d(ctx); + + JOBX(launch_draw_fullscreen)(batch, type, attrib); + batch->draw_count++; +} + /* Launch grid is the compute equivalent of draw_vbo, so in this routine, we * construct the COMPUTE job and some of its payload. */ @@ -4685,6 +4721,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.emit_write_timestamp = emit_write_timestamp; screen->vtbl.select_tile_size = GENX(pan_select_tile_size); screen->vtbl.get_conv_desc = get_conv_desc; + screen->vtbl.draw_fullscreen = panfrost_draw_fullscreen; pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev), dev->kmod.dev->props.gpu_variant, diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index e5dd965cb5a..4b258388591 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -32,49 +32,13 @@ #include "compiler/pan_compiler.h" #include "compiler/nir/nir_serialize.h" #include "decode.h" +#include "pan_blitter.h" #include "pan_device.h" #include "pan_fence.h" #include "pan_screen.h" #include "pan_trace.h" #include "pan_util.h" -static void -panfrost_clear(struct pipe_context *pipe, unsigned buffers, - uint32_t color_clear_mask, uint8_t stencil_clear_mask, - const struct pipe_scissor_state *scissor_state, - const union pipe_color_union *color, double depth, - unsigned stencil) -{ - PAN_TRACE_FUNC(PAN_TRACE_GL_CONTEXT); - - if (!panfrost_render_condition_check(pan_context(pipe))) - return; - - /* Only get batch after checking the render condition, since the check can - * cause the batch to be flushed. - */ - struct panfrost_context *ctx = pan_context(pipe); - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - if (!batch) - return; - - /* At the start of the batch, we can clear for free */ - if (batch->draw_count == 0) { - panfrost_batch_clear(batch, buffers, color, depth, stencil); - return; - } - - /* Once there is content, clear with a fullscreen quad */ - panfrost_blitter_save(ctx, PAN_RENDER_CLEAR); - - perf_debug(ctx, "Clearing with quad"); - util_blitter_clear( - ctx->blitter, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height, - util_framebuffer_get_num_layers(&ctx->pipe_framebuffer), buffers, color, - depth, stencil, - util_framebuffer_get_num_samples(&ctx->pipe_framebuffer) > 1); -} - bool panfrost_writes_point_size(struct panfrost_context *ctx) { @@ -262,9 +226,10 @@ panfrost_set_shader_images(struct pipe_context *pctx, struct panfrost_resource *rsrc = pan_resource(image->resource); - /* Images don't work with AFBC/AFRC, since they require pixel-level - * granularity */ - if (drm_is_afbc(rsrc->modifier) || drm_is_afrc(rsrc->modifier)) { + /* Images don't work with AFBC/AFRC/tiled, since they require + * pixel-level granularity */ + if (drm_is_afbc(rsrc->modifier) || drm_is_afrc(rsrc->modifier) || + drm_is_mtk_tiled(rsrc->modifier)) { pan_resource_modifier_convert( ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, true, "Shader image"); @@ -1029,7 +994,7 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->fence_server_sync = panfrost_fence_server_sync; gallium->flush = panfrost_flush; - gallium->clear = panfrost_clear; + gallium->clear = panfrost_blitter_clear; gallium->clear_texture = u_default_clear_texture; gallium->texture_barrier = panfrost_texture_barrier; gallium->set_frontend_noop = panfrost_set_frontend_noop; @@ -1100,7 +1065,7 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) goto failed; } - ctx->blitter = util_blitter_create(gallium); + ctx->blitter = panfrost_blitter_create(gallium); ctx->writers = _mesa_hash_table_create(gallium, _mesa_hash_pointer, _mesa_key_pointer_equal); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 52004c92c4e..eaa29c1cd34 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -192,6 +192,7 @@ struct panfrost_context { } texture_buffer[MESA_SHADER_STAGES]; struct blitter_context *blitter; + bool has_blit_loop; struct pan_mod_convert_shaders mod_convert_shaders; @@ -435,6 +436,27 @@ struct panfrost_shader_binary { struct util_dynarray binary; }; +struct panfrost_run_fullscreen_attrib { + float x, y, z, w; +}; + +/* The tiler always allocates packets that can hold 64 vertices in RUN_IDVS + * malloc mode. For RUN_FULLSCREEN, the vertex array is preallocated but must + * match the tiler allocation strategy. */ +#define PAN_RUN_FULLSCREEN_NUM_VERTICES 64 + +#define PAN_RUN_FULLSCREEN_ATTRIB_STRIDE \ + sizeof(struct panfrost_run_fullscreen_attrib) + +/* A RUN_FULLSCREEN packet is made of a position and a texcoord attrib. */ +#define PAN_RUN_FULLSCREEN_PACKET_STRIDE \ + (2 * sizeof(struct panfrost_run_fullscreen_attrib)) + +#define PAN_RUN_FULLSCREEN_ARRAY_SIZE \ + (PAN_RUN_FULLSCREEN_NUM_VERTICES * PAN_RUN_FULLSCREEN_PACKET_STRIDE) + +#define PAN_RUN_FULLSCREEN_ARRAY_ALIGN 64 + void panfrost_disk_cache_store(struct disk_cache *cache, const struct panfrost_uncompiled_shader *uncompiled, @@ -471,6 +493,10 @@ unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs, unsigned vbi, unsigned divisor); +struct pan_ptr panfrost_emit_fullscreen_vertex_array(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib); + struct panfrost_zsa_state; struct panfrost_sampler_state; struct panfrost_sampler_view; diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 2246804b85c..1f7cd871e1d 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -1109,6 +1109,120 @@ emit_tiler_oom_context(struct cs_builder *b, struct panfrost_batch *batch) csf_update_tiler_oom_ctx(b, batch->csf.tiler_oom_ctx.gpu); } +static void +csf_emit_draw_flags(struct panfrost_context *ctx, enum mesa_prim mode, + bool fs_required, struct MALI_DCD_FLAGS_0 *flags_0, + struct MALI_DCD_FLAGS_1 *flags_1) +{ + struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; + struct panfrost_compiled_shader *fs = ctx->prog[MESA_SHADER_FRAGMENT]; + + if (flags_0) { + enum mesa_prim reduced_mode = u_reduced_prim(mode); + bool polygon = reduced_mode == MESA_PRIM_TRIANGLES; + bool lines = reduced_mode == MESA_PRIM_LINES; + + /* + * From the Gallium documentation, + * pipe_rasterizer_state::cull_face "indicates which faces of + * polygons to cull". Points and lines are not considered + * polygons and should be drawn even if all faces are culled. + * The hardware does not take primitive type into account when + * culling, so we need to do that check ourselves. + */ + flags_0->cull_front_face = + polygon && (rast->cull_face & PIPE_FACE_FRONT); + flags_0->cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK); + flags_0->front_face_ccw = rast->front_ccw; + + flags_0->multisample_enable = rast->multisample; + + /* Use per-sample shading if required by API Also use it when a + * blend shader is used with multisampling, as this is handled + * by a single ST_TILE in the blend shader with the current + * sample ID, requiring per-sample shading. + */ + flags_0->evaluate_per_sample = + (rast->multisample && + ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader)); + + flags_0->aligned_line_ends = !rast->line_rectangular; + + if (lines && rast->line_smooth) + flags_0->multisample_enable = true; + + bool has_oq = ctx->occlusion_query && ctx->active_queries; + if (has_oq) { + if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER) + flags_0->occlusion_query = MALI_OCCLUSION_MODE_COUNTER; + else + flags_0->occlusion_query = MALI_OCCLUSION_MODE_PREDICATE; + } + + if (fs_required) { + struct pan_earlyzs_state earlyzs = pan_earlyzs_get( + fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq, + ctx->blend->base.alpha_to_coverage, + ctx->depth_stencil->zs_always_passes, + PAN_EARLYZS_ZS_TILEBUF_NOT_READ); + + flags_0->pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill; + flags_0->zs_update_operation = (enum mali_pixel_kill)earlyzs.update; + + flags_0->allow_forward_pixel_to_kill = + pan_allow_forward_pixel_to_kill(ctx, fs); + flags_0->allow_forward_pixel_to_be_killed = !fs->info.writes_global; + + flags_0->overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0); + flags_0->overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1); + + /* Also use per-sample shading if required by the shader + */ + flags_0->evaluate_per_sample |= + (fs->info.fs.sample_shading && rast->multisample); + + /* Unlike Bifrost, alpha-to-coverage must be included in + * this identically-named flag. Confusing, isn't it? + */ + flags_0->shader_modifies_coverage = fs->info.fs.writes_coverage || + fs->info.fs.can_discard || + ctx->blend->base.alpha_to_coverage; + + flags_0->alpha_to_coverage = ctx->blend->base.alpha_to_coverage; + } else { + /* These operations need to be FORCE to benefit from the + * depth-only pass optimizations. + */ + flags_0->pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY; + flags_0->zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY; + + /* No shader and no blend => no shader or blend + * reasons to disable FPK. The only FPK-related state + * not covered is alpha-to-coverage which we don't set + * without blend. + */ + flags_0->allow_forward_pixel_to_kill = true; + + /* No shader => no shader side effects */ + flags_0->allow_forward_pixel_to_be_killed = true; + + /* Alpha isn't written so these are vacuous */ + flags_0->overdraw_alpha0 = true; + flags_0->overdraw_alpha1 = true; + } + } + + if (flags_1) { + flags_1->sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF; + + if (fs_required) { + /* See JM Valhall equivalent code */ + flags_1->render_target_mask = + (fs->info.outputs_written >> FRAG_RESULT_DATA0) & ctx->fb_rt_mask; + } + } +} + static uint32_t csf_emit_draw_state(struct panfrost_batch *batch, const struct pipe_draw_info *info, unsigned drawid_offset) @@ -1218,113 +1332,14 @@ csf_emit_draw_state(struct panfrost_batch *batch, cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS), primitive_flags.opaque[0]); + struct MALI_DCD_FLAGS_0 dcd_flags0_unpacked = { 0, }; + struct MALI_DCD_FLAGS_1 dcd_flags1_unpacked = { 0, }; struct mali_dcd_flags_0_packed dcd_flags0; struct mali_dcd_flags_1_packed dcd_flags1; - - pan_pack(&dcd_flags0, DCD_FLAGS_0, cfg) { - enum mesa_prim reduced_mode = u_reduced_prim(info->mode); - bool polygon = reduced_mode == MESA_PRIM_TRIANGLES; - bool lines = reduced_mode == MESA_PRIM_LINES; - - /* - * From the Gallium documentation, - * pipe_rasterizer_state::cull_face "indicates which faces of - * polygons to cull". Points and lines are not considered - * polygons and should be drawn even if all faces are culled. - * The hardware does not take primitive type into account when - * culling, so we need to do that check ourselves. - */ - cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT); - cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK); - cfg.front_face_ccw = rast->front_ccw; - - cfg.multisample_enable = rast->multisample; - - /* Use per-sample shading if required by API Also use it when a - * blend shader is used with multisampling, as this is handled - * by a single ST_TILE in the blend shader with the current - * sample ID, requiring per-sample shading. - */ - cfg.evaluate_per_sample = - (rast->multisample && - ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader)); - - cfg.aligned_line_ends = !rast->line_rectangular; - - if (lines && rast->line_smooth) - cfg.multisample_enable = true; - - bool has_oq = ctx->occlusion_query && ctx->active_queries; - if (has_oq) { - if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER) - cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER; - else - cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE; - } - - if (fs_required) { - struct pan_earlyzs_state earlyzs = pan_earlyzs_get( - fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq, - ctx->blend->base.alpha_to_coverage, - ctx->depth_stencil->zs_always_passes, - PAN_EARLYZS_ZS_TILEBUF_NOT_READ); - - cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill; - cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update; - - cfg.allow_forward_pixel_to_kill = - pan_allow_forward_pixel_to_kill(ctx, fs); - cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global; - - cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0); - cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1); - - /* Also use per-sample shading if required by the shader - */ - cfg.evaluate_per_sample |= - (fs->info.fs.sample_shading && rast->multisample); - - /* Unlike Bifrost, alpha-to-coverage must be included in - * this identically-named flag. Confusing, isn't it? - */ - cfg.shader_modifies_coverage = fs->info.fs.writes_coverage || - fs->info.fs.can_discard || - ctx->blend->base.alpha_to_coverage; - - cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage; - } else { - /* These operations need to be FORCE to benefit from the - * depth-only pass optimizations. - */ - cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY; - cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY; - - /* No shader and no blend => no shader or blend - * reasons to disable FPK. The only FPK-related state - * not covered is alpha-to-coverage which we don't set - * without blend. - */ - cfg.allow_forward_pixel_to_kill = true; - - /* No shader => no shader side effects */ - cfg.allow_forward_pixel_to_be_killed = true; - - /* Alpha isn't written so these are vacuous */ - cfg.overdraw_alpha0 = true; - cfg.overdraw_alpha1 = true; - } - } - - pan_pack(&dcd_flags1, DCD_FLAGS_1, cfg) { - cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF; - - if (fs_required) { - /* See JM Valhall equivalent code */ - cfg.render_target_mask = - (fs->info.outputs_written >> FRAG_RESULT_DATA0) & ctx->fb_rt_mask; - } - } - + csf_emit_draw_flags(ctx, info->mode, fs_required, &dcd_flags0_unpacked, + &dcd_flags1_unpacked); + MALI_DCD_FLAGS_0_pack(&dcd_flags0, &dcd_flags0_unpacked); + MALI_DCD_FLAGS_1_pack(&dcd_flags1, &dcd_flags1_unpacked); cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD0), dcd_flags0.opaque[0]); cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD1), dcd_flags1.opaque[0]); @@ -1462,6 +1477,122 @@ GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch, } } +static struct pan_ptr +csf_emit_fullscreen_dcd(struct panfrost_batch *batch, + struct pan_ptr vertex_array, uint64_t resources, + struct MALI_DCD_FLAGS_0 *dcd_flags0, + struct MALI_DCD_FLAGS_1 *dcd_flags1) +{ + struct panfrost_context *ctx = batch->ctx; + struct pan_ptr dcd = pan_pool_alloc_desc(&batch->pool.base, DRAW); + + csf_emit_draw_flags(ctx, MESA_PRIM_QUADS, true, dcd_flags0, dcd_flags1); + + pan_cast_and_pack(dcd.cpu, DRAW, cfg) { + /* Flags */ + cfg.flags_0 = *dcd_flags0; + cfg.flags_1 = *dcd_flags1; + + /* Vertex descriptor */ + if (vertex_array.cpu) { +#if PAN_ARCH >= 12 + cfg.vertex_pointer = vertex_array.gpu; +#else + cfg.vertex_array.packet = true; + cfg.vertex_array.pointer = vertex_array.gpu; + cfg.vertex_array.vertex_packet_stride = + PAN_RUN_FULLSCREEN_PACKET_STRIDE; + cfg.vertex_array.vertex_attribute_stride = + PAN_RUN_FULLSCREEN_ATTRIB_STRIDE; +#endif + } + + /* Depth/stencil and blend descriptor */ +#if PAN_ARCH == 10 + cfg.minimum_z = batch->minimum_z; + cfg.maximum_z = batch->maximum_z; +#endif + cfg.depth_stencil = batch->depth_stencil; + cfg.blend_count = MAX2(batch->key.nr_cbufs, 1); + cfg.blend = batch->blend; + + /* Shader environment */ +#if PAN_ARCH >= 12 + cfg.fragment_fau.count = DIV_ROUND_UP( + batch->nr_push_uniforms[MESA_SHADER_FRAGMENT], 2); + cfg.fragment_resources = resources; + cfg.fragment_shader = batch->rsd[MESA_SHADER_FRAGMENT]; + cfg.thread_storage = batch->tls.gpu; + cfg.fragment_fau.pointer = batch->push_uniforms[MESA_SHADER_FRAGMENT]; +#else + cfg.shader.attribute_offset = 0; + cfg.shader.fau_count = DIV_ROUND_UP( + batch->nr_push_uniforms[MESA_SHADER_FRAGMENT], 2); + cfg.shader.resources = resources; + cfg.shader.shader = batch->rsd[MESA_SHADER_FRAGMENT]; + cfg.shader.thread_storage = batch->tls.gpu; + cfg.shader.fau = batch->push_uniforms[MESA_SHADER_FRAGMENT]; +#endif + } + + return dcd; +} + +void +GENX(csf_launch_draw_fullscreen)(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ + PAN_TRACE_FUNC(PAN_TRACE_GL_CSF); + + struct cs_builder *b = batch->csf.cs.builder; + struct MALI_DCD_FLAGS_0 dcd_flags0_unpacked = { 0, }; + struct MALI_DCD_FLAGS_1 dcd_flags1_unpacked = { 0, }; + + if (batch->draw_count == 0) { + emit_tiler_oom_context(b, batch); + cs_vt_start(batch->csf.cs.builder, cs_now()); + } + + /* Build draw call. */ + struct pan_ptr array = panfrost_emit_fullscreen_vertex_array(batch, type, + attrib); + uint64_t resources = panfrost_emit_resources(batch, MESA_SHADER_FRAGMENT); + struct pan_ptr dcd = csf_emit_fullscreen_dcd(batch, array, resources, + &dcd_flags0_unpacked, + &dcd_flags1_unpacked); + + struct mali_primitive_flags_packed primitive_flags; + pan_pack(&primitive_flags, PRIMITIVE_FLAGS, cfg) { + cfg.scissor_array_enable = false; + cfg.view_mask = 0; + } + + /* Set input staging registers. */ + uint64_t *sbd = (uint64_t *)batch->scissor; + cs_move64_to(b, cs_sr_reg64(b, FULLSCREEN, TILER_CTX), + csf_get_tiler_desc(batch)); + cs_move64_to(b, cs_sr_reg64(b, FULLSCREEN, SCISSOR_BOX), *sbd); + cs_move32_to(b, cs_sr_reg32(b, FULLSCREEN, TILER_FLAGS), + primitive_flags.opaque[0]); +#if PAN_ARCH >= 13 + struct mali_dcd_flags_0_packed dcd_flags0; + struct mali_dcd_flags_1_packed dcd_flags1; + MALI_DCD_FLAGS_0_pack(&dcd_flags0, &dcd_flags0_unpacked); + MALI_DCD_FLAGS_1_pack(&dcd_flags1, &dcd_flags1_unpacked); + cs_move32_to(b, cs_sr_reg32(b, FULLSCREEN, TILER_DCD_FLAGS0), + dcd_flags0.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, FULLSCREEN, TILER_DCD_FLAGS1), + dcd_flags1.opaque[0]); + cs_move32_to(b, cs_sr_reg32(b, FULLSCREEN, TILER_DCD_FLAGS2), 0); +#endif + + /* Emit RUN_FULLSCREEN. */ + struct cs_index dcd_pointer = cs_reg64(b, 64); + cs_move64_to(b, dcd_pointer, dcd.gpu); + cs_run_fullscreen(b, 0, dcd_pointer); +} + #define POSITION_FIFO_SIZE (64 * 1024) static enum drm_panthor_group_priority diff --git a/src/gallium/drivers/panfrost/pan_csf.h b/src/gallium/drivers/panfrost/pan_csf.h index b7be8be2339..d649b58dc56 100644 --- a/src/gallium/drivers/panfrost/pan_csf.h +++ b/src/gallium/drivers/panfrost/pan_csf.h @@ -83,6 +83,7 @@ struct panfrost_csf_context { #if defined(PAN_ARCH) && PAN_ARCH >= 10 #include "genxml/gen_macros.h" +#include "util/u_blitter.h" struct panfrost_batch; struct panfrost_context; @@ -120,6 +121,9 @@ void GENX(csf_launch_draw_indirect)(struct panfrost_batch *batch, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect); +void GENX(csf_launch_draw_fullscreen)(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib); void GENX(csf_emit_write_timestamp)(struct panfrost_batch *batch, struct panfrost_resource *dst, diff --git a/src/gallium/drivers/panfrost/pan_helpers.c b/src/gallium/drivers/panfrost/pan_helpers.c index 99012fd8dee..29d7d63518c 100644 --- a/src/gallium/drivers/panfrost/pan_helpers.c +++ b/src/gallium/drivers/panfrost/pan_helpers.c @@ -172,6 +172,41 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs, return idx; } +struct pan_ptr +panfrost_emit_fullscreen_vertex_array(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ + struct pan_ptr array = { .cpu = NULL, .gpu = 0 }; + struct panfrost_run_fullscreen_attrib *texcoords; + + if (type != UTIL_BLITTER_ATTRIB_TEXCOORD_XY && + type != UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW) + return array; + + array = pan_pool_alloc_aligned(&batch->pool.base, + PAN_RUN_FULLSCREEN_ARRAY_SIZE, + PAN_RUN_FULLSCREEN_ARRAY_ALIGN); + texcoords = (struct panfrost_run_fullscreen_attrib *) + ((uint8_t *)array.cpu + (PAN_RUN_FULLSCREEN_NUM_VERTICES * + PAN_RUN_FULLSCREEN_ATTRIB_STRIDE)); + + texcoords[0].x = attrib->texcoord.x1; + texcoords[0].y = attrib->texcoord.y1; + texcoords[0].z = attrib->texcoord.z; + texcoords[0].w = attrib->texcoord.w; + texcoords[1].x = attrib->texcoord.x2; + texcoords[1].y = attrib->texcoord.y1; + texcoords[1].z = attrib->texcoord.z; + texcoords[1].w = attrib->texcoord.w; + texcoords[2].x = attrib->texcoord.x1; + texcoords[2].y = attrib->texcoord.y2; + texcoords[2].z = attrib->texcoord.z; + texcoords[2].w = attrib->texcoord.w; + + return array; +} + /* * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together, * meaning that we draw to a given target. Adding to only one mask does not diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index 845c238853e..f8ba0428c8d 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -497,7 +497,8 @@ jm_emit_vertex_job(struct panfrost_batch *batch, static void jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch, - bool fs_required, enum mesa_prim prim) + bool fs_required, enum mesa_prim prim, + struct pan_ptr *vertex_array) { struct panfrost_context *ctx = batch->ctx; struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; @@ -559,6 +560,13 @@ jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch, cfg.flags_0.aligned_line_ends = !rast->line_rectangular; cfg.vertex_array.packet = true; + if (vertex_array) { + cfg.vertex_array.pointer = vertex_array->gpu; + cfg.vertex_array.vertex_packet_stride = + PAN_RUN_FULLSCREEN_PACKET_STRIDE; + cfg.vertex_array.vertex_attribute_stride = + PAN_RUN_FULLSCREEN_ATTRIB_STRIDE; + } cfg.minimum_z = batch->minimum_z; cfg.maximum_z = batch->maximum_z; @@ -804,7 +812,7 @@ jm_emit_malloc_vertex_job(struct panfrost_batch *batch, } jm_emit_tiler_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), batch, - fs_required, u_reduced_prim(info->mode)); + fs_required, u_reduced_prim(info->mode), NULL); pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) { jm_emit_shader_env(batch, &cfg, MESA_SHADER_VERTEX, @@ -852,7 +860,8 @@ jm_emit_tiler_job(struct panfrost_batch *batch, ; #endif - jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, prim); + jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, + prim, NULL); panfrost_emit_primitive_size(ctx, prim == MESA_PRIM_POINTS, batch->varyings.psiz, prim_size); @@ -1020,6 +1029,51 @@ GENX(jm_launch_draw_indirect)(struct panfrost_batch *batch, UNREACHABLE("draw indirect not implemented for jm"); } +void +GENX(jm_launch_draw_fullscreen)(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib) +{ +#if PAN_ARCH == 9 + PAN_TRACE_FUNC(PAN_TRACE_GL_JM); + + struct pan_ptr job, dcd, vertex_array; + + job = pan_pool_alloc_desc(&batch->pool.base, FULLSCREEN_JOB); + if (!job.cpu) { + mesa_loge("jm_launch_draw failed"); + return; + } + + dcd = pan_pool_alloc_desc(&batch->pool.base, DRAW); + if (!dcd.cpu) { + mesa_loge("jm_launch_draw failed"); + return; + } + + vertex_array = panfrost_emit_fullscreen_vertex_array(batch, type, attrib); + jm_emit_tiler_draw(dcd.cpu, batch, true, u_reduced_prim(MESA_PRIM_QUADS), + &vertex_array); + + pan_section_pack(job.cpu, FULLSCREEN_JOB, PRIMITIVE, cfg) { + cfg.scissor_array_enable = false; + } + pan_section_pack(job.cpu, FULLSCREEN_JOB, DCD, cfg) { + cfg.address = dcd.gpu; + } + pan_section_pack(job.cpu, FULLSCREEN_JOB, TILER, cfg) { + cfg.address = jm_emit_tiler_desc(batch); + } + memcpy(pan_section_ptr(job.cpu, FULLSCREEN_JOB, SCISSOR), &batch->scissor, + pan_size(SCISSOR)); + + pan_jc_add_job(&batch->jm.jobs.vtc_jc, MALI_JOB_TYPE_FULLSCREEN, false, + false, 0, 0, &job, false); +#else + UNREACHABLE("draw fullscreen not available for arch < 9"); +#endif +} + void GENX(jm_emit_write_timestamp)(struct panfrost_batch *batch, struct panfrost_resource *dst, unsigned offset) diff --git a/src/gallium/drivers/panfrost/pan_jm.h b/src/gallium/drivers/panfrost/pan_jm.h index 22ca685d67a..947b2a45acb 100644 --- a/src/gallium/drivers/panfrost/pan_jm.h +++ b/src/gallium/drivers/panfrost/pan_jm.h @@ -26,6 +26,7 @@ struct panfrost_jm_batch { #if defined(PAN_ARCH) && PAN_ARCH < 10 #include "genxml/gen_macros.h" +#include "util/u_blitter.h" struct panfrost_batch; struct panfrost_context; @@ -75,6 +76,9 @@ void GENX(jm_launch_draw_indirect)(struct panfrost_batch *batch, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect); +void GENX(jm_launch_draw_fullscreen)(struct panfrost_batch *batch, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib); void GENX(jm_emit_write_timestamp)(struct panfrost_batch *batch, struct panfrost_resource *dst, diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 5ac43cdeeae..b64160fa5fb 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -27,6 +27,7 @@ #include "decode.h" #include "pan_afbc.h" #include "pan_afrc.h" +#include "pan_blitter.h" #include "pan_bo.h" #include "pan_context.h" #include "pan_resource.h" @@ -35,56 +36,6 @@ #include "pan_trace.h" #include "pan_util.h" -static void -panfrost_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, unsigned clear_flags, - double depth, unsigned stencil, unsigned dstx, - unsigned dsty, unsigned width, unsigned height, - bool render_condition_enabled) -{ - PAN_TRACE_FUNC(PAN_TRACE_GL_RESOURCE); - - struct panfrost_context *ctx = pan_context(pipe); - - if (render_condition_enabled && !panfrost_render_condition_check(ctx)) - return; - - /* Legalize here because it could trigger a recursive blit otherwise */ - struct panfrost_resource *rdst = pan_resource(dst->texture); - enum pipe_format dst_view_format = util_format_linear(dst->format); - pan_legalize_format(ctx, rdst, dst_view_format, true, false); - - panfrost_blitter_save( - ctx, render_condition_enabled ? PAN_RENDER_COND : PAN_RENDER_BASE); - util_blitter_clear_depth_stencil(ctx->blitter, dst, clear_flags, depth, - stencil, dstx, dsty, width, height); -} - -static void -panfrost_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const union pipe_color_union *color, unsigned dstx, - unsigned dsty, unsigned width, unsigned height, - bool render_condition_enabled) -{ - PAN_TRACE_FUNC(PAN_TRACE_GL_RESOURCE); - - struct panfrost_context *ctx = pan_context(pipe); - - if (render_condition_enabled && !panfrost_render_condition_check(ctx)) - return; - - /* Legalize here because it could trigger a recursive blit otherwise */ - struct panfrost_resource *rdst = pan_resource(dst->texture); - enum pipe_format dst_view_format = util_format_linear(dst->format); - pan_legalize_format(ctx, rdst, dst_view_format, true, false); - - panfrost_blitter_save( - ctx, (render_condition_enabled ? PAN_RENDER_COND : PAN_RENDER_BASE) | PAN_SAVE_FRAGMENT_CONSTANT); - util_blitter_clear_render_target(ctx->blitter, dst, color, dstx, dsty, width, - height); -} - static uint64_t panfrost_max_res_size_b(unsigned arch) { @@ -1355,7 +1306,7 @@ pan_blit_from_staging(struct pipe_context *pctx, blit.mask = util_format_get_mask(blit.src.format); blit.filter = PIPE_TEX_FILTER_NEAREST; - panfrost_blit_no_afbc_legalization(pctx, &blit); + panfrost_blitter_blit_legalized(pctx, &blit); } static void @@ -1375,7 +1326,7 @@ pan_blit_to_staging(struct pipe_context *pctx, struct panfrost_transfer *trans) blit.mask = util_format_get_mask(blit.dst.format); blit.filter = PIPE_TEX_FILTER_NEAREST; - panfrost_blit_no_afbc_legalization(pctx, &blit); + panfrost_blitter_blit_legalized(pctx, &blit); } static void @@ -1473,7 +1424,7 @@ pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc) blit.mask = util_format_get_mask(blit.dst.format); blit.filter = PIPE_TEX_FILTER_NEAREST; - panfrost_blit(pctx, &blit); + panfrost_blitter_blit(pctx, &blit); linear = pan_resource(plinear); } @@ -1863,7 +1814,7 @@ pan_resource_modifier_convert(struct panfrost_context *ctx, if (drm_is_mtk_tiled(rsrc->modifier)) screen->vtbl.mtk_detile(ctx, &blit); else - panfrost_blit_no_afbc_legalization(&ctx->base, &blit); + panfrost_blitter_blit_legalized(&ctx->base, &blit); } } @@ -2578,7 +2529,7 @@ panfrost_resource_context_init(struct pipe_context *pctx) pctx->texture_map = u_transfer_helper_transfer_map; pctx->texture_unmap = u_transfer_helper_transfer_unmap; pctx->resource_copy_region = util_resource_copy_region; - pctx->blit = panfrost_blit; + pctx->blit = panfrost_blitter_blit; pctx->generate_mipmap = panfrost_generate_mipmap; pctx->flush_resource = panfrost_flush_resource; pctx->invalidate_resource = panfrost_invalidate_resource; @@ -2586,6 +2537,6 @@ panfrost_resource_context_init(struct pipe_context *pctx) pctx->buffer_subdata = u_default_buffer_subdata; pctx->texture_subdata = u_default_texture_subdata; pctx->clear_buffer = u_default_clear_buffer; - pctx->clear_render_target = panfrost_clear_render_target; - pctx->clear_depth_stencil = panfrost_clear_depth_stencil; + pctx->clear_render_target = panfrost_blitter_clear_render_target; + pctx->clear_depth_stencil = panfrost_blitter_clear_depth_stencil; } diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 5220b5a11d4..392463d105b 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -145,37 +145,6 @@ void panfrost_resource_screen_destroy(struct pipe_screen *screen); void panfrost_resource_context_init(struct pipe_context *pctx); -/* Blitting */ - -enum panfrost_blitter_op /* bitmask */ -{ - PAN_SAVE_TEXTURES = 1, - PAN_SAVE_FRAMEBUFFER = 2, - PAN_SAVE_FRAGMENT_STATE = 4, - PAN_SAVE_FRAGMENT_CONSTANT = 8, - PAN_DISABLE_RENDER_COND = 16, -}; - -enum { - PAN_RENDER_BLIT = - PAN_SAVE_TEXTURES | PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE, - PAN_RENDER_BLIT_COND = PAN_SAVE_TEXTURES | PAN_SAVE_FRAMEBUFFER | - PAN_SAVE_FRAGMENT_STATE | PAN_DISABLE_RENDER_COND, - PAN_RENDER_BASE = PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE, - PAN_RENDER_COND = - PAN_SAVE_FRAMEBUFFER | PAN_SAVE_FRAGMENT_STATE | PAN_DISABLE_RENDER_COND, - PAN_RENDER_CLEAR = PAN_SAVE_FRAGMENT_STATE | PAN_SAVE_FRAGMENT_CONSTANT, -}; - -/* Callers should ensure that all AFBC/AFRC resources that will be used in the - * blit operation are legalized before calling blitter operations, otherwise - * we may trigger a recursive blit */ -void panfrost_blitter_save(struct panfrost_context *ctx, - const enum panfrost_blitter_op blitter_op); - -void panfrost_blit(struct pipe_context *pipe, - const struct pipe_blit_info *info); - void panfrost_resource_set_damage_region(struct pipe_screen *screen, struct pipe_resource *res, unsigned int nrects, @@ -249,7 +218,4 @@ void pan_legalize_format(struct panfrost_context *ctx, void pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc); -void panfrost_blit_no_afbc_legalization(struct pipe_context *pipe, - const struct pipe_blit_info *info); - #endif /* PAN_RESOURCE_H */ diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 14eb7ea59fd..29e576256dc 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -15,6 +15,7 @@ #include "util/disk_cache.h" #include "util/log.h" #include "util/set.h" +#include "util/u_blitter.h" #include "util/u_dynarray.h" #include "pan_device.h" @@ -30,6 +31,7 @@ struct panfrost_batch; struct panfrost_context; struct panfrost_resource; struct panfrost_compiled_shader; +struct panfrost_uncompiled_shader; struct pan_fb_info; struct pan_blend_state; @@ -92,6 +94,12 @@ struct panfrost_vtable { /* construct a render target blend descriptor */ uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt, unsigned force_size, bool dithered); + + /* Run a fullscreen draw call (for blits) */ + void (*draw_fullscreen)(struct panfrost_context *ctx, + struct panfrost_uncompiled_shader *vs, + enum blitter_attrib_type type, + const struct blitter_attrib *attrib); }; struct panfrost_screen { diff --git a/src/panfrost/ci/panfrost-g610-fails.txt b/src/panfrost/ci/panfrost-g610-fails.txt index 65a2775e4f6..bf9f0769d0c 100644 --- a/src/panfrost/ci/panfrost-g610-fails.txt +++ b/src/panfrost/ci/panfrost-g610-fails.txt @@ -96,7 +96,6 @@ spec@egl 1.4@eglterminate then unbind context,Fail spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail spec@egl_khr_surfaceless_context@viewport,Fail spec@egl_mesa_configless_context@basic,Fail -spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail spec@ext_framebuffer_multisample@accuracy 16 srgb depthstencil,Fail spec@ext_framebuffer_multisample@accuracy 16 srgb depthstencil linear,Fail spec@ext_framebuffer_multisample@accuracy 16 srgb small depthstencil,Fail diff --git a/src/panfrost/genxml/v10.xml b/src/panfrost/genxml/v10.xml index 2fd4bb86637..0eeae1d67fd 100644 --- a/src/panfrost/genxml/v10.xml +++ b/src/panfrost/genxml/v10.xml @@ -921,6 +921,12 @@ + + + + + + @@ -2095,10 +2101,8 @@ - + - - diff --git a/src/panfrost/genxml/v12.xml b/src/panfrost/genxml/v12.xml index 0d651f01b0d..80f7bf95227 100644 --- a/src/panfrost/genxml/v12.xml +++ b/src/panfrost/genxml/v12.xml @@ -1155,6 +1155,12 @@ + + + + + + diff --git a/src/panfrost/genxml/v13.xml b/src/panfrost/genxml/v13.xml index c644d2bd49c..14d7b46d30e 100644 --- a/src/panfrost/genxml/v13.xml +++ b/src/panfrost/genxml/v13.xml @@ -1464,6 +1464,15 @@ + + + + + + + + + diff --git a/src/panfrost/genxml/v9.xml b/src/panfrost/genxml/v9.xml index d5bc4c1e110..de23971815a 100644 --- a/src/panfrost/genxml/v9.xml +++ b/src/panfrost/genxml/v9.xml @@ -43,6 +43,7 @@ + @@ -1558,10 +1559,8 @@ - + - - @@ -1616,6 +1615,10 @@ + + + + @@ -1643,6 +1646,14 @@
+ +
+
+
+
+
+ +
diff --git a/src/panfrost/lib/pan_jc.h b/src/panfrost/lib/pan_jc.h index c853887001e..4fb24690236 100644 --- a/src/panfrost/lib/pan_jc.h +++ b/src/panfrost/lib/pan_jc.h @@ -114,7 +114,8 @@ static inline bool job_uses_tiling(enum mali_job_type type) { #if PAN_ARCH >= 9 - if (type == MALI_JOB_TYPE_MALLOC_VERTEX) + if (type == MALI_JOB_TYPE_MALLOC_VERTEX || + type == MALI_JOB_TYPE_FULLSCREEN) return true; #endif