diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c index 9013a8c60dc..10545eedd06 100644 --- a/src/gallium/drivers/panfrost/pan_blit.c +++ b/src/gallium/drivers/panfrost/pan_blit.c @@ -27,6 +27,7 @@ * */ +#include "pan_blitter.h" #include "pan_context.h" #include "pan_util.h" #include "util/format/u_format.h" @@ -86,20 +87,219 @@ panfrost_u_blitter_blit(struct pipe_context *pipe, return true; } +static void +panfrost_blit_add_ctx_bos(struct panfrost_batch *batch, + struct pan_blit_context *ctx) +{ + if (ctx->pool.transient_bo) { + panfrost_batch_add_bo(batch, ctx->pool.transient_bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); + } + + util_dynarray_foreach(&ctx->pool.bos, struct panfrost_bo *, bo) { + panfrost_batch_add_bo(batch, *bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); + } +} + void panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) { - /* We don't have a hardware blit, so we just fake it with - * u_blitter. We could do a little better by culling - * vertex jobs, though. */ - if (info->render_condition_enable && !panfrost_render_condition_check(pan_context(pipe))) return; - if (panfrost_u_blitter_blit(pipe, info)) - return; + struct panfrost_device *dev = pan_device(pipe->screen); - return; + if (!(dev->debug & PAN_DBG_PANBLIT)) { + panfrost_u_blitter_blit(pipe, info); + return; + } + + assert(!info->num_window_rectangles); + assert(!info->alpha_blend); + + struct panfrost_resource *psrc = pan_resource(info->src.resource); + struct panfrost_resource *pdst = pan_resource(info->dst.resource); + struct panfrost_context *ctx = pan_context(pipe); + + struct pipe_surface tmpl = { + .format = info->dst.format, + .u.tex.level = info->dst.level, + }; + + struct pan_blit_info pinfo = { + .src = { + .planes[0].format = info->src.format, + .planes[0].image = &psrc->image, + .level = info->src.level, + .start = { info->src.box.x, info->src.box.y }, + .end = { + info->src.box.x + info->src.box.width - 1, + info->src.box.y + info->src.box.height - 1, + }, + }, + .dst = { + .planes[0].format = info->dst.format, + .planes[0].image = &pdst->image, + .level = info->dst.level, + .start = { info->dst.box.x, info->dst.box.y }, + .end = { + info->dst.box.x + info->dst.box.width - 1, + info->dst.box.y + info->dst.box.height - 1, + }, + }, + .scissor = { + .enable = info->scissor_enable, + .minx = info->scissor.minx, + .miny = info->scissor.miny, + .maxx = info->scissor.maxx - 1, + .maxy = info->scissor.maxy - 1, + }, + .nearest = info->filter == PIPE_TEX_FILTER_NEAREST, + }; + + if (info->dst.resource->target == PIPE_TEXTURE_2D_ARRAY || + info->dst.resource->target == PIPE_TEXTURE_1D_ARRAY || + info->dst.resource->target == PIPE_TEXTURE_CUBE || + info->dst.resource->target == PIPE_TEXTURE_CUBE_ARRAY) { + pinfo.dst.start.layer = info->dst.box.z; + pinfo.dst.end.layer = info->dst.box.z + info->dst.box.depth - 1; + } else if (info->dst.resource->target == PIPE_TEXTURE_3D) { + pinfo.dst.start.z = info->dst.box.z; + pinfo.dst.end.z = info->dst.box.z + info->dst.box.depth - 1; + } + + if (info->src.resource->target == PIPE_TEXTURE_2D_ARRAY || + info->src.resource->target == PIPE_TEXTURE_1D_ARRAY || + info->src.resource->target == PIPE_TEXTURE_CUBE || + info->src.resource->target == PIPE_TEXTURE_CUBE_ARRAY) { + pinfo.src.start.layer = info->src.box.z; + pinfo.src.end.layer = info->src.box.z + info->src.box.depth - 1; + } else if (info->src.resource->target == PIPE_TEXTURE_3D) { + pinfo.src.start.z = info->src.box.z; + pinfo.src.end.z = info->src.box.z + info->src.box.depth - 1; + } + + unsigned draw_flags = 0; + + /* For ZS buffers, only blit the component defined in the mask, the + * preload logic will take care of preloading the other component. + */ + if (util_format_is_depth_and_stencil(pinfo.dst.planes[0].format) && + util_format_is_depth_and_stencil(pinfo.src.planes[0].format) && + (info->mask & PIPE_MASK_ZS) != PIPE_MASK_ZS) { + pinfo.src.planes[0].format = + (info->mask & PIPE_MASK_Z) ? + util_format_get_depth_only(info->src.format) : + util_format_stencil_only(info->src.format); + pinfo.dst.planes[0].format = + (info->mask & PIPE_MASK_Z) ? + util_format_get_depth_only(info->dst.format) : + util_format_stencil_only(info->dst.format); + } + + /* With our Z32_FLOAT_S8X24_UINT mapped to Z32_FLOAT + S8_UINT we + * can't easily handle ZS <-> color blits, so let's forbid it for + * now. + */ + assert((!psrc->separate_stencil && !pdst->separate_stencil) || + !(info->mask & ~PIPE_MASK_ZS)); + + if (psrc->separate_stencil) { + if (pinfo.src.planes[0].format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + pinfo.src.planes[0].format = PIPE_FORMAT_Z32_FLOAT; + + if (info->mask & PIPE_MASK_S) { + unsigned s_idx = info->mask & PIPE_MASK_Z ? 1 : 0; + + pinfo.src.planes[s_idx].format = PIPE_FORMAT_S8_UINT; + pinfo.src.planes[s_idx].image = &psrc->separate_stencil->image; + } + } + + if (info->mask & PIPE_MASK_Z) + draw_flags |= PIPE_CLEAR_DEPTH; + + if (info->mask & PIPE_MASK_S) + draw_flags |= PIPE_CLEAR_STENCIL; + + if (info->mask & PIPE_MASK_RGBA) + draw_flags |= PIPE_CLEAR_COLOR0; + + unsigned dst_w = u_minify(info->dst.resource->width0, info->dst.level); + unsigned dst_h = u_minify(info->dst.resource->height0, info->dst.level); + unsigned minx = MAX2(0, pinfo.dst.start.x) & ~31; + unsigned miny = MAX2(0, pinfo.dst.start.y) & ~31; + unsigned maxx = MIN2(dst_w, ALIGN_POT(pinfo.dst.end.x + 1, 32)); + unsigned maxy = MIN2(dst_h, ALIGN_POT(pinfo.dst.end.y + 1, 32)); + + if (info->scissor_enable) { + minx = MAX2(minx, info->scissor.minx & ~31); + miny = MAX2(miny, info->scissor.miny & ~31); + maxx = MIN2(maxx, ALIGN_POT(info->scissor.maxx + 1, 32)); + maxy = MIN2(maxy, ALIGN_POT(info->scissor.maxy + 1, 32)); + } + + struct pan_blit_context bctx; + + pan_blit_ctx_init(dev, &pinfo, &bctx); + do { + if (bctx.dst.cur_layer < 0) + continue; + + tmpl.u.tex.first_layer = tmpl.u.tex.last_layer = bctx.dst.cur_layer; + struct pipe_surface *dst_surf = + pipe->create_surface(pipe, info->dst.resource, &tmpl); + struct pipe_framebuffer_state key = { + .width = dst_w, + .height = dst_h, + }; + + if (util_format_is_depth_or_stencil(info->dst.format)) { + key.zsbuf = dst_surf; + } else { + key.cbufs[0] = dst_surf; + key.nr_cbufs = 1; + } + + struct panfrost_batch *batch = panfrost_get_fresh_batch(ctx, &key); + + pipe_surface_reference(&dst_surf, NULL); + + panfrost_batch_add_bo(batch, pinfo.src.planes[0].image->data.bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_FRAGMENT); + + if (pinfo.src.planes[1].image) { + panfrost_batch_add_bo(batch, + pinfo.src.planes[1].image->data.bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_FRAGMENT); + } + + panfrost_batch_add_fbo_bos(batch); + panfrost_blit_add_ctx_bos(batch, &bctx); + batch->draws = draw_flags; + batch->minx = minx; + batch->miny = miny; + batch->maxx = maxx; + batch->maxy = maxy; + + mali_ptr tiler = pan_is_bifrost(dev) ? + panfrost_batch_get_bifrost_tiler(batch, ~0) : 0; + pan_blit(&bctx, &batch->pool, &batch->scoreboard, + panfrost_batch_reserve_tls(batch, false), tiler); + panfrost_freeze_batch(batch); + } while (pan_blit_next_surface(&bctx)); + + pan_blit_ctx_cleanup(&bctx); } diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 5119cf82827..60bce5d5283 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -96,9 +96,6 @@ panfrost_batch_fence_reference(struct panfrost_batch_fence *fence) pipe_reference(NULL, &fence->reference); } -static void -panfrost_batch_add_fbo_bos(struct panfrost_batch *batch); - static struct panfrost_batch * panfrost_create_batch(struct panfrost_context *ctx, const struct pipe_framebuffer_state *key) @@ -131,7 +128,7 @@ panfrost_create_batch(struct panfrost_context *ctx, return batch; } -static void +void panfrost_freeze_batch(struct panfrost_batch *batch) { struct panfrost_context *ctx = batch->ctx; @@ -274,6 +271,29 @@ panfrost_get_batch(struct panfrost_context *ctx, return batch; } +struct panfrost_batch * +panfrost_get_fresh_batch(struct panfrost_context *ctx, + const struct pipe_framebuffer_state *key) +{ + struct panfrost_batch *batch = panfrost_get_batch(ctx, key); + + /* The batch has no draw/clear queued, let's return it directly. + * Note that it's perfectly fine to re-use a batch with an + * existing clear, we'll just update it with the new clear request. + */ + if (!batch->scoreboard.first_job) { + ctx->batch = batch; + return batch; + } + + /* Otherwise, we need to freeze the existing one and instantiate a new + * one. + */ + panfrost_freeze_batch(batch); + batch = panfrost_get_batch(ctx, key); + return batch; +} + /* Get the job corresponding to the FBO we're currently rendering into */ struct panfrost_batch * @@ -583,7 +603,8 @@ panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *su } } -static void + +void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch) { for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 6906c7c8d78..fcb79489ae4 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -140,12 +140,19 @@ panfrost_batch_fence_unreference(struct panfrost_batch_fence *fence); void panfrost_batch_fence_reference(struct panfrost_batch_fence *batch); +struct panfrost_batch * +panfrost_get_fresh_batch(struct panfrost_context *ctx, + const struct pipe_framebuffer_state *key); + struct panfrost_batch * panfrost_get_batch_for_fbo(struct panfrost_context *ctx); struct panfrost_batch * panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx); +void +panfrost_freeze_batch(struct panfrost_batch *batch); + void panfrost_batch_init(struct panfrost_context *ctx); @@ -153,6 +160,9 @@ void panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, uint32_t flags); +void +panfrost_batch_add_fbo_bos(struct panfrost_batch *batch); + struct panfrost_bo * panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size, uint32_t create_flags, uint32_t access_flags); diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 2649a39d4f1..33ca20776c8 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -68,6 +68,7 @@ static const struct debug_named_value panfrost_debug_options[] = { {"noafbc", PAN_DBG_NO_AFBC, "Disable AFBC support"}, {"nocrc", PAN_DBG_NO_CRC, "Disable transaction elimination"}, {"msaa16", PAN_DBG_MSAA16, "Enable MSAA 8x and 16x support"}, + {"panblit", PAN_DBG_PANBLIT, "Use pan_blitter instead of u_blitter"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/panfrost/lib/pan_util.h b/src/panfrost/lib/pan_util.h index bdac9984ec9..4ebb5caa085 100644 --- a/src/panfrost/lib/pan_util.h +++ b/src/panfrost/lib/pan_util.h @@ -39,5 +39,6 @@ #define PAN_DBG_GL3 0x0100 #define PAN_DBG_NO_AFBC 0x0200 #define PAN_DBG_MSAA16 0x0400 +#define PAN_DBG_PANBLIT 0x0800 #endif /* PAN_UTIL_H */