From 7994929e84a30904e7ab1ca03b75ef44d2167e88 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 23 Mar 2021 12:20:35 +0100 Subject: [PATCH] panfrost: Use the blend shader cache attached to the device Signed-off-by: Boris Brezillon Acked-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/meson.build | 1 - src/gallium/drivers/panfrost/pan_blend_cso.c | 71 +------ src/gallium/drivers/panfrost/pan_blend_cso.h | 27 --- .../drivers/panfrost/pan_blend_shaders.c | 199 ------------------ .../drivers/panfrost/pan_blend_shaders.h | 47 ----- src/gallium/drivers/panfrost/pan_cmdstream.c | 4 +- src/gallium/drivers/panfrost/pan_context.c | 17 -- src/gallium/drivers/panfrost/pan_context.h | 3 - src/gallium/drivers/panfrost/pan_job.c | 48 +++-- src/gallium/drivers/panfrost/pan_screen.c | 3 + 10 files changed, 51 insertions(+), 369 deletions(-) delete mode 100644 src/gallium/drivers/panfrost/pan_blend_shaders.c delete mode 100644 src/gallium/drivers/panfrost/pan_blend_shaders.h diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 0e4a70b8f64..391dcc5d5f1 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -29,7 +29,6 @@ files_panfrost = files( 'pan_blit.c', 'pan_job.c', 'pan_assemble.c', - 'pan_blend_shaders.c', 'pan_blend_cso.c', 'pan_cmdstream.c', 'pan_compute.c', diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 32ccab434e4..9a79bb44098 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -28,7 +28,8 @@ #include #include "util/u_memory.h" #include "gallium/auxiliary/util/u_blend.h" -#include "pan_blend_shaders.h" +#include "pan_context.h" +#include "pan_blend_cso.h" #include "pan_bo.h" #include "panfrost-quirks.h" @@ -59,54 +60,6 @@ * (our subclass of pipe_blend_state). */ -/* Given an initialized CSO and a particular framebuffer format, grab a - * blend shader, generating and compiling it if it doesn't exist - * (lazy-loading in a way). This routine, when the cache hits, should - * befast, suitable for calling every draw to avoid wacky dirty - * tracking paths. If the cache hits, boom, done. */ - -struct panfrost_blend_shader * -panfrost_get_blend_shader(struct panfrost_context *ctx, - struct panfrost_blend_state *blend, - enum pipe_format fmt, unsigned nr_samples, - unsigned rt, - const float *constants) -{ - /* Prevent NULL collision issues.. */ - assert(fmt != 0); - - /* Check the cache. Key by the RT and format */ - struct hash_table *shaders = ctx->blend_shaders; - struct panfrost_blend_shader_key key = { - .rt = rt, - .format = fmt, - .nr_samples = MAX2(nr_samples, 1), - .has_constants = constants != NULL, - .logicop_enable = blend->base.logicop_enable, - }; - - if (blend->base.logicop_enable) { - key.logicop_func = blend->base.logicop_func; - } else { - unsigned idx = blend->base.independent_blend_enable ? rt : 0; - - if (blend->base.rt[idx].blend_enable) - key.equation = blend->base.rt[idx]; - } - - struct hash_entry *he = _mesa_hash_table_search(shaders, &key); - struct panfrost_blend_shader *shader = he ? he->data : NULL; - - if (!shader) { - /* Cache miss. Build one instead, cache it, and go */ - shader = panfrost_create_blend_shader(ctx, blend, &key); - _mesa_hash_table_insert(shaders, &shader->key, shader); - } - - panfrost_compile_blend_shader(shader, constants); - return shader; -} - /* Create a blend CSO. Essentially, try to compile a fixed-function * expression and initialize blend shaders */ @@ -214,12 +167,6 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc /* Otherwise, we need to grab a shader */ - unsigned constant_mask = pan_blend_constant_mask(&pan_blend, rti); - struct panfrost_blend_shader *shader = - panfrost_get_blend_shader(ctx, blend, fmt, nr_samples, rti, - constant_mask ? - ctx->blend_color.color : NULL); - /* Upload the shader, sharing a BO */ if (!(*bo)) { *bo = panfrost_batch_create_bo(batch, 4096, @@ -229,22 +176,26 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc PAN_BO_ACCESS_FRAGMENT); } - /* Size check */ - assert((*shader_offset + shader->size) < 4096); + pthread_mutex_lock(&dev->blend_shaders.lock); + struct pan_blend_shader_variant *shader = + pan_blend_get_shader_locked(dev, &pan_blend, rti); - memcpy((*bo)->ptr.cpu + *shader_offset, shader->buffer, shader->size); + /* Size check */ + assert((*shader_offset + shader->binary.size) < 4096); + + memcpy((*bo)->ptr.cpu + *shader_offset, shader->binary.data, shader->binary.size); struct panfrost_blend_final final = { .is_shader = true, .shader = { - .work_count = shader->work_count, .first_tag = shader->first_tag, .gpu = (*bo)->ptr.gpu + *shader_offset, }, .load_dest = pan_blend_reads_dest(&pan_blend, rti), }; - *shader_offset += shader->size; + *shader_offset += shader->binary.size; + pthread_mutex_unlock(&dev->blend_shaders.lock); return final; } diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.h b/src/gallium/drivers/panfrost/pan_blend_cso.h index 71b4001f617..7cabaae388d 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.h +++ b/src/gallium/drivers/panfrost/pan_blend_cso.h @@ -54,30 +54,6 @@ struct panfrost_blend_shader_key { struct pipe_rt_blend_state equation; }; -/* An internal blend shader descriptor, from the compiler */ - -struct panfrost_blend_shader { - struct panfrost_blend_shader_key key; - struct panfrost_context *ctx; - - nir_shader *nir; - - /* Blend constants */ - float constants[4]; - - /* The compiled shader */ - void *buffer; - - /* Byte count of the shader */ - unsigned size; - - /* Number of 128-bit work registers required by the shader */ - unsigned work_count; - - /* First instruction tag (for tagging the pointer) */ - unsigned first_tag; -}; - /* A blend shader descriptor ready for actual use */ struct panfrost_blend_shader_final { @@ -86,9 +62,6 @@ struct panfrost_blend_shader_final { /* First instruction tag (for tagging the pointer) */ unsigned first_tag; - - /* Same meaning as panfrost_blend_shader */ - unsigned work_count; }; struct panfrost_blend_equation_final { diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c deleted file mode 100644 index 7c4f21a2404..00000000000 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * © Copyright 2018 Alyssa Rosenzweig - * Copyright (C) 2019-2020 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include "pan_blend_shaders.h" -#include "pan_shader.h" -#include "pan_util.h" -#include "panfrost-quirks.h" -#include "compiler/nir/nir_builder.h" -#include "panfrost/util/nir_lower_blend.h" -#include "panfrost/util/pan_lower_framebuffer.h" -#include "gallium/auxiliary/util/u_blend.h" -#include "util/u_memory.h" - -/* - * Implements the command stream portion of programmatic blend shaders. - * - * On Midgard, common blending operations are accelerated by the fixed-function - * blending pipeline. Panfrost supports this fast path via the code in - * pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly - * simple modes present in ES2) require "blend shaders", a special internal - * shader type used for programmable blending. - * - * Blend shaders operate during the normal blending time, but they bypass the - * fixed-function blending pipeline and instead go straight to the Midgard - * shader cores. The shaders themselves are essentially just fragment shaders, - * making heavy use of uint8 arithmetic to manipulate RGB values for the - * framebuffer. - * - * As is typical with Midgard, shader binaries must be accompanied by - * information about the first tag (ORed with the bottom nibble of address, - * like usual) and work registers. Work register count is assumed to be less - * than or equal to the coresponding fragment shader's work count. This - * suggests that blend shader invocation is tied to fragment shader - * execution. - * - * The shaders themselves use the standard ISA. The source pixel colour, - * including alpha, is preloaded into r0 as a vec4 of float32. The destination - * pixel colour must be loaded explicitly via load/store ops, possibly - * performing conversions in software. The blended colour must be stored with a - * fragment writeout in the correct framebuffer format, either in software or - * via conversion opcodes on the load/store pipe. - * - * Blend shaders hardcode constants. Naively, this requires recompilation each - * time the blend color changes, which is a performance risk. Accordingly, we - * 'cheat' a bit: instead of loading the constant, we compile a shader with a - * dummy constant, exporting the offset to the immediate in the shader binary, - * storing this generic binary and metadata in the CSO itself at CSO create - * time. - * - * We then hot patch in the color into this shader at attachment / color change - * time, allowing for CSO create to be the only expensive operation - * (compilation). - */ - -struct panfrost_blend_shader * -panfrost_create_blend_shader(struct panfrost_context *ctx, - struct panfrost_blend_state *state, - const struct panfrost_blend_shader_key *key) -{ - struct panfrost_device *dev = pan_device(ctx->base.screen); - struct panfrost_blend_shader *res = rzalloc(ctx, struct panfrost_blend_shader); - struct pan_blend_state pan_blend = state->pan; - - res->ctx = ctx; - res->key = *key; - - /* Build the shader */ - pan_blend.rts[key->rt].format = key->format; - pan_blend.rts[key->rt].nr_samples = key->nr_samples; - res->nir = pan_blend_create_shader(dev, &pan_blend, key->rt); - - return res; -} - -uint64_t -bifrost_get_blend_desc(const struct panfrost_device *dev, - enum pipe_format fmt, unsigned rt, unsigned force_size) -{ - const struct util_format_description *desc = util_format_description(fmt); - uint64_t res; - - pan_pack(&res, BIFROST_INTERNAL_BLEND, cfg) { - cfg.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; - cfg.fixed_function.num_comps = desc->nr_channels; - cfg.fixed_function.rt = rt; - - nir_alu_type T = pan_unpacked_type_for_format(desc); - - if (force_size) - T = nir_alu_type_get_base_type(T) | force_size; - - switch (T) { - case nir_type_float16: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_F16; - break; - case nir_type_float32: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_F32; - break; - case nir_type_int8: - case nir_type_int16: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_I16; - break; - case nir_type_int32: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_I32; - break; - case nir_type_uint8: - case nir_type_uint16: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_U16; - break; - case nir_type_uint32: - cfg.fixed_function.conversion.register_format = - MALI_BIFROST_REGISTER_FILE_FORMAT_U32; - break; - default: - unreachable("Invalid format"); - } - - cfg.fixed_function.conversion.memory_format = - panfrost_format_to_bifrost_blend(dev, desc, true); - } - - return res; -} - -void -panfrost_compile_blend_shader(struct panfrost_blend_shader *shader, - const float *constants) -{ - struct panfrost_device *dev = pan_device(shader->ctx->base.screen); - - /* If the shader has already been compiled and the constants match - * or the shader doesn't use the blend constants, we can keep the - * compiled version. - */ - if (shader->buffer && - (!constants || - !memcmp(shader->constants, constants, sizeof(shader->constants)))) - return; - - /* Compile or recompile the NIR shader */ - struct panfrost_compile_inputs inputs = { - .gpu_id = dev->gpu_id, - .is_blend = true, - .blend.rt = shader->key.rt, - .blend.nr_samples = shader->key.nr_samples, - .rt_formats = {shader->key.format}, - }; - - if (constants) - memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants)); - - if (pan_is_bifrost(dev)) { - inputs.blend.bifrost_blend_desc = - bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt, 0); - } - - struct pan_shader_info info; - struct util_dynarray binary; - - util_dynarray_init(&binary, NULL); - pan_shader_compile(dev, shader->nir, &inputs, &binary, &info); - - /* Allow us to patch later */ - shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag; - shader->size = binary.size; - shader->buffer = reralloc_size(shader, shader->buffer, shader->size); - memcpy(shader->buffer, binary.data, shader->size); - shader->work_count = info.work_reg_count; - - util_dynarray_fini(&binary); -} diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.h b/src/gallium/drivers/panfrost/pan_blend_shaders.h deleted file mode 100644 index 9f79f7764bb..00000000000 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * © Copyright 2018 Alyssa Rosenzweig - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __PAN_BLEND_SHADERS_H__ -#define __PAN_BLEND_SHADERS_H__ - -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include -#include "pan_context.h" -#include "pan_blend_cso.h" - -struct panfrost_blend_shader * -panfrost_create_blend_shader(struct panfrost_context *ctx, - struct panfrost_blend_state *state, - const struct panfrost_blend_shader_key *key); - -void -panfrost_compile_blend_shader(struct panfrost_blend_shader *shader, - const float *constants); - -uint64_t -bifrost_get_blend_desc(const struct panfrost_device *dev, - enum pipe_format fmt, unsigned rt, unsigned force_size); - -#endif diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 4a591f20b9a..570ac064284 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -36,7 +36,6 @@ #include "pan_job.h" #include "pan_shader.h" #include "pan_texture.h" -#include "pan_blend_shaders.h" /* If a BO is accessed for a particular shader stage, will it be in the primary * batch (vertex/tiler) or the secondary batch (fragment)? Anything but @@ -989,7 +988,8 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, unsigned rt, if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) { enum pipe_format format = batch->key.cbufs[rt]->format; - uniform->u[0] = bifrost_get_blend_desc(dev, format, rt, 32) >> 32; + uniform->u[0] = + pan_blend_get_bifrost_desc(dev, format, rt, 32) >> 32; } else { pan_pack(&uniform->u[0], BIFROST_INTERNAL_CONVERSION, cfg) cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw; diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index fb976977ce4..5d738f5a1cc 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -51,7 +51,6 @@ #include "midgard_pack.h" #include "pan_screen.h" -#include "pan_blend_shaders.h" #include "pan_cmdstream.h" #include "pan_util.h" #include "decode.h" @@ -1665,16 +1664,6 @@ panfrost_set_stream_output_targets(struct pipe_context *pctx, so->num_targets = num_targets; } -static uint32_t panfrost_shader_key_hash(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct panfrost_blend_shader_key)); -} - -static bool panfrost_shader_key_equal(const void *a, const void *b) -{ - return !memcmp(a, b, sizeof(struct panfrost_blend_shader_key)); -} - struct pipe_context * panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { @@ -1780,12 +1769,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_batch_init(ctx); - ctx->blit_blend = rzalloc(ctx, struct panfrost_blend_state); - ctx->blend_shaders = - _mesa_hash_table_create(ctx, - panfrost_shader_key_hash, - panfrost_shader_key_equal); - /* By default mask everything on */ ctx->sample_mask = ~0; ctx->active_queries = true; diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 67750a80386..9b9856e09a4 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -178,9 +178,6 @@ struct panfrost_context { unsigned sample_mask; unsigned min_samples; - struct panfrost_blend_state *blit_blend; - struct hash_table *blend_shaders; - struct panfrost_query *cond_query; bool cond_cond; enum pipe_render_cond_flag cond_mode; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 7252f00f5dd..011811c8ab1 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -840,23 +840,45 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u if (loc >= FRAG_RESULT_DATA0 && !panfrost_blend_format(format).internal) { - struct panfrost_blend_shader *b = - panfrost_get_blend_shader(batch->ctx, batch->ctx->blit_blend, - format, - rsrc->base.nr_samples, - loc - FRAG_RESULT_DATA0, - NULL); + struct panfrost_device *dev = pan_device(batch->ctx->base.screen); + struct panfrost_bo *bo = + panfrost_batch_create_bo(batch, 4096, + PAN_BO_EXECUTE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_FRAGMENT); + unsigned rt = loc - FRAG_RESULT_DATA0; + struct pan_blend_state blend_state = { + .rt_count = rt + 1, + }; - struct panfrost_bo *bo = panfrost_batch_create_bo(batch, b->size, - PAN_BO_EXECUTE, - PAN_BO_ACCESS_PRIVATE | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_FRAGMENT); + blend_state.rts[rt] = (struct pan_blend_rt_state) { + .format = format, + .nr_samples = rsrc->base.nr_samples, + .equation = { + .blend_enable = true, + .rgb_src_factor = BLEND_FACTOR_ZERO, + .rgb_invert_src_factor = true, + .rgb_dst_factor = BLEND_FACTOR_ZERO, + .rgb_func = BLEND_FUNC_ADD, + .alpha_src_factor = BLEND_FACTOR_ZERO, + .alpha_invert_src_factor = true, + .alpha_dst_factor = BLEND_FACTOR_ZERO, + .alpha_func = BLEND_FUNC_ADD, + .color_mask = 0xf, + }, + }; - memcpy(bo->ptr.cpu, b->buffer, b->size); - assert(b->work_count <= 4); + pthread_mutex_lock(&dev->blend_shaders.lock); + struct pan_blend_shader_variant *b = + pan_blend_get_shader_locked(dev, &blend_state, + loc - FRAG_RESULT_DATA0); + + assert(b->work_reg_count <= 4); + memcpy(bo->ptr.cpu, b->binary.data, b->binary.size); blend_shader = bo->ptr.gpu | b->first_tag; + pthread_mutex_unlock(&dev->blend_shaders.lock); } struct panfrost_ptr transfer = panfrost_pool_alloc_aligned(&batch->pool, diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 819a8eb8781..208e062c667 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -685,6 +685,8 @@ panfrost_destroy_screen(struct pipe_screen *pscreen) { struct panfrost_device *dev = pan_device(pscreen); + pan_blend_shaders_cleanup(dev); + if (dev->ro) dev->ro->destroy(dev->ro); panfrost_close_device(dev); @@ -856,6 +858,7 @@ panfrost_create_screen(int fd, struct renderonly *ro) panfrost_resource_screen_init(&screen->base); panfrost_init_blit_shaders(dev); + pan_blend_shaders_init(dev); return &screen->base; }