mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 22:18:13 +02:00
panfrost: Preprocess shaders at CSO create time
Now the only passes that depend on the shader key can run late, so we can preprocess ahead-of-time once and throw away the original shader. This reduces the cost of shader variants, as well as deduplicates some lowering for transform feedback shaders. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20906>
This commit is contained in:
parent
683d1b6078
commit
c65a9be421
4 changed files with 29 additions and 29 deletions
|
|
@ -339,6 +339,9 @@ struct panfrost_uncompiled_shader {
|
||||||
* shaders for desktop GL.
|
* shaders for desktop GL.
|
||||||
*/
|
*/
|
||||||
uint32_t fixed_varying_mask;
|
uint32_t fixed_varying_mask;
|
||||||
|
|
||||||
|
/* If gl_FragColor was lowered, we need to optimize the stores later */
|
||||||
|
bool fragcolor_lowered;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The binary artefacts of compiling a shader. This differs from
|
/* The binary artefacts of compiling a shader. This differs from
|
||||||
|
|
|
||||||
|
|
@ -95,19 +95,21 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
||||||
/* Lower this early so the backends don't have to worry about it */
|
/* Lower this early so the backends don't have to worry about it */
|
||||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
||||||
|
|
||||||
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
|
||||||
NIR_PASS_V(s, nir_lower_fragcolor, 8);
|
|
||||||
}
|
|
||||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||||
|
|
||||||
/* No IDVS for internal XFB shaders */
|
/* No IDVS for internal XFB shaders */
|
||||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||||
|
|
||||||
|
if (s->info.has_transform_feedback_varyings) {
|
||||||
|
NIR_PASS_V(s, nir_io_add_const_offset_to_base,
|
||||||
|
nir_var_shader_in | nir_var_shader_out);
|
||||||
|
NIR_PASS_V(s, nir_io_add_intrinsic_xfb_info);
|
||||||
|
NIR_PASS_V(s, pan_lower_xfb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
util_dynarray_init(&out->binary, NULL);
|
util_dynarray_init(&out->binary, NULL);
|
||||||
pan_shader_preprocess(s, inputs.gpu_id);
|
|
||||||
|
|
||||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
if (key->fs.nr_cbufs_for_fragcolor) {
|
if (key->fs.nr_cbufs_for_fragcolor) {
|
||||||
|
|
@ -194,8 +196,11 @@ panfrost_shader_get(struct pipe_screen *pscreen,
|
||||||
|
|
||||||
static void
|
static void
|
||||||
panfrost_build_key(struct panfrost_context *ctx,
|
panfrost_build_key(struct panfrost_context *ctx,
|
||||||
struct panfrost_shader_key *key, const nir_shader *nir)
|
struct panfrost_shader_key *key,
|
||||||
|
struct panfrost_uncompiled_shader *uncompiled)
|
||||||
{
|
{
|
||||||
|
const nir_shader *nir = uncompiled->nir;
|
||||||
|
|
||||||
/* We don't currently have vertex shader variants */
|
/* We don't currently have vertex shader variants */
|
||||||
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
||||||
return;
|
return;
|
||||||
|
|
@ -206,7 +211,7 @@ panfrost_build_key(struct panfrost_context *ctx,
|
||||||
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
||||||
|
|
||||||
/* gl_FragColor lowering needs the number of colour buffers */
|
/* gl_FragColor lowering needs the number of colour buffers */
|
||||||
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
if (uncompiled->fragcolor_lowered) {
|
||||||
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
|
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -299,7 +304,7 @@ panfrost_update_shader_variant(struct panfrost_context *ctx,
|
||||||
simple_mtx_lock(&uncompiled->lock);
|
simple_mtx_lock(&uncompiled->lock);
|
||||||
|
|
||||||
struct panfrost_shader_key key = {0};
|
struct panfrost_shader_key key = {0};
|
||||||
panfrost_build_key(ctx, &key, uncompiled->nir);
|
panfrost_build_key(ctx, &key, uncompiled);
|
||||||
|
|
||||||
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
|
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
|
||||||
so) {
|
so) {
|
||||||
|
|
@ -361,6 +366,18 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
||||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
|
||||||
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
|
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||||
|
|
||||||
|
NIR_PASS_V(nir, nir_lower_fragcolor, 8);
|
||||||
|
so->fragcolor_lowered = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then run the suite of lowering and optimization, including I/O lowering */
|
||||||
|
struct panfrost_device *dev = pan_device(pctx->screen);
|
||||||
|
pan_shader_preprocess(nir, dev->gpu_id);
|
||||||
|
|
||||||
/* If this shader uses transform feedback, compile the transform
|
/* If this shader uses transform feedback, compile the transform
|
||||||
* feedback program. This is a special shader variant.
|
* feedback program. This is a special shader variant.
|
||||||
*/
|
*/
|
||||||
|
|
@ -397,11 +414,8 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
||||||
* gl_FragColor is used, there is only a single render target. The
|
* gl_FragColor is used, there is only a single render target. The
|
||||||
* implicit broadcast is neither especially useful nor required by GLES.
|
* implicit broadcast is neither especially useful nor required by GLES.
|
||||||
*/
|
*/
|
||||||
if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
|
if (so->fragcolor_lowered)
|
||||||
so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
|
||||||
|
|
||||||
key.fs.nr_cbufs_for_fragcolor = 1;
|
key.fs.nr_cbufs_for_fragcolor = 1;
|
||||||
}
|
|
||||||
|
|
||||||
/* Creating a CSO is single-threaded, so it's ok to use the
|
/* Creating a CSO is single-threaded, so it's ok to use the
|
||||||
* locked function without explicitly taking the lock. Creating a
|
* locked function without explicitly taking the lock. Creating a
|
||||||
|
|
|
||||||
|
|
@ -4815,16 +4815,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||||
NIR_PASS_V(nir, pan_lower_sample_pos);
|
NIR_PASS_V(nir, pan_lower_sample_pos);
|
||||||
NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
|
NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
|
||||||
NIR_PASS_V(nir, nir_lower_64bit_phis);
|
NIR_PASS_V(nir, nir_lower_64bit_phis);
|
||||||
|
|
||||||
if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
|
|
||||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
|
|
||||||
nir_var_shader_in | nir_var_shader_out);
|
|
||||||
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
|
||||||
NIR_PASS_V(nir, pan_lower_xfb);
|
|
||||||
}
|
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
|
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
|
||||||
|
|
||||||
NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
|
NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
|
||||||
NIR_PASS_V(nir, pan_lower_helper_invocation);
|
NIR_PASS_V(nir, pan_lower_helper_invocation);
|
||||||
NIR_PASS_V(nir, nir_lower_int64);
|
NIR_PASS_V(nir, nir_lower_int64);
|
||||||
|
|
|
||||||
|
|
@ -402,14 +402,6 @@ midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||||
|
|
||||||
NIR_PASS_V(nir, pan_lower_helper_invocation);
|
NIR_PASS_V(nir, pan_lower_helper_invocation);
|
||||||
NIR_PASS_V(nir, pan_lower_sample_pos);
|
NIR_PASS_V(nir, pan_lower_sample_pos);
|
||||||
|
|
||||||
if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
|
|
||||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
|
|
||||||
nir_var_shader_in | nir_var_shader_out);
|
|
||||||
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
|
||||||
NIR_PASS_V(nir, pan_lower_xfb);
|
|
||||||
}
|
|
||||||
|
|
||||||
NIR_PASS_V(nir, midgard_nir_lower_algebraic_early);
|
NIR_PASS_V(nir, midgard_nir_lower_algebraic_early);
|
||||||
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
|
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
|
||||||
NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
|
NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue