mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 14:20:11 +01:00
panfrost: Enable more than 16 varyings on v9+
This change removes the limit of 16 varyings caused by the 8-bit offset value used in LD_VAR_BUF[_IMM]. LD_VAR[_IMM] is used instead and the necessary ADs are emitted at draw time. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34074>
This commit is contained in:
parent
85b6bd989e
commit
cd2ca0ac22
9 changed files with 88 additions and 32 deletions
|
|
@ -1,4 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2025 Arm Ltd.
|
||||
* Copyright (C) 2023 Amazon.com, Inc. or its affiliates.
|
||||
* Copyright (C) 2018 Alyssa Rosenzweig
|
||||
* Copyright (C) 2020 Collabora Ltd.
|
||||
|
|
@ -2875,6 +2876,57 @@ panfrost_update_streamout_offsets(struct panfrost_context *ctx)
|
|||
(PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | PAN_DIRTY_RASTERIZER | \
|
||||
PAN_DIRTY_OQ)
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
static uint64_t
|
||||
panfrost_emit_varying_descriptors(struct panfrost_batch *batch)
|
||||
{
|
||||
struct panfrost_compiled_shader *vs =
|
||||
batch->ctx->prog[PIPE_SHADER_VERTEX];
|
||||
struct panfrost_compiled_shader *fs =
|
||||
batch->ctx->prog[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
const uint32_t vs_out_mask = vs->info.varyings.fixed_varyings;
|
||||
const uint32_t fs_in_mask = fs->info.varyings.fixed_varyings;
|
||||
const uint32_t fs_in_slots = fs->info.varyings.input_count +
|
||||
util_bitcount(fs_in_mask);
|
||||
|
||||
struct panfrost_ptr bufs =
|
||||
pan_pool_alloc_desc_array(&batch->pool.base, fs_in_slots, ATTRIBUTE);
|
||||
struct mali_attribute_packed *descs = bufs.cpu;
|
||||
|
||||
batch->nr_varying_attribs[PIPE_SHADER_FRAGMENT] = fs_in_slots;
|
||||
|
||||
const uint32_t varying_size = panfrost_vertex_attribute_stride(vs, fs);
|
||||
|
||||
for (uint32_t i = 0; i < fs_in_slots; i++) {
|
||||
const struct pan_shader_varying *var = &fs->info.varyings.input[i];
|
||||
|
||||
uint32_t index = 0;
|
||||
if (var->location >= VARYING_SLOT_VAR0) {
|
||||
unsigned nr_special = util_bitcount(vs_out_mask);
|
||||
unsigned general_index = (var->location - VARYING_SLOT_VAR0);
|
||||
index = nr_special + general_index;
|
||||
} else {
|
||||
index = util_bitcount(vs_out_mask & BITFIELD_MASK(var->location));
|
||||
}
|
||||
|
||||
pan_pack(&descs[i], ATTRIBUTE, cfg) {
|
||||
cfg.attribute_type = MALI_ATTRIBUTE_TYPE_VERTEX_PACKET;
|
||||
cfg.offset_enable = false;
|
||||
cfg.format = GENX(panfrost_format_from_pipe_format)(var->format)->hw;
|
||||
cfg.table = 61;
|
||||
cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
|
||||
cfg.offset = 1024 + (index * 16);
|
||||
cfg.buffer_index = 0;
|
||||
cfg.attribute_stride = varying_size;
|
||||
cfg.packet_stride = varying_size + 16;
|
||||
}
|
||||
}
|
||||
|
||||
return bufs.gpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
panfrost_update_shader_state(struct panfrost_batch *batch,
|
||||
enum pipe_shader_type st)
|
||||
|
|
@ -2904,6 +2956,9 @@ panfrost_update_shader_state(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
if ((dirty & PAN_DIRTY_STAGE_SHADER) && frag)
|
||||
batch->attribs[st] = panfrost_emit_varying_descriptors(batch);
|
||||
|
||||
if (dirty & PAN_DIRTY_STAGE_IMAGE) {
|
||||
batch->images[st] =
|
||||
ctx->image_mask[st] ? panfrost_emit_images(batch, st) : 0;
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ panfrost_vertex_attribute_stride(struct panfrost_compiled_shader *vs,
|
|||
unsigned v = vs->info.varyings.output_count;
|
||||
unsigned f = fs->info.varyings.input_count;
|
||||
unsigned slots = MAX2(v, f);
|
||||
slots += util_bitcount(fs->key.fs.fixed_varying_mask);
|
||||
slots += util_bitcount(vs->info.varyings.fixed_varyings);
|
||||
|
||||
/* Assumes 16 byte slots. We could do better. */
|
||||
return slots * 16;
|
||||
|
|
@ -310,7 +310,11 @@ panfrost_emit_resources(struct panfrost_batch *batch,
|
|||
panfrost_make_resource_table(T, PAN_TABLE_IMAGE, batch->images[stage],
|
||||
util_last_bit(ctx->image_mask[stage]));
|
||||
|
||||
if (stage == PIPE_SHADER_VERTEX) {
|
||||
if (stage == PIPE_SHADER_FRAGMENT) {
|
||||
panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
|
||||
batch->attribs[stage],
|
||||
batch->nr_varying_attribs[PIPE_SHADER_FRAGMENT]);
|
||||
} else if (stage == PIPE_SHADER_VERTEX) {
|
||||
panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
|
||||
batch->attribs[stage],
|
||||
ctx->vertex->num_elements);
|
||||
|
|
|
|||
|
|
@ -346,9 +346,6 @@ struct panfrost_fs_key {
|
|||
/* Number of colour buffers if gl_FragColor is written */
|
||||
unsigned nr_cbufs_for_fragcolor;
|
||||
|
||||
/* On Valhall, fixed_varying_mask of the linked vertex shader */
|
||||
uint32_t fixed_varying_mask;
|
||||
|
||||
/* Midgard shaders that read the tilebuffer must be keyed for
|
||||
* non-blendable formats
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ struct panfrost_batch {
|
|||
|
||||
unsigned nr_push_uniforms[PIPE_SHADER_TYPES];
|
||||
unsigned nr_uniform_buffers[PIPE_SHADER_TYPES];
|
||||
unsigned nr_varying_attribs[PIPE_SHADER_TYPES];
|
||||
|
||||
/* Varying related pointers */
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -77,15 +77,12 @@ static bool
|
|||
lower_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct panfrost_compile_inputs *inputs)
|
||||
{
|
||||
/* We always use heap-based varying allocation when IDVS is used on Valhall. */
|
||||
bool malloc_idvs = !inputs->no_idvs;
|
||||
|
||||
/* All vertex attributes come from the attribute table.
|
||||
* Fragment inputs come from the attribute table too, unless they've
|
||||
* been allocated on the heap.
|
||||
*/
|
||||
if (b->shader->info.stage == MESA_SHADER_VERTEX ||
|
||||
(b->shader->info.stage == MESA_SHADER_FRAGMENT && !malloc_idvs)) {
|
||||
b->shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_intrinsic_set_base(
|
||||
intrin,
|
||||
pan_res_handle(PAN_TABLE_ATTRIBUTE, nir_intrinsic_base(intrin)));
|
||||
|
|
@ -131,6 +128,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
case nir_intrinsic_image_texel_address:
|
||||
return lower_image_intrin(b, intrin);
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
return lower_input_intrin(b, intrin, inputs);
|
||||
case nir_intrinsic_load_ubo:
|
||||
return lower_load_ubo_intrin(b, intrin);
|
||||
|
|
|
|||
|
|
@ -364,7 +364,7 @@ panfrost_init_shader_caps(struct panfrost_screen *screen)
|
|||
caps->max_tex_indirections = 16384; /* arbitrary */
|
||||
caps->max_control_flow_depth = 1024; /* arbitrary */
|
||||
/* Used as ABI on Midgard */
|
||||
caps->max_inputs = 16;
|
||||
caps->max_inputs = dev->arch >= 9 ? 32 : 16;
|
||||
caps->max_outputs = i == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
|
||||
caps->max_temps = 256; /* arbitrary */
|
||||
caps->max_const_buffer0_size = 16 * 1024 * sizeof(float);
|
||||
|
|
@ -638,7 +638,7 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
|||
|
||||
caps->shader_buffer_offset_alignment = 4;
|
||||
|
||||
caps->max_varyings = dev->arch >= 9 ? 16 : 32;
|
||||
caps->max_varyings = 32;
|
||||
|
||||
/* Removed in v6 (Bifrost) */
|
||||
caps->gl_clamp =
|
||||
|
|
|
|||
|
|
@ -136,19 +136,21 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
.push_uniforms = true,
|
||||
};
|
||||
|
||||
if (dev->arch >= 9)
|
||||
/* Use LD_VAR_BUF for varying lookups. */
|
||||
inputs.valhall.use_ld_var_buf = true;
|
||||
|
||||
/* Lower this early so the backends don't have to worry about it */
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||
unsigned fixed_varying_mask =
|
||||
(ir->info.inputs_read & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
|
||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
/* No IDVS for internal XFB shaders */
|
||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||
|
||||
inputs.fixed_varying_mask =
|
||||
(ir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
|
||||
if (s->info.has_transform_feedback_varyings) {
|
||||
NIR_PASS(_, s, nir_io_add_const_offset_to_base,
|
||||
nir_var_shader_in | nir_var_shader_out);
|
||||
|
|
@ -293,7 +295,6 @@ panfrost_build_fs_key(struct panfrost_context *ctx,
|
|||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||
struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
|
||||
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
||||
|
||||
/* gl_FragColor lowering needs the number of colour buffers */
|
||||
if (uncompiled->fragcolor_lowered) {
|
||||
|
|
@ -326,12 +327,6 @@ panfrost_build_fs_key(struct panfrost_context *ctx,
|
|||
key->rt_formats[i] = fmt;
|
||||
}
|
||||
}
|
||||
|
||||
/* Funny desktop GL varying lowering on Valhall */
|
||||
if (dev->arch >= 9) {
|
||||
assert(vs != NULL && "too early");
|
||||
key->fixed_varying_mask = vs->fixed_varying_mask;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -471,13 +466,6 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
|||
so->stream_output = cso->stream_output;
|
||||
so->nir = nir;
|
||||
|
||||
/* Fix linkage early */
|
||||
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
so->fixed_varying_mask =
|
||||
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
}
|
||||
|
||||
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
|
|
|
|||
|
|
@ -146,6 +146,11 @@ GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
|
|||
#if PAN_ARCH >= 9
|
||||
info->varyings.output_count =
|
||||
util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
|
||||
|
||||
/* Store the mask of special varyings, in case we need to emit ADs later. */
|
||||
info->varyings.fixed_varyings =
|
||||
(s->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
#endif
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
|
|
@ -195,6 +200,11 @@ GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
|
|||
#if PAN_ARCH >= 9
|
||||
info->varyings.input_count =
|
||||
util_last_bit(s->info.inputs_read >> VARYING_SLOT_VAR0);
|
||||
|
||||
/* Store the mask of special varyings, in case we need to emit ADs later. */
|
||||
info->varyings.fixed_varyings =
|
||||
(s->info.inputs_read & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -294,6 +294,9 @@ struct pan_shader_info {
|
|||
|
||||
/* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */
|
||||
uint32_t noperspective;
|
||||
|
||||
/* Bitfield of special varyings. */
|
||||
uint32_t fixed_varyings;
|
||||
} varyings;
|
||||
|
||||
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue