mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
panfrost: Enable more than 16 varyings on v9+
This change removes the limit of 16 varyings caused by the 8-bit offset value used in LD_VAR_BUF[_IMM]. LD_VAR[_IMM] is used instead and the necessary ADs are emitted at draw time. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34074>
This commit is contained in:
parent
85b6bd989e
commit
cd2ca0ac22
9 changed files with 88 additions and 32 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
/*
|
/*
|
||||||
|
* Copyright (C) 2025 Arm Ltd.
|
||||||
* Copyright (C) 2023 Amazon.com, Inc. or its affiliates.
|
* Copyright (C) 2023 Amazon.com, Inc. or its affiliates.
|
||||||
* Copyright (C) 2018 Alyssa Rosenzweig
|
* Copyright (C) 2018 Alyssa Rosenzweig
|
||||||
* Copyright (C) 2020 Collabora Ltd.
|
* Copyright (C) 2020 Collabora Ltd.
|
||||||
|
|
@ -2875,6 +2876,57 @@ panfrost_update_streamout_offsets(struct panfrost_context *ctx)
|
||||||
(PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | PAN_DIRTY_RASTERIZER | \
|
(PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | PAN_DIRTY_RASTERIZER | \
|
||||||
PAN_DIRTY_OQ)
|
PAN_DIRTY_OQ)
|
||||||
|
|
||||||
|
#if PAN_ARCH >= 9
|
||||||
|
static uint64_t
|
||||||
|
panfrost_emit_varying_descriptors(struct panfrost_batch *batch)
|
||||||
|
{
|
||||||
|
struct panfrost_compiled_shader *vs =
|
||||||
|
batch->ctx->prog[PIPE_SHADER_VERTEX];
|
||||||
|
struct panfrost_compiled_shader *fs =
|
||||||
|
batch->ctx->prog[PIPE_SHADER_FRAGMENT];
|
||||||
|
|
||||||
|
const uint32_t vs_out_mask = vs->info.varyings.fixed_varyings;
|
||||||
|
const uint32_t fs_in_mask = fs->info.varyings.fixed_varyings;
|
||||||
|
const uint32_t fs_in_slots = fs->info.varyings.input_count +
|
||||||
|
util_bitcount(fs_in_mask);
|
||||||
|
|
||||||
|
struct panfrost_ptr bufs =
|
||||||
|
pan_pool_alloc_desc_array(&batch->pool.base, fs_in_slots, ATTRIBUTE);
|
||||||
|
struct mali_attribute_packed *descs = bufs.cpu;
|
||||||
|
|
||||||
|
batch->nr_varying_attribs[PIPE_SHADER_FRAGMENT] = fs_in_slots;
|
||||||
|
|
||||||
|
const uint32_t varying_size = panfrost_vertex_attribute_stride(vs, fs);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < fs_in_slots; i++) {
|
||||||
|
const struct pan_shader_varying *var = &fs->info.varyings.input[i];
|
||||||
|
|
||||||
|
uint32_t index = 0;
|
||||||
|
if (var->location >= VARYING_SLOT_VAR0) {
|
||||||
|
unsigned nr_special = util_bitcount(vs_out_mask);
|
||||||
|
unsigned general_index = (var->location - VARYING_SLOT_VAR0);
|
||||||
|
index = nr_special + general_index;
|
||||||
|
} else {
|
||||||
|
index = util_bitcount(vs_out_mask & BITFIELD_MASK(var->location));
|
||||||
|
}
|
||||||
|
|
||||||
|
pan_pack(&descs[i], ATTRIBUTE, cfg) {
|
||||||
|
cfg.attribute_type = MALI_ATTRIBUTE_TYPE_VERTEX_PACKET;
|
||||||
|
cfg.offset_enable = false;
|
||||||
|
cfg.format = GENX(panfrost_format_from_pipe_format)(var->format)->hw;
|
||||||
|
cfg.table = 61;
|
||||||
|
cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
|
||||||
|
cfg.offset = 1024 + (index * 16);
|
||||||
|
cfg.buffer_index = 0;
|
||||||
|
cfg.attribute_stride = varying_size;
|
||||||
|
cfg.packet_stride = varying_size + 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bufs.gpu;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
panfrost_update_shader_state(struct panfrost_batch *batch,
|
panfrost_update_shader_state(struct panfrost_batch *batch,
|
||||||
enum pipe_shader_type st)
|
enum pipe_shader_type st)
|
||||||
|
|
@ -2904,6 +2956,9 @@ panfrost_update_shader_state(struct panfrost_batch *batch,
|
||||||
}
|
}
|
||||||
|
|
||||||
#if PAN_ARCH >= 9
|
#if PAN_ARCH >= 9
|
||||||
|
if ((dirty & PAN_DIRTY_STAGE_SHADER) && frag)
|
||||||
|
batch->attribs[st] = panfrost_emit_varying_descriptors(batch);
|
||||||
|
|
||||||
if (dirty & PAN_DIRTY_STAGE_IMAGE) {
|
if (dirty & PAN_DIRTY_STAGE_IMAGE) {
|
||||||
batch->images[st] =
|
batch->images[st] =
|
||||||
ctx->image_mask[st] ? panfrost_emit_images(batch, st) : 0;
|
ctx->image_mask[st] ? panfrost_emit_images(batch, st) : 0;
|
||||||
|
|
|
||||||
|
|
@ -273,7 +273,7 @@ panfrost_vertex_attribute_stride(struct panfrost_compiled_shader *vs,
|
||||||
unsigned v = vs->info.varyings.output_count;
|
unsigned v = vs->info.varyings.output_count;
|
||||||
unsigned f = fs->info.varyings.input_count;
|
unsigned f = fs->info.varyings.input_count;
|
||||||
unsigned slots = MAX2(v, f);
|
unsigned slots = MAX2(v, f);
|
||||||
slots += util_bitcount(fs->key.fs.fixed_varying_mask);
|
slots += util_bitcount(vs->info.varyings.fixed_varyings);
|
||||||
|
|
||||||
/* Assumes 16 byte slots. We could do better. */
|
/* Assumes 16 byte slots. We could do better. */
|
||||||
return slots * 16;
|
return slots * 16;
|
||||||
|
|
@ -310,7 +310,11 @@ panfrost_emit_resources(struct panfrost_batch *batch,
|
||||||
panfrost_make_resource_table(T, PAN_TABLE_IMAGE, batch->images[stage],
|
panfrost_make_resource_table(T, PAN_TABLE_IMAGE, batch->images[stage],
|
||||||
util_last_bit(ctx->image_mask[stage]));
|
util_last_bit(ctx->image_mask[stage]));
|
||||||
|
|
||||||
if (stage == PIPE_SHADER_VERTEX) {
|
if (stage == PIPE_SHADER_FRAGMENT) {
|
||||||
|
panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
|
||||||
|
batch->attribs[stage],
|
||||||
|
batch->nr_varying_attribs[PIPE_SHADER_FRAGMENT]);
|
||||||
|
} else if (stage == PIPE_SHADER_VERTEX) {
|
||||||
panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
|
panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
|
||||||
batch->attribs[stage],
|
batch->attribs[stage],
|
||||||
ctx->vertex->num_elements);
|
ctx->vertex->num_elements);
|
||||||
|
|
|
||||||
|
|
@ -346,9 +346,6 @@ struct panfrost_fs_key {
|
||||||
/* Number of colour buffers if gl_FragColor is written */
|
/* Number of colour buffers if gl_FragColor is written */
|
||||||
unsigned nr_cbufs_for_fragcolor;
|
unsigned nr_cbufs_for_fragcolor;
|
||||||
|
|
||||||
/* On Valhall, fixed_varying_mask of the linked vertex shader */
|
|
||||||
uint32_t fixed_varying_mask;
|
|
||||||
|
|
||||||
/* Midgard shaders that read the tilebuffer must be keyed for
|
/* Midgard shaders that read the tilebuffer must be keyed for
|
||||||
* non-blendable formats
|
* non-blendable formats
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,7 @@ struct panfrost_batch {
|
||||||
|
|
||||||
unsigned nr_push_uniforms[PIPE_SHADER_TYPES];
|
unsigned nr_push_uniforms[PIPE_SHADER_TYPES];
|
||||||
unsigned nr_uniform_buffers[PIPE_SHADER_TYPES];
|
unsigned nr_uniform_buffers[PIPE_SHADER_TYPES];
|
||||||
|
unsigned nr_varying_attribs[PIPE_SHADER_TYPES];
|
||||||
|
|
||||||
/* Varying related pointers */
|
/* Varying related pointers */
|
||||||
struct {
|
struct {
|
||||||
|
|
|
||||||
|
|
@ -77,15 +77,12 @@ static bool
|
||||||
lower_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
lower_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
const struct panfrost_compile_inputs *inputs)
|
const struct panfrost_compile_inputs *inputs)
|
||||||
{
|
{
|
||||||
/* We always use heap-based varying allocation when IDVS is used on Valhall. */
|
|
||||||
bool malloc_idvs = !inputs->no_idvs;
|
|
||||||
|
|
||||||
/* All vertex attributes come from the attribute table.
|
/* All vertex attributes come from the attribute table.
|
||||||
* Fragment inputs come from the attribute table too, unless they've
|
* Fragment inputs come from the attribute table too, unless they've
|
||||||
* been allocated on the heap.
|
* been allocated on the heap.
|
||||||
*/
|
*/
|
||||||
if (b->shader->info.stage == MESA_SHADER_VERTEX ||
|
if (b->shader->info.stage == MESA_SHADER_VERTEX ||
|
||||||
(b->shader->info.stage == MESA_SHADER_FRAGMENT && !malloc_idvs)) {
|
b->shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
nir_intrinsic_set_base(
|
nir_intrinsic_set_base(
|
||||||
intrin,
|
intrin,
|
||||||
pan_res_handle(PAN_TABLE_ATTRIBUTE, nir_intrinsic_base(intrin)));
|
pan_res_handle(PAN_TABLE_ATTRIBUTE, nir_intrinsic_base(intrin)));
|
||||||
|
|
@ -131,6 +128,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
case nir_intrinsic_image_texel_address:
|
case nir_intrinsic_image_texel_address:
|
||||||
return lower_image_intrin(b, intrin);
|
return lower_image_intrin(b, intrin);
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
|
case nir_intrinsic_load_interpolated_input:
|
||||||
return lower_input_intrin(b, intrin, inputs);
|
return lower_input_intrin(b, intrin, inputs);
|
||||||
case nir_intrinsic_load_ubo:
|
case nir_intrinsic_load_ubo:
|
||||||
return lower_load_ubo_intrin(b, intrin);
|
return lower_load_ubo_intrin(b, intrin);
|
||||||
|
|
|
||||||
|
|
@ -364,7 +364,7 @@ panfrost_init_shader_caps(struct panfrost_screen *screen)
|
||||||
caps->max_tex_indirections = 16384; /* arbitrary */
|
caps->max_tex_indirections = 16384; /* arbitrary */
|
||||||
caps->max_control_flow_depth = 1024; /* arbitrary */
|
caps->max_control_flow_depth = 1024; /* arbitrary */
|
||||||
/* Used as ABI on Midgard */
|
/* Used as ABI on Midgard */
|
||||||
caps->max_inputs = 16;
|
caps->max_inputs = dev->arch >= 9 ? 32 : 16;
|
||||||
caps->max_outputs = i == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
|
caps->max_outputs = i == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
|
||||||
caps->max_temps = 256; /* arbitrary */
|
caps->max_temps = 256; /* arbitrary */
|
||||||
caps->max_const_buffer0_size = 16 * 1024 * sizeof(float);
|
caps->max_const_buffer0_size = 16 * 1024 * sizeof(float);
|
||||||
|
|
@ -638,7 +638,7 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
||||||
|
|
||||||
caps->shader_buffer_offset_alignment = 4;
|
caps->shader_buffer_offset_alignment = 4;
|
||||||
|
|
||||||
caps->max_varyings = dev->arch >= 9 ? 16 : 32;
|
caps->max_varyings = 32;
|
||||||
|
|
||||||
/* Removed in v6 (Bifrost) */
|
/* Removed in v6 (Bifrost) */
|
||||||
caps->gl_clamp =
|
caps->gl_clamp =
|
||||||
|
|
|
||||||
|
|
@ -136,19 +136,21 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
||||||
.push_uniforms = true,
|
.push_uniforms = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dev->arch >= 9)
|
|
||||||
/* Use LD_VAR_BUF for varying lookups. */
|
|
||||||
inputs.valhall.use_ld_var_buf = true;
|
|
||||||
|
|
||||||
/* Lower this early so the backends don't have to worry about it */
|
/* Lower this early so the backends don't have to worry about it */
|
||||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
unsigned fixed_varying_mask =
|
||||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
(ir->info.inputs_read & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||||
|
|
||||||
|
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||||
|
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||||
/* No IDVS for internal XFB shaders */
|
/* No IDVS for internal XFB shaders */
|
||||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||||
|
|
||||||
|
inputs.fixed_varying_mask =
|
||||||
|
(ir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||||
|
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||||
|
|
||||||
if (s->info.has_transform_feedback_varyings) {
|
if (s->info.has_transform_feedback_varyings) {
|
||||||
NIR_PASS(_, s, nir_io_add_const_offset_to_base,
|
NIR_PASS(_, s, nir_io_add_const_offset_to_base,
|
||||||
nir_var_shader_in | nir_var_shader_out);
|
nir_var_shader_in | nir_var_shader_out);
|
||||||
|
|
@ -293,7 +295,6 @@ panfrost_build_fs_key(struct panfrost_context *ctx,
|
||||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||||
struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
|
struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
|
||||||
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
|
||||||
|
|
||||||
/* gl_FragColor lowering needs the number of colour buffers */
|
/* gl_FragColor lowering needs the number of colour buffers */
|
||||||
if (uncompiled->fragcolor_lowered) {
|
if (uncompiled->fragcolor_lowered) {
|
||||||
|
|
@ -326,12 +327,6 @@ panfrost_build_fs_key(struct panfrost_context *ctx,
|
||||||
key->rt_formats[i] = fmt;
|
key->rt_formats[i] = fmt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Funny desktop GL varying lowering on Valhall */
|
|
||||||
if (dev->arch >= 9) {
|
|
||||||
assert(vs != NULL && "too early");
|
|
||||||
key->fixed_varying_mask = vs->fixed_varying_mask;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -471,13 +466,6 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
||||||
so->stream_output = cso->stream_output;
|
so->stream_output = cso->stream_output;
|
||||||
so->nir = nir;
|
so->nir = nir;
|
||||||
|
|
||||||
/* Fix linkage early */
|
|
||||||
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
|
|
||||||
so->fixed_varying_mask =
|
|
||||||
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
|
||||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
|
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
|
||||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||||
|
|
|
||||||
|
|
@ -146,6 +146,11 @@ GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
|
||||||
#if PAN_ARCH >= 9
|
#if PAN_ARCH >= 9
|
||||||
info->varyings.output_count =
|
info->varyings.output_count =
|
||||||
util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
|
util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
|
||||||
|
|
||||||
|
/* Store the mask of special varyings, in case we need to emit ADs later. */
|
||||||
|
info->varyings.fixed_varyings =
|
||||||
|
(s->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||||
|
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_FRAGMENT:
|
case MESA_SHADER_FRAGMENT:
|
||||||
|
|
@ -195,6 +200,11 @@ GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
|
||||||
#if PAN_ARCH >= 9
|
#if PAN_ARCH >= 9
|
||||||
info->varyings.input_count =
|
info->varyings.input_count =
|
||||||
util_last_bit(s->info.inputs_read >> VARYING_SLOT_VAR0);
|
util_last_bit(s->info.inputs_read >> VARYING_SLOT_VAR0);
|
||||||
|
|
||||||
|
/* Store the mask of special varyings, in case we need to emit ADs later. */
|
||||||
|
info->varyings.fixed_varyings =
|
||||||
|
(s->info.inputs_read & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||||
|
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -294,6 +294,9 @@ struct pan_shader_info {
|
||||||
|
|
||||||
/* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */
|
/* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */
|
||||||
uint32_t noperspective;
|
uint32_t noperspective;
|
||||||
|
|
||||||
|
/* Bitfield of special varyings. */
|
||||||
|
uint32_t fixed_varyings;
|
||||||
} varyings;
|
} varyings;
|
||||||
|
|
||||||
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
|
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue