mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 13:10:10 +01:00
panfrost: Move sysvals to dedicated UBO
This makes UBO 0 less special, allowing us to generalize uniform optimization. Note this disables RMU on Midgard as we're about to rewrite the RMU mechanism. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
This commit is contained in:
parent
0dc539a872
commit
db7e2dce1c
5 changed files with 19 additions and 60 deletions
|
|
@ -399,12 +399,8 @@ panfrost_shader_compile(struct panfrost_context *ctx,
|
|||
state->attribute_count = attribute_count;
|
||||
state->varying_count = varying_count;
|
||||
|
||||
/* Uniforms have been lowered to UBOs using nir_lower_uniforms_to_ubo()
|
||||
* which already increments s->info.num_ubos. We do have to account for
|
||||
* the "no uniform, no UBO" case though, otherwise sysval passed
|
||||
* through uniforms won't work correctly.
|
||||
*/
|
||||
state->ubo_count = MAX2(s->info.num_ubos, 1);
|
||||
/* Sysvals have dedicated UBO */
|
||||
state->ubo_count = s->info.num_ubos + (state->sysval_count ? 1 : 0);
|
||||
|
||||
/* Prepare the descriptors at compile-time */
|
||||
state->shader.shader = shader;
|
||||
|
|
|
|||
|
|
@ -989,55 +989,39 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
return 0;
|
||||
|
||||
struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
|
||||
|
||||
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
|
||||
|
||||
/* Uniforms are implicitly UBO #0 */
|
||||
bool has_uniforms = buf->enabled_mask & (1 << 0);
|
||||
|
||||
/* Allocate room for the sysval and the uniforms */
|
||||
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
|
||||
size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
|
||||
size_t size = sys_size + uniform_size;
|
||||
struct panfrost_ptr transfer =
|
||||
panfrost_pool_alloc_aligned(&batch->pool, size, 16);
|
||||
panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
|
||||
|
||||
/* Upload sysvals requested by the shader */
|
||||
panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
|
||||
|
||||
/* Upload uniforms */
|
||||
if (has_uniforms && uniform_size) {
|
||||
const void *cpu = panfrost_map_constant_buffer_cpu(ctx, buf, 0);
|
||||
memcpy(transfer.cpu + sys_size, cpu, uniform_size);
|
||||
}
|
||||
/* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
|
||||
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
|
||||
unsigned ubo_count = shader->ubo_count - (sys_size ? 1 : 0);
|
||||
|
||||
/* Next up, attach UBOs. UBO #0 is the uniforms we just
|
||||
* uploaded, so it's always included. The count is the highest UBO
|
||||
* addressable -- gaps are included. */
|
||||
|
||||
unsigned ubo_count = 32 - __builtin_clz(buf->enabled_mask | 1);
|
||||
|
||||
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
|
||||
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * (ubo_count + 1);
|
||||
struct panfrost_ptr ubos =
|
||||
panfrost_pool_alloc_aligned(&batch->pool, sz,
|
||||
MALI_UNIFORM_BUFFER_LENGTH);
|
||||
|
||||
uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
|
||||
|
||||
/* Upload uniforms as a UBO */
|
||||
/* Upload sysval as a final UBO */
|
||||
|
||||
if (size) {
|
||||
pan_pack(ubo_ptr, UNIFORM_BUFFER, cfg) {
|
||||
cfg.entries = DIV_ROUND_UP(size, 16);
|
||||
if (sys_size) {
|
||||
pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) {
|
||||
cfg.entries = DIV_ROUND_UP(sys_size, 16);
|
||||
cfg.pointer = transfer.gpu;
|
||||
}
|
||||
} else {
|
||||
*ubo_ptr = 0;
|
||||
}
|
||||
|
||||
/* The rest are honest-to-goodness UBOs */
|
||||
|
||||
for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
|
||||
for (unsigned ubo = 0; ubo < ubo_count; ++ubo) {
|
||||
size_t usz = buf->cb[ubo].buffer_size;
|
||||
bool enabled = buf->enabled_mask & (1 << ubo);
|
||||
bool empty = usz == 0;
|
||||
|
|
|
|||
|
|
@ -489,28 +489,9 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
bool offset_is_const = nir_src_is_const(*offset);
|
||||
bi_index dyn_offset = bi_src_index(offset);
|
||||
uint32_t const_offset = 0;
|
||||
|
||||
uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0;
|
||||
bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input);
|
||||
|
||||
/* We may need to offset UBO loads by however many sysvals we have */
|
||||
unsigned sysval_offset = 16 * b->shader->sysvals.sysval_count;
|
||||
|
||||
if (nir_src_is_const(*offset))
|
||||
const_offset = nir_src_as_uint(*offset);
|
||||
|
||||
if ((kernel_input ||
|
||||
(nir_src_is_const(instr->src[0]) &&
|
||||
nir_src_as_uint(instr->src[0]) == 0)) &&
|
||||
b->shader->sysvals.sysval_count) {
|
||||
if (offset_is_const) {
|
||||
const_offset += sysval_offset;
|
||||
} else {
|
||||
dyn_offset = bi_iadd_u32(b, dyn_offset,
|
||||
bi_imm_u32(sysval_offset), false);
|
||||
}
|
||||
}
|
||||
|
||||
bi_load_to(b, instr->num_components * 32,
|
||||
bi_dest_index(&instr->dest), offset_is_const ?
|
||||
bi_imm_u32(const_offset) : dyn_offset,
|
||||
|
|
@ -635,7 +616,8 @@ bi_load_sysval(bi_builder *b, nir_instr *instr,
|
|||
unsigned idx = (uniform * 16) + offset;
|
||||
|
||||
bi_load_to(b, nr_components * 32, bi_dest_index(&nir_dest),
|
||||
bi_imm_u32(idx), bi_zero(), BI_SEG_UBO);
|
||||
bi_imm_u32(idx),
|
||||
bi_imm_u32(b->shader->nir->info.num_ubos), BI_SEG_UBO);
|
||||
}
|
||||
|
||||
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
|
||||
|
|
|
|||
|
|
@ -1451,7 +1451,8 @@ emit_sysval_read(compiler_context *ctx, nir_instr *instr,
|
|||
|
||||
/* Emit the read itself -- this is never indirect */
|
||||
midgard_instruction *ins =
|
||||
emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0, 0);
|
||||
emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0,
|
||||
ctx->nir->info.num_ubos);
|
||||
|
||||
ins->mask = mask_of(nr_components);
|
||||
}
|
||||
|
|
@ -1708,7 +1709,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
reg = nir_dest_index(&instr->dest);
|
||||
|
||||
if (is_kernel) {
|
||||
emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count * 16) + offset, indirect_offset, 0, 0);
|
||||
emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, 0);
|
||||
} else if (is_ubo) {
|
||||
nir_src index = instr->src[0];
|
||||
|
||||
|
|
@ -1716,10 +1717,6 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
assert(nir_src_is_const(index));
|
||||
|
||||
uint32_t uindex = nir_src_as_uint(index);
|
||||
|
||||
if (uindex == 0)
|
||||
offset += ctx->sysvals.sysval_count * 16;
|
||||
|
||||
emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
|
||||
} else if (is_global || is_shared || is_scratch) {
|
||||
unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);
|
||||
|
|
|
|||
|
|
@ -1447,7 +1447,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
|||
void
|
||||
midgard_schedule_program(compiler_context *ctx)
|
||||
{
|
||||
midgard_promote_uniforms(ctx);
|
||||
// midgard_promote_uniforms(ctx);
|
||||
|
||||
/* Must be lowered right before scheduling */
|
||||
mir_squeeze_index(ctx);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue