mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 18:00:10 +01:00
panfrost: Promote uniform registers late
Rather than creating either a load or a uniform register read with a fixed beginning offset, we always create a load and then promote to a uniform register later. This will allow us to promote in a register pressure aware manner. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
parent
aa03159120
commit
3174bc9972
6 changed files with 174 additions and 82 deletions
|
|
@ -345,6 +345,11 @@ mir_next_op(struct midgard_instruction *ins)
|
|||
mir_foreach_block(ctx, v_block) \
|
||||
mir_foreach_instr_in_block(v_block, v)
|
||||
|
||||
#define mir_foreach_instr_global_safe(ctx, v) \
|
||||
mir_foreach_block(ctx, v_block) \
|
||||
mir_foreach_instr_in_block_safe(v_block, v)
|
||||
|
||||
|
||||
|
||||
static inline midgard_instruction *
|
||||
mir_last_in_block(struct midgard_block *block)
|
||||
|
|
@ -440,6 +445,18 @@ bool mir_has_multiple_writes(compiler_context *ctx, int src);
|
|||
|
||||
void mir_create_pipeline_registers(compiler_context *ctx);
|
||||
|
||||
void
|
||||
midgard_promote_uniforms(compiler_context *ctx, unsigned pressure);
|
||||
|
||||
void
|
||||
emit_ubo_read(
|
||||
compiler_context *ctx,
|
||||
unsigned dest,
|
||||
unsigned offset,
|
||||
nir_src *indirect_offset,
|
||||
unsigned index);
|
||||
|
||||
|
||||
/* Final emission */
|
||||
|
||||
void emit_binary_bundle(
|
||||
|
|
|
|||
|
|
@ -40,6 +40,12 @@
|
|||
op == midgard_alu_op_imov \
|
||||
)
|
||||
|
||||
#define OP_IS_UBO_READ(op) ( \
|
||||
op == midgard_op_ld_uniform_32 || \
|
||||
op == midgard_op_ld_uniform_16 || \
|
||||
op == midgard_op_ld_uniform_32i \
|
||||
)
|
||||
|
||||
/* ALU control words are single bit fields with a lot of space */
|
||||
|
||||
#define ALU_ENAB_VEC_MUL (1 << 17)
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ libpanfrost_midgard_files = files(
|
|||
'midgard_ra_pipeline.c',
|
||||
'midgard_liveness.c',
|
||||
'midgard_ops.c',
|
||||
'mir_promote_uniforms.c',
|
||||
'cppwrap.cpp',
|
||||
'disassemble.c',
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1157,7 +1157,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
|
|||
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
|
||||
* optimized) versions of UBO #0 */
|
||||
|
||||
static void
|
||||
void
|
||||
emit_ubo_read(
|
||||
compiler_context *ctx,
|
||||
unsigned dest,
|
||||
|
|
@ -1167,36 +1167,20 @@ emit_ubo_read(
|
|||
{
|
||||
/* TODO: half-floats */
|
||||
|
||||
if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) {
|
||||
/* Fast path: For the first 16 uniforms, direct accesses are
|
||||
* 0-cycle, since they're just a register fetch in the usual
|
||||
* case. So, we alias the registers while we're still in
|
||||
* SSA-space */
|
||||
midgard_instruction ins = m_ld_uniform_32(dest, offset);
|
||||
|
||||
int reg_slot = 23 - offset;
|
||||
alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
|
||||
/* TODO: Don't split */
|
||||
ins.load_store.varying_parameters = (offset & 7) << 7;
|
||||
ins.load_store.address = offset >> 3;
|
||||
|
||||
if (indirect_offset) {
|
||||
emit_indirect_offset(ctx, indirect_offset);
|
||||
ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
|
||||
} else {
|
||||
/* Otherwise, read from the 'special' UBO to access
|
||||
* higher-indexed uniforms, at a performance cost. More
|
||||
* generally, we're emitting a UBO read instruction. */
|
||||
|
||||
midgard_instruction ins = m_ld_uniform_32(dest, offset);
|
||||
|
||||
/* TODO: Don't split */
|
||||
ins.load_store.varying_parameters = (offset & 7) << 7;
|
||||
ins.load_store.address = offset >> 3;
|
||||
|
||||
if (indirect_offset) {
|
||||
emit_indirect_offset(ctx, indirect_offset);
|
||||
ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
|
||||
} else {
|
||||
ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
|
||||
}
|
||||
|
||||
/* TODO respect index */
|
||||
|
||||
emit_mir_instruction(ctx, ins);
|
||||
ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, ins);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2228,57 +2212,6 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
|
|||
return progress;
|
||||
}
|
||||
|
||||
/* The following passes reorder MIR instructions to enable better scheduling */
|
||||
|
||||
static void
|
||||
midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
|
||||
{
|
||||
mir_foreach_instr_in_block_safe(block, ins) {
|
||||
if (ins->type != TAG_LOAD_STORE_4) continue;
|
||||
|
||||
/* We've found a load/store op. Check if next is also load/store. */
|
||||
midgard_instruction *next_op = mir_next_op(ins);
|
||||
if (&next_op->link != &block->instructions) {
|
||||
if (next_op->type == TAG_LOAD_STORE_4) {
|
||||
/* If so, we're done since we're a pair */
|
||||
ins = mir_next_op(ins);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Maximum search distance to pair, to avoid register pressure disasters */
|
||||
int search_distance = 8;
|
||||
|
||||
/* Otherwise, we have an orphaned load/store -- search for another load */
|
||||
mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
|
||||
/* Terminate search if necessary */
|
||||
if (!(search_distance--)) break;
|
||||
|
||||
if (c->type != TAG_LOAD_STORE_4) continue;
|
||||
|
||||
/* Stores cannot be reordered, since they have
|
||||
* dependencies. For the same reason, indirect
|
||||
* loads cannot be reordered as their index is
|
||||
* loaded in r27.w */
|
||||
|
||||
if (OP_IS_STORE(c->load_store.op)) continue;
|
||||
|
||||
/* It appears the 0x800 bit is set whenever a
|
||||
* load is direct, unset when it is indirect.
|
||||
* Skip indirect loads. */
|
||||
|
||||
if (!(c->load_store.unknown & 0x800)) continue;
|
||||
|
||||
/* We found one! Move it up to pair and remove it from the old location */
|
||||
|
||||
mir_insert_instruction_before(ins, *c);
|
||||
mir_remove_instruction(c);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If there are leftovers after the below pass, emit actual fmov
|
||||
* instructions for the slow-but-correct path */
|
||||
|
||||
|
|
@ -2358,8 +2291,6 @@ emit_block(compiler_context *ctx, nir_block *block)
|
|||
/* Perform heavylifting for aliasing */
|
||||
actualise_ssa_to_alias(ctx);
|
||||
|
||||
midgard_pair_load_store(ctx, this_block);
|
||||
|
||||
/* Append fragment shader epilogue (value writeout) */
|
||||
if (ctx->stage == MESA_SHADER_FRAGMENT) {
|
||||
if (block == nir_impl_last_block(ctx->func->impl)) {
|
||||
|
|
@ -2564,7 +2495,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
|
|||
|
||||
compiler_context *ctx = &ictx;
|
||||
|
||||
/* TODO: Decide this at runtime */
|
||||
/* Start off with a safe cutoff, allowing usage of all 16 work
|
||||
* registers. Later, we'll promote uniform reads to uniform registers
|
||||
* if we determine it is beneficial to do so */
|
||||
ctx->uniform_cutoff = 8;
|
||||
|
||||
/* Initialize at a global (not block) level hash tables */
|
||||
|
|
|
|||
|
|
@ -524,6 +524,59 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
|||
block->is_scheduled = true;
|
||||
}
|
||||
|
||||
/* The following passes reorder MIR instructions to enable better scheduling */
|
||||
|
||||
static void
|
||||
midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
|
||||
{
|
||||
mir_foreach_instr_in_block_safe(block, ins) {
|
||||
if (ins->type != TAG_LOAD_STORE_4) continue;
|
||||
|
||||
/* We've found a load/store op. Check if next is also load/store. */
|
||||
midgard_instruction *next_op = mir_next_op(ins);
|
||||
if (&next_op->link != &block->instructions) {
|
||||
if (next_op->type == TAG_LOAD_STORE_4) {
|
||||
/* If so, we're done since we're a pair */
|
||||
ins = mir_next_op(ins);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Maximum search distance to pair, to avoid register pressure disasters */
|
||||
int search_distance = 8;
|
||||
|
||||
/* Otherwise, we have an orphaned load/store -- search for another load */
|
||||
mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
|
||||
/* Terminate search if necessary */
|
||||
if (!(search_distance--)) break;
|
||||
|
||||
if (c->type != TAG_LOAD_STORE_4) continue;
|
||||
|
||||
/* Stores cannot be reordered, since they have
|
||||
* dependencies. For the same reason, indirect
|
||||
* loads cannot be reordered as their index is
|
||||
* loaded in r27.w */
|
||||
|
||||
if (OP_IS_STORE(c->load_store.op)) continue;
|
||||
|
||||
/* It appears the 0x800 bit is set whenever a
|
||||
* load is direct, unset when it is indirect.
|
||||
* Skip indirect loads. */
|
||||
|
||||
if (!(c->load_store.unknown & 0x800)) continue;
|
||||
|
||||
/* We found one! Move it up to pair and remove it from the old location */
|
||||
|
||||
mir_insert_instruction_before(ins, *c);
|
||||
mir_remove_instruction(c);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
schedule_program(compiler_context *ctx)
|
||||
{
|
||||
|
|
@ -531,6 +584,12 @@ schedule_program(compiler_context *ctx)
|
|||
bool spilled = false;
|
||||
int iter_count = 10; /* max iterations */
|
||||
|
||||
midgard_promote_uniforms(ctx, 8);
|
||||
|
||||
mir_foreach_block(ctx, block) {
|
||||
midgard_pair_load_store(ctx, block);
|
||||
}
|
||||
|
||||
do {
|
||||
/* We would like to run RA after scheduling, but spilling can
|
||||
* complicate this */
|
||||
|
|
|
|||
76
src/panfrost/midgard/mir_promote_uniforms.c
Normal file
76
src/panfrost/midgard/mir_promote_uniforms.c
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Collabora, Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors (Collabora):
|
||||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
/* This pass promotes reads from uniforms from load/store ops to uniform
|
||||
* registers if it is beneficial to do so. Normally, this saves both
|
||||
* instructions and total register pressure, but it does take a toll on the
|
||||
* number of work registers that are available, so this is a balance.
|
||||
*
|
||||
* To cope, we take as an argument the maximum work register pressure in the
|
||||
* program so we allow that many registers through at minimum, to prevent
|
||||
* spilling. If we spill anyway, I mean, it's a lose-lose at that point. */
|
||||
|
||||
void
|
||||
midgard_promote_uniforms(compiler_context *ctx, unsigned register_pressure)
|
||||
{
|
||||
/* For our purposes, pressure is capped at the number of vec4 work
|
||||
* registers, not live values which would consider spills */
|
||||
register_pressure = MAX2(register_pressure, 16);
|
||||
|
||||
mir_foreach_instr_global_safe(ctx, ins) {
|
||||
if (ins->type != TAG_LOAD_STORE_4) continue;
|
||||
if (!OP_IS_UBO_READ(ins->load_store.op)) continue;
|
||||
|
||||
unsigned lo = ins->load_store.varying_parameters >> 7;
|
||||
unsigned hi = ins->load_store.address;
|
||||
|
||||
/* TODO: Combine fields logically */
|
||||
unsigned address = (hi << 3) | lo;
|
||||
|
||||
/* Check this is UBO 0 */
|
||||
if (ins->load_store.unknown & 0xF) continue;
|
||||
|
||||
/* Check we're accessing directly */
|
||||
if (ins->load_store.unknown != 0x1E00) continue;
|
||||
|
||||
/* Check if it's a promotable range */
|
||||
unsigned uniform_reg = 23 - address;
|
||||
|
||||
if (address > 16) continue;
|
||||
if (register_pressure > uniform_reg) continue;
|
||||
|
||||
/* It is, great! Let's promote */
|
||||
|
||||
ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
|
||||
|
||||
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
|
||||
mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted);
|
||||
|
||||
mir_remove_instruction(ins);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue