panfrost: Promote uniform registers late

Rather than creating either a load or a uniform register read with a
fixed beginning offset, we always create a load and then promote to a
uniform register later. This will allow us to promote in a register
pressure aware manner.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-07-16 14:10:08 -07:00
parent aa03159120
commit 3174bc9972
6 changed files with 174 additions and 82 deletions

View file

@ -345,6 +345,11 @@ mir_next_op(struct midgard_instruction *ins)
mir_foreach_block(ctx, v_block) \
mir_foreach_instr_in_block(v_block, v)
#define mir_foreach_instr_global_safe(ctx, v) \
mir_foreach_block(ctx, v_block) \
mir_foreach_instr_in_block_safe(v_block, v)
static inline midgard_instruction *
mir_last_in_block(struct midgard_block *block)
@ -440,6 +445,18 @@ bool mir_has_multiple_writes(compiler_context *ctx, int src);
void mir_create_pipeline_registers(compiler_context *ctx);
void
midgard_promote_uniforms(compiler_context *ctx, unsigned pressure);
void
emit_ubo_read(
compiler_context *ctx,
unsigned dest,
unsigned offset,
nir_src *indirect_offset,
unsigned index);
/* Final emission */
void emit_binary_bundle(

View file

@ -40,6 +40,12 @@
op == midgard_alu_op_imov \
)
#define OP_IS_UBO_READ(op) ( \
op == midgard_op_ld_uniform_32 || \
op == midgard_op_ld_uniform_16 || \
op == midgard_op_ld_uniform_32i \
)
/* ALU control words are single bit fields with a lot of space */
#define ALU_ENAB_VEC_MUL (1 << 17)

View file

@ -29,6 +29,7 @@ libpanfrost_midgard_files = files(
'midgard_ra_pipeline.c',
'midgard_liveness.c',
'midgard_ops.c',
'mir_promote_uniforms.c',
'cppwrap.cpp',
'disassemble.c',
)

View file

@ -1157,7 +1157,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
* optimized) versions of UBO #0 */
static void
void
emit_ubo_read(
compiler_context *ctx,
unsigned dest,
@ -1167,36 +1167,20 @@ emit_ubo_read(
{
/* TODO: half-floats */
if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) {
/* Fast path: For the first 16 uniforms, direct accesses are
* 0-cycle, since they're just a register fetch in the usual
* case. So, we alias the registers while we're still in
* SSA-space */
midgard_instruction ins = m_ld_uniform_32(dest, offset);
int reg_slot = 23 - offset;
alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
/* TODO: Don't split */
ins.load_store.varying_parameters = (offset & 7) << 7;
ins.load_store.address = offset >> 3;
if (indirect_offset) {
emit_indirect_offset(ctx, indirect_offset);
ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
} else {
/* Otherwise, read from the 'special' UBO to access
* higher-indexed uniforms, at a performance cost. More
* generally, we're emitting a UBO read instruction. */
midgard_instruction ins = m_ld_uniform_32(dest, offset);
/* TODO: Don't split */
ins.load_store.varying_parameters = (offset & 7) << 7;
ins.load_store.address = offset >> 3;
if (indirect_offset) {
emit_indirect_offset(ctx, indirect_offset);
ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
} else {
ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
}
/* TODO respect index */
emit_mir_instruction(ctx, ins);
ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
}
emit_mir_instruction(ctx, ins);
}
static void
@ -2228,57 +2212,6 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
return progress;
}
/* The following passes reorder MIR instructions to enable better scheduling */
static void
midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
{
mir_foreach_instr_in_block_safe(block, ins) {
if (ins->type != TAG_LOAD_STORE_4) continue;
/* We've found a load/store op. Check if next is also load/store. */
midgard_instruction *next_op = mir_next_op(ins);
if (&next_op->link != &block->instructions) {
if (next_op->type == TAG_LOAD_STORE_4) {
/* If so, we're done since we're a pair */
ins = mir_next_op(ins);
continue;
}
/* Maximum search distance to pair, to avoid register pressure disasters */
int search_distance = 8;
/* Otherwise, we have an orphaned load/store -- search for another load */
mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
/* Terminate search if necessary */
if (!(search_distance--)) break;
if (c->type != TAG_LOAD_STORE_4) continue;
/* Stores cannot be reordered, since they have
* dependencies. For the same reason, indirect
* loads cannot be reordered as their index is
* loaded in r27.w */
if (OP_IS_STORE(c->load_store.op)) continue;
/* It appears the 0x800 bit is set whenever a
* load is direct, unset when it is indirect.
* Skip indirect loads. */
if (!(c->load_store.unknown & 0x800)) continue;
/* We found one! Move it up to pair and remove it from the old location */
mir_insert_instruction_before(ins, *c);
mir_remove_instruction(c);
break;
}
}
}
}
/* If there are leftovers after the below pass, emit actual fmov
* instructions for the slow-but-correct path */
@ -2358,8 +2291,6 @@ emit_block(compiler_context *ctx, nir_block *block)
/* Perform heavylifting for aliasing */
actualise_ssa_to_alias(ctx);
midgard_pair_load_store(ctx, this_block);
/* Append fragment shader epilogue (value writeout) */
if (ctx->stage == MESA_SHADER_FRAGMENT) {
if (block == nir_impl_last_block(ctx->func->impl)) {
@ -2564,7 +2495,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
compiler_context *ctx = &ictx;
/* TODO: Decide this at runtime */
/* Start off with a safe cutoff, allowing usage of all 16 work
* registers. Later, we'll promote uniform reads to uniform registers
* if we determine it is beneficial to do so */
ctx->uniform_cutoff = 8;
/* Initialize at a global (not block) level hash tables */

View file

@ -524,6 +524,59 @@ schedule_block(compiler_context *ctx, midgard_block *block)
block->is_scheduled = true;
}
/* The following passes reorder MIR instructions to enable better scheduling */
static void
midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
{
mir_foreach_instr_in_block_safe(block, ins) {
if (ins->type != TAG_LOAD_STORE_4) continue;
/* We've found a load/store op. Check if next is also load/store. */
midgard_instruction *next_op = mir_next_op(ins);
if (&next_op->link != &block->instructions) {
if (next_op->type == TAG_LOAD_STORE_4) {
/* If so, we're done since we're a pair */
ins = mir_next_op(ins);
continue;
}
/* Maximum search distance to pair, to avoid register pressure disasters */
int search_distance = 8;
/* Otherwise, we have an orphaned load/store -- search for another load */
mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
/* Terminate search if necessary */
if (!(search_distance--)) break;
if (c->type != TAG_LOAD_STORE_4) continue;
/* Stores cannot be reordered, since they have
* dependencies. For the same reason, indirect
* loads cannot be reordered as their index is
* loaded in r27.w */
if (OP_IS_STORE(c->load_store.op)) continue;
/* It appears the 0x800 bit is set whenever a
* load is direct, unset when it is indirect.
* Skip indirect loads. */
if (!(c->load_store.unknown & 0x800)) continue;
/* We found one! Move it up to pair and remove it from the old location */
mir_insert_instruction_before(ins, *c);
mir_remove_instruction(c);
break;
}
}
}
}
void
schedule_program(compiler_context *ctx)
{
@ -531,6 +584,12 @@ schedule_program(compiler_context *ctx)
bool spilled = false;
int iter_count = 10; /* max iterations */
midgard_promote_uniforms(ctx, 8);
mir_foreach_block(ctx, block) {
midgard_pair_load_store(ctx, block);
}
do {
/* We would like to run RA after scheduling, but spilling can
* complicate this */

View file

@ -0,0 +1,76 @@
/*
* Copyright (C) 2019 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors (Collabora):
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include "compiler.h"
/* This pass promotes reads from uniforms from load/store ops to uniform
* registers if it is beneficial to do so. Normally, this saves both
* instructions and total register pressure, but it does take a toll on the
* number of work registers that are available, so this is a balance.
*
* To cope, we take as an argument the maximum work register pressure in the
* program so we allow that many registers through at minimum, to prevent
* spilling. If we spill anyway, I mean, it's a lose-lose at that point. */
void
midgard_promote_uniforms(compiler_context *ctx, unsigned register_pressure)
{
/* For our purposes, pressure is capped at the number of vec4 work
* registers, not live values which would consider spills */
register_pressure = MAX2(register_pressure, 16);
mir_foreach_instr_global_safe(ctx, ins) {
if (ins->type != TAG_LOAD_STORE_4) continue;
if (!OP_IS_UBO_READ(ins->load_store.op)) continue;
unsigned lo = ins->load_store.varying_parameters >> 7;
unsigned hi = ins->load_store.address;
/* TODO: Combine fields logically */
unsigned address = (hi << 3) | lo;
/* Check this is UBO 0 */
if (ins->load_store.unknown & 0xF) continue;
/* Check we're accessing directly */
if (ins->load_store.unknown != 0x1E00) continue;
/* Check if it's a promotable range */
unsigned uniform_reg = 23 - address;
if (address > 16) continue;
if (register_pressure > uniform_reg) continue;
/* It is, great! Let's promote */
ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted);
mir_remove_instruction(ins);
}
}