mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
freedreno/ir3: lower immeds to const
Helps reduce register pressure and instruction counts for immediates
that would otherwise require a mov into gpr.
total instructions in shared programs: 4455332 -> 4369297 (-1.93%)
total dwords in shared programs: 8807872 -> 8614432 (-2.20%)
total full registers used in shared programs: 263062 -> 250846 (-4.64%)
total half registers used in shader programs: 9845 -> 9845 (0.00%)
total const registers used in shared programs: 1029735 -> 1466993 (42.46%)
half full const instr dwords
helped 0 10415 0 17861 5912
hurt 0 1157 21458 947 33
Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
b15c7fc268
commit
173871dfb9
3 changed files with 80 additions and 4 deletions
|
|
@ -659,8 +659,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
|
||||
if (!emit->key.binning_pass)
|
||||
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
|
||||
/* mark clean after emitting consts: */
|
||||
ctx->prog.dirty = 0;
|
||||
/* mark clean after emitting consts.. a bit ugly, but since binning
|
||||
* pass is emitted first, we want to do this only for main draw:
|
||||
*/
|
||||
if (!emit->key.binning_pass)
|
||||
ctx->prog.dirty = 0;
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
|
||||
|
|
|
|||
|
|
@ -648,8 +648,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
|
||||
if (!emit->key.binning_pass)
|
||||
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
|
||||
/* mark clean after emitting consts: */
|
||||
ctx->prog.dirty = 0;
|
||||
/* mark clean after emitting consts.. a bit ugly, but since binning
|
||||
* pass is emitted first, we want to do this only for main draw:
|
||||
*/
|
||||
if (!emit->key.binning_pass)
|
||||
ctx->prog.dirty = 0;
|
||||
}
|
||||
|
||||
if ((dirty & FD_DIRTY_BLEND)) {
|
||||
|
|
|
|||
|
|
@ -29,13 +29,16 @@
|
|||
#include "freedreno_util.h"
|
||||
|
||||
#include "ir3.h"
|
||||
#include "ir3_shader.h"
|
||||
|
||||
/*
|
||||
* Copy Propagate:
|
||||
*/
|
||||
|
||||
struct ir3_cp_ctx {
|
||||
struct ir3 *shader;
|
||||
struct ir3_shader_variant *so;
|
||||
unsigned immediate_idx;
|
||||
};
|
||||
|
||||
/* is it a type preserving mov, with ok flags? */
|
||||
|
|
@ -233,6 +236,62 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
|
|||
*dstflags &= ~IR3_REG_SABS;
|
||||
}
|
||||
|
||||
static struct ir3_register *
|
||||
lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
|
||||
{
|
||||
unsigned swiz, idx, i;
|
||||
|
||||
reg = ir3_reg_clone(ctx->shader, reg);
|
||||
|
||||
/* in some cases, there are restrictions on (abs)/(neg) plus const..
|
||||
* so just evaluate those and clear the flags:
|
||||
*/
|
||||
if (new_flags & IR3_REG_SABS) {
|
||||
reg->iim_val = abs(reg->iim_val);
|
||||
new_flags &= ~IR3_REG_SABS;
|
||||
}
|
||||
|
||||
if (new_flags & IR3_REG_FABS) {
|
||||
reg->fim_val = fabs(reg->fim_val);
|
||||
new_flags &= ~IR3_REG_FABS;
|
||||
}
|
||||
|
||||
if (new_flags & IR3_REG_SNEG) {
|
||||
reg->iim_val = -reg->iim_val;
|
||||
new_flags &= ~IR3_REG_SNEG;
|
||||
}
|
||||
|
||||
if (new_flags & IR3_REG_FNEG) {
|
||||
reg->fim_val = -reg->fim_val;
|
||||
new_flags &= ~IR3_REG_FNEG;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->immediate_idx; i++) {
|
||||
swiz = i % 4;
|
||||
idx = i / 4;
|
||||
|
||||
if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == ctx->immediate_idx) {
|
||||
/* need to generate a new immediate: */
|
||||
swiz = i % 4;
|
||||
idx = i / 4;
|
||||
ctx->so->immediates[idx].val[swiz] = reg->uim_val;
|
||||
ctx->so->immediates_count = idx + 1;
|
||||
ctx->immediate_idx++;
|
||||
}
|
||||
|
||||
new_flags &= ~IR3_REG_IMMED;
|
||||
new_flags |= IR3_REG_CONST;
|
||||
reg->flags = new_flags;
|
||||
reg->num = i + (4 * ctx->so->first_immediate);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle cp for a given src register. This additionally handles
|
||||
* the cases of collapsing immedate/const (which replace the src
|
||||
|
|
@ -281,6 +340,13 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
|
|||
combine_flags(&new_flags, src);
|
||||
|
||||
if (!valid_flags(instr, n, new_flags)) {
|
||||
/* See if lowering an immediate to const would help. */
|
||||
if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
|
||||
debug_assert(new_flags & IR3_REG_IMMED);
|
||||
instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/* special case for "normal" mad instructions, we can
|
||||
* try swapping the first two args if that fits better.
|
||||
*
|
||||
|
|
@ -378,6 +444,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
|
|||
src_reg->flags = new_flags;
|
||||
src_reg->iim_val = iim_val;
|
||||
instr->regs[n+1] = src_reg;
|
||||
} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
|
||||
/* See if lowering an immediate to const would help. */
|
||||
instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
@ -484,6 +553,7 @@ void
|
|||
ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
|
||||
{
|
||||
struct ir3_cp_ctx ctx = {
|
||||
.shader = ir,
|
||||
.so = so,
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue