freedreno/ir3: handle const/immed/abs/neg in cp

Be smarter about propagating copies from const or immed, or with abs/neg
modifiers.  Also, realize that absneg.s and absneg.f are really "fancy"
mov instructions.

This opens up the possibility to remove more copies.  It helps the TGSI
frontend a bit, but will be really needed for the NIR f/e which builds
everything up in SSA form (ie. will *always* insert a mov from const or
immediate).

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2015-03-29 11:24:57 -04:00
parent 104713d9f2
commit f370e95421
3 changed files with 316 additions and 33 deletions

View file

@ -525,6 +525,12 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
return NULL;
}
static inline bool conflicts(struct ir3_instruction *a,
struct ir3_instruction *b)
{
return (a && b) && (a != b);
}
static inline bool reg_gpr(struct ir3_register *r)
{
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))

View file

@ -32,29 +32,47 @@
/*
* Copy Propagate:
*
* We could eventually drop this, if the front-end did not insert any
* mov's.. For now keeping it as a separate pass since that is less
* painful than updating the existing frontend. It is expected that
* with an eventual new NIR based frontend that we won't need this.
*/
static void block_cp(struct ir3_block *block);
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr);
static bool is_eligible_mov(struct ir3_instruction *instr)
/* Is it a non-transformative (ie. not type changing) mov? This can
* also include absneg.s/absneg.f, which for the most part can be
* treated as a mov (single src argument).
*/
static bool is_same_type_mov(struct ir3_instruction *instr)
{
struct ir3_register *dst = instr->regs[0];
/* mov's that write to a0.x or p0.x are special: */
if (dst->num == regid(REG_P0, 0))
return false;
if (dst->num == regid(REG_A0, 0))
return false;
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type)) {
(instr->cat1.src_type == instr->cat1.dst_type))
return true;
if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
(instr->opc == OPC_ABSNEG_S)))
return true;
return false;
}
/* is it a type preserving mov, with ok flags? */
static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
{
if (is_same_type_mov(instr)) {
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
struct ir3_instruction *src_instr = ssa(src);
if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
return false;
/* TODO: propagate abs/neg modifiers if possible */
if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_RELATIV))
if (src->flags & IR3_REG_RELATIV)
return false;
if (!allow_flags)
if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
return false;
if (!src_instr)
return false;
/* TODO: remove this hack: */
@ -65,10 +83,270 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
return false;
}
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr)
static unsigned cp_flags(unsigned flags)
{
/* only considering these flags (at least for now): */
flags &= (IR3_REG_CONST | IR3_REG_IMMED |
IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS |
IR3_REG_BNOT);
return flags;
}
static bool valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned flags)
{
unsigned valid_flags;
flags = cp_flags(flags);
/* clear flags that are 'ok' */
switch (instr->category) {
case 1:
case 5:
/* no flags allowed */
if (flags)
return false;
break;
case 6:
valid_flags = IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
break;
case 2:
valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST;
if (ir3_cat2_immed(instr->opc))
valid_flags |= IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
unsigned m = (n ^ 1) + 1;
/* cannot deal w/ const in both srcs:
* (note that some cat2 actually only have a single src)
*/
if (m < instr->regs_count) {
struct ir3_register *reg = instr->regs[m];
if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
return false;
if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
return false;
}
/* cannot be const + ABS|NEG: */
if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
return false;
}
break;
case 3:
valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST;
if (flags & ~valid_flags)
return false;
if (flags & IR3_REG_CONST) {
/* cannot deal w/ const in 2nd src: */
/* TODO in some common cases, like mad, we can swap
* first two args.. possibly we should allow that here
* and fixup in legalize?
*/
if (n == 1)
return false;
/* cannot be const + ABS|NEG: */
if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
return false;
}
break;
case 4:
/* seems like blob compiler avoids const as src.. */
/* TODO double check if this is still the case on a4xx */
if (flags & IR3_REG_CONST)
return false;
if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
return false;
break;
}
return true;
}
/* propagate register flags from src to dst.. negates need special
* handling to cancel each other out.
*/
static void combine_flags(unsigned *dstflags, unsigned srcflags)
{
/* if what we are combining into already has (abs) flags,
* we can drop (neg) from src:
*/
if (*dstflags & IR3_REG_FABS)
srcflags &= ~IR3_REG_FNEG;
if (*dstflags & IR3_REG_SABS)
srcflags &= ~IR3_REG_SNEG;
if (srcflags & IR3_REG_FABS)
*dstflags |= IR3_REG_FABS;
if (srcflags & IR3_REG_SABS)
*dstflags |= IR3_REG_SABS;
if (srcflags & IR3_REG_FNEG)
*dstflags ^= IR3_REG_FNEG;
if (srcflags & IR3_REG_SNEG)
*dstflags ^= IR3_REG_SNEG;
if (srcflags & IR3_REG_BNOT)
*dstflags ^= IR3_REG_BNOT;
}
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
* register with a non-ssa src) or collapsing mov's from relative
* src (which needs to also fixup the address src reference by the
* instruction).
*/
static void
reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
{
unsigned src_flags = 0, new_flags;
struct ir3_instruction *src_instr;
if (is_meta(instr)) {
/* meta instructions cannot fold up register
* flags.. they are usually src for texture
* fetch, etc, where we cannot specify abs/neg
*/
reg->instr = instr_cp(reg->instr, NULL);
return;
}
src_instr = instr_cp(reg->instr, &src_flags);
new_flags = reg->flags;
combine_flags(&new_flags, src_flags);
reg->flags = new_flags;
reg->instr = src_instr;
if (!valid_flags(instr, n, reg->flags)) {
/* insert an absneg.f */
if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) {
debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
reg->instr = ir3_ABSNEG_S(instr->block,
reg->instr, cp_flags(src_flags));
} else {
debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)));
reg->instr = ir3_ABSNEG_F(instr->block,
reg->instr, cp_flags(src_flags));
}
reg->flags &= ~cp_flags(src_flags);
/* send it through instr_cp() again since
* the absneg src might be a mov from const
* that could be cleaned up:
*/
reg->instr = instr_cp(reg->instr, NULL);
return;
}
if (is_same_type_mov(reg->instr)) {
struct ir3_register *src_reg = reg->instr->regs[1];
unsigned new_flags = src_reg->flags;
combine_flags(&new_flags, reg->flags);
if (!valid_flags(instr, n, new_flags))
return;
/* Here we handle the special case of mov from
* CONST and/or RELATIV. These need to be handled
* specially, because in the case of move from CONST
* there is no src ir3_instruction so we need to
* replace the ir3_register. And in the case of
* RELATIV we need to handle the address register
* dependency.
*/
if (src_reg->flags & IR3_REG_CONST) {
/* an instruction cannot reference two different
* address registers:
*/
if ((src_reg->flags & IR3_REG_RELATIV) &&
conflicts(instr->address, reg->instr->address))
return;
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
if (src_reg->flags & IR3_REG_RELATIV)
instr->address = reg->instr->address;
return;
}
if ((src_reg->flags & IR3_REG_RELATIV) &&
!conflicts(instr->address, reg->instr->address)) {
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
instr->address = reg->instr->address;
return;
}
/* NOTE: seems we can only do immed integers, so don't
* need to care about float. But we do need to handle
* abs/neg *before* checking that the immediate requires
* few enough bits to encode:
*
* TODO: do we need to do something to avoid accidentally
* catching a float immed?
*/
if (src_reg->flags & IR3_REG_IMMED) {
int32_t iim_val = src_reg->iim_val;
debug_assert((instr->category == 6) ||
((instr->category == 2) &&
ir3_cat2_immed(instr->opc)));
if (new_flags & IR3_REG_SABS)
iim_val = abs(iim_val);
if (new_flags & IR3_REG_SNEG)
iim_val = -iim_val;
if (new_flags & IR3_REG_BNOT)
iim_val = ~iim_val;
if (!(iim_val & ~0x3ff)) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
}
return;
}
}
}
/**
* Given an SSA src (instruction), return the one with extraneous
* mov's removed, ie, for (to copy NIR syntax):
*
* vec1 ssa1 = fadd <something>, <somethingelse>
* vec1 ssa2 = fabs ssa1
* vec1 ssa3 = fneg ssa1
*
* then calling instr_cp(ssa3, &flags) would return ssa1 with
* (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
* then disallow eliminating copies which would require flag
* propagation (for example, we cannot propagate abs/neg into
* an output).
*/
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr, unsigned *flags)
{
struct ir3_register *reg;
/* stay within the block.. don't try to operate across
* basic block boundaries or we'll have problems when
* dealing with multiple basic blocks:
@ -76,9 +354,12 @@ instr_cp(struct ir3_instruction *instr)
if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
return instr;
if (is_eligible_mov(instr)) {
struct ir3_instruction *src_instr = ssa(instr->regs[1]);
return instr_cp(src_instr);
if (is_eligible_mov(instr, !!flags)) {
struct ir3_register *reg = instr->regs[1];
struct ir3_instruction *src_instr = ssa(reg);
if (flags)
combine_flags(flags, reg->flags);
return instr_cp(src_instr, flags);
}
/* Check termination condition before walking children (rather
@ -89,18 +370,20 @@ instr_cp(struct ir3_instruction *instr)
* up as a src, we only need to recursively walk the children
* once.)
*/
if (!ir3_instr_check_mark(instr)) {
struct ir3_register *reg;
if (ir3_instr_check_mark(instr))
return instr;
/* walk down the graph from each src: */
foreach_src(reg, instr)
if (reg->flags & IR3_REG_SSA)
reg->instr = instr_cp(reg->instr);
/* walk down the graph from each src: */
foreach_src_n(reg, n, instr) {
if (!(reg->flags & IR3_REG_SSA))
continue;
if (instr->address)
instr->address = instr_cp(instr->address);
reg_cp(instr, reg, n);
}
if (instr->address)
instr->address = instr_cp(instr->address, NULL);
return instr;
}
@ -111,7 +394,7 @@ static void block_cp(struct ir3_block *block)
for (i = 0; i < block->noutputs; i++) {
if (block->outputs[i]) {
struct ir3_instruction *out =
instr_cp(block->outputs[i]);
instr_cp(block->outputs[i], NULL);
block->outputs[i] = out;
}

View file

@ -118,12 +118,6 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
static struct group_ops instr_ops = { instr_get, instr_insert_mov };
static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
{
return (a && b) && (a != b);
}
static void group_n(struct group_ops *ops, void *arr, unsigned n)
{
unsigned i, j;