mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 16:20:13 +01:00
freedreno/ir3: handle const/immed/abs/neg in cp
Be smarter about propagating copies from const or immed, or with abs/neg modifiers. Also, realize that absneg.s and absneg.f are really "fancy" mov instructions. This opens up the possibility to remove more copies. It helps the TGSI frontend a bit, but will be really needed for the NIR f/e which builds everything up in SSA form (ie. will *always* insert a mov from const or immediate). Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
104713d9f2
commit
f370e95421
3 changed files with 316 additions and 33 deletions
|
|
@ -525,6 +525,12 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool conflicts(struct ir3_instruction *a,
|
||||
struct ir3_instruction *b)
|
||||
{
|
||||
return (a && b) && (a != b);
|
||||
}
|
||||
|
||||
static inline bool reg_gpr(struct ir3_register *r)
|
||||
{
|
||||
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
|
||||
|
|
|
|||
|
|
@ -32,29 +32,47 @@
|
|||
|
||||
/*
|
||||
* Copy Propagate:
|
||||
*
|
||||
* We could eventually drop this, if the front-end did not insert any
|
||||
* mov's.. For now keeping it as a separate pass since that is less
|
||||
* painful than updating the existing frontend. It is expected that
|
||||
* with an eventual new NIR based frontend that we won't need this.
|
||||
*/
|
||||
|
||||
static void block_cp(struct ir3_block *block);
|
||||
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr);
|
||||
|
||||
static bool is_eligible_mov(struct ir3_instruction *instr)
|
||||
/* Is it a non-transformative (ie. not type changing) mov? This can
|
||||
* also include absneg.s/absneg.f, which for the most part can be
|
||||
* treated as a mov (single src argument).
|
||||
*/
|
||||
static bool is_same_type_mov(struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
|
||||
/* mov's that write to a0.x or p0.x are special: */
|
||||
if (dst->num == regid(REG_P0, 0))
|
||||
return false;
|
||||
if (dst->num == regid(REG_A0, 0))
|
||||
return false;
|
||||
|
||||
if ((instr->category == 1) &&
|
||||
(instr->cat1.src_type == instr->cat1.dst_type)) {
|
||||
(instr->cat1.src_type == instr->cat1.dst_type))
|
||||
return true;
|
||||
if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
|
||||
(instr->opc == OPC_ABSNEG_S)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* is it a type preserving mov, with ok flags? */
|
||||
static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
|
||||
{
|
||||
if (is_same_type_mov(instr)) {
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
struct ir3_register *src = instr->regs[1];
|
||||
struct ir3_instruction *src_instr = ssa(src);
|
||||
if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
|
||||
return false;
|
||||
/* TODO: propagate abs/neg modifiers if possible */
|
||||
if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
|
||||
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_RELATIV))
|
||||
if (src->flags & IR3_REG_RELATIV)
|
||||
return false;
|
||||
if (!allow_flags)
|
||||
if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
|
||||
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
|
||||
return false;
|
||||
if (!src_instr)
|
||||
return false;
|
||||
/* TODO: remove this hack: */
|
||||
|
|
@ -65,10 +83,270 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
static struct ir3_instruction *
|
||||
instr_cp(struct ir3_instruction *instr)
|
||||
static unsigned cp_flags(unsigned flags)
|
||||
{
|
||||
/* only considering these flags (at least for now): */
|
||||
flags &= (IR3_REG_CONST | IR3_REG_IMMED |
|
||||
IR3_REG_FNEG | IR3_REG_FABS |
|
||||
IR3_REG_SNEG | IR3_REG_SABS |
|
||||
IR3_REG_BNOT);
|
||||
return flags;
|
||||
}
|
||||
|
||||
static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
||||
unsigned flags)
|
||||
{
|
||||
unsigned valid_flags;
|
||||
flags = cp_flags(flags);
|
||||
|
||||
/* clear flags that are 'ok' */
|
||||
switch (instr->category) {
|
||||
case 1:
|
||||
case 5:
|
||||
/* no flags allowed */
|
||||
if (flags)
|
||||
return false;
|
||||
break;
|
||||
case 6:
|
||||
valid_flags = IR3_REG_IMMED;
|
||||
if (flags & ~valid_flags)
|
||||
return false;
|
||||
break;
|
||||
case 2:
|
||||
valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST;
|
||||
|
||||
if (ir3_cat2_immed(instr->opc))
|
||||
valid_flags |= IR3_REG_IMMED;
|
||||
|
||||
if (flags & ~valid_flags)
|
||||
return false;
|
||||
|
||||
if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
|
||||
unsigned m = (n ^ 1) + 1;
|
||||
/* cannot deal w/ const in both srcs:
|
||||
* (note that some cat2 actually only have a single src)
|
||||
*/
|
||||
if (m < instr->regs_count) {
|
||||
struct ir3_register *reg = instr->regs[m];
|
||||
if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
|
||||
return false;
|
||||
if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
|
||||
return false;
|
||||
}
|
||||
/* cannot be const + ABS|NEG: */
|
||||
if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
|
||||
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST;
|
||||
|
||||
if (flags & ~valid_flags)
|
||||
return false;
|
||||
|
||||
if (flags & IR3_REG_CONST) {
|
||||
/* cannot deal w/ const in 2nd src: */
|
||||
/* TODO in some common cases, like mad, we can swap
|
||||
* first two args.. possibly we should allow that here
|
||||
* and fixup in legalize?
|
||||
*/
|
||||
if (n == 1)
|
||||
return false;
|
||||
/* cannot be const + ABS|NEG: */
|
||||
if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
|
||||
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
/* seems like blob compiler avoids const as src.. */
|
||||
/* TODO double check if this is still the case on a4xx */
|
||||
if (flags & IR3_REG_CONST)
|
||||
return false;
|
||||
if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* propagate register flags from src to dst.. negates need special
|
||||
* handling to cancel each other out.
|
||||
*/
|
||||
static void combine_flags(unsigned *dstflags, unsigned srcflags)
|
||||
{
|
||||
/* if what we are combining into already has (abs) flags,
|
||||
* we can drop (neg) from src:
|
||||
*/
|
||||
if (*dstflags & IR3_REG_FABS)
|
||||
srcflags &= ~IR3_REG_FNEG;
|
||||
if (*dstflags & IR3_REG_SABS)
|
||||
srcflags &= ~IR3_REG_SNEG;
|
||||
|
||||
if (srcflags & IR3_REG_FABS)
|
||||
*dstflags |= IR3_REG_FABS;
|
||||
if (srcflags & IR3_REG_SABS)
|
||||
*dstflags |= IR3_REG_SABS;
|
||||
if (srcflags & IR3_REG_FNEG)
|
||||
*dstflags ^= IR3_REG_FNEG;
|
||||
if (srcflags & IR3_REG_SNEG)
|
||||
*dstflags ^= IR3_REG_SNEG;
|
||||
if (srcflags & IR3_REG_BNOT)
|
||||
*dstflags ^= IR3_REG_BNOT;
|
||||
}
|
||||
|
||||
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
|
||||
|
||||
/**
|
||||
* Handle cp for a given src register. This additionally handles
|
||||
* the cases of collapsing immedate/const (which replace the src
|
||||
* register with a non-ssa src) or collapsing mov's from relative
|
||||
* src (which needs to also fixup the address src reference by the
|
||||
* instruction).
|
||||
*/
|
||||
static void
|
||||
reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
||||
{
|
||||
unsigned src_flags = 0, new_flags;
|
||||
struct ir3_instruction *src_instr;
|
||||
|
||||
if (is_meta(instr)) {
|
||||
/* meta instructions cannot fold up register
|
||||
* flags.. they are usually src for texture
|
||||
* fetch, etc, where we cannot specify abs/neg
|
||||
*/
|
||||
reg->instr = instr_cp(reg->instr, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
src_instr = instr_cp(reg->instr, &src_flags);
|
||||
|
||||
new_flags = reg->flags;
|
||||
combine_flags(&new_flags, src_flags);
|
||||
|
||||
reg->flags = new_flags;
|
||||
reg->instr = src_instr;
|
||||
|
||||
if (!valid_flags(instr, n, reg->flags)) {
|
||||
/* insert an absneg.f */
|
||||
if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) {
|
||||
debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
|
||||
reg->instr = ir3_ABSNEG_S(instr->block,
|
||||
reg->instr, cp_flags(src_flags));
|
||||
} else {
|
||||
debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)));
|
||||
reg->instr = ir3_ABSNEG_F(instr->block,
|
||||
reg->instr, cp_flags(src_flags));
|
||||
}
|
||||
reg->flags &= ~cp_flags(src_flags);
|
||||
/* send it through instr_cp() again since
|
||||
* the absneg src might be a mov from const
|
||||
* that could be cleaned up:
|
||||
*/
|
||||
reg->instr = instr_cp(reg->instr, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_same_type_mov(reg->instr)) {
|
||||
struct ir3_register *src_reg = reg->instr->regs[1];
|
||||
unsigned new_flags = src_reg->flags;
|
||||
|
||||
combine_flags(&new_flags, reg->flags);
|
||||
|
||||
if (!valid_flags(instr, n, new_flags))
|
||||
return;
|
||||
|
||||
/* Here we handle the special case of mov from
|
||||
* CONST and/or RELATIV. These need to be handled
|
||||
* specially, because in the case of move from CONST
|
||||
* there is no src ir3_instruction so we need to
|
||||
* replace the ir3_register. And in the case of
|
||||
* RELATIV we need to handle the address register
|
||||
* dependency.
|
||||
*/
|
||||
if (src_reg->flags & IR3_REG_CONST) {
|
||||
/* an instruction cannot reference two different
|
||||
* address registers:
|
||||
*/
|
||||
if ((src_reg->flags & IR3_REG_RELATIV) &&
|
||||
conflicts(instr->address, reg->instr->address))
|
||||
return;
|
||||
|
||||
src_reg->flags = new_flags;
|
||||
instr->regs[n+1] = src_reg;
|
||||
|
||||
if (src_reg->flags & IR3_REG_RELATIV)
|
||||
instr->address = reg->instr->address;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if ((src_reg->flags & IR3_REG_RELATIV) &&
|
||||
!conflicts(instr->address, reg->instr->address)) {
|
||||
src_reg->flags = new_flags;
|
||||
instr->regs[n+1] = src_reg;
|
||||
instr->address = reg->instr->address;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* NOTE: seems we can only do immed integers, so don't
|
||||
* need to care about float. But we do need to handle
|
||||
* abs/neg *before* checking that the immediate requires
|
||||
* few enough bits to encode:
|
||||
*
|
||||
* TODO: do we need to do something to avoid accidentally
|
||||
* catching a float immed?
|
||||
*/
|
||||
if (src_reg->flags & IR3_REG_IMMED) {
|
||||
int32_t iim_val = src_reg->iim_val;
|
||||
|
||||
debug_assert((instr->category == 6) ||
|
||||
((instr->category == 2) &&
|
||||
ir3_cat2_immed(instr->opc)));
|
||||
|
||||
if (new_flags & IR3_REG_SABS)
|
||||
iim_val = abs(iim_val);
|
||||
|
||||
if (new_flags & IR3_REG_SNEG)
|
||||
iim_val = -iim_val;
|
||||
|
||||
if (new_flags & IR3_REG_BNOT)
|
||||
iim_val = ~iim_val;
|
||||
|
||||
if (!(iim_val & ~0x3ff)) {
|
||||
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
|
||||
src_reg->flags = new_flags;
|
||||
src_reg->iim_val = iim_val;
|
||||
instr->regs[n+1] = src_reg;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an SSA src (instruction), return the one with extraneous
|
||||
* mov's removed, ie, for (to copy NIR syntax):
|
||||
*
|
||||
* vec1 ssa1 = fadd <something>, <somethingelse>
|
||||
* vec1 ssa2 = fabs ssa1
|
||||
* vec1 ssa3 = fneg ssa1
|
||||
*
|
||||
* then calling instr_cp(ssa3, &flags) would return ssa1 with
|
||||
* (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
|
||||
* then disallow eliminating copies which would require flag
|
||||
* propagation (for example, we cannot propagate abs/neg into
|
||||
* an output).
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
instr_cp(struct ir3_instruction *instr, unsigned *flags)
|
||||
{
|
||||
struct ir3_register *reg;
|
||||
|
||||
/* stay within the block.. don't try to operate across
|
||||
* basic block boundaries or we'll have problems when
|
||||
* dealing with multiple basic blocks:
|
||||
|
|
@ -76,9 +354,12 @@ instr_cp(struct ir3_instruction *instr)
|
|||
if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
|
||||
return instr;
|
||||
|
||||
if (is_eligible_mov(instr)) {
|
||||
struct ir3_instruction *src_instr = ssa(instr->regs[1]);
|
||||
return instr_cp(src_instr);
|
||||
if (is_eligible_mov(instr, !!flags)) {
|
||||
struct ir3_register *reg = instr->regs[1];
|
||||
struct ir3_instruction *src_instr = ssa(reg);
|
||||
if (flags)
|
||||
combine_flags(flags, reg->flags);
|
||||
return instr_cp(src_instr, flags);
|
||||
}
|
||||
|
||||
/* Check termination condition before walking children (rather
|
||||
|
|
@ -89,18 +370,20 @@ instr_cp(struct ir3_instruction *instr)
|
|||
* up as a src, we only need to recursively walk the children
|
||||
* once.)
|
||||
*/
|
||||
if (!ir3_instr_check_mark(instr)) {
|
||||
struct ir3_register *reg;
|
||||
if (ir3_instr_check_mark(instr))
|
||||
return instr;
|
||||
|
||||
/* walk down the graph from each src: */
|
||||
foreach_src(reg, instr)
|
||||
if (reg->flags & IR3_REG_SSA)
|
||||
reg->instr = instr_cp(reg->instr);
|
||||
/* walk down the graph from each src: */
|
||||
foreach_src_n(reg, n, instr) {
|
||||
if (!(reg->flags & IR3_REG_SSA))
|
||||
continue;
|
||||
|
||||
if (instr->address)
|
||||
instr->address = instr_cp(instr->address);
|
||||
reg_cp(instr, reg, n);
|
||||
}
|
||||
|
||||
if (instr->address)
|
||||
instr->address = instr_cp(instr->address, NULL);
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +394,7 @@ static void block_cp(struct ir3_block *block)
|
|||
for (i = 0; i < block->noutputs; i++) {
|
||||
if (block->outputs[i]) {
|
||||
struct ir3_instruction *out =
|
||||
instr_cp(block->outputs[i]);
|
||||
instr_cp(block->outputs[i], NULL);
|
||||
|
||||
block->outputs[i] = out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,12 +118,6 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
|
|||
static struct group_ops instr_ops = { instr_get, instr_insert_mov };
|
||||
|
||||
|
||||
|
||||
static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
|
||||
{
|
||||
return (a && b) && (a != b);
|
||||
}
|
||||
|
||||
static void group_n(struct group_ops *ops, void *arr, unsigned n)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue