mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
r300: merge MOVs with MULs or ADDs in merge channels
Shader-db stats with RV530: total instructions in shared programs: 166499 -> 164362 (-1.28%) instructions in affected programs: 80056 -> 77919 (-2.67%) total temps in shared programs: 21658 -> 21565 (-0.43%) temps in affected programs: 1780 -> 1687 (-5.22%) Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17560>
This commit is contained in:
parent
275beae42d
commit
9c01fff445
1 changed files with 116 additions and 1 deletions
|
|
@ -998,6 +998,113 @@ static bool merge_movs(
|
|||
return true;
|
||||
}
|
||||
|
||||
static int have_shared_source(struct rc_instruction * inst1, struct rc_instruction * inst2)
|
||||
{
|
||||
int shared_src = -1;
|
||||
const struct rc_opcode_info * opcode1 = rc_get_opcode_info(inst1->U.I.Opcode);
|
||||
const struct rc_opcode_info * opcode2 = rc_get_opcode_info(inst2->U.I.Opcode);
|
||||
for (unsigned i = 0; i < opcode1->NumSrcRegs; i++) {
|
||||
for (unsigned j = 0; j < opcode2->NumSrcRegs; j++) {
|
||||
if (inst1->U.I.SrcReg[i].File == inst2->U.I.SrcReg[j].File &&
|
||||
inst1->U.I.SrcReg[i].Index == inst2->U.I.SrcReg[j].Index &&
|
||||
inst1->U.I.SrcReg[i].RelAddr == inst2->U.I.SrcReg[j].RelAddr)
|
||||
shared_src = i;
|
||||
}
|
||||
}
|
||||
return shared_src;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will try to merge MOV and ADD/MUL instructions with the same
|
||||
* destination, making use of the constant swizzles.
|
||||
*
|
||||
* For example:
|
||||
* MOV temp[0].x const[0].x
|
||||
* MUL temp[0].yz const[1].yz const[2].yz
|
||||
*
|
||||
* becomes
|
||||
* MAD temp[0].xyz const[1].0yz const[2].0yz const[0].x00
|
||||
*/
|
||||
static int merge_mov_add_mul(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst1,
|
||||
struct rc_instruction * inst2)
|
||||
{
|
||||
struct rc_instruction * inst, * mov;
|
||||
if (inst1->U.I.Opcode == RC_OPCODE_MOV) {
|
||||
mov = inst1;
|
||||
inst = inst2;
|
||||
} else {
|
||||
mov = inst2;
|
||||
inst = inst1;
|
||||
}
|
||||
|
||||
const bool is_mul = inst->U.I.Opcode == RC_OPCODE_MUL;
|
||||
int shared_index = have_shared_source(inst, mov);
|
||||
unsigned wmask = mov->U.I.DstReg.WriteMask | inst->U.I.DstReg.WriteMask;
|
||||
|
||||
/* If there is a shared source, just merge the swizzles and be done with it. */
|
||||
if (shared_index != -1) {
|
||||
struct rc_src_register shared_src = inst->U.I.SrcReg[shared_index];
|
||||
struct rc_src_register other_src = inst->U.I.SrcReg[1 - shared_index];
|
||||
|
||||
shared_src.Negate = merge_negates(mov->U.I.SrcReg[0], shared_src);
|
||||
shared_src.Swizzle = merge_swizzles(shared_src.Swizzle,
|
||||
mov->U.I.SrcReg[0].Swizzle);
|
||||
other_src.Negate = clean_negate(other_src);
|
||||
unsigned int swz = is_mul ? RC_SWIZZLE_ONE : RC_SWIZZLE_ZERO;
|
||||
other_src.Swizzle = fill_swizzle(other_src.Swizzle, wmask, swz);
|
||||
|
||||
if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, shared_src) ||
|
||||
!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, other_src))
|
||||
return 0;
|
||||
|
||||
inst2->U.I.Opcode = inst->U.I.Opcode;
|
||||
inst2->U.I.SrcReg[0] = shared_src;
|
||||
inst2->U.I.SrcReg[1] = other_src;
|
||||
|
||||
/* TODO: we can do a bit better in the special case when one of the sources is none.
|
||||
* Convert to MAD otherwise.
|
||||
*/
|
||||
} else {
|
||||
struct rc_src_register src0, src1, src2;
|
||||
if (is_mul) {
|
||||
src2 = mov->U.I.SrcReg[0];
|
||||
src0 = inst->U.I.SrcReg[0];
|
||||
src1 = inst->U.I.SrcReg[1];
|
||||
} else {
|
||||
src0 = mov->U.I.SrcReg[0];
|
||||
src1 = inst->U.I.SrcReg[0];
|
||||
src2 = inst->U.I.SrcReg[1];
|
||||
}
|
||||
/* The following login expects that the unused channels have empty negate bits. */
|
||||
src0.Negate = clean_negate(src0);
|
||||
src1.Negate = clean_negate(src1);
|
||||
src2.Negate = clean_negate(src2);
|
||||
|
||||
src0.Swizzle = fill_swizzle(src0.Swizzle,
|
||||
wmask, RC_SWIZZLE_ONE);
|
||||
src1.Swizzle = fill_swizzle(src1.Swizzle,
|
||||
wmask, is_mul ? RC_SWIZZLE_ZERO : RC_SWIZZLE_ONE);
|
||||
src2.Swizzle = fill_swizzle(src2.Swizzle,
|
||||
wmask, RC_SWIZZLE_ZERO);
|
||||
if (!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src0) ||
|
||||
!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src1) ||
|
||||
!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src2))
|
||||
return 0;
|
||||
|
||||
inst2->U.I.Opcode = RC_OPCODE_MAD;
|
||||
inst2->U.I.SrcReg[0] = src0;
|
||||
inst2->U.I.SrcReg[1] = src1;
|
||||
inst2->U.I.SrcReg[2] = src2;
|
||||
}
|
||||
inst2->U.I.DstReg.WriteMask = wmask;
|
||||
/* finally delete the original instruction */
|
||||
rc_remove_instruction(inst1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool inst_combination(
|
||||
struct rc_instruction * inst1,
|
||||
struct rc_instruction * inst2,
|
||||
|
|
@ -1071,6 +1178,12 @@ static void merge_channels(struct radeon_compiler * c, struct rc_instruction * i
|
|||
if (merge_movs(c, inst, cur))
|
||||
return;
|
||||
}
|
||||
|
||||
if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_ADD) ||
|
||||
inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MUL)) {
|
||||
if (merge_mov_add_mul(c, inst, cur))
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1102,7 +1215,9 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
while(inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * cur = inst;
|
||||
inst = inst->Next;
|
||||
if (cur->U.I.Opcode == RC_OPCODE_MOV)
|
||||
if (cur->U.I.Opcode == RC_OPCODE_MOV ||
|
||||
cur->U.I.Opcode == RC_OPCODE_ADD ||
|
||||
cur->U.I.Opcode == RC_OPCODE_MUL)
|
||||
merge_channels(c, cur);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue