mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 15:30:14 +01:00
r300: merge simple movs with constant swizzles together
This pass will merge instructions like these MOV output[0].x, temp[5].x___; MOV output[0].yzw, none._001; into MOV output[0].xyzw, temp[5].x001; It is currently very careful with control flow and dependency tracking, so there is still room for improvements. Shader-db stats with RV530: total instructions in shared programs: 132486 -> 132256 (-0.17%) instructions in affected programs: 6186 -> 5956 (-3.72%) helped: 65 HURT: 0 total temps in shared programs: 18035 -> 18014 (-0.12%) temps in affected programs: 295 -> 274 (-7.12%) helped: 22 HURT: 1 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16657>
This commit is contained in:
parent
0fcd423a6a
commit
6c2959c025
1 changed files with 82 additions and 0 deletions
|
|
@ -887,6 +887,86 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
|
||||
unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
|
||||
for (unsigned int chan = 0; chan < 4; chan++) {
|
||||
unsigned int swz = GET_SWZ(swz1, chan);
|
||||
if (swz != RC_SWIZZLE_UNUSED) {
|
||||
SET_SWZ(new_swz, chan, swz);
|
||||
continue;
|
||||
}
|
||||
swz = GET_SWZ(swz2, chan);
|
||||
SET_SWZ(new_swz, chan, swz);
|
||||
}
|
||||
return new_swz;
|
||||
}
|
||||
|
||||
static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
|
||||
{
|
||||
unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
|
||||
unsigned int orig_dst_file = inst->U.I.DstReg.File;
|
||||
unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
|
||||
unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
|
||||
unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
|
||||
|
||||
struct rc_instruction * cur = inst;
|
||||
while (cur!= &c->Program.Instructions) {
|
||||
cur = cur->Next;
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
|
||||
|
||||
/* Keep it simple for now and stop when encountering any
|
||||
* control flow.
|
||||
*/
|
||||
if (opcode->IsFlowControl)
|
||||
return 0;
|
||||
|
||||
/* Stop when the original destination is overwritten */
|
||||
if (orig_dst_reg == cur->U.I.DstReg.Index &&
|
||||
orig_dst_file == cur->U.I.DstReg.File &&
|
||||
(orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0)
|
||||
return 0;
|
||||
|
||||
/* Stop the search when the original instruction destination
|
||||
* is used as a source for anything.
|
||||
*/
|
||||
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (cur->U.I.SrcReg[i].File == orig_dst_file &&
|
||||
cur->U.I.SrcReg[i].Index == orig_dst_reg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cur->U.I.Opcode == RC_OPCODE_MOV &&
|
||||
cur->U.I.DstReg.File == orig_dst_file &&
|
||||
cur->U.I.DstReg.Index == orig_dst_reg &&
|
||||
(cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) {
|
||||
|
||||
/* We can merge the movs if one of them is from inline constant */
|
||||
if (cur->U.I.SrcReg[0].File == RC_FILE_NONE ||
|
||||
orig_src_file == RC_FILE_NONE) {
|
||||
cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
|
||||
|
||||
if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
|
||||
cur->U.I.SrcReg[0].File = orig_src_file;
|
||||
cur->U.I.SrcReg[0].Index = orig_src_reg;
|
||||
cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
|
||||
cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
|
||||
}
|
||||
cur->U.I.SrcReg[0].Swizzle =
|
||||
merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
|
||||
inst->U.I.SrcReg[0].Swizzle);
|
||||
|
||||
cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
|
||||
|
||||
/* finally delete the original mov */
|
||||
rc_remove_instruction(inst);
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
|
@ -900,6 +980,8 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
continue;
|
||||
|
||||
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
|
||||
if (merge_movs(c,cur))
|
||||
continue;
|
||||
copy_propagate(c, cur);
|
||||
/* cur may no longer be part of the program */
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue