r300/compiler: Implement simple peephole optimizer

Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
This commit is contained in:
Nicolai Hähnle 2009-10-11 16:13:02 +02:00 committed by Marek Olšák
parent 0b94c05c28
commit 997f2eac5e
8 changed files with 308 additions and 47 deletions

View file

@ -21,6 +21,7 @@ C_SOURCES = \
radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
radeon_optimize.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \

View file

@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
rc_optimize(&c->Base);
debug_program_log(c, "after dataflow optimize");
rc_dataflow_swizzles(&c->Base);
if (c->Base.Error)
return;

View file

@ -30,7 +30,7 @@
#include "radeon_program.h"
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb,
refmask &= RC_MASK_XYZW;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(refmask, chan)) {
cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
}
}
if (refmask)
cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
if (refmask && inst->SrcReg[src].RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v
}
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used) {
for(unsigned int chan = 0; chan < 3; ++chan) {
if (GET_BIT(refmasks[src], chan))
cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
}
}
if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
refmasks[src] & RC_MASK_XYZ);
if (inst->Alpha.Src[src].Used) {
if (GET_BIT(refmasks[src], 3))
cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
}
if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
/**
* Calls a callback function for all sourced register channels.
* Calls a callback function for all register reads.
*
* This is conservative, i.e. channels may be called multiple times,
* and the writemask of the instruction is not taken into account.
* This is conservative, i.e. if the same register is referenced multiple times,
* the callback may also be called multiple times.
* Also, the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
reads_normal(inst, cb, userdata);
@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void *
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
if (opcode->HasDstReg) {
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(inst->DstReg.WriteMask, chan))
cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
}
}
if (opcode->HasDstReg && inst->DstReg.WriteMask)
cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
for(unsigned int chan = 0; chan < 3; ++chan) {
if (GET_BIT(inst->RGB.WriteMask, chan))
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
}
if (inst->RGB.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
if (inst->Alpha.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
/**
* Calls a callback function for all written register channels.
* Calls a callback function for all register writes in the instruction,
* reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *
}
struct mask_to_chan_data {
void * UserData;
rc_read_write_chan_fn Fn;
};
static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct mask_to_chan_data * d = data;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(mask, chan))
d->Fn(d->UserData, inst, file, index, chan);
}
}
/**
* Calls a callback function for all sourced register channels.
*
* This is conservative, i.e. channels may be called multiple times,
* and the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
}
/**
* Calls a callback function for all written register channels.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
}
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{

View file

@ -39,10 +39,15 @@ struct rc_swizzle_caps;
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask);
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
void rc_dataflow_swizzles(struct radeon_compiler * c);
/*@}*/
void rc_optimize(struct radeon_compiler * c);
#endif /* RADEON_DATAFLOW_H */

View file

@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s,
sap.Proxies = proxies;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
rc_for_all_writes(inst, scan_write, &sap);
rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}

View file

@ -0,0 +1,219 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
struct rc_src_register combine;
combine.File = inner.File;
combine.Index = inner.Index;
combine.RelAddr = inner.RelAddr;
if (outer.Abs) {
combine.Abs = 1;
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
combine.Negate = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(outer.Swizzle, chan);
if (swz < 4)
combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
}
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
return combine;
}
struct peephole_state {
struct radeon_compiler * C;
struct rc_instruction * Mov;
unsigned int Conflict:1;
/** Whether Mov's source has been clobbered */
unsigned int SourceClobbered:1;
/** Which components of Mov's destination register are still from that Mov? */
unsigned int MovMask:4;
/** Which components of Mov's destination register are clearly *not* from that Mov */
unsigned int DefinedMask:4;
/** Which components of Mov's source register are sourced */
unsigned int SourcedMask:4;
/** Branch depth beyond Mov; negative value indicates we left the Mov's block */
int BranchDepth;
};
static void peephole_scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct peephole_state * s = data;
if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
return;
if ((mask & s->MovMask) == mask) {
if (s->SourceClobbered) {
s->Conflict = 1;
}
} else if ((mask & s->DefinedMask) == mask) {
/* read from something entirely written by other instruction: this is okay */
} else {
/* read from component combination that is not well-defined without
* the MOV: cannot remove it */
s->Conflict = 1;
}
}
static void peephole_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct peephole_state * s = data;
if (s->BranchDepth < 0)
return;
if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
s->MovMask &= ~mask;
if (s->BranchDepth == 0)
s->DefinedMask |= mask;
else
s->DefinedMask &= ~mask;
} else if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
if (mask & s->SourcedMask)
s->SourceClobbered = 1;
} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
s->SourceClobbered = 1;
}
}
static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct peephole_state s;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
return;
memset(&s, 0, sizeof(s));
s.C = c;
s.Mov = inst_mov;
s.MovMask = inst_mov->U.I.DstReg.WriteMask;
s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
}
/* 1st pass: Check whether all subsequent readers can be changed */
for(struct rc_instruction * inst = inst_mov->Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
if (s.Conflict)
return;
if (s.BranchDepth >= 0) {
if (inst->U.I.Opcode == RC_OPCODE_IF) {
s.BranchDepth++;
} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
s.BranchDepth--;
if (s.BranchDepth < 0) {
s.DefinedMask &= ~s.MovMask;
s.MovMask = 0;
}
}
}
}
if (s.Conflict)
return;
/* 2nd pass: We can satisfy all readers, so switch them over all at once */
s.MovMask = inst_mov->U.I.DstReg.WriteMask;
s.BranchDepth = 0;
for(struct rc_instruction * inst = inst_mov->Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
unsigned int refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
refmask |= (1 << swz) & RC_MASK_XYZW;
}
if ((refmask & s.MovMask) == refmask)
inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
}
}
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
s.MovMask &= ~inst->U.I.DstReg.WriteMask;
}
}
if (s.BranchDepth >= 0) {
if (inst->U.I.Opcode == RC_OPCODE_IF) {
s.BranchDepth++;
} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
s.BranchDepth--;
if (s.BranchDepth < 0)
break; /* no more readers after this point */
}
}
}
/* Finally, remove the original MOV instruction */
rc_remove_instruction(inst_mov);
}
void rc_optimize(struct radeon_compiler * c)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
if (cur->U.I.Opcode == RC_OPCODE_MOV)
peephole(c, cur);
}
}

View file

@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s,
}
static void scan_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
rc_for_all_reads(inst, scan_callback, s);
rc_for_all_writes(inst, scan_callback, s);
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
}
}

View file

@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
* counter-intuitive, to account for the case where an
* instruction writes to the same register as it reads
* from. */
rc_for_all_writes(inst, &scan_write, &s);
rc_for_all_reads(inst, &scan_read, &s);
rc_for_all_writes_chan(inst, &scan_write, &s);
rc_for_all_reads_chan(inst, &scan_read, &s);
DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);