mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
i965/fs: Copy propagate partial reads.
This commit reworks copy propagation a bit to support propagating the copying of partial registers. This comes up every time we have pull constants because we do a pull constant read immediately followed by a move to splat the one component of the out to 8 or 16-wide. This allows us to eliminate the copy and simply use the one component of the register. Shader DB results: total instructions in shared programs: 5044937 -> 5044428 (-0.01%) instructions in affected programs: 66112 -> 65603 (-0.77%) GAINED: 0 LOST: 0 Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
4d5f0eb048
commit
1728e74957
2 changed files with 64 additions and 20 deletions
|
|
@ -337,6 +337,7 @@ public:
|
|||
bool opt_cse_local(bblock_t *block);
|
||||
bool opt_copy_propagate();
|
||||
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
|
||||
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
|
||||
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
|
||||
exec_list *acp);
|
||||
void opt_drop_redundant_mov_to_flags();
|
||||
|
|
|
|||
|
|
@ -277,24 +277,30 @@ is_logic_op(enum opcode opcode)
|
|||
bool
|
||||
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
{
|
||||
if (inst->src[arg].file != GRF)
|
||||
return false;
|
||||
|
||||
if (entry->src.file == IMM)
|
||||
return false;
|
||||
assert(entry->src.file == GRF || entry->src.file == UNIFORM);
|
||||
|
||||
if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
|
||||
inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
|
||||
return false;
|
||||
|
||||
/* Bail if inst is reading more than entry is writing. */
|
||||
if ((inst->regs_read(this, arg) * inst->src[arg].stride *
|
||||
type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
|
||||
assert(entry->dst.file == GRF);
|
||||
if (inst->src[arg].reg != entry->dst.reg)
|
||||
return false;
|
||||
|
||||
if (inst->src[arg].file != entry->dst.file ||
|
||||
inst->src[arg].reg != entry->dst.reg ||
|
||||
inst->src[arg].reg_offset != entry->dst.reg_offset ||
|
||||
inst->src[arg].subreg_offset != entry->dst.subreg_offset) {
|
||||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
int reg_size = dispatch_width * sizeof(float);
|
||||
if (inst->src[arg].reg_offset < entry->dst.reg_offset ||
|
||||
(inst->src[arg].reg_offset * reg_size + inst->src[arg].subreg_offset +
|
||||
inst->regs_read(this, arg) * inst->src[arg].stride * reg_size) >
|
||||
(entry->dst.reg_offset + 1) * reg_size)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
|
||||
if (inst->conditional_mod &&
|
||||
|
|
@ -361,11 +367,39 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
|||
|
||||
inst->src[arg].file = entry->src.file;
|
||||
inst->src[arg].reg = entry->src.reg;
|
||||
inst->src[arg].reg_offset = entry->src.reg_offset;
|
||||
inst->src[arg].subreg_offset = entry->src.subreg_offset;
|
||||
inst->src[arg].stride *= entry->src.stride;
|
||||
inst->saturate = inst->saturate || entry->saturate;
|
||||
|
||||
switch (entry->src.file) {
|
||||
case BAD_FILE:
|
||||
case HW_REG:
|
||||
case UNIFORM:
|
||||
inst->src[arg].reg_offset = entry->src.reg_offset;
|
||||
inst->src[arg].subreg_offset = entry->src.subreg_offset;
|
||||
break;
|
||||
case GRF:
|
||||
{
|
||||
/* In this case, we have to deal with mapping parts of vgrfs to
|
||||
* other parts of vgrfs so we have to do some reg_offset magic.
|
||||
*/
|
||||
|
||||
/* Compute the offset of inst->src[arg] relative to inst->dst */
|
||||
assert(entry->dst.subreg_offset == 0);
|
||||
int rel_offset = inst->src[arg].reg_offset - entry->dst.reg_offset;
|
||||
int rel_suboffset = inst->src[arg].subreg_offset;
|
||||
|
||||
/* Compute the final register offset (in bytes) */
|
||||
int offset = entry->src.reg_offset * reg_size + entry->src.subreg_offset;
|
||||
offset += rel_offset * reg_size + rel_suboffset;
|
||||
inst->src[arg].reg_offset = offset / reg_size;
|
||||
inst->src[arg].subreg_offset = offset % reg_size;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid register file");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!inst->src[arg].abs) {
|
||||
inst->src[arg].abs = entry->src.abs;
|
||||
inst->src[arg].negate ^= entry->src.negate;
|
||||
|
|
@ -375,9 +409,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
|||
}
|
||||
|
||||
|
||||
static bool
|
||||
try_constant_propagate(struct brw_context *brw, fs_inst *inst,
|
||||
acp_entry *entry)
|
||||
bool
|
||||
fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
|
|
@ -385,12 +418,22 @@ try_constant_propagate(struct brw_context *brw, fs_inst *inst,
|
|||
return false;
|
||||
|
||||
for (int i = inst->sources - 1; i >= 0; i--) {
|
||||
if (inst->src[i].file != entry->dst.file ||
|
||||
inst->src[i].reg != entry->dst.reg ||
|
||||
inst->src[i].reg_offset != entry->dst.reg_offset ||
|
||||
inst->src[i].subreg_offset != entry->dst.subreg_offset ||
|
||||
inst->src[i].type != entry->dst.type ||
|
||||
inst->src[i].stride > 1)
|
||||
if (inst->src[i].file != GRF)
|
||||
continue;
|
||||
|
||||
assert(entry->dst.file == GRF);
|
||||
if (inst->src[i].reg != entry->dst.reg ||
|
||||
inst->src[i].type != entry->dst.type)
|
||||
continue;
|
||||
|
||||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
int reg_size = dispatch_width * sizeof(float);
|
||||
if (inst->src[i].reg_offset < entry->dst.reg_offset ||
|
||||
(inst->src[i].reg_offset * reg_size + inst->src[i].subreg_offset +
|
||||
inst->regs_read(this, i) * inst->src[i].stride * reg_size) >
|
||||
(entry->dst.reg_offset + 1) * reg_size)
|
||||
continue;
|
||||
|
||||
/* Don't bother with cases that should have been taken care of by the
|
||||
|
|
@ -546,7 +589,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
|
|||
continue;
|
||||
|
||||
foreach_in_list(acp_entry, entry, &acp[inst->src[i].reg % ACP_HASH_SIZE]) {
|
||||
if (try_constant_propagate(brw, inst, entry))
|
||||
if (try_constant_propagate(inst, entry))
|
||||
progress = true;
|
||||
|
||||
if (try_copy_propagate(inst, i, entry))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue