i965/fs: Perform CSE on load_payload instructions if it's not a copy.

Since CSE creates instructions, if we let CSE generate things register
coalescing can't remove, bad things will happen. Only let CSE combine
non-copy load_payloads.

E.g., allow CSE to handle this

   load_payload vgrf4+0, vgrf5, vgrf6

but not this

   load_payload vgrf4+0, vgrf5+0, vgrf5+1
This commit is contained in:
Matt Turner 2014-03-30 12:41:55 -07:00
parent 8f4e324be2
commit 31ae9c25ff

View file

@ -42,6 +42,22 @@ struct aeb_entry : public exec_node {
};
}
static bool
is_copy_payload(const fs_inst *inst)
{
const int reg = inst->src[0].reg;
if (inst->src[0].reg_offset != 0)
return false;
for (int i = 1; i < inst->sources; i++) {
if (inst->src[i].reg != reg ||
inst->src[i].reg_offset != i) {
return false;
}
}
return true;
}
static bool
is_expression(const fs_inst *const inst)
{
@ -84,6 +100,8 @@ is_expression(const fs_inst *const inst)
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
return inst->mlen == 0;
case SHADER_OPCODE_LOAD_PAYLOAD:
return !is_copy_payload(inst);
default:
return false;
}