i965: Add a pass to the FS to split virtual GRFs to float channels.

Improves nexuiz performance 0.91% (+/- 0.54%, n=8)
This commit is contained in:
Eric Anholt 2010-10-13 20:17:15 -07:00
parent b8613d70da
commit 4f88550ba0
2 changed files with 116 additions and 2 deletions

View file

@ -2111,6 +2111,7 @@ static void
assign_reg(int *reg_hw_locations, fs_reg *reg)
{
if (reg->file == GRF && reg->reg != 0) {
assert(reg->reg_offset >= 0);
reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
reg->reg = 0;
}
@ -2302,7 +2303,7 @@ fs_visitor::assign_regs()
}
}
assert(hw_reg != -1);
assert(hw_reg >= 0);
hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
last_grf = MAX2(last_grf,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
@ -2322,6 +2323,92 @@ fs_visitor::assign_regs()
talloc_free(regs);
}
/**
* Split large virtual GRFs into separate components if we can.
*
* This is mostly duplicated with what brw_fs_vector_splitting does,
* but that's really conservative because it's afraid of doing
* splitting that doesn't result in real progress after the rest of
* the optimization phases, which would cause infinite looping in
* optimization. We can do it once here, safely. This also has the
* opportunity to split interpolated values, or maybe even uniforms,
* which we don't have at the IR level.
*
* We want to split, because virtual GRFs are what we register
* allocate and spill (due to contiguousness requirements for some
* instructions), and they're what we naturally generate in the
* codegen process, but most virtual GRFs don't actually need to be
* contiguous sets of GRFs. If we split, we'll end up with reduced
* live intervals and better dead code elimination and coalescing.
*/
void
fs_visitor::split_virtual_grfs()
{
int num_vars = this->virtual_grf_next;
bool split_grf[num_vars];
int new_virtual_grf[num_vars];
/* Try to split anything > 0 sized. */
for (int i = 0; i < num_vars; i++) {
if (this->virtual_grf_sizes[i] != 1)
split_grf[i] = true;
else
split_grf[i] = false;
}
if (brw->has_pln) {
/* PLN opcodes rely on the delta_xy being contiguous. */
split_grf[this->delta_x.reg] = false;
}
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
/* Texturing produces 4 contiguous registers, so no splitting. */
if ((inst->opcode == FS_OPCODE_TEX ||
inst->opcode == FS_OPCODE_TXB ||
inst->opcode == FS_OPCODE_TXL) &&
inst->dst.file == GRF) {
split_grf[inst->dst.reg] = false;
}
}
/* Allocate new space for split regs. Note that the virtual
* numbers will be contiguous.
*/
for (int i = 0; i < num_vars; i++) {
if (split_grf[i]) {
new_virtual_grf[i] = virtual_grf_alloc(1);
for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
int reg = virtual_grf_alloc(1);
assert(reg == new_virtual_grf[i] + j - 1);
}
this->virtual_grf_sizes[i] = 1;
}
}
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
if (inst->dst.file == GRF &&
split_grf[inst->dst.reg] &&
inst->dst.reg_offset != 0) {
inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
inst->dst.reg_offset - 1);
inst->dst.reg_offset = 0;
}
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
split_grf[inst->src[i].reg] &&
inst->src[i].reg_offset != 0) {
inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
inst->src[i].reg_offset - 1);
inst->src[i].reg_offset = 0;
}
}
}
}
void
fs_visitor::calculate_live_intervals()
{
@ -3054,13 +3141,15 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
}
v.emit_fb_writes();
v.split_virtual_grfs();
v.assign_curb_setup();
v.assign_urb_setup();
bool progress;
do {
progress = false;
v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;

View file

@ -213,6 +213,9 @@ public:
init();
this->opcode = opcode;
this->dst = dst;
if (dst.file == GRF)
assert(dst.reg_offset >= 0);
}
fs_inst(int opcode, fs_reg dst, fs_reg src0)
@ -221,6 +224,11 @@ public:
this->opcode = opcode;
this->dst = dst;
this->src[0] = src0;
if (dst.file == GRF)
assert(dst.reg_offset >= 0);
if (src[0].file == GRF)
assert(src[0].reg_offset >= 0);
}
fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
@ -230,6 +238,13 @@ public:
this->dst = dst;
this->src[0] = src0;
this->src[1] = src1;
if (dst.file == GRF)
assert(dst.reg_offset >= 0);
if (src[0].file == GRF)
assert(src[0].reg_offset >= 0);
if (src[1].file == GRF)
assert(src[1].reg_offset >= 0);
}
fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
@ -240,6 +255,15 @@ public:
this->src[0] = src0;
this->src[1] = src1;
this->src[2] = src2;
if (dst.file == GRF)
assert(dst.reg_offset >= 0);
if (src[0].file == GRF)
assert(src[0].reg_offset >= 0);
if (src[1].file == GRF)
assert(src[1].reg_offset >= 0);
if (src[2].file == GRF)
assert(src[2].reg_offset >= 0);
}
int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
@ -336,6 +360,7 @@ public:
void assign_urb_setup();
void assign_regs();
void assign_regs_trivial();
void split_virtual_grfs();
void calculate_live_intervals();
bool propagate_constants();
bool register_coalesce();