mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
i965/fs: Add support for generating MADs.
Improves nexuiz performance 0.65% +/- .10% (n=5) on my gen6, and .39%
+/- .11% (n=10) on gen7. No statistically significant performance
difference on warsow (n=5, but only one shader has MADs).
v2: Add support for MADs in 16-wide by using compression control.
v3: Don't generate MADs when it will force an immediate to be moved to a temp.
(it's not clear whether this is a win or not, but it should result in less
questionable change to codegen compared to v2).
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v2)
This commit is contained in:
parent
a1bfafc5a9
commit
7d55f37b0e
3 changed files with 56 additions and 0 deletions
|
|
@ -529,6 +529,7 @@ public:
|
|||
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
|
||||
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
|
||||
bool try_emit_saturate(ir_expression *ir);
|
||||
bool try_emit_mad(ir_expression *ir, int mul_arg);
|
||||
void emit_bool_to_cond_code(ir_rvalue *condition);
|
||||
void emit_if_gen6(ir_if *ir);
|
||||
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
|
||||
|
|
|
|||
|
|
@ -725,6 +725,20 @@ fs_visitor::generate_code()
|
|||
brw_set_acc_write_control(p, 0);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MAD:
|
||||
brw_set_access_mode(p, BRW_ALIGN_16);
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_MAD(p, dst, src[0], src[1], src[2]);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||
} else {
|
||||
brw_MAD(p, dst, src[0], src[1], src[2]);
|
||||
}
|
||||
brw_set_access_mode(p, BRW_ALIGN_1);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FRC:
|
||||
brw_FRC(p, dst, src[0]);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -182,6 +182,43 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
|
||||
{
|
||||
/* 3-src instructions were introduced in gen6. */
|
||||
if (intel->gen < 6)
|
||||
return false;
|
||||
|
||||
/* MAD can only handle floating-point data. */
|
||||
if (ir->type != glsl_type::float_type)
|
||||
return false;
|
||||
|
||||
ir_rvalue *nonmul = ir->operands[1 - mul_arg];
|
||||
ir_expression *mul = ir->operands[mul_arg]->as_expression();
|
||||
|
||||
if (!mul || mul->operation != ir_binop_mul)
|
||||
return false;
|
||||
|
||||
if (nonmul->as_constant() ||
|
||||
mul->operands[0]->as_constant() ||
|
||||
mul->operands[1]->as_constant())
|
||||
return false;
|
||||
|
||||
nonmul->accept(this);
|
||||
fs_reg src0 = this->result;
|
||||
|
||||
mul->operands[0]->accept(this);
|
||||
fs_reg src1 = this->result;
|
||||
|
||||
mul->operands[1]->accept(this);
|
||||
fs_reg src2 = this->result;
|
||||
|
||||
this->result = fs_reg(this, ir->type);
|
||||
emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::visit(ir_expression *ir)
|
||||
{
|
||||
|
|
@ -193,6 +230,10 @@ fs_visitor::visit(ir_expression *ir)
|
|||
|
||||
if (try_emit_saturate(ir))
|
||||
return;
|
||||
if (ir->operation == ir_binop_add) {
|
||||
if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
|
||||
return;
|
||||
}
|
||||
|
||||
for (operand = 0; operand < ir->get_num_operands(); operand++) {
|
||||
ir->operands[operand]->accept(this);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue