mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 04:30:10 +01:00
i965/fs: Emit better b2f of an expression on GEN4 and GEN5
On platforms that do not natively generate 0u and ~0u for Boolean
results, b2f expressions that look like
f = b2f(expr cmp 0)
will generate better code by pretending the expression is
f = ir_triop_sel(0.0, 1.0, expr cmp 0)
This is because the last instruction of "expr" can generate the
condition code for the "cmp 0". This avoids having to do the "-(b & 1)"
trick to generate 0u or ~0u for the Boolean result. This means code like
mov(16) g16<1>F 1F
mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F
(+f0) sel(16) m6<1>F g16<8,8,1>F 0F
will be generated instead of
mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F
cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F
and(16) g4<1>D g2<8,8,1>D 1D
and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD
v2: When the comparison is either == 0.0 or != 0.0 use the knowledge
that the true (or false) case already results in zero would allow better
code generation by possibly avoiding a load-immediate instruction.
v3: Apply the optimization even when neither comparitor is zero.
Shader-db results:
GM45 (0x2A42):
total instructions in shared programs: 3551002 -> 3550829 (-0.00%)
instructions in affected programs: 33269 -> 33096 (-0.52%)
helped: 121
Iron Lake (0x0046):
total instructions in shared programs: 4993327 -> 4993146 (-0.00%)
instructions in affected programs: 34199 -> 34018 (-0.53%)
helped: 129
No change on other platforms.
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Tapani Palli <tapani.palli@intel.com>
This commit is contained in:
parent
036e347f3c
commit
b616164c95
2 changed files with 99 additions and 4 deletions
|
|
@ -307,6 +307,7 @@ public:
|
|||
const fs_reg &a);
|
||||
void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
|
||||
const fs_reg &src0, const fs_reg &src1);
|
||||
bool try_emit_b2f_of_comparison(ir_expression *ir);
|
||||
bool try_emit_saturate(ir_expression *ir);
|
||||
bool try_emit_line(ir_expression *ir);
|
||||
bool try_emit_mad(ir_expression *ir);
|
||||
|
|
@ -317,6 +318,7 @@ public:
|
|||
bool opt_saturate_propagation();
|
||||
bool opt_cmod_propagation();
|
||||
void emit_bool_to_cond_code(ir_rvalue *condition);
|
||||
void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]);
|
||||
void emit_if_gen6(ir_if *ir);
|
||||
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
|
||||
uint32_t spill_offset, int count);
|
||||
|
|
|
|||
|
|
@ -475,6 +475,87 @@ fs_visitor::try_emit_mad(ir_expression *ir)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir)
|
||||
{
|
||||
/* On platforms that do not natively generate 0u and ~0u for Boolean
|
||||
* results, b2f expressions that look like
|
||||
*
|
||||
* f = b2f(expr cmp 0)
|
||||
*
|
||||
* will generate better code by pretending the expression is
|
||||
*
|
||||
* f = ir_triop_csel(0.0, 1.0, expr cmp 0)
|
||||
*
|
||||
* This is because the last instruction of "expr" can generate the
|
||||
* condition code for the "cmp 0". This avoids having to do the "-(b & 1)"
|
||||
* trick to generate 0u or ~0u for the Boolean result. This means code like
|
||||
*
|
||||
* mov(16) g16<1>F 1F
|
||||
* mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F
|
||||
* (+f0) sel(16) m6<1>F g16<8,8,1>F 0F
|
||||
*
|
||||
* will be generated instead of
|
||||
*
|
||||
* mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F
|
||||
* cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F
|
||||
* and(16) g4<1>D g2<8,8,1>D 1D
|
||||
* and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD
|
||||
*
|
||||
* When the comparison is either == 0.0 or != 0.0 using the knowledge that
|
||||
* the true (or false) case already results in zero would allow better code
|
||||
* generation by possibly avoiding a load-immediate instruction.
|
||||
*/
|
||||
ir_expression *cmp = ir->operands[0]->as_expression();
|
||||
if (cmp == NULL)
|
||||
return false;
|
||||
|
||||
if (cmp->operation == ir_binop_equal || cmp->operation == ir_binop_nequal) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
ir_constant *c = cmp->operands[i]->as_constant();
|
||||
if (c == NULL || !c->is_zero())
|
||||
continue;
|
||||
|
||||
ir_expression *expr = cmp->operands[i ^ 1]->as_expression();
|
||||
if (expr != NULL) {
|
||||
fs_reg op[2];
|
||||
|
||||
for (unsigned j = 0; j < 2; j++) {
|
||||
cmp->operands[j]->accept(this);
|
||||
op[j] = this->result;
|
||||
|
||||
resolve_ud_negate(&op[j]);
|
||||
}
|
||||
|
||||
emit_bool_to_cond_code_of_reg(cmp, op);
|
||||
|
||||
/* In this case we know when the condition is true, op[i ^ 1]
|
||||
* contains zero. Invert the predicate, use op[i ^ 1] as src0,
|
||||
* and immediate 1.0f as src1.
|
||||
*/
|
||||
this->result = vgrf(ir->type);
|
||||
op[i ^ 1].type = BRW_REGISTER_TYPE_F;
|
||||
|
||||
fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f)));
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = cmp->operation == ir_binop_equal;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emit_bool_to_cond_code(cmp);
|
||||
|
||||
fs_reg temp = vgrf(ir->type);
|
||||
emit(MOV(temp, fs_reg(1.0f)));
|
||||
|
||||
this->result = vgrf(ir->type);
|
||||
fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f)));
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
pack_pixel_offset(float x)
|
||||
{
|
||||
|
|
@ -639,6 +720,11 @@ fs_visitor::visit(ir_expression *ir)
|
|||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
return;
|
||||
|
||||
case ir_unop_b2f:
|
||||
if (brw->gen <= 5 && try_emit_b2f_of_comparison(ir))
|
||||
return;
|
||||
break;
|
||||
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
case ir_binop_interpolate_at_offset:
|
||||
case ir_binop_interpolate_at_sample:
|
||||
|
|
@ -2508,7 +2594,6 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
|
|||
}
|
||||
|
||||
fs_reg op[3];
|
||||
fs_inst *inst;
|
||||
|
||||
assert(expr->get_num_operands() <= 3);
|
||||
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
|
||||
|
|
@ -2520,6 +2605,14 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
|
|||
resolve_ud_negate(&op[i]);
|
||||
}
|
||||
|
||||
emit_bool_to_cond_code_of_reg(expr, op);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3])
|
||||
{
|
||||
fs_inst *inst;
|
||||
|
||||
switch (expr->operation) {
|
||||
case ir_unop_logic_not:
|
||||
inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
|
||||
|
|
@ -2528,7 +2621,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
|
|||
|
||||
case ir_binop_logic_xor:
|
||||
if (brw->gen <= 5) {
|
||||
fs_reg temp = vgrf(ir->type);
|
||||
fs_reg temp = vgrf(expr->type);
|
||||
emit(XOR(temp, op[0], op[1]));
|
||||
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
|
||||
} else {
|
||||
|
|
@ -2539,7 +2632,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
|
|||
|
||||
case ir_binop_logic_or:
|
||||
if (brw->gen <= 5) {
|
||||
fs_reg temp = vgrf(ir->type);
|
||||
fs_reg temp = vgrf(expr->type);
|
||||
emit(OR(temp, op[0], op[1]));
|
||||
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
|
||||
} else {
|
||||
|
|
@ -2550,7 +2643,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
|
|||
|
||||
case ir_binop_logic_and:
|
||||
if (brw->gen <= 5) {
|
||||
fs_reg temp = vgrf(ir->type);
|
||||
fs_reg temp = vgrf(expr->type);
|
||||
emit(AND(temp, op[0], op[1]));
|
||||
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue