mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 05:00:09 +01:00
intel/fs: Emit better code for b2f(inot(a)) and b2i(inot(a))
Since Boolean values are either -1 (true) or 0 (false), b2f(inot(a))
maps -1 => 0.0 and 0 => 1.0. This is equivalent to 1.0 +
float(boolBitsToInt(a)). On Intel GPUs, ADD is one of the few
instructions that can type-convert during write to destination, so we
can achieve this in a single instruction:
add g47F, g26D, 1D
v2: Fix swizzles.
v3: Fix typos in comments. Noticed by Ken.
All Gen6+ platforms had similar results. (Skylake shown)
Skylake
total instructions in shared programs: 15185583 -> 15184683 (<.01%)
instructions in affected programs: 239389 -> 238489 (-0.38%)
helped: 899
HURT: 1
helped stats (abs) min: 1 max: 2 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.15% max: 1.85% x̄: 0.49% x̃: 0.44%
HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.09% max: 0.09% x̄: 0.09% x̃: 0.09%
95% mean confidence interval for instructions value: -1.01 -0.99
95% mean confidence interval for instructions %-change: -0.51% -0.48%
Instructions are helped.
total cycles in shared programs: 370964249 -> 370961508 (<.01%)
cycles in affected programs: 1487586 -> 1484845 (-0.18%)
helped: 420
HURT: 268
helped stats (abs) min: 1 max: 232 x̄: 22.41 x̃: 6
helped stats (rel) min: 0.05% max: 22.60% x̄: 1.30% x̃: 0.41%
HURT stats (abs) min: 1 max: 230 x̄: 24.90 x̃: 10
HURT stats (rel) min: <.01% max: 21.60% x̄: 1.45% x̃: 0.52%
95% mean confidence interval for cycles value: -7.61 -0.36
95% mean confidence interval for cycles %-change: -0.44% -0.02%
Cycles are helped.
No changes on Iron Lake or GM45.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
cb3e21cd19
commit
7725d60938
2 changed files with 40 additions and 0 deletions
|
|
@ -205,6 +205,8 @@ public:
|
|||
void nir_emit_block(nir_block *block);
|
||||
void nir_emit_instr(nir_instr *instr);
|
||||
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
|
||||
bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
|
||||
nir_alu_instr *instr);
|
||||
void nir_emit_load_const(const brw::fs_builder &bld,
|
||||
nir_load_const_instr *instr);
|
||||
void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
|
||||
|
|
|
|||
|
|
@ -753,6 +753,42 @@ fs_visitor::resolve_inot_sources(const fs_builder &bld, nir_alu_instr *instr,
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::try_emit_b2fi_of_inot(const fs_builder &bld,
|
||||
fs_reg result,
|
||||
nir_alu_instr *instr)
|
||||
{
|
||||
if (devinfo->gen < 6 || devinfo->gen >= 12)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *const inot_instr = nir_src_as_alu_instr(&instr->src[0].src);
|
||||
|
||||
if (inot_instr == NULL || inot_instr->op != nir_op_inot)
|
||||
return false;
|
||||
|
||||
/* HF is also possible as a destination on BDW+. For nir_op_b2i, the set
|
||||
* of valid size-changing combinations is a bit more complex.
|
||||
*
|
||||
* The source restriction is just because I was lazy about generating the
|
||||
* constant below.
|
||||
*/
|
||||
if (nir_dest_bit_size(instr->dest.dest) != 32 ||
|
||||
nir_src_bit_size(inot_instr->src[0].src) != 32)
|
||||
return false;
|
||||
|
||||
/* b2[fi](inot(a)) maps a=0 => 1, a=-1 => 0. Since a can only be 0 or -1,
|
||||
* this is float(1 + a).
|
||||
*/
|
||||
fs_reg op;
|
||||
|
||||
prepare_alu_destination_and_sources(bld, inot_instr, &op, false);
|
||||
|
||||
bld.ADD(result, op, brw_imm_d(1));
|
||||
assert(!instr->dest.saturate);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||
{
|
||||
|
|
@ -844,6 +880,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
|||
case nir_op_b2f16:
|
||||
case nir_op_b2f32:
|
||||
case nir_op_b2f64:
|
||||
if (try_emit_b2fi_of_inot(bld, result, instr))
|
||||
break;
|
||||
op[0].type = BRW_REGISTER_TYPE_D;
|
||||
op[0].negate = !op[0].negate;
|
||||
/* fallthrough */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue