mesa/src/compiler/nir/nir_search_helpers.h
Ian Romanick 09705747d7 nir/algebraic: Reassociate fadd into fmul in DPH-like pattern
Moving the add to the other end of the sequence allows it to be fused
into an FMA.

Ice Lake
total instructions in shared programs: 17173074 -> 16933147 (-1.40%)
instructions in affected programs: 7938745 -> 7698818 (-3.02%)
helped: 35583
HURT: 90
helped stats (abs) min: 1 max: 716 x̄: 6.75 x̃: 6
helped stats (rel) min: 0.10% max: 53.04% x̄: 5.29% x̃: 3.45%
HURT stats (abs)   min: 1 max: 41 x̄: 2.46 x̃: 1
HURT stats (rel)   min: 0.32% max: 8.33% x̄: 1.41% x̃: 0.77%
95% mean confidence interval for instructions value: -6.80 -6.65
95% mean confidence interval for instructions %-change: -5.32% -5.22%
Instructions are helped.

total cycles in shared programs: 360881386 -> 359533568 (-0.37%)
cycles in affected programs: 189489144 -> 188141326 (-0.71%)
helped: 27250
HURT: 6707
helped stats (abs) min: 1 max: 21997 x̄: 62.15 x̃: 16
helped stats (rel) min: <.01% max: 70.69% x̄: 4.04% x̃: 2.35%
HURT stats (abs)   min: 1 max: 3507 x̄: 51.56 x̃: 14
HURT stats (rel)   min: <.01% max: 77.26% x̄: 2.72% x̃: 1.27%
95% mean confidence interval for cycles value: -44.70 -34.68
95% mean confidence interval for cycles %-change: -2.75% -2.65%
Cycles are helped.

total spills in shared programs: 8943 -> 8829 (-1.27%)
spills in affected programs: 625 -> 511 (-18.24%)
helped: 6
HURT: 3

total fills in shared programs: 21815 -> 21719 (-0.44%)
fills in affected programs: 1653 -> 1557 (-5.81%)
helped: 7
HURT: 10

LOST:   11
GAINED: 3

Skylake and Broadwell had similar results. (Skylake shown)
total instructions in shared programs: 15271996 -> 15040882 (-1.51%)
instructions in affected programs: 7193699 -> 6962585 (-3.21%)
helped: 33985
HURT: 30
helped stats (abs) min: 1 max: 260 x̄: 6.80 x̃: 6
helped stats (rel) min: 0.10% max: 30.00% x̄: 5.54% x̃: 3.85%
HURT stats (abs)   min: 1 max: 41 x̄: 4.00 x̃: 3
HURT stats (rel)   min: 0.20% max: 2.16% x̄: 1.46% x̃: 1.72%
95% mean confidence interval for instructions value: -6.87 -6.72
95% mean confidence interval for instructions %-change: -5.59% -5.48%
Instructions are helped.

total cycles in shared programs: 355520785 -> 354253799 (-0.36%)
cycles in affected programs: 185869148 -> 184602162 (-0.68%)
helped: 25824
HURT: 6287
helped stats (abs) min: 1 max: 21997 x̄: 61.66 x̃: 16
helped stats (rel) min: <.01% max: 42.05% x̄: 4.18% x̃: 2.41%
HURT stats (abs)   min: 1 max: 3327 x̄: 51.76 x̃: 14
HURT stats (rel)   min: <.01% max: 101.62% x̄: 2.80% x̃: 1.28%
95% mean confidence interval for cycles value: -44.70 -34.21
95% mean confidence interval for cycles %-change: -2.87% -2.76%
Cycles are helped.

total spills in shared programs: 8835 -> 8818 (-0.19%)
spills in affected programs: 613 -> 596 (-2.77%)
helped: 5
HURT: 2

total fills in shared programs: 21738 -> 21744 (0.03%)
fills in affected programs: 1348 -> 1354 (0.45%)
helped: 5
HURT: 11

LOST:   0
GAINED: 12

Haswell
total instructions in shared programs: 13447102 -> 13381508 (-0.49%)
instructions in affected programs: 3770735 -> 3705141 (-1.74%)
helped: 11999
HURT: 29
helped stats (abs) min: 1 max: 409 x̄: 5.60 x̃: 3
helped stats (rel) min: 0.10% max: 20.00% x̄: 2.38% x̃: 1.87%
HURT stats (abs)   min: 3 max: 750 x̄: 54.90 x̃: 3
HURT stats (rel)   min: 0.12% max: 125.30% x̄: 9.96% x̃: 1.82%
95% mean confidence interval for instructions value: -5.71 -5.19
95% mean confidence interval for instructions %-change: -2.39% -2.30%
Instructions are helped.

total cycles in shared programs: 376342236 -> 375690458 (-0.17%)
cycles in affected programs: 155699021 -> 155047243 (-0.42%)
helped: 8397
HURT: 2876
helped stats (abs) min: 1 max: 20248 x̄: 109.87 x̃: 18
helped stats (rel) min: <.01% max: 40.71% x̄: 2.23% x̃: 1.49%
HURT stats (abs)   min: 1 max: 15414 x̄: 94.15 x̃: 22
HURT stats (rel)   min: <.01% max: 432.49% x̄: 3.15% x̃: 1.41%
95% mean confidence interval for cycles value: -67.64 -48.00
95% mean confidence interval for cycles %-change: -0.99% -0.74%
Cycles are helped.

total spills in shared programs: 23134 -> 23184 (0.22%)
spills in affected programs: 1675 -> 1725 (2.99%)
helped: 13
HURT: 11

total fills in shared programs: 34550 -> 34686 (0.39%)
fills in affected programs: 1421 -> 1557 (9.57%)
helped: 13
HURT: 11

LOST:   0
GAINED: 11

Ivy Bridge
total instructions in shared programs: 12019642 -> 11987285 (-0.27%)
instructions in affected programs: 1532236 -> 1499879 (-2.11%)
helped: 5522
HURT: 110
helped stats (abs) min: 1 max: 312 x̄: 6.22 x̃: 3
helped stats (rel) min: 0.16% max: 20.00% x̄: 2.46% x̃: 1.88%
HURT stats (abs)   min: 1 max: 750 x̄: 18.07 x̃: 3
HURT stats (rel)   min: 0.09% max: 125.30% x̄: 3.42% x̃: 1.15%
95% mean confidence interval for instructions value: -6.25 -5.24
95% mean confidence interval for instructions %-change: -2.43% -2.26%
Instructions are helped.

total cycles in shared programs: 180214667 -> 179761900 (-0.25%)
cycles in affected programs: 31448723 -> 30995956 (-1.44%)
helped: 7191
HURT: 2838
helped stats (abs) min: 1 max: 17680 x̄: 88.47 x̃: 17
helped stats (rel) min: <.01% max: 50.45% x̄: 2.16% x̃: 1.40%
HURT stats (abs)   min: 1 max: 15540 x̄: 64.63 x̃: 24
HURT stats (rel)   min: 0.02% max: 435.17% x̄: 3.10% x̃: 1.51%
95% mean confidence interval for cycles value: -53.34 -36.95
95% mean confidence interval for cycles %-change: -0.81% -0.53%
Cycles are helped.

total spills in shared programs: 3599 -> 3642 (1.19%)
spills in affected programs: 1180 -> 1223 (3.64%)
helped: 12
HURT: 2

total fills in shared programs: 4031 -> 4162 (3.25%)
fills in affected programs: 876 -> 1007 (14.95%)
helped: 12
HURT: 2

LOST:   6
GAINED: 5

Sandy Bridge
total instructions in shared programs: 10850686 -> 10822890 (-0.26%)
instructions in affected programs: 1247986 -> 1220190 (-2.23%)
helped: 4699
HURT: 102
helped stats (abs) min: 1 max: 104 x̄: 6.02 x̃: 3
helped stats (rel) min: 0.15% max: 17.65% x̄: 2.44% x̃: 1.88%
HURT stats (abs)   min: 1 max: 16 x̄: 4.70 x̃: 3
HURT stats (rel)   min: 0.09% max: 3.85% x̄: 1.11% x̃: 1.10%
95% mean confidence interval for instructions value: -6.10 -5.47
95% mean confidence interval for instructions %-change: -2.42% -2.30%
Instructions are helped.

total cycles in shared programs: 154044149 -> 153920095 (-0.08%)
cycles in affected programs: 26037392 -> 25913338 (-0.48%)
helped: 5974
HURT: 2521
helped stats (abs) min: 1 max: 1802 x̄: 35.42 x̃: 16
helped stats (rel) min: <.01% max: 35.80% x̄: 1.43% x̃: 0.84%
HURT stats (abs)   min: 1 max: 862 x̄: 34.73 x̃: 20
HURT stats (rel)   min: 0.01% max: 36.33% x̄: 1.67% x̃: 0.85%
95% mean confidence interval for cycles value: -16.31 -12.90
95% mean confidence interval for cycles %-change: -0.56% -0.45%
Cycles are helped.

total spills in shared programs: 2876 -> 2957 (2.82%)
spills in affected programs: 592 -> 673 (13.68%)
helped: 6
HURT: 35

total fills in shared programs: 3157 -> 3134 (-0.73%)
fills in affected programs: 402 -> 379 (-5.72%)
helped: 6
HURT: 0

LOST:   5
GAINED: 11

Reviewed-by: Matt Turner <mattst88@gmail.com>
2019-07-11 10:20:03 -07:00

327 lines
8.9 KiB
C

/*
* Copyright © 2016 Red Hat
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#ifndef _NIR_SEARCH_HELPERS_
#define _NIR_SEARCH_HELPERS_
#include "nir.h"
#include "util/bitscan.h"
#include <math.h>
static inline bool
is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_int: {
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
if (val <= 0 || !util_is_power_of_two_or_zero64(val))
return false;
break;
}
case nir_type_uint: {
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
if (val == 0 || !util_is_power_of_two_or_zero64(val))
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_int: {
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
if (val >= 0 || !util_is_power_of_two_or_zero64(-val))
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float: {
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
if (isnan(val) || val < 0.0f || val > 1.0f)
return false;
break;
}
default:
return false;
}
}
return true;
}
/**
* Exclusive compare with (0, 1).
*
* This differs from \c is_zero_to_one because that function tests 0 <= src <=
* 1 while this function tests 0 < src < 1.
*/
static inline bool
is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float: {
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
if (isnan(val) || val <= 0.0f || val >= 1.0f)
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return true;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float:
if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) == 0.0)
return false;
break;
case nir_type_bool:
case nir_type_int:
case nir_type_uint:
if (nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) == 0)
return false;
break;
default:
return false;
}
}
return true;
}
static inline bool
is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
UNUSED const uint8_t *swizzle)
{
return !nir_src_is_const(instr->src[src].src);
}
static inline bool
is_not_fmul(nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
nir_alu_instr *src_alu =
nir_src_as_alu_instr(instr->src[src].src);
if (src_alu == NULL)
return true;
if (src_alu->op == nir_op_fneg)
return is_not_fmul(src_alu, 0, 0, NULL);
return src_alu->op != nir_op_fmul;
}
static inline bool
is_fsign(nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
nir_alu_instr *src_alu =
nir_src_as_alu_instr(instr->src[src].src);
if (src_alu == NULL)
return false;
if (src_alu->op == nir_op_fneg)
src_alu = nir_src_as_alu_instr(src_alu->src[0].src);
return src_alu->op == nir_op_fsign;
}
static inline bool
is_not_const_and_not_fsign(nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
return is_not_const(instr, src, num_components, swizzle) &&
!is_fsign(instr, src, num_components, swizzle);
}
static inline bool
is_used_once(nir_alu_instr *instr)
{
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);
bool zero_use = list_empty(&instr->dest.dest.ssa.uses);
if (zero_if_use && zero_use)
return false;
if (!zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses))
return false;
if (!zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses))
return false;
if (!list_is_singular(&instr->dest.dest.ssa.if_uses) &&
!list_is_singular(&instr->dest.dest.ssa.uses))
return false;
return true;
}
static inline bool
is_used_by_if(nir_alu_instr *instr)
{
return !list_empty(&instr->dest.dest.ssa.if_uses);
}
static inline bool
is_not_used_by_if(nir_alu_instr *instr)
{
return list_empty(&instr->dest.dest.ssa.if_uses);
}
static inline bool
is_used_by_non_fsat(nir_alu_instr *instr)
{
nir_foreach_use(src, &instr->dest.dest.ssa) {
const nir_instr *const user_instr = src->parent_instr;
if (user_instr->type != nir_instr_type_alu)
return true;
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
assert(instr != user_alu);
if (user_alu->op != nir_op_fsat)
return true;
}
return false;
}
/**
* Returns true if a NIR ALU src represents a constant integer
* of either 32 or 64 bits, and the higher word (bit-size / 2)
* of all its components is zero.
*/
static inline bool
is_upper_half_zero(nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size;
if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != 0) {
return false;
}
}
return true;
}
/**
* Returns true if a NIR ALU src represents a constant integer
* of either 32 or 64 bits, and the lower word (bit-size / 2)
* of all its components is zero.
*/
static inline bool
is_lower_half_zero(nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
uint32_t low_bits =
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1;
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0)
return false;
}
return true;
}
static inline bool
no_signed_wrap(nir_alu_instr *instr)
{
return instr->no_signed_wrap;
}
static inline bool
no_unsigned_wrap(nir_alu_instr *instr)
{
return instr->no_unsigned_wrap;
}
#endif /* _NIR_SEARCH_ */