mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
Moving the add to the other end of the sequence allows it to be fused
into an FMA.
Ice Lake
total instructions in shared programs: 17173074 -> 16933147 (-1.40%)
instructions in affected programs: 7938745 -> 7698818 (-3.02%)
helped: 35583
HURT: 90
helped stats (abs) min: 1 max: 716 x̄: 6.75 x̃: 6
helped stats (rel) min: 0.10% max: 53.04% x̄: 5.29% x̃: 3.45%
HURT stats (abs) min: 1 max: 41 x̄: 2.46 x̃: 1
HURT stats (rel) min: 0.32% max: 8.33% x̄: 1.41% x̃: 0.77%
95% mean confidence interval for instructions value: -6.80 -6.65
95% mean confidence interval for instructions %-change: -5.32% -5.22%
Instructions are helped.
total cycles in shared programs: 360881386 -> 359533568 (-0.37%)
cycles in affected programs: 189489144 -> 188141326 (-0.71%)
helped: 27250
HURT: 6707
helped stats (abs) min: 1 max: 21997 x̄: 62.15 x̃: 16
helped stats (rel) min: <.01% max: 70.69% x̄: 4.04% x̃: 2.35%
HURT stats (abs) min: 1 max: 3507 x̄: 51.56 x̃: 14
HURT stats (rel) min: <.01% max: 77.26% x̄: 2.72% x̃: 1.27%
95% mean confidence interval for cycles value: -44.70 -34.68
95% mean confidence interval for cycles %-change: -2.75% -2.65%
Cycles are helped.
total spills in shared programs: 8943 -> 8829 (-1.27%)
spills in affected programs: 625 -> 511 (-18.24%)
helped: 6
HURT: 3
total fills in shared programs: 21815 -> 21719 (-0.44%)
fills in affected programs: 1653 -> 1557 (-5.81%)
helped: 7
HURT: 10
LOST: 11
GAINED: 3
Skylake and Broadwell had similar results. (Skylake shown)
total instructions in shared programs: 15271996 -> 15040882 (-1.51%)
instructions in affected programs: 7193699 -> 6962585 (-3.21%)
helped: 33985
HURT: 30
helped stats (abs) min: 1 max: 260 x̄: 6.80 x̃: 6
helped stats (rel) min: 0.10% max: 30.00% x̄: 5.54% x̃: 3.85%
HURT stats (abs) min: 1 max: 41 x̄: 4.00 x̃: 3
HURT stats (rel) min: 0.20% max: 2.16% x̄: 1.46% x̃: 1.72%
95% mean confidence interval for instructions value: -6.87 -6.72
95% mean confidence interval for instructions %-change: -5.59% -5.48%
Instructions are helped.
total cycles in shared programs: 355520785 -> 354253799 (-0.36%)
cycles in affected programs: 185869148 -> 184602162 (-0.68%)
helped: 25824
HURT: 6287
helped stats (abs) min: 1 max: 21997 x̄: 61.66 x̃: 16
helped stats (rel) min: <.01% max: 42.05% x̄: 4.18% x̃: 2.41%
HURT stats (abs) min: 1 max: 3327 x̄: 51.76 x̃: 14
HURT stats (rel) min: <.01% max: 101.62% x̄: 2.80% x̃: 1.28%
95% mean confidence interval for cycles value: -44.70 -34.21
95% mean confidence interval for cycles %-change: -2.87% -2.76%
Cycles are helped.
total spills in shared programs: 8835 -> 8818 (-0.19%)
spills in affected programs: 613 -> 596 (-2.77%)
helped: 5
HURT: 2
total fills in shared programs: 21738 -> 21744 (0.03%)
fills in affected programs: 1348 -> 1354 (0.45%)
helped: 5
HURT: 11
LOST: 0
GAINED: 12
Haswell
total instructions in shared programs: 13447102 -> 13381508 (-0.49%)
instructions in affected programs: 3770735 -> 3705141 (-1.74%)
helped: 11999
HURT: 29
helped stats (abs) min: 1 max: 409 x̄: 5.60 x̃: 3
helped stats (rel) min: 0.10% max: 20.00% x̄: 2.38% x̃: 1.87%
HURT stats (abs) min: 3 max: 750 x̄: 54.90 x̃: 3
HURT stats (rel) min: 0.12% max: 125.30% x̄: 9.96% x̃: 1.82%
95% mean confidence interval for instructions value: -5.71 -5.19
95% mean confidence interval for instructions %-change: -2.39% -2.30%
Instructions are helped.
total cycles in shared programs: 376342236 -> 375690458 (-0.17%)
cycles in affected programs: 155699021 -> 155047243 (-0.42%)
helped: 8397
HURT: 2876
helped stats (abs) min: 1 max: 20248 x̄: 109.87 x̃: 18
helped stats (rel) min: <.01% max: 40.71% x̄: 2.23% x̃: 1.49%
HURT stats (abs) min: 1 max: 15414 x̄: 94.15 x̃: 22
HURT stats (rel) min: <.01% max: 432.49% x̄: 3.15% x̃: 1.41%
95% mean confidence interval for cycles value: -67.64 -48.00
95% mean confidence interval for cycles %-change: -0.99% -0.74%
Cycles are helped.
total spills in shared programs: 23134 -> 23184 (0.22%)
spills in affected programs: 1675 -> 1725 (2.99%)
helped: 13
HURT: 11
total fills in shared programs: 34550 -> 34686 (0.39%)
fills in affected programs: 1421 -> 1557 (9.57%)
helped: 13
HURT: 11
LOST: 0
GAINED: 11
Ivy Bridge
total instructions in shared programs: 12019642 -> 11987285 (-0.27%)
instructions in affected programs: 1532236 -> 1499879 (-2.11%)
helped: 5522
HURT: 110
helped stats (abs) min: 1 max: 312 x̄: 6.22 x̃: 3
helped stats (rel) min: 0.16% max: 20.00% x̄: 2.46% x̃: 1.88%
HURT stats (abs) min: 1 max: 750 x̄: 18.07 x̃: 3
HURT stats (rel) min: 0.09% max: 125.30% x̄: 3.42% x̃: 1.15%
95% mean confidence interval for instructions value: -6.25 -5.24
95% mean confidence interval for instructions %-change: -2.43% -2.26%
Instructions are helped.
total cycles in shared programs: 180214667 -> 179761900 (-0.25%)
cycles in affected programs: 31448723 -> 30995956 (-1.44%)
helped: 7191
HURT: 2838
helped stats (abs) min: 1 max: 17680 x̄: 88.47 x̃: 17
helped stats (rel) min: <.01% max: 50.45% x̄: 2.16% x̃: 1.40%
HURT stats (abs) min: 1 max: 15540 x̄: 64.63 x̃: 24
HURT stats (rel) min: 0.02% max: 435.17% x̄: 3.10% x̃: 1.51%
95% mean confidence interval for cycles value: -53.34 -36.95
95% mean confidence interval for cycles %-change: -0.81% -0.53%
Cycles are helped.
total spills in shared programs: 3599 -> 3642 (1.19%)
spills in affected programs: 1180 -> 1223 (3.64%)
helped: 12
HURT: 2
total fills in shared programs: 4031 -> 4162 (3.25%)
fills in affected programs: 876 -> 1007 (14.95%)
helped: 12
HURT: 2
LOST: 6
GAINED: 5
Sandy Bridge
total instructions in shared programs: 10850686 -> 10822890 (-0.26%)
instructions in affected programs: 1247986 -> 1220190 (-2.23%)
helped: 4699
HURT: 102
helped stats (abs) min: 1 max: 104 x̄: 6.02 x̃: 3
helped stats (rel) min: 0.15% max: 17.65% x̄: 2.44% x̃: 1.88%
HURT stats (abs) min: 1 max: 16 x̄: 4.70 x̃: 3
HURT stats (rel) min: 0.09% max: 3.85% x̄: 1.11% x̃: 1.10%
95% mean confidence interval for instructions value: -6.10 -5.47
95% mean confidence interval for instructions %-change: -2.42% -2.30%
Instructions are helped.
total cycles in shared programs: 154044149 -> 153920095 (-0.08%)
cycles in affected programs: 26037392 -> 25913338 (-0.48%)
helped: 5974
HURT: 2521
helped stats (abs) min: 1 max: 1802 x̄: 35.42 x̃: 16
helped stats (rel) min: <.01% max: 35.80% x̄: 1.43% x̃: 0.84%
HURT stats (abs) min: 1 max: 862 x̄: 34.73 x̃: 20
HURT stats (rel) min: 0.01% max: 36.33% x̄: 1.67% x̃: 0.85%
95% mean confidence interval for cycles value: -16.31 -12.90
95% mean confidence interval for cycles %-change: -0.56% -0.45%
Cycles are helped.
total spills in shared programs: 2876 -> 2957 (2.82%)
spills in affected programs: 592 -> 673 (13.68%)
helped: 6
HURT: 35
total fills in shared programs: 3157 -> 3134 (-0.73%)
fills in affected programs: 402 -> 379 (-5.72%)
helped: 6
HURT: 0
LOST: 5
GAINED: 11
Reviewed-by: Matt Turner <mattst88@gmail.com>
327 lines
8.9 KiB
C
327 lines
8.9 KiB
C
/*
|
|
* Copyright © 2016 Red Hat
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Rob Clark <robclark@freedesktop.org>
|
|
*/
|
|
|
|
#ifndef _NIR_SEARCH_HELPERS_
|
|
#define _NIR_SEARCH_HELPERS_
|
|
|
|
#include "nir.h"
|
|
#include "util/bitscan.h"
|
|
#include <math.h>
|
|
|
|
static inline bool
|
|
is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
|
case nir_type_int: {
|
|
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
if (val <= 0 || !util_is_power_of_two_or_zero64(val))
|
|
return false;
|
|
break;
|
|
}
|
|
case nir_type_uint: {
|
|
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
if (val == 0 || !util_is_power_of_two_or_zero64(val))
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
|
case nir_type_int: {
|
|
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
if (val >= 0 || !util_is_power_of_two_or_zero64(-val))
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
|
case nir_type_float: {
|
|
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
|
if (isnan(val) || val < 0.0f || val > 1.0f)
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Exclusive compare with (0, 1).
|
|
*
|
|
* This differs from \c is_zero_to_one because that function tests 0 <= src <=
|
|
* 1 while this function tests 0 < src < 1.
|
|
*/
|
|
static inline bool
|
|
is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
|
case nir_type_float: {
|
|
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
|
if (isnan(val) || val <= 0.0f || val >= 1.0f)
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return true;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
|
case nir_type_float:
|
|
if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) == 0.0)
|
|
return false;
|
|
break;
|
|
case nir_type_bool:
|
|
case nir_type_int:
|
|
case nir_type_uint:
|
|
if (nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) == 0)
|
|
return false;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
return !nir_src_is_const(instr->src[src].src);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_fmul(nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
nir_alu_instr *src_alu =
|
|
nir_src_as_alu_instr(instr->src[src].src);
|
|
|
|
if (src_alu == NULL)
|
|
return true;
|
|
|
|
if (src_alu->op == nir_op_fneg)
|
|
return is_not_fmul(src_alu, 0, 0, NULL);
|
|
|
|
return src_alu->op != nir_op_fmul;
|
|
}
|
|
|
|
static inline bool
|
|
is_fsign(nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
nir_alu_instr *src_alu =
|
|
nir_src_as_alu_instr(instr->src[src].src);
|
|
|
|
if (src_alu == NULL)
|
|
return false;
|
|
|
|
if (src_alu->op == nir_op_fneg)
|
|
src_alu = nir_src_as_alu_instr(src_alu->src[0].src);
|
|
|
|
return src_alu->op == nir_op_fsign;
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const_and_not_fsign(nir_alu_instr *instr, unsigned src,
|
|
unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
return is_not_const(instr, src, num_components, swizzle) &&
|
|
!is_fsign(instr, src, num_components, swizzle);
|
|
}
|
|
|
|
static inline bool
|
|
is_used_once(nir_alu_instr *instr)
|
|
{
|
|
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);
|
|
bool zero_use = list_empty(&instr->dest.dest.ssa.uses);
|
|
|
|
if (zero_if_use && zero_use)
|
|
return false;
|
|
|
|
if (!zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses))
|
|
return false;
|
|
|
|
if (!zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses))
|
|
return false;
|
|
|
|
if (!list_is_singular(&instr->dest.dest.ssa.if_uses) &&
|
|
!list_is_singular(&instr->dest.dest.ssa.uses))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_used_by_if(nir_alu_instr *instr)
|
|
{
|
|
return !list_empty(&instr->dest.dest.ssa.if_uses);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_used_by_if(nir_alu_instr *instr)
|
|
{
|
|
return list_empty(&instr->dest.dest.ssa.if_uses);
|
|
}
|
|
|
|
static inline bool
|
|
is_used_by_non_fsat(nir_alu_instr *instr)
|
|
{
|
|
nir_foreach_use(src, &instr->dest.dest.ssa) {
|
|
const nir_instr *const user_instr = src->parent_instr;
|
|
|
|
if (user_instr->type != nir_instr_type_alu)
|
|
return true;
|
|
|
|
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
|
|
|
|
assert(instr != user_alu);
|
|
if (user_alu->op != nir_op_fsat)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Returns true if a NIR ALU src represents a constant integer
|
|
* of either 32 or 64 bits, and the higher word (bit-size / 2)
|
|
* of all its components is zero.
|
|
*/
|
|
static inline bool
|
|
is_upper_half_zero(nir_alu_instr *instr, unsigned src,
|
|
unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
|
|
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size;
|
|
if ((nir_src_comp_as_uint(instr->src[src].src,
|
|
swizzle[i]) & high_bits) != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns true if a NIR ALU src represents a constant integer
|
|
* of either 32 or 64 bits, and the lower word (bit-size / 2)
|
|
* of all its components is zero.
|
|
*/
|
|
static inline bool
|
|
is_lower_half_zero(nir_alu_instr *instr, unsigned src,
|
|
unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
uint32_t low_bits =
|
|
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1;
|
|
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
no_signed_wrap(nir_alu_instr *instr)
|
|
{
|
|
return instr->no_signed_wrap;
|
|
}
|
|
|
|
static inline bool
|
|
no_unsigned_wrap(nir_alu_instr *instr)
|
|
{
|
|
return instr->no_unsigned_wrap;
|
|
}
|
|
|
|
#endif /* _NIR_SEARCH_ */
|