mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-31 09:10:32 +01:00
fossil-db (gfx1201): Totals from 160 (0.20% of 79839) affected shaders: MaxWaves: 4008 -> 3952 (-1.40%) Instrs: 390073 -> 379834 (-2.62%); split: -2.63%, +0.00% CodeSize: 2126020 -> 2053740 (-3.40%); split: -3.40%, +0.00% VGPRs: 9492 -> 9612 (+1.26%) Latency: 6746019 -> 6723893 (-0.33%); split: -0.33%, +0.00% InvThroughput: 849571 -> 848942 (-0.07%); split: -0.42%, +0.35% VClause: 11977 -> 11983 (+0.05%); split: -0.20%, +0.25% SClause: 11828 -> 11824 (-0.03%); split: -0.14%, +0.11% Copies: 30003 -> 30938 (+3.12%); split: -0.09%, +3.20% PreSGPRs: 8914 -> 8938 (+0.27%) PreVGPRs: 7352 -> 7514 (+2.20%); split: -0.04%, +2.24% VALU: 171829 -> 168829 (-1.75%); split: -1.76%, +0.01% SALU: 66503 -> 66543 (+0.06%); split: -0.01%, +0.07% VMEM: 29365 -> 25327 (-13.75%) VOPD: 864 -> 1013 (+17.25%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36760>
1021 lines
32 KiB
C
1021 lines
32 KiB
C
/*
|
|
* Copyright © 2016 Red Hat
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Rob Clark <robclark@freedesktop.org>
|
|
*/
|
|
|
|
#ifndef _NIR_SEARCH_HELPERS_
|
|
#define _NIR_SEARCH_HELPERS_
|
|
|
|
#include <math.h>
|
|
#include "util/bitscan.h"
|
|
#include "util/u_math.h"
|
|
#include "nir.h"
|
|
#include "nir_range_analysis.h"
|
|
#include "nir_search.h"
|
|
|
|
static inline bool
|
|
is_pos_power_of_two(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_int: {
|
|
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
if (val <= 0 || !util_is_power_of_two_or_zero64(val))
|
|
return false;
|
|
break;
|
|
}
|
|
case nir_type_uint: {
|
|
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
if (val == 0 || !util_is_power_of_two_or_zero64(val))
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_neg_power_of_two(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
int64_t int_min = u_intN_min(instr->src[src].src.ssa->bit_size);
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_int: {
|
|
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
/* "int_min" is a power-of-two, but negation can cause overflow. */
|
|
if (val == int_min || val >= 0 || !util_is_power_of_two_or_zero64(-val))
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_bitcount2(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
if (util_bitcount64(val) != 2)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_nan(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
if (!isnan(nir_src_comp_as_float(instr->src[src].src, swizzle[i])))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_negative_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
union di tmp;
|
|
tmp.d = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
|
if (tmp.ui != 0x8000000000000000ull)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_any_comp_nan(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components, const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
if (isnan(nir_src_comp_as_float(instr->src[src].src, swizzle[i])))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#define MULTIPLE(test) \
|
|
static inline bool \
|
|
is_unsigned_multiple_of_##test(const nir_search_state *state, \
|
|
const nir_alu_instr *instr, \
|
|
unsigned src, unsigned num_components, \
|
|
const uint8_t *swizzle) \
|
|
{ \
|
|
unsigned num = ffs(test) - 1; \
|
|
for (unsigned i = 0; i < num_components; i++) { \
|
|
nir_scalar s = nir_get_scalar(instr->src[src].src.ssa, swizzle[i]); \
|
|
if (nir_def_num_lsb_zero(state->numlsb_ht, s) < num) \
|
|
return false; \
|
|
} \
|
|
\
|
|
return true; \
|
|
}
|
|
|
|
MULTIPLE(2)
|
|
MULTIPLE(4)
|
|
MULTIPLE(8)
|
|
MULTIPLE(16)
|
|
MULTIPLE(32)
|
|
MULTIPLE(64)
|
|
|
|
static inline bool
|
|
is_zero_to_one(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_float: {
|
|
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
|
if (isnan(val) || val < 0.0f || val > 1.0f)
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Exclusive compare with (0, 1).
|
|
*
|
|
* This differs from \c is_zero_to_one because that function tests 0 <= src <=
|
|
* 1 while this function tests 0 < src < 1.
|
|
*/
|
|
static inline bool
|
|
is_gt_0_and_lt_1(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_float: {
|
|
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
|
if (isnan(val) || val <= 0.0f || val >= 1.0f)
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* x & 1 != 0
|
|
*/
|
|
static inline bool
|
|
is_odd(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_int:
|
|
case nir_type_uint: {
|
|
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & 1) == 0)
|
|
return false;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return true;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_alu_type type = nir_op_infos[instr->op].input_types[src];
|
|
switch (nir_alu_type_get_base_type(type)) {
|
|
case nir_type_float:
|
|
if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) == 0.0)
|
|
return false;
|
|
break;
|
|
case nir_type_bool:
|
|
case nir_type_int:
|
|
case nir_type_uint:
|
|
if (nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) == 0)
|
|
return false;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/** Is value unsigned less than the limit? */
|
|
static inline bool
|
|
is_ult(const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle,
|
|
uint64_t limit)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const uint64_t val =
|
|
nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
|
|
if (val >= limit)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/** Is value unsigned less than 32? */
|
|
static inline bool
|
|
is_ult_32(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_ult(instr, src, num_components, swizzle, 32);
|
|
}
|
|
|
|
/** Is value unsigned less than 0xfffc07fc? */
|
|
static inline bool
|
|
is_ult_0xfffc07fc(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_ult(instr, src, num_components, swizzle, 0xfffc07fcU);
|
|
}
|
|
|
|
/** Is the first 5 bits of value unsigned greater than or equal 2? */
|
|
static inline bool
|
|
is_first_5_bits_uge_2(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const unsigned val =
|
|
nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
|
|
if ((val & 0x1f) < 2)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Is this a constant that could be either int16_t or uint16_t after applying
|
|
* a scale factor?
|
|
*/
|
|
static inline bool
|
|
is_16_bits_with_scale(const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle, int scale)
|
|
{
|
|
/* only constant srcs: */
|
|
if (!nir_src_is_const(instr->src[src].src))
|
|
return false;
|
|
|
|
/* All elements must be representable as int16_t or uint16_t. */
|
|
bool must_be_signed = false;
|
|
bool must_be_unsigned = false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const int64_t val =
|
|
scale * nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
|
|
if (val > 0xffff || val < -0x8000)
|
|
return false;
|
|
|
|
if (val < 0) {
|
|
if (must_be_unsigned)
|
|
return false;
|
|
|
|
must_be_signed = true;
|
|
}
|
|
|
|
if (val > 0x7fff) {
|
|
if (must_be_signed)
|
|
return false;
|
|
|
|
must_be_unsigned = true;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/** Is this a constant that could be either int16_t or uint16_t? */
|
|
static inline bool
|
|
is_16_bits(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_16_bits_with_scale(instr, src, num_components, swizzle, 1);
|
|
}
|
|
|
|
/** Like is_16_bits, but could 2 times the constant fit in 16 bits? */
|
|
static inline bool
|
|
is_2x_16_bits(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_16_bits_with_scale(instr, src, num_components, swizzle, 2);
|
|
}
|
|
|
|
/** Like is_16_bits, but could -2 times the constant fit in 16 bits? */
|
|
static inline bool
|
|
is_neg2x_16_bits(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_16_bits_with_scale(instr, src, num_components, swizzle, -2);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
return !nir_src_is_const(instr->src[src].src);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_fmul(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
nir_alu_instr *src_alu =
|
|
nir_src_as_alu_instr(instr->src[src].src);
|
|
|
|
if (src_alu == NULL)
|
|
return true;
|
|
|
|
if (src_alu->op == nir_op_fneg)
|
|
return is_not_fmul(state, src_alu, 0, 0, NULL);
|
|
|
|
return src_alu->op != nir_op_fmul && src_alu->op != nir_op_fmulz;
|
|
}
|
|
|
|
static inline bool
|
|
is_fmul(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
nir_alu_instr *src_alu =
|
|
nir_src_as_alu_instr(instr->src[src].src);
|
|
|
|
if (src_alu == NULL)
|
|
return false;
|
|
|
|
if (src_alu->op == nir_op_fneg)
|
|
return is_fmul(state, src_alu, 0, 0, NULL);
|
|
|
|
return src_alu->op == nir_op_fmul || src_alu->op == nir_op_fmulz;
|
|
}
|
|
|
|
static inline bool
|
|
is_fsign(const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
nir_alu_instr *src_alu =
|
|
nir_src_as_alu_instr(instr->src[src].src);
|
|
|
|
if (src_alu == NULL)
|
|
return false;
|
|
|
|
if (src_alu->op == nir_op_fneg)
|
|
src_alu = nir_src_as_alu_instr(src_alu->src[0].src);
|
|
|
|
return src_alu != NULL && src_alu->op == nir_op_fsign;
|
|
}
|
|
|
|
static inline bool
|
|
is_not_const_and_not_fsign(const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return is_not_const(state, instr, src, num_components, swizzle) &&
|
|
!is_fsign(instr, src, num_components, swizzle);
|
|
}
|
|
|
|
static inline bool
|
|
has_multiple_uses(const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
return !list_is_empty(&instr->def.uses) &&
|
|
!list_is_singular(&instr->def.uses);
|
|
}
|
|
|
|
static inline bool
|
|
is_used_once(const nir_alu_instr *instr)
|
|
{
|
|
return list_is_singular(&instr->def.uses);
|
|
}
|
|
|
|
static inline bool
|
|
is_used_by_if(const nir_alu_instr *instr)
|
|
{
|
|
return nir_def_used_by_if(&instr->def);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_used_by_if(const nir_alu_instr *instr)
|
|
{
|
|
return !is_used_by_if(instr);
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_if(const nir_alu_instr *instr)
|
|
{
|
|
return nir_def_only_used_by_if(&instr->def);
|
|
}
|
|
|
|
static inline bool
|
|
is_used_by_non_fsat(const nir_alu_instr *instr)
|
|
{
|
|
nir_foreach_use(src, &instr->def) {
|
|
const nir_instr *const user_instr = nir_src_parent_instr(src);
|
|
|
|
if (user_instr->type != nir_instr_type_alu)
|
|
return true;
|
|
|
|
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
|
|
|
|
assert(instr != user_alu);
|
|
if (user_alu->op != nir_op_fsat)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline bool
|
|
is_used_by_non_ldc_nv(const nir_alu_instr *instr)
|
|
{
|
|
nir_foreach_use(src, &instr->def) {
|
|
const nir_instr *const user_instr = nir_src_parent_instr(src);
|
|
|
|
if (user_instr->type != nir_instr_type_intrinsic)
|
|
return true;
|
|
|
|
const nir_intrinsic_instr *const user_intrin = nir_instr_as_intrinsic(user_instr);
|
|
|
|
if (user_intrin->intrinsic != nir_intrinsic_ldc_nv)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_as_float_impl(const nir_alu_instr *instr, unsigned depth)
|
|
{
|
|
nir_foreach_use(src, &instr->def) {
|
|
const nir_instr *const user_instr = nir_src_parent_instr(src);
|
|
|
|
if (user_instr->type != nir_instr_type_alu) {
|
|
if (user_instr->type == nir_instr_type_intrinsic) {
|
|
switch (nir_instr_as_intrinsic(user_instr)->intrinsic) {
|
|
case nir_intrinsic_ddx:
|
|
case nir_intrinsic_ddy:
|
|
case nir_intrinsic_ddx_fine:
|
|
case nir_intrinsic_ddy_fine:
|
|
case nir_intrinsic_ddx_coarse:
|
|
case nir_intrinsic_ddy_coarse:
|
|
continue;
|
|
default:
|
|
break;
|
|
}
|
|
} else if (user_instr->type == nir_instr_type_tex) {
|
|
const nir_tex_instr *tex = nir_instr_as_tex(user_instr);
|
|
const nir_tex_src *tex_src = container_of(src, nir_tex_src, src);
|
|
|
|
/* These have unknown type. */
|
|
if (tex_src->src_type == nir_tex_src_backend1 ||
|
|
tex_src->src_type == nir_tex_src_backend2)
|
|
return false;
|
|
|
|
unsigned idx = tex_src - tex->src;
|
|
if (nir_tex_instr_src_type(tex, idx) == nir_type_float)
|
|
continue;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
|
|
assert(instr != user_alu);
|
|
|
|
unsigned index = (nir_alu_src *)container_of(src, nir_alu_src, src) - user_alu->src;
|
|
|
|
/* bcsel acts like a move: if the bcsel is only used by float
|
|
* instructions, then the original value is (transitively) only used by
|
|
* float too.
|
|
*
|
|
* The unbounded recursion would terminate because use chains are acyclic
|
|
* in SSA. However, we limit the search depth regardless to avoid stack
|
|
* overflows in patholgical shaders and to reduce the worst-case time.
|
|
*/
|
|
bool is_mov = (user_alu->op == nir_op_bcsel && index != 0) ||
|
|
nir_op_is_vec_or_mov(user_alu->op);
|
|
if (is_mov && depth < 8) {
|
|
if (is_only_used_as_float_impl(user_alu, depth + 1))
|
|
continue;
|
|
}
|
|
|
|
nir_alu_type type = nir_op_infos[user_alu->op].input_types[index];
|
|
if (nir_alu_type_get_base_type(type) != nir_type_float)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_as_float(const nir_alu_instr *instr)
|
|
{
|
|
return is_only_used_as_float_impl(instr, 0);
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_fadd(const nir_alu_instr *instr)
|
|
{
|
|
nir_foreach_use(src, &instr->def) {
|
|
const nir_instr *const user_instr = nir_src_parent_instr(src);
|
|
if (user_instr->type != nir_instr_type_alu)
|
|
return false;
|
|
|
|
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
|
|
assert(instr != user_alu);
|
|
|
|
if (user_alu->op == nir_op_fneg || user_alu->op == nir_op_fabs) {
|
|
if (!is_only_used_by_fadd(user_alu))
|
|
return false;
|
|
} else if (user_alu->op != nir_op_fadd) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_alu_op(const nir_alu_instr *instr, nir_op op)
|
|
{
|
|
nir_foreach_use(src, &instr->def) {
|
|
const nir_instr *const user_instr = nir_src_parent_instr(src);
|
|
if (user_instr->type != nir_instr_type_alu)
|
|
return false;
|
|
|
|
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
|
|
assert(instr != user_alu);
|
|
|
|
if (user_alu->op != op)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_iadd(const nir_alu_instr *instr)
|
|
{
|
|
return is_only_used_by_alu_op(instr, nir_op_iadd);
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_iand(const nir_alu_instr *instr)
|
|
{
|
|
return is_only_used_by_alu_op(instr, nir_op_iand);
|
|
}
|
|
|
|
static inline bool
|
|
is_only_used_by_ior(const nir_alu_instr *instr)
|
|
{
|
|
return is_only_used_by_alu_op(instr, nir_op_ior);
|
|
}
|
|
|
|
static inline bool
|
|
only_lower_8_bits_used(const nir_alu_instr *instr)
|
|
{
|
|
return (nir_def_bits_used(&instr->def) & ~0xffull) == 0;
|
|
}
|
|
|
|
static inline bool
|
|
only_lower_16_bits_used(const nir_alu_instr *instr)
|
|
{
|
|
return (nir_def_bits_used(&instr->def) & ~0xffffull) == 0;
|
|
}
|
|
|
|
/**
|
|
* Returns true if a NIR ALU src represents a constant integer
|
|
* of either 32 or 64 bits, and the higher word (bit-size / 2)
|
|
* of all its components is zero.
|
|
*/
|
|
static inline bool
|
|
is_upper_half_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
|
|
uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
|
|
if ((nir_src_comp_as_uint(instr->src[src].src,
|
|
swizzle[i]) &
|
|
high_bits) != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns true if a NIR ALU src represents a constant integer
|
|
* of either 32 or 64 bits, and the lower word (bit-size / 2)
|
|
* of all its components is zero.
|
|
*/
|
|
static inline bool
|
|
is_lower_half_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
uint64_t low_bits = BITFIELD64_MASK(nir_src_bit_size(instr->src[src].src) / 2);
|
|
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != 0)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_upper_half_negative_one(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
|
|
uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
|
|
if ((nir_src_comp_as_uint(instr->src[src].src,
|
|
swizzle[i]) &
|
|
high_bits) != high_bits) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
is_lower_half_negative_one(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
uint64_t low_bits = BITFIELD64_MASK(nir_src_bit_size(instr->src[src].src) / 2);
|
|
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != low_bits)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns whether an operand is a constant bit-mask, meaning that it
|
|
* only has consecutive 1 bits starting from the LSB.
|
|
* Numbers whose MSB is 1 are excluded because they are not useful
|
|
* for the optimizations where this function is used.
|
|
*/
|
|
static inline bool
|
|
is_const_bitmask(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const unsigned bit_size = instr->src[src].src.ssa->bit_size;
|
|
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
const unsigned num_bits = util_bitcount64(c);
|
|
if (c != BITFIELD64_MASK(num_bits) || num_bits == bit_size)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns whether an operand is a non zero constant
|
|
* that can be created by nir_op_bfm.
|
|
*/
|
|
static inline bool
|
|
is_const_bfm(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const unsigned bit_size = instr->src[src].src.ssa->bit_size;
|
|
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
const unsigned num_bits = util_bitcount64(c);
|
|
const unsigned offset = ffsll(c) - 1;
|
|
if (c == 0 || c != (BITFIELD64_MASK(num_bits) << offset) || num_bits == bit_size)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns whether the 5 LSBs of an operand are non-zero.
|
|
*/
|
|
static inline bool
|
|
is_5lsb_not_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
|
if ((c & 0x1f) == 0)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns whether at least one bit is 0.
|
|
*/
|
|
static inline bool
|
|
is_not_uint_max(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, unsigned num_components,
|
|
const uint8_t *swizzle)
|
|
{
|
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
|
return false;
|
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
const int64_t c = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
|
if (c == -1)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
no_signed_wrap(const nir_alu_instr *instr)
|
|
{
|
|
return instr->no_signed_wrap;
|
|
}
|
|
|
|
static inline bool
|
|
no_unsigned_wrap(const nir_alu_instr *instr)
|
|
{
|
|
return instr->no_unsigned_wrap;
|
|
}
|
|
|
|
static inline bool
|
|
xz_components_unused(const nir_alu_instr *instr)
|
|
{
|
|
return (nir_def_components_read(&instr->def) & 0x5) == 0;
|
|
}
|
|
|
|
static inline bool
|
|
is_integral(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range r = nir_analyze_range(state->range_ht, instr, src);
|
|
|
|
return r.is_integral;
|
|
}
|
|
|
|
/**
|
|
* Is the value finite?
|
|
*/
|
|
static inline bool
|
|
is_finite(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
|
|
return v.is_finite;
|
|
}
|
|
|
|
static inline bool
|
|
is_finite_not_zero(UNUSED const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
|
|
return v.is_finite &&
|
|
(v.range == lt_zero || v.range == gt_zero || v.range == ne_zero);
|
|
}
|
|
|
|
#define RELATION(r) \
|
|
static inline bool \
|
|
is_##r(const nir_search_state *state, const nir_alu_instr *instr, \
|
|
unsigned src, UNUSED unsigned num_components, \
|
|
UNUSED const uint8_t *swizzle) \
|
|
{ \
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src); \
|
|
return v.range == r; \
|
|
} \
|
|
\
|
|
static inline bool \
|
|
is_a_number_##r(const nir_search_state *state, const nir_alu_instr *instr, \
|
|
unsigned src, UNUSED unsigned num_components, \
|
|
UNUSED const uint8_t *swizzle) \
|
|
{ \
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src); \
|
|
return v.is_a_number && v.range == r; \
|
|
}
|
|
|
|
RELATION(lt_zero)
|
|
RELATION(le_zero)
|
|
RELATION(gt_zero)
|
|
RELATION(ge_zero)
|
|
RELATION(ne_zero)
|
|
|
|
static inline bool
|
|
is_not_negative(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.range == ge_zero || v.range == gt_zero || v.range == eq_zero;
|
|
}
|
|
|
|
static inline bool
|
|
is_a_number_not_negative(const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.is_a_number &&
|
|
(v.range == ge_zero || v.range == gt_zero || v.range == eq_zero);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_positive(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.range == le_zero || v.range == lt_zero || v.range == eq_zero;
|
|
}
|
|
|
|
static inline bool
|
|
is_a_number_not_positive(const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.is_a_number &&
|
|
(v.range == le_zero || v.range == lt_zero || v.range == eq_zero);
|
|
}
|
|
|
|
static inline bool
|
|
is_not_zero(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.range == lt_zero || v.range == gt_zero || v.range == ne_zero;
|
|
}
|
|
|
|
static inline bool
|
|
is_a_number_not_zero(const nir_search_state *state, const nir_alu_instr *instr,
|
|
unsigned src, UNUSED unsigned num_components,
|
|
UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.is_a_number &&
|
|
(v.range == lt_zero || v.range == gt_zero || v.range == ne_zero);
|
|
}
|
|
|
|
static inline bool
|
|
is_a_number(const nir_search_state *state, const nir_alu_instr *instr, unsigned src,
|
|
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
|
|
{
|
|
const struct ssa_result_range v = nir_analyze_range(state->range_ht, instr, src);
|
|
return v.is_a_number;
|
|
}
|
|
|
|
#endif /* _NIR_SEARCH_ */
|