nir: Keep the range analysis HT around intra-pass until we make a change.

This lets us memoize range analysis work across instructions.  Reduces
runtime of shader-db on Intel by -30.0288% +/- 2.1693% (n=3).

Fixes: 405de7ccb6 ("nir/range-analysis: Rudimentary value range analysis pass")
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Eric Anholt 2019-09-23 15:40:46 -07:00
parent 7025dbe794
commit c23db0df18
7 changed files with 52 additions and 38 deletions

View file

@ -1155,6 +1155,7 @@ ${pass_name}_pre_block(nir_block *block, uint16_t *states)
static bool
${pass_name}_block(nir_builder *build, nir_block *block,
struct hash_table *range_ht,
const uint16_t *states, const bool *condition_flags)
{
bool progress = false;
@ -1181,7 +1182,8 @@ ${pass_name}_block(nir_builder *build, nir_block *block,
const struct transform *xform = &${pass_name}_state${i}_xforms[i];
if (condition_flags[xform->condition_offset] &&
!(xform->search->inexact && ignore_inexact) &&
nir_replace_instr(build, alu, xform->search, xform->replace)) {
nir_replace_instr(build, alu, range_ht, xform->search, xform->replace)) {
_mesa_hash_table_clear(range_ht, NULL);
progress = true;
break;
}
@ -1210,14 +1212,17 @@ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
*/
uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states));
struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
nir_foreach_block(block, impl) {
${pass_name}_pre_block(block, states);
}
nir_foreach_block_reverse(block, impl) {
progress |= ${pass_name}_block(&build, block, states, condition_flags);
progress |= ${pass_name}_block(&build, block, range_ht, states, condition_flags);
}
ralloc_free(range_ht);
free(states);
if (progress) {

View file

@ -325,8 +325,8 @@ comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld)
* and neither operand is immediate value 0, add it to the set.
*/
if (is_used_by_if(alu) &&
is_not_const_zero(alu, 0, 1, swizzle) &&
is_not_const_zero(alu, 1, 1, swizzle))
is_not_const_zero(NULL, alu, 0, 1, swizzle) &&
is_not_const_zero(NULL, alu, 1, 1, swizzle))
add_instruction_for_block(bi, alu);
break;

View file

@ -1028,14 +1028,9 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
#undef _______
struct ssa_result_range
nir_analyze_range(const nir_alu_instr *instr, unsigned src)
nir_analyze_range(struct hash_table *range_ht,
const nir_alu_instr *instr, unsigned src)
{
struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
const struct ssa_result_range r =
analyze_expression(instr, src, ht, nir_alu_src_type(instr, src));
_mesa_hash_table_destroy(ht, NULL);
return r;
return analyze_expression(instr, src, range_ht,
nir_alu_src_type(instr, src));
}

View file

@ -42,6 +42,7 @@ struct ssa_result_range {
};
extern struct ssa_result_range
nir_analyze_range(const nir_alu_instr *instr, unsigned src);
nir_analyze_range(struct hash_table *range_ht,
const nir_alu_instr *instr, unsigned src);
#endif /* _NIR_RANGE_ANALYSIS_H_ */

View file

@ -39,6 +39,7 @@ struct match_state {
uint8_t comm_op_direction;
unsigned variables_seen;
nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES];
struct hash_table *range_ht;
};
static bool
@ -297,7 +298,8 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
return false;
if (var->cond && !var->cond(instr, src, num_components, new_swizzle))
if (var->cond && !var->cond(state->range_ht, instr,
src, num_components, new_swizzle))
return false;
if (var->type != nir_type_invalid &&
@ -621,6 +623,7 @@ UNUSED static void dump_value(const nir_search_value *val)
nir_ssa_def *
nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
struct hash_table *range_ht,
const nir_search_expression *search,
const nir_search_value *replace)
{
@ -634,6 +637,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
struct match_state state;
state.inexact_match = false;
state.has_exact_alu = false;
state.range_ht = range_ht;
STATIC_ASSERT(sizeof(state.comm_op_direction) * 8 >= NIR_SEARCH_MAX_COMM_OPS);

View file

@ -93,7 +93,7 @@ typedef struct {
* variables to require, for example, power-of-two in order for the search
* to match.
*/
bool (*cond)(nir_alu_instr *instr, unsigned src,
bool (*cond)(struct hash_table *range_ht, nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle);
/** Swizzle (for replace only) */
@ -175,6 +175,7 @@ NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
nir_ssa_def *
nir_replace_instr(struct nir_builder *b, nir_alu_instr *instr,
struct hash_table *range_ht,
const nir_search_expression *search,
const nir_search_value *replace);

View file

@ -33,7 +33,8 @@
#include <math.h>
static inline bool
is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
is_pos_power_of_two(UNUSED struct hash_table *ht, nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
@ -63,7 +64,8 @@ is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
}
static inline bool
is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
is_neg_power_of_two(UNUSED struct hash_table *ht, nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
@ -87,7 +89,8 @@ is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
}
static inline bool
is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
is_zero_to_one(UNUSED struct hash_table *ht, nir_alu_instr *instr, unsigned src,
unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
@ -117,7 +120,8 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
* 1 while this function tests 0 < src < 1.
*/
static inline bool
is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
is_gt_0_and_lt_1(UNUSED struct hash_table *ht, nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
@ -141,7 +145,8 @@ is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
}
static inline bool
is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
is_not_const_zero(UNUSED struct hash_table *ht, nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
@ -168,14 +173,15 @@ is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
}
static inline bool
is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
is_not_const(UNUSED struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components,
UNUSED const uint8_t *swizzle)
{
return !nir_src_is_const(instr->src[src].src);
}
static inline bool
is_not_fmul(nir_alu_instr *instr, unsigned src,
is_not_fmul(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
nir_alu_instr *src_alu =
@ -185,7 +191,7 @@ is_not_fmul(nir_alu_instr *instr, unsigned src,
return true;
if (src_alu->op == nir_op_fneg)
return is_not_fmul(src_alu, 0, 0, NULL);
return is_not_fmul(ht, src_alu, 0, 0, NULL);
return src_alu->op != nir_op_fmul;
}
@ -207,10 +213,10 @@ is_fsign(nir_alu_instr *instr, unsigned src,
}
static inline bool
is_not_const_and_not_fsign(nir_alu_instr *instr, unsigned src,
is_not_const_and_not_fsign(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
return is_not_const(instr, src, num_components, swizzle) &&
return is_not_const(ht, instr, src, num_components, swizzle) &&
!is_fsign(instr, src, num_components, swizzle);
}
@ -273,7 +279,8 @@ is_used_by_non_fsat(nir_alu_instr *instr)
* of all its components is zero.
*/
static inline bool
is_upper_half_zero(nir_alu_instr *instr, unsigned src,
is_upper_half_zero(UNUSED struct hash_table *ht,
nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
@ -297,7 +304,8 @@ is_upper_half_zero(nir_alu_instr *instr, unsigned src,
* of all its components is zero.
*/
static inline bool
is_lower_half_zero(nir_alu_instr *instr, unsigned src,
is_lower_half_zero(UNUSED struct hash_table *ht,
nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
@ -326,20 +334,20 @@ no_unsigned_wrap(nir_alu_instr *instr)
}
static inline bool
is_integral(nir_alu_instr *instr, unsigned src,
is_integral(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
const struct ssa_result_range r = nir_analyze_range(instr, src);
const struct ssa_result_range r = nir_analyze_range(ht, instr, src);
return r.is_integral;
}
#define RELATION(r) \
static inline bool \
is_ ## r (nir_alu_instr *instr, unsigned src, \
is_ ## r (struct hash_table *ht, nir_alu_instr *instr, unsigned src, \
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) \
{ \
const struct ssa_result_range v = nir_analyze_range(instr, src); \
const struct ssa_result_range v = nir_analyze_range(ht, instr, src); \
return v.range == r; \
}
@ -350,26 +358,26 @@ RELATION(ge_zero)
RELATION(ne_zero)
static inline bool
is_not_negative(nir_alu_instr *instr, unsigned src,
is_not_negative(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
const struct ssa_result_range v = nir_analyze_range(instr, src);
const struct ssa_result_range v = nir_analyze_range(ht, instr, src);
return v.range == ge_zero || v.range == gt_zero || v.range == eq_zero;
}
static inline bool
is_not_positive(nir_alu_instr *instr, unsigned src,
is_not_positive(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
const struct ssa_result_range v = nir_analyze_range(instr, src);
const struct ssa_result_range v = nir_analyze_range(ht, instr, src);
return v.range == le_zero || v.range == lt_zero || v.range == eq_zero;
}
static inline bool
is_not_zero(nir_alu_instr *instr, unsigned src,
is_not_zero(struct hash_table *ht, nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
const struct ssa_result_range v = nir_analyze_range(instr, src);
const struct ssa_result_range v = nir_analyze_range(ht, instr, src);
return v.range == lt_zero || v.range == gt_zero || v.range == ne_zero;
}