mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 13:30:12 +01:00
nir_algebraic: Add basic optimizations for umul_low and imadsh_mix16
For umul_low (al * bl), zero is returned if the low 16-bits word of either source is zero. for imadsh_mix16 (ah * bl << 16 + c), c is returned if either 'ah' or 'bl' is zero. A couple of nir_search_helpers are added: is_upper_half_zero() returns true if the highest word of all components of an integer NIR alu src are zero. is_lower_half_zero() returns true if the lowest word of all components of an integer nir alu src are zero. Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
e45de3a6c3
commit
3addd7c8d9
2 changed files with 55 additions and 0 deletions
|
|
@ -1105,6 +1105,15 @@ for op in ['fddx', 'fddx_fine', 'fddx_coarse',
|
||||||
((op, 'a'), 0.0, 'info->stage == MESA_SHADER_COMPUTE && info->cs.derivative_group == DERIVATIVE_GROUP_NONE')
|
((op, 'a'), 0.0, 'info->stage == MESA_SHADER_COMPUTE && info->cs.derivative_group == DERIVATIVE_GROUP_NONE')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Some optimizations for ir3-specific instructions.
|
||||||
|
optimizations += [
|
||||||
|
# 'al * bl': If either 'al' or 'bl' is zero, return zero.
|
||||||
|
(('umul_low', '#a(is_lower_half_zero)', 'b'), (0)),
|
||||||
|
# '(ah * bl) << 16 + c': If either 'ah' or 'bl' is zero, return 'c'.
|
||||||
|
(('imadsh_mix16', '#a@32(is_upper_half_zero)', 'b@32', 'c@32'), ('c')),
|
||||||
|
(('imadsh_mix16', 'a@32', '#b@32(is_lower_half_zero)', 'c@32'), ('c')),
|
||||||
|
]
|
||||||
|
|
||||||
# This section contains "late" optimizations that should be run before
|
# This section contains "late" optimizations that should be run before
|
||||||
# creating ffmas and calling regular optimizations for the final time.
|
# creating ffmas and calling regular optimizations for the final time.
|
||||||
# Optimizations should go here if they help code generation and conflict
|
# Optimizations should go here if they help code generation and conflict
|
||||||
|
|
|
||||||
|
|
@ -242,4 +242,50 @@ is_used_by_non_fsat(nir_alu_instr *instr)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if a NIR ALU src represents a constant integer
|
||||||
|
* of either 32 or 64 bits, and the higher word (bit-size / 2)
|
||||||
|
* of all its components is zero.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
is_upper_half_zero(nir_alu_instr *instr, unsigned src,
|
||||||
|
unsigned num_components, const uint8_t *swizzle)
|
||||||
|
{
|
||||||
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
|
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
|
||||||
|
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size;
|
||||||
|
if ((nir_src_comp_as_uint(instr->src[src].src,
|
||||||
|
swizzle[i]) & high_bits) != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if a NIR ALU src represents a constant integer
|
||||||
|
* of either 32 or 64 bits, and the lower word (bit-size / 2)
|
||||||
|
* of all its components is zero.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
is_lower_half_zero(nir_alu_instr *instr, unsigned src,
|
||||||
|
unsigned num_components, const uint8_t *swizzle)
|
||||||
|
{
|
||||||
|
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
|
uint32_t low_bits =
|
||||||
|
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1;
|
||||||
|
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _NIR_SEARCH_ */
|
#endif /* _NIR_SEARCH_ */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue