mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 19:20:08 +01:00
ac/nir: move aco_nir_op_supports_packed_math_16bit here
aco_nir_op_supports_packed_math_16bit currently can't be used by amd/common because tests don't link with ACO, so linking would fail, but we want to move the nir_opt_vectorize callback here that uses it. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38603>
This commit is contained in:
parent
d95b43e07b
commit
2c9995a94f
7 changed files with 56 additions and 52 deletions
|
|
@ -921,3 +921,52 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data)
|
|||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* This only applies to ACO, not LLVM, but it's not part of ACO because it's used by this shared
|
||||
* code that doesn't always link with ACO like tests.
|
||||
*/
|
||||
bool
|
||||
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
|
||||
{
|
||||
switch (alu->op) {
|
||||
case nir_op_f2f16: {
|
||||
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
|
||||
unsigned execution_mode = shader->info.float_controls_execution_mode;
|
||||
return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
|
||||
nir_is_rounding_mode_rtz(execution_mode, 16);
|
||||
}
|
||||
case nir_op_fadd:
|
||||
case nir_op_fsub:
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_fdiv:
|
||||
case nir_op_flrp:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fmin:
|
||||
case nir_op_fmax:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_iabs:
|
||||
case nir_op_iadd:
|
||||
case nir_op_iadd_sat:
|
||||
case nir_op_uadd_sat:
|
||||
case nir_op_isub:
|
||||
case nir_op_isub_sat:
|
||||
case nir_op_usub_sat:
|
||||
case nir_op_ineg:
|
||||
case nir_op_imul:
|
||||
case nir_op_imin:
|
||||
case nir_op_imax:
|
||||
case nir_op_umin:
|
||||
case nir_op_umax:
|
||||
case nir_op_extract_u8:
|
||||
case nir_op_extract_i8:
|
||||
case nir_op_ishl:
|
||||
case nir_op_ishr:
|
||||
case nir_op_ushr: return true;
|
||||
case nir_op_u2u16:
|
||||
case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -444,6 +444,9 @@ ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);
|
|||
bool
|
||||
ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data);
|
||||
|
||||
bool
|
||||
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -440,52 +440,6 @@ aco_is_gpu_supported(const struct radeon_info* info)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
|
||||
{
|
||||
switch (alu->op) {
|
||||
case nir_op_f2f16: {
|
||||
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
|
||||
unsigned execution_mode = shader->info.float_controls_execution_mode;
|
||||
return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
|
||||
nir_is_rounding_mode_rtz(execution_mode, 16);
|
||||
}
|
||||
case nir_op_fadd:
|
||||
case nir_op_fsub:
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_fdiv:
|
||||
case nir_op_flrp:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fmin:
|
||||
case nir_op_fmax:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_iabs:
|
||||
case nir_op_iadd:
|
||||
case nir_op_iadd_sat:
|
||||
case nir_op_uadd_sat:
|
||||
case nir_op_isub:
|
||||
case nir_op_isub_sat:
|
||||
case nir_op_usub_sat:
|
||||
case nir_op_ineg:
|
||||
case nir_op_imul:
|
||||
case nir_op_imin:
|
||||
case nir_op_imax:
|
||||
case nir_op_umin:
|
||||
case nir_op_umax:
|
||||
case nir_op_extract_u8:
|
||||
case nir_op_extract_i8:
|
||||
case nir_op_ishl:
|
||||
case nir_op_ishr:
|
||||
case nir_op_ushr: return true;
|
||||
case nir_op_u2u16:
|
||||
case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
aco_print_asm(const struct radeon_info *info, unsigned wave_size,
|
||||
uint32_t *binary, unsigned num_dw)
|
||||
|
|
|
|||
|
|
@ -74,8 +74,6 @@ uint64_t aco_get_codegen_flags();
|
|||
|
||||
bool aco_is_gpu_supported(const struct radeon_info* info);
|
||||
|
||||
bool aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
|
||||
|
||||
void aco_print_asm(const struct radeon_info *info, unsigned wave_size,
|
||||
uint32_t *binary, unsigned num_dw);
|
||||
|
||||
|
|
|
|||
|
|
@ -406,7 +406,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
|
||||
/* Packed 16-bit instructions have to be VGPR. */
|
||||
if (alu_instr->def.num_components == 2 &&
|
||||
aco_nir_op_supports_packed_math_16bit(alu_instr))
|
||||
ac_nir_op_supports_packed_math_16bit(alu_instr))
|
||||
type = RegType::vgpr;
|
||||
|
||||
switch (alu_instr->op) {
|
||||
|
|
|
|||
|
|
@ -280,7 +280,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
|
|||
}
|
||||
|
||||
const unsigned bit_size = alu->def.bit_size;
|
||||
if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu))
|
||||
if (bit_size == 16 && ac_nir_op_supports_packed_math_16bit(alu))
|
||||
return 2;
|
||||
|
||||
if (bit_size != 8 && bit_size != 16)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *dat
|
|||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
|
||||
aco_nir_op_supports_packed_math_16bit(alu)) {
|
||||
ac_nir_op_supports_packed_math_16bit(alu)) {
|
||||
/* ACO requires that all but the first bit of swizzle must be equal. */
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
||||
if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1))
|
||||
|
|
@ -39,7 +39,7 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, const void *data)
|
|||
if (alu->def.bit_size != 16)
|
||||
return 1;
|
||||
|
||||
return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
|
||||
return ac_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
|
||||
}
|
||||
|
||||
void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue