ac/nir: move aco_nir_op_supports_packed_math_16bit here

aco_nir_op_supports_packed_math_16bit currently can't be used by amd/common
because tests don't link with ACO, so linking would fail, but we want
to move the nir_opt_vectorize callback here that uses it.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38603>
This commit is contained in:
Marek Olšák 2025-11-26 21:08:51 -05:00 committed by Marge Bot
parent d95b43e07b
commit 2c9995a94f
7 changed files with 56 additions and 52 deletions

View file

@ -921,3 +921,52 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data)
default: return false;
}
}
/* This only applies to ACO, not LLVM, but it's not part of ACO because it's used by this shared
* code that doesn't always link with ACO like tests.
*/
bool
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
{
switch (alu->op) {
case nir_op_f2f16: {
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
unsigned execution_mode = shader->info.float_controls_execution_mode;
return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
nir_is_rounding_mode_rtz(execution_mode, 16);
}
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_fdiv:
case nir_op_flrp:
case nir_op_fabs:
case nir_op_fneg:
case nir_op_fsat:
case nir_op_fmin:
case nir_op_fmax:
case nir_op_f2f16_rtz:
case nir_op_iabs:
case nir_op_iadd:
case nir_op_iadd_sat:
case nir_op_uadd_sat:
case nir_op_isub:
case nir_op_isub_sat:
case nir_op_usub_sat:
case nir_op_ineg:
case nir_op_imul:
case nir_op_imin:
case nir_op_imax:
case nir_op_umin:
case nir_op_umax:
case nir_op_extract_u8:
case nir_op_extract_i8:
case nir_op_ishl:
case nir_op_ishr:
case nir_op_ushr: return true;
case nir_op_u2u16:
case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
default: return false;
}
}

View file

@ -444,6 +444,9 @@ ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);
bool
ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data);
bool
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
#ifdef __cplusplus
}
#endif

View file

@ -440,52 +440,6 @@ aco_is_gpu_supported(const struct radeon_info* info)
}
}
bool
aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
{
switch (alu->op) {
case nir_op_f2f16: {
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
unsigned execution_mode = shader->info.float_controls_execution_mode;
return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
nir_is_rounding_mode_rtz(execution_mode, 16);
}
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmul:
case nir_op_ffma:
case nir_op_fdiv:
case nir_op_flrp:
case nir_op_fabs:
case nir_op_fneg:
case nir_op_fsat:
case nir_op_fmin:
case nir_op_fmax:
case nir_op_f2f16_rtz:
case nir_op_iabs:
case nir_op_iadd:
case nir_op_iadd_sat:
case nir_op_uadd_sat:
case nir_op_isub:
case nir_op_isub_sat:
case nir_op_usub_sat:
case nir_op_ineg:
case nir_op_imul:
case nir_op_imin:
case nir_op_imax:
case nir_op_umin:
case nir_op_umax:
case nir_op_extract_u8:
case nir_op_extract_i8:
case nir_op_ishl:
case nir_op_ishr:
case nir_op_ushr: return true;
case nir_op_u2u16:
case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
default: return false;
}
}
void
aco_print_asm(const struct radeon_info *info, unsigned wave_size,
uint32_t *binary, unsigned num_dw)

View file

@ -74,8 +74,6 @@ uint64_t aco_get_codegen_flags();
bool aco_is_gpu_supported(const struct radeon_info* info);
bool aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
void aco_print_asm(const struct radeon_info *info, unsigned wave_size,
uint32_t *binary, unsigned num_dw);

View file

@ -406,7 +406,7 @@ init_context(isel_context* ctx, nir_shader* shader)
/* Packed 16-bit instructions have to be VGPR. */
if (alu_instr->def.num_components == 2 &&
aco_nir_op_supports_packed_math_16bit(alu_instr))
ac_nir_op_supports_packed_math_16bit(alu_instr))
type = RegType::vgpr;
switch (alu_instr->op) {

View file

@ -280,7 +280,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
}
const unsigned bit_size = alu->def.bit_size;
if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu))
if (bit_size == 16 && ac_nir_op_supports_packed_math_16bit(alu))
return 2;
if (bit_size != 8 && bit_size != 16)

View file

@ -17,7 +17,7 @@ bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *dat
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
aco_nir_op_supports_packed_math_16bit(alu)) {
ac_nir_op_supports_packed_math_16bit(alu)) {
/* ACO requires that all but the first bit of swizzle must be equal. */
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1))
@ -39,7 +39,7 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, const void *data)
if (alu->def.bit_size != 16)
return 1;
return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
return ac_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
}
void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps)