diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index 87ee4391e45..8eee8230da1 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -921,3 +921,52 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data) default: return false; } } + +/* This only applies to ACO, not LLVM, but it's not part of ACO because it's used by this shared + * code that doesn't always link with ACO like tests. + */ +bool +ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu) +{ + switch (alu->op) { + case nir_op_f2f16: { + nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader; + unsigned execution_mode = shader->info.float_controls_execution_mode; + return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) || + nir_is_rounding_mode_rtz(execution_mode, 16); + } + case nir_op_fadd: + case nir_op_fsub: + case nir_op_fmul: + case nir_op_ffma: + case nir_op_fdiv: + case nir_op_flrp: + case nir_op_fabs: + case nir_op_fneg: + case nir_op_fsat: + case nir_op_fmin: + case nir_op_fmax: + case nir_op_f2f16_rtz: + case nir_op_iabs: + case nir_op_iadd: + case nir_op_iadd_sat: + case nir_op_uadd_sat: + case nir_op_isub: + case nir_op_isub_sat: + case nir_op_usub_sat: + case nir_op_ineg: + case nir_op_imul: + case nir_op_imin: + case nir_op_imax: + case nir_op_umin: + case nir_op_umax: + case nir_op_extract_u8: + case nir_op_extract_i8: + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: return true; + case nir_op_u2u16: + case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8; + default: return false; + } +} diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index 4f1672df330..0d4f78b0ac0 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -444,6 +444,9 @@ ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_); bool ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data); +bool +ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu); + #ifdef __cplusplus } #endif diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index 0fb17a0714d..f7cb834acf2 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -440,52 +440,6 @@ aco_is_gpu_supported(const struct radeon_info* info) } } -bool -aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu) -{ - switch (alu->op) { - case nir_op_f2f16: { - nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader; - unsigned execution_mode = shader->info.float_controls_execution_mode; - return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) || - nir_is_rounding_mode_rtz(execution_mode, 16); - } - case nir_op_fadd: - case nir_op_fsub: - case nir_op_fmul: - case nir_op_ffma: - case nir_op_fdiv: - case nir_op_flrp: - case nir_op_fabs: - case nir_op_fneg: - case nir_op_fsat: - case nir_op_fmin: - case nir_op_fmax: - case nir_op_f2f16_rtz: - case nir_op_iabs: - case nir_op_iadd: - case nir_op_iadd_sat: - case nir_op_uadd_sat: - case nir_op_isub: - case nir_op_isub_sat: - case nir_op_usub_sat: - case nir_op_ineg: - case nir_op_imul: - case nir_op_imin: - case nir_op_imax: - case nir_op_umin: - case nir_op_umax: - case nir_op_extract_u8: - case nir_op_extract_i8: - case nir_op_ishl: - case nir_op_ishr: - case nir_op_ushr: return true; - case nir_op_u2u16: - case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8; - default: return false; - } -} - void aco_print_asm(const struct radeon_info *info, unsigned wave_size, uint32_t *binary, unsigned num_dw) diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h index 30bd8f1b67f..57c75306bf4 100644 --- a/src/amd/compiler/aco_interface.h +++ b/src/amd/compiler/aco_interface.h @@ -74,8 +74,6 @@ uint64_t aco_get_codegen_flags(); bool aco_is_gpu_supported(const struct radeon_info* info); -bool aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu); - void aco_print_asm(const struct radeon_info *info, unsigned wave_size, uint32_t *binary, unsigned num_dw); diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 3b105dd50c1..4d559b15833 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -406,7 +406,7 @@ init_context(isel_context* ctx, nir_shader* shader) /* Packed 16-bit instructions have to be VGPR. */ if (alu_instr->def.num_components == 2 && - aco_nir_op_supports_packed_math_16bit(alu_instr)) + ac_nir_op_supports_packed_math_16bit(alu_instr)) type = RegType::vgpr; switch (alu_instr->op) { diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 1fa194bf1fe..f4ed903698a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -280,7 +280,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_) } const unsigned bit_size = alu->def.bit_size; - if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu)) + if (bit_size == 16 && ac_nir_op_supports_packed_math_16bit(alu)) return 2; if (bit_size != 8 && bit_size != 16) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 80a0a8ad51d..ec7304f8de2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -17,7 +17,7 @@ bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *dat nir_alu_instr *alu = nir_instr_as_alu(instr); if (alu->def.bit_size == 16 && alu->def.num_components == 2 && - aco_nir_op_supports_packed_math_16bit(alu)) { + ac_nir_op_supports_packed_math_16bit(alu)) { /* ACO requires that all but the first bit of swizzle must be equal. */ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1)) @@ -39,7 +39,7 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, const void *data) if (alu->def.bit_size != 16) return 1; - return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1; + return ac_nir_op_supports_packed_math_16bit(alu) ? 2 : 1; } void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps)