ac/nir: move aco_nir_op_supports_packed_math_16bit here

aco_nir_op_supports_packed_math_16bit currently can't be used by amd/common because tests don't link with ACO, so linking would fail, but we want to move the nir_opt_vectorize callback here that uses it. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38603>
2025-12-26 19:20:08 +01:00 · 2025-11-26 21:08:51 -05:00 · 2025-11-26 21:08:51 -05:00 · 2c9995a94f
commit 2c9995a94f
parent d95b43e07b
7 changed files with 56 additions and 52 deletions
--- a/src/amd/common/nir/ac_nir.c
+++ b/src/amd/common/nir/ac_nir.c
@ -921,3 +921,52 @@ ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data)
   default: return false;
   }
 }
+
+/* This only applies to ACO, not LLVM, but it's not part of ACO because it's used by this shared
+ * code that doesn't always link with ACO like tests.
+ */
+bool
+ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
+{
+   switch (alu->op) {
+   case nir_op_f2f16: {
+      nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
+      unsigned execution_mode = shader->info.float_controls_execution_mode;
+      return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
+             nir_is_rounding_mode_rtz(execution_mode, 16);
+   }
+   case nir_op_fadd:
+   case nir_op_fsub:
+   case nir_op_fmul:
+   case nir_op_ffma:
+   case nir_op_fdiv:
+   case nir_op_flrp:
+   case nir_op_fabs:
+   case nir_op_fneg:
+   case nir_op_fsat:
+   case nir_op_fmin:
+   case nir_op_fmax:
+   case nir_op_f2f16_rtz:
+   case nir_op_iabs:
+   case nir_op_iadd:
+   case nir_op_iadd_sat:
+   case nir_op_uadd_sat:
+   case nir_op_isub:
+   case nir_op_isub_sat:
+   case nir_op_usub_sat:
+   case nir_op_ineg:
+   case nir_op_imul:
+   case nir_op_imin:
+   case nir_op_imax:
+   case nir_op_umin:
+   case nir_op_umax:
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_ishl:
+   case nir_op_ishr:
+   case nir_op_ushr: return true;
+   case nir_op_u2u16:
+   case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
+   default: return false;
+   }
+}
--- a/src/amd/common/nir/ac_nir.h
+++ b/src/amd/common/nir/ac_nir.h
@ -444,6 +444,9 @@ ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);
 bool
 ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data);

+bool
+ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@ -440,52 +440,6 @@ aco_is_gpu_supported(const struct radeon_info* info)
   }
 }

-bool
-aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
-{
-   switch (alu->op) {
-   case nir_op_f2f16: {
-      nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
-      unsigned execution_mode = shader->info.float_controls_execution_mode;
-      return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
-             nir_is_rounding_mode_rtz(execution_mode, 16);
-   }
-   case nir_op_fadd:
-   case nir_op_fsub:
-   case nir_op_fmul:
-   case nir_op_ffma:
-   case nir_op_fdiv:
-   case nir_op_flrp:
-   case nir_op_fabs:
-   case nir_op_fneg:
-   case nir_op_fsat:
-   case nir_op_fmin:
-   case nir_op_fmax:
-   case nir_op_f2f16_rtz:
-   case nir_op_iabs:
-   case nir_op_iadd:
-   case nir_op_iadd_sat:
-   case nir_op_uadd_sat:
-   case nir_op_isub:
-   case nir_op_isub_sat:
-   case nir_op_usub_sat:
-   case nir_op_ineg:
-   case nir_op_imul:
-   case nir_op_imin:
-   case nir_op_imax:
-   case nir_op_umin:
-   case nir_op_umax:
-   case nir_op_extract_u8:
-   case nir_op_extract_i8:
-   case nir_op_ishl:
-   case nir_op_ishr:
-   case nir_op_ushr: return true;
-   case nir_op_u2u16:
-   case nir_op_i2i16: return alu->src[0].src.ssa->bit_size == 8;
-   default: return false;
-   }
-}
-
 void
 aco_print_asm(const struct radeon_info *info, unsigned wave_size,
              uint32_t *binary, unsigned num_dw)
--- a/src/amd/compiler/aco_interface.h
+++ b/src/amd/compiler/aco_interface.h
@ -74,8 +74,6 @@ uint64_t aco_get_codegen_flags();

 bool aco_is_gpu_supported(const struct radeon_info* info);

-bool aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
-
 void aco_print_asm(const struct radeon_info *info, unsigned wave_size,
                   uint32_t *binary, unsigned num_dw);

--- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp
+++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp
@ -406,7 +406,7 @@ init_context(isel_context* ctx, nir_shader* shader)

               /* Packed 16-bit instructions have to be VGPR. */
               if (alu_instr->def.num_components == 2 &&
-                   aco_nir_op_supports_packed_math_16bit(alu_instr))
+                   ac_nir_op_supports_packed_math_16bit(alu_instr))
                  type = RegType::vgpr;

               switch (alu_instr->op) {
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@ -280,7 +280,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
   }

   const unsigned bit_size = alu->def.bit_size;
-   if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu))
+   if (bit_size == 16 && ac_nir_op_supports_packed_math_16bit(alu))
      return 2;

   if (bit_size != 8 && bit_size != 16)
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@ -17,7 +17,7 @@ bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *dat
      nir_alu_instr *alu = nir_instr_as_alu(instr);

      if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
-          aco_nir_op_supports_packed_math_16bit(alu)) {
+          ac_nir_op_supports_packed_math_16bit(alu)) {
         /* ACO requires that all but the first bit of swizzle must be equal. */
         for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
            if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1))
@ -39,7 +39,7 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, const void *data)
   if (alu->def.bit_size != 16)
      return 1;

-   return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
+   return ac_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
 }

 void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps)