nir/opt_16bit_tex_image: optimize packed conversions too

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28730>
2025-12-24 17:30:12 +01:00 · 2024-04-13 22:16:15 +02:00 · 2024-04-13 22:16:15 +02:00 · 603982ea80
commit 603982ea80
parent eeed928111
4 changed files with 106 additions and 70 deletions
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@ -694,8 +694,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
      };
      struct nir_opt_16bit_tex_image_options opt_16bit_options = {
         .rounding_mode = nir_rounding_mode_undef,
-         .opt_tex_dest_types = nir_type_float,
-         .opt_image_dest_types = nir_type_float,
+         .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
+         .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
+         .integer_dest_saturates = true,
         .opt_image_store_data = true,
         .opt_image_srcs = true,
         .opt_srcs_options_count = separate_g16 ? 2 : 1,
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -6315,6 +6315,7 @@ struct nir_opt_16bit_tex_image_options {
   nir_rounding_mode rounding_mode;
   nir_alu_type opt_tex_dest_types;
   nir_alu_type opt_image_dest_types;
+   bool integer_dest_saturates;
   bool opt_image_store_data;
   bool opt_image_srcs;
   unsigned opt_srcs_options_count;
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@ -616,49 +616,6 @@ nir_lower_mediump_vars(nir_shader *shader, nir_variable_mode modes)
   return progress;
 }

-static bool
-is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m)
-{
-   if (instr->type != nir_instr_type_alu)
-      return false;
-
-   nir_alu_instr *alu = nir_instr_as_alu(instr);
-   return alu->op == m && alu->src[0].src.ssa->bit_size == n;
-}
-
-static bool
-is_f16_to_f32_conversion(nir_instr *instr)
-{
-   return is_n_to_m_conversion(instr, 16, nir_op_f2f32);
-}
-
-static bool
-is_f32_to_f16_conversion(nir_instr *instr)
-{
-   return is_n_to_m_conversion(instr, 32, nir_op_f2f16) ||
-          is_n_to_m_conversion(instr, 32, nir_op_f2fmp);
-}
-
-static bool
-is_i16_to_i32_conversion(nir_instr *instr)
-{
-   return is_n_to_m_conversion(instr, 16, nir_op_i2i32);
-}
-
-static bool
-is_u16_to_u32_conversion(nir_instr *instr)
-{
-   return is_n_to_m_conversion(instr, 16, nir_op_u2u32);
-}
-
-static bool
-is_i32_to_i16_conversion(nir_instr *instr)
-{
-   return is_n_to_m_conversion(instr, 32, nir_op_i2i16) ||
-          is_n_to_m_conversion(instr, 32, nir_op_u2u16) ||
-          is_n_to_m_conversion(instr, 32, nir_op_i2imp);
-}
-
 /**
 * Fix types of source operands of texture opcodes according to
 * the constraints by inserting the appropriate conversion opcodes.
@ -788,16 +745,21 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
            can_opt &= const_is_i16(comp);
         else if (opt_i16_u16)
            can_opt &= (const_is_u16(comp) || const_is_i16(comp));
+      } else if (nir_scalar_is_alu(comp)) {
+         nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr);
+         if (alu->src[0].src.ssa->bit_size != 16)
+            return false;
+
+         if (alu->op == nir_op_f2f32)
+            can_opt &= opt_f16;
+         else if (alu->op == nir_op_i2i32)
+            can_opt &= opt_i16 || opt_i16_u16;
+         else if (alu->op == nir_op_u2u32)
+            can_opt &= opt_u16 || opt_i16_u16;
+         else
+            return false;
      } else {
-         if (opt_f16)
-            can_opt &= is_f16_to_f32_conversion(comp.def->parent_instr);
-         else if (opt_u16)
-            can_opt &= is_u16_to_u32_conversion(comp.def->parent_instr);
-         else if (opt_i16)
-            can_opt &= is_i16_to_i32_conversion(comp.def->parent_instr);
-         else if (opt_i16_u16)
-            can_opt &= (is_i16_to_i32_conversion(comp.def->parent_instr) ||
-                        is_u16_to_u32_conversion(comp.def->parent_instr));
+         return false;
      }
   }

@ -855,31 +817,102 @@ static bool
 opt_16bit_destination(nir_def *ssa, nir_alu_type dest_type, unsigned exec_mode,
                      struct nir_opt_16bit_tex_image_options *options)
 {
-   bool is_f32_to_f16 = dest_type == nir_type_float32;
-   bool is_i32_to_i16 = dest_type == nir_type_int32 || dest_type == nir_type_uint32;
+   bool opt_f2f16 = dest_type == nir_type_float32;
+   bool opt_i2i16 = (dest_type == nir_type_int32 || dest_type == nir_type_uint32) &&
+                    !options->integer_dest_saturates;
+   bool opt_i2i16_sat = dest_type == nir_type_int32 && options->integer_dest_saturates;
+   bool opt_u2u16_sat = dest_type == nir_type_uint32 && options->integer_dest_saturates;

   nir_rounding_mode rdm = options->rounding_mode;
   nir_rounding_mode src_rdm =
      nir_get_rounding_mode_from_float_controls(exec_mode, nir_type_float16);
-   bool allow_standard = (src_rdm == rdm || src_rdm == nir_rounding_mode_undef);
-   bool allow_rtz = rdm == nir_rounding_mode_rtz;
-   bool allow_rtne = rdm == nir_rounding_mode_rtne;

   nir_foreach_use(use, ssa) {
      nir_instr *instr = nir_src_parent_instr(use);
-      is_f32_to_f16 &= (allow_standard && is_f32_to_f16_conversion(instr)) ||
-                       (allow_rtz && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtz)) ||
-                       (allow_rtne && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne));
-      is_i32_to_i16 &= is_i32_to_i16_conversion(instr);
-   }
+      if (instr->type != nir_instr_type_alu)
+         return false;

-   if (!is_f32_to_f16 && !is_i32_to_i16)
-      return false;
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+      switch (alu->op) {
+      case nir_op_pack_half_2x16_split:
+         if (alu->src[0].src.ssa != alu->src[1].src.ssa)
+            return false;
+         FALLTHROUGH;
+      case nir_op_pack_half_2x16:
+         /* pack_half rounding is undefined */
+         if (!opt_f2f16)
+            return false;
+         break;
+      case nir_op_pack_half_2x16_rtz_split:
+         if (alu->src[0].src.ssa != alu->src[1].src.ssa)
+            return false;
+         FALLTHROUGH;
+      case nir_op_f2f16_rtz:
+         if (rdm != nir_rounding_mode_rtz || !opt_f2f16)
+            return false;
+         break;
+      case nir_op_f2f16_rtne:
+         if (rdm != nir_rounding_mode_rtne || !opt_f2f16)
+            return false;
+         break;
+      case nir_op_f2f16:
+      case nir_op_f2fmp:
+         if (src_rdm != rdm && src_rdm != nir_rounding_mode_undef)
+            return false;
+         if (!opt_f2f16)
+            return false;
+         break;
+      case nir_op_i2i16:
+      case nir_op_i2imp:
+      case nir_op_u2u16:
+         if (!opt_i2i16)
+            return false;
+         break;
+      case nir_op_pack_sint_2x16:
+         if (!opt_i2i16_sat)
+            return false;
+         break;
+      case nir_op_pack_uint_2x16:
+         if (!opt_u2u16_sat)
+            return false;
+         break;
+      default:
+         return false;
+      }
+   }

   /* All uses are the same conversions. Replace them with mov. */
   nir_foreach_use(use, ssa) {
-      nir_alu_instr *conv = nir_instr_as_alu(nir_src_parent_instr(use));
-      conv->op = nir_op_mov;
+      nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
+      switch (alu->op) {
+      case nir_op_f2f16_rtne:
+      case nir_op_f2f16_rtz:
+      case nir_op_f2f16:
+      case nir_op_f2fmp:
+      case nir_op_i2i16:
+      case nir_op_i2imp:
+      case nir_op_u2u16:
+         alu->op = nir_op_mov;
+         break;
+      case nir_op_pack_half_2x16_rtz_split:
+      case nir_op_pack_half_2x16_split:
+         alu->op = nir_op_pack_32_2x16_split;
+         break;
+      case nir_op_pack_32_2x16_split:
+         /* Split opcodes have two operands, so the iteration
+          * for the second use will already observe the
+          * updated opcode.
+          */
+         break;
+      case nir_op_pack_half_2x16:
+      case nir_op_pack_sint_2x16:
+      case nir_op_pack_uint_2x16:
+         alu->op = nir_op_pack_32_2x16;
+         break;
+      default:
+         unreachable("unsupported conversion op");
+      };
   }

   ssa->bit_size = 16;
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@ -209,8 +209,9 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
   };
   struct nir_opt_16bit_tex_image_options opt_16bit_options = {
      .rounding_mode = nir_rounding_mode_undef,
-      .opt_tex_dest_types = nir_type_float,
-      .opt_image_dest_types = nir_type_float,
+      .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
+      .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
+      .integer_dest_saturates = true,
      .opt_image_store_data = true,
      .opt_image_srcs = true,
      .opt_srcs_options_count = has_g16 ? 2 : 1,