r600/sfn: optimize comparison results

* optimize not(compare(a,b)), nir_opt_algebraic does this only if the comparison result is used only once, but on a vector arch we still get an advantage when doing this, because it reduces dependencies. * optimize b2f32(compare(a,b)), this is r600 specific Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37205>
2026-03-17 14:40:43 +01:00 · 2025-09-03 16:13:10 +02:00 · 2025-09-03 16:13:10 +02:00 · 51d8ca2dff
commit 51d8ca2dff
parent 82dffae611
3 changed files with 123 additions and 0 deletions
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@ -653,6 +653,7 @@ optimize_once(nir_shader *shader)
   NIR_PASS(progress, shader, nir_opt_dce);
   NIR_PASS(progress, shader, nir_opt_undef);
   NIR_PASS(progress, shader, nir_opt_loop_unroll);
+   NIR_PASS(progress, shader, r600_nir_opt_compare_results);
   return progress;
 }

--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp
@ -148,6 +148,119 @@ nir_def *FixKcacheIndirectRead::lower(nir_instr *instr)
   return result;
 }

+class OptNotFromComparison : public NirLowerInstruction {
+private:
+   bool filter(const nir_instr *instr) const override;
+   nir_def *lower(nir_instr *instr) override;
+};
+
+bool
+OptNotFromComparison::filter(const nir_instr *instr) const
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   auto alu = nir_instr_as_alu(instr);
+   if (alu->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
+      return false;
+
+   auto p = nir_def_as_alu(alu->src[0].src.ssa);
+
+   switch (alu->op) {
+   case nir_op_inot:
+      switch (p->op) {
+      case nir_op_flt:
+      case nir_op_fge:
+      case nir_op_feq:
+      case nir_op_fneu:
+      case nir_op_ilt:
+      case nir_op_ult:
+      case nir_op_ige:
+      case nir_op_uge:
+      case nir_op_ieq:
+      case nir_op_ine:
+         return true;
+      default:
+         return false;
+      }
+   case nir_op_b2f32:
+      switch (p->op) {
+      case nir_op_fge:
+      case nir_op_flt:
+      case nir_op_feq:
+      case nir_op_fneu:
+         return true;
+      default:
+         return false;
+      }
+   default:
+      return false;
+   }
+
+   return true;
+}
+
+nir_def *
+OptNotFromComparison::lower(nir_instr *instr)
+{
+   auto alu = nir_instr_as_alu(instr);
+
+   auto p = nir_def_as_alu(alu->src[0].src.ssa);
+
+   auto src0 = nir_channel(b, p->src[0].src.ssa, p->src[0].swizzle[0]);
+   auto src1 = nir_channel(b, p->src[1].src.ssa, p->src[1].swizzle[0]);
+
+   switch (alu->op) {
+   case nir_op_inot:
+
+      switch (p->op) {
+      case nir_op_flt:
+         return nir_fge(b, src0, src1);
+      case nir_op_fge:
+         return nir_flt(b, src0, src1);
+      case nir_op_feq:
+         return nir_fneu(b, src0, src1);
+      case nir_op_fneu:
+         return nir_feq(b, src0, src1);
+
+      case nir_op_ilt:
+         return nir_ige(b, src0, src1);
+      case nir_op_ult:
+         return nir_uge(b, src0, src1);
+
+      case nir_op_ige:
+         return nir_ilt(b, src0, src1);
+      case nir_op_uge:
+         return nir_ult(b, src0, src1);
+
+      case nir_op_ieq:
+         return nir_ine(b, src0, src1);
+      case nir_op_ine:
+         return nir_ieq(b, src0, src1);
+      default:
+         return 0;
+      }
+   case nir_op_b2f32:
+      if (p->src[0].src.ssa->bit_size != 32)
+         return 0;
+      switch (p->op) {
+      case nir_op_fge:
+         return nir_sge(b, src0, src1);
+      case nir_op_flt:
+         return nir_slt(b, src0, src1);
+      case nir_op_feq:
+         return nir_seq(b, src0, src1);
+      case nir_op_fneu:
+         return nir_sne(b, src0, src1);
+      default:
+         return 0;
+      }
+   default:
+      return 0;
+   }
+   return 0;
+}
+
 } // namespace r600

 bool
@ -168,3 +281,9 @@ r600_nir_fix_kcache_indirect_access(nir_shader *shader)
   return shader->info.num_ubos > 14 ?
 	    r600::FixKcacheIndirectRead().run(shader) : false;
 }
+
+bool
+r600_nir_opt_compare_results(nir_shader *shader)
+{
+   return r600::OptNotFromComparison().run(shader);
+}
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.h
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.h
@ -13,6 +13,9 @@
 bool
 r600_nir_lower_pack_unpack_2x16(nir_shader *shader);

+bool
+r600_nir_opt_compare_results(nir_shader *shader);
+
 bool
 r600_nir_lower_trigen(nir_shader *shader, enum amd_gfx_level gfx_level);