intel/compiler: Add support for ternary add instruction on XeHP

v2: - Re-arragne opcode in correct order (Matt Turner) - Move ADD3 case closer to LRP (Jason) Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11596>
2026-05-06 15:58:05 +02:00 · 2020-06-05 22:40:26 -07:00 · 2020-06-05 22:40:26 -07:00 · 705285b9f4
commit 705285b9f4
parent e8dff256c0
9 changed files with 18 additions and 0 deletions
--- a/src/intel/compiler/brw_eu.cpp
+++ b/src/intel/compiler/brw_eu.cpp
@ -684,6 +684,7 @@ static const struct opcode_desc opcode_descs[] = {
   { BRW_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_GE(GFX7) },
   { BRW_OPCODE_SAD2,     80,  "sad2",    2,    1,    GFX_ALL },
   { BRW_OPCODE_SADA2,    81,  "sada2",   2,    1,    GFX_ALL },
+   { BRW_OPCODE_ADD3,     82,  "add3",    3,    1,    GFX_GE(GFX125) },
   { BRW_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
   { BRW_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
   { BRW_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@ -246,6 +246,7 @@ ALU3(CSEL)
 ALU1(F32TO16)
 ALU1(F16TO32)
 ALU2(ADD)
+ALU3(ADD3)
 ALU2(AVG)
 ALU2(MUL)
 ALU1(FRC)
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@ -270,6 +270,7 @@ enum opcode {
   BRW_OPCODE_SUBB, /**< Gfx7+ */
   BRW_OPCODE_SAD2,
   BRW_OPCODE_SADA2,
+   BRW_OPCODE_ADD3, /* Gen12+ only */
   BRW_OPCODE_DP4,
   BRW_OPCODE_DPH,
   BRW_OPCODE_DP3,
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@ -1117,6 +1117,7 @@ ALU1(FBL)
 ALU1(CBIT)
 ALU2(ADDC)
 ALU2(SUBB)
+ALU3(ADD3)

 brw_inst *
 brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@ -601,6 +601,7 @@ namespace brw {
      }

      ALU2(ADD)
+      ALU3(ADD3)
      ALU2_ACC(ADDC)
      ALU2(AND)
      ALU2(ASR)
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@ -2090,6 +2090,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
         brw_LRP(p, dst, src[0], src[1], src[2]);
 	 break;

+      case BRW_OPCODE_ADD3:
+         assert(devinfo->verx10 >= 125);
+         brw_ADD3(p, dst, src[0], src[1], src[2]);
+         break;
+
      case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
 	 break;
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@ -1185,6 +1185,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
      inst = bld.ADD(result, op[0], op[1]);
      break;

+   case nir_op_iadd3:
+      inst = bld.ADD3(result, op[0], op[1], op[2]);
+      break;
+
   case nir_op_iadd_sat:
   case nir_op_uadd_sat:
      inst = bld.ADD(result, op[0], op[1]);
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@ -378,6 +378,7 @@ namespace {
      case BRW_OPCODE_MOV:
      case BRW_OPCODE_CMP:
      case BRW_OPCODE_ADD:
+      case BRW_OPCODE_ADD3:
      case BRW_OPCODE_MUL:
      case SHADER_OPCODE_MOV_RELOC_IMM:
      case VEC4_OPCODE_MOV_FOR_SCRATCH:
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@ -866,6 +866,7 @@ backend_instruction::is_commutative() const
   case BRW_OPCODE_OR:
   case BRW_OPCODE_XOR:
   case BRW_OPCODE_ADD:
+   case BRW_OPCODE_ADD3:
   case BRW_OPCODE_MUL:
   case SHADER_OPCODE_MULH:
      return true;
@ -983,6 +984,7 @@ backend_instruction::can_do_saturate() const
 {
   switch (opcode) {
   case BRW_OPCODE_ADD:
+   case BRW_OPCODE_ADD3:
   case BRW_OPCODE_ASR:
   case BRW_OPCODE_AVG:
   case BRW_OPCODE_CSEL:
@ -1028,6 +1030,7 @@ backend_instruction::can_do_cmod() const
 {
   switch (opcode) {
   case BRW_OPCODE_ADD:
+   case BRW_OPCODE_ADD3:
   case BRW_OPCODE_ADDC:
   case BRW_OPCODE_AND:
   case BRW_OPCODE_ASR: