i965/vs: Avoid the MUL/MACH/MOV sequence for small integer multiplies.

We do a lot of multiplies by 3 or 4 for skinning shaders, and we can avoid the sequence if we just move them into the right argument of the MUL. On pre-IVB, this means reliably putting a constant in a position where it can't be constant folded, but that's still better than MUL/MACH/MOV. Improves GLB 2.7 trex performance by 0.788648% +/- 0.23865% (n=29/30) v2: Fix test for pre-sandybridge. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com> (v1)
2026-01-02 00:50:07 +01:00 · 2013-06-07 18:29:50 -07:00 · 2013-06-07 18:29:50 -07:00 · 9a0bd682f9
commit 9a0bd682f9
parent d28e285d41
1 changed files with 36 additions and 12 deletions
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@ -1313,6 +1313,20 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
   }
 }

+static bool
+is_16bit_constant(ir_rvalue *rvalue)
+{
+   ir_constant *constant = rvalue->as_constant();
+   if (!constant)
+      return false;
+
+   if (constant->type != glsl_type::int_type &&
+       constant->type != glsl_type::uint_type)
+      return false;
+
+   return constant->value.u[0] < (1 << 16);
+}
+
 void
 vec4_visitor::visit(ir_expression *ir)
 {
@ -1472,19 +1486,29 @@ vec4_visitor::visit(ir_expression *ir)

   case ir_binop_mul:
      if (ir->type->is_integer()) {
-	 /* For integer multiplication, the MUL uses the low 16 bits
-	  * of one of the operands (src0 on gen6, src1 on gen7).  The
-	  * MACH accumulates in the contribution of the upper 16 bits
-	  * of that operand.
-	  *
-	  * FINISHME: Emit just the MUL if we know an operand is small
-	  * enough.
-	  */
-	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+	 /* For integer multiplication, the MUL uses the low 16 bits of one of
+	  * the operands (src0 through SNB, src1 on IVB and later).  The MACH
+	  * accumulates in the contribution of the upper 16 bits of that
+	  * operand.  If we can determine that one of the args is in the low
+	  * 16 bits, though, we can just emit a single MUL.
+          */
+         if (is_16bit_constant(ir->operands[0])) {
+            if (intel->gen < 7)
+               emit(MUL(result_dst, op[0], op[1]));
+            else
+               emit(MUL(result_dst, op[1], op[0]));
+         } else if (is_16bit_constant(ir->operands[1])) {
+            if (intel->gen < 7)
+               emit(MUL(result_dst, op[1], op[0]));
+            else
+               emit(MUL(result_dst, op[0], op[1]));
+         } else {
+            struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);

-	 emit(MUL(acc, op[0], op[1]));
-	 emit(MACH(dst_null_d(), op[0], op[1]));
-	 emit(MOV(result_dst, src_reg(acc)));
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(dst_null_d(), op[0], op[1]));
+            emit(MOV(result_dst, src_reg(acc)));
+         }
      } else {
 	 emit(MUL(result_dst, op[0], op[1]));
      }