Revert "i965/vec4: Change vec4_visitor::emit_lrp to use MAC for gen<6"

This reverts commit 2dfbbeca50 with the comment about MAC and implicit accumulator removed. Why this code didn't work in all circumstances is unknown and without a working Ironlake simulator (which uses a different AUB format) we'll probably never know, short of a lot of experimentation, and spending a bunch of time to try to optimize a few instructions on Ironlake is not time well spent. Moreover, for mix(vec4, vec4, vec4) using the accumulator introduces a dependence between the otherwise independent per-component calculations. Not using the accumulator, even if it means an extra instruction per component might be preferable. We don't know, we don't have data, and we don't have the necessary register on Ironlake for shader_time to tell us. Cc: "10.2" <mesa-stable@lists.freedesktop.org> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77703 Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2026-05-06 05:08:08 +02:00 · 2014-05-22 09:38:24 -07:00 · 2014-05-22 09:38:24 -07:00 · db42dd8952
commit db42dd8952
parent 492af22fb4
1 changed files with 9 additions and 5 deletions
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@ -1171,13 +1171,17 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
      /* Earlier generations don't support three source operations, so we
       * need to emit x*(1-a) + y*a.
       */
-      dst_reg one_minus_a   = dst_reg(this, glsl_type::vec4_type);
-      one_minus_a.writemask = dst.writemask;
+      dst_reg y_times_a           = dst_reg(this, glsl_type::vec4_type);
+      dst_reg one_minus_a         = dst_reg(this, glsl_type::vec4_type);
+      dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
+      y_times_a.writemask           = dst.writemask;
+      one_minus_a.writemask         = dst.writemask;
+      x_times_one_minus_a.writemask = dst.writemask;

+      emit(MUL(y_times_a, y, a));
      emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
-      vec4_instruction *mul = emit(MUL(dst_null_f(), y, a));
-      mul->writes_accumulator = true;
-      emit(MAC(dst, x, src_reg(one_minus_a)));
+      emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
+      emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
   }
 }