llvmpipe: Immediate multiplication.

2026-05-02 14:28:05 +02:00 · 2009-10-25 11:48:17 +00:00 · 2009-10-25 11:48:17 +00:00 · e4c5e01c10
commit e4c5e01c10
parent e1342f871b
4 changed files with 64 additions and 32 deletions
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@ -47,6 +47,7 @@

 #include "util/u_memory.h"
 #include "util/u_debug.h"
+#include "util/u_math.h"
 #include "util/u_string.h"
 #include "util/u_cpu_detect.h"

@ -424,6 +425,59 @@ lp_build_mul(struct lp_build_context *bld,
 }


+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b)
+{
+   LLVMValueRef factor;
+
+   if(b == 0)
+      return bld->zero;
+
+   if(b == 1)
+      return a;
+
+   if(b == -1)
+      return LLVMBuildNeg(bld->builder, a, "");
+
+   if(b == 2 && bld->type.floating)
+      return lp_build_add(bld, a, a);
+
+   if(util_is_pot(b)) {
+      unsigned shift = ffs(b) - 1;
+
+      if(bld->type.floating) {
+#if 0
+         /*
+          * Power of two multiplication by directly manipulating the mantissa.
+          *
+          * XXX: This might not be always faster, it will introduce a small error
+          * for multiplication by zero, and it will produce wrong results
+          * for Inf and NaN.
+          */
+         unsigned mantissa = lp_mantissa(bld->type);
+         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+         a = LLVMBuildAdd(bld->builder, a, factor, "");
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+         return a;
+#endif
+      }
+      else {
+         factor = lp_build_const_scalar(bld->type, shift);
+         return LLVMBuildShl(bld->builder, a, factor, "");
+      }
+   }
+
+   factor = lp_build_const_scalar(bld->type, (double)b);
+   return lp_build_mul(bld, a, factor);
+}
+
+
 /**
 * Generate a / b
 */
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@ -66,6 +66,11 @@ lp_build_mul(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b);

+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b);
+
 LLVMValueRef
 lp_build_div(struct lp_build_context *bld,
             LLVMValueRef a,
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@ -108,32 +108,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 }


-/**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
-               LLVMValueRef coeff,
-               int step)
-{
-   LLVMValueRef factor;
-
-   switch(step) {
-   case 0:
-      return bld->base.zero;
-   case 1:
-      return coeff;
-   case 2:
-      return lp_build_add(&bld->base, coeff, coeff);
-   default:
-      factor = lp_build_const_scalar(bld->base.type, (double)step);
-      return lp_build_mul(&bld->base, coeff, factor);
-   }
-}
-
-
 /**
 * Multiply the dadx and dady with the xstep and ystep respectively.
 */
@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
      if (mode != TGSI_INTERPOLATE_CONSTANT) {
         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
            if(mask & (1 << chan)) {
-               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
-               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+               bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
            }
         }
      }
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
   LLVMBuilderRef builder = bld->builder;
   struct lp_build_context i32, h16, u8n;
   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
-   LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255;
+   LLVMValueRef i32_c8, i32_c128, i32_c255;
   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
   LLVMValueRef x0, x1;
@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
   h16_vec_type = lp_build_vec_type(h16.type);
   u8n_vec_type = lp_build_vec_type(u8n.type);

-   f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0);
-   s = lp_build_mul(&bld->coord_bld, s, f32_c256);
-   t = lp_build_mul(&bld->coord_bld, t, f32_c256);
+   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+   t = lp_build_mul_imm(&bld->coord_bld, t, 256);

   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");