llvmpipe: Bootstrap type conversions.

2026-02-15 00:10:25 +01:00 · 2009-08-07 09:51:48 +01:00 · 2009-08-07 09:51:48 +01:00 · 8988424ee8
commit 8988424ee8
parent b19cb0080c
4 changed files with 642 additions and 0 deletions
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_fs_llvm.c',
 		'lp_bld_arit.c',
 		'lp_bld_const.c',
+		'lp_bld_conv.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
@ -67,4 +68,9 @@ env.Program(
    source = ['lp_test_blend.c', 'lp_test_main.c'],
 )

+env.Program(
+    target = 'lp_test_conv',
+    source = ['lp_test_conv.c', 'lp_test_main.c'],
+)
+
 Export('llvmpipe')
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper
+ *
+ * LLVM IR doesn't support all basic arithmetic operations we care about (most
+ * notably min/max and saturated operations), and it is often necessary to
+ * resort machine-specific intrinsics directly. The functions here hide all
+ * these implementation details from the other modules.
+ *
+ * We also do simple expressions simplification here. Reasons are:
+ * - it is very easy given we have all necessary information readily available
+ * - LLVM optimization passes fail to simplify several vector expressions
+ * - We often know value constraints which the optimization passes have no way
+ *   of knowing, such as when source arguments are known to be in [0, 1] range.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_conv.h"
+
+
+static LLVMValueRef
+lp_build_trunc(LLVMBuilderRef builder,
+               union lp_type src_type,
+               union lp_type dst_type,
+               LLVMValueRef *src, unsigned num_srcs)
+{
+   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length);
+
+   for(i = 0; i < num_srcs; ++i)
+      tmp[i] = src[i];
+
+   while(src_type.width > dst_type.width) {
+      LLVMTypeRef tmp_vec_type = lp_build_vec_type(src_type);
+      union lp_type new_type = src_type;
+      LLVMTypeRef new_vec_type;
+
+      new_type.width /= 2;
+      new_type.length *= 2;
+      new_vec_type = lp_build_vec_type(new_type);
+
+      for(i = 0; i < num_srcs/2; ++i) {
+         LLVMValueRef lo = tmp[2*i + 0];
+         LLVMValueRef hi = tmp[2*i + 1];
+         LLVMValueRef packed = NULL;
+
+         if(src_type.width == 32) {
+            /* FIXME: we only have a packed signed intrinsic */
+            packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi);
+         }
+         else if(src_type.width == 16) {
+            if(dst_type.sign)
+               packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", tmp_vec_type, lo, hi);
+            else
+               packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", tmp_vec_type, lo, hi);
+         }
+         else
+            assert(0);
+
+         tmp[i] = LLVMBuildBitCast(builder, packed, new_vec_type, "");
+      }
+
+      src_type = new_type;
+
+      num_srcs /= 2;
+   }
+
+   assert(num_srcs == 1);
+
+   return tmp[0];
+}
+
+
+/**
+ * Convert between two SIMD types.
+ *
+ * Converting between SIMD types of different element width poses a problem:
+ * SIMD registers have a fixed number of bits, so different element widths
+ * imply different vector lengths. Therefore we must multiplex the multiple
+ * incoming sources into a single destination vector, or demux a single incoming
+ * vector into multiple vectors.
+ */
+void
+lp_build_conv(LLVMBuilderRef builder,
+              union lp_type src_type,
+              union lp_type dst_type,
+              LLVMValueRef *src, unsigned num_srcs,
+              LLVMValueRef *dst, unsigned num_dsts)
+{
+   unsigned i;
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+   if(!src_type.norm && dst_type.norm) {
+      /* FIXME: clamp */
+   }
+
+   if(src_type.floating && !dst_type.floating) {
+      double dscale;
+      LLVMTypeRef tmp;
+
+      /* Rescale */
+      dscale = lp_const_scale(dst_type);
+      if (dscale != 1.0) {
+         LLVMValueRef scale = lp_build_const_uni(src_type, dscale);
+         for(i = 0; i < num_srcs; ++i)
+            src[i] = LLVMBuildMul(builder, src[i], scale, "");
+      }
+
+      /* Use an equally sized integer for intermediate computations */
+      src_type.floating = FALSE;
+      tmp = lp_build_vec_type(src_type);
+      for(i = 0; i < num_srcs; ++i) {
+#if 0
+         if(dst_type.sign)
+            src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
+         else
+            src[i] = LLVMBuildFPToUI(builder, src[i], tmp, "");
+#else
+        /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
+         src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
+#endif
+      }
+   }
+   else {
+      unsigned src_shift = lp_const_shift(src_type);
+      unsigned dst_shift = lp_const_shift(dst_type);
+
+      if(src_shift > dst_shift) {
+         LLVMValueRef shift = lp_build_int_const_uni(src_type, src_shift - dst_shift);
+         for(i = 0; i < num_srcs; ++i)
+            if(dst_type.sign)
+               src[i] = LLVMBuildAShr(builder, src[i], shift, "");
+            else
+               src[i] = LLVMBuildLShr(builder, src[i], shift, "");
+      }
+   }
+
+   if(src_type.width > dst_type.width) {
+      assert(num_dsts == 1);
+      dst[0] = lp_build_trunc(builder, src_type, dst_type, src, num_srcs);
+   }
+   else
+      assert(0);
+}
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for type conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_CONV_H
+#define LP_BLD_CONV_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+
+
+void
+lp_build_conv(LLVMBuilderRef builder,
+              union lp_type src_type,
+              union lp_type dst_type,
+              LLVMValueRef *srcs, unsigned num_srcs,
+              LLVMValueRef *dsts, unsigned num_dsts);
+
+
+#endif /* !LP_BLD_CONV_H */
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@ -0,0 +1,392 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for type conversion.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_conv.h"
+#include "lp_test.h"
+
+
+typedef void (*conv_test_ptr_t)(const void *src, const void *dst);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "cycles\t"
+           "type\t"
+           "src_type\t"
+           "dst_type\n");
+
+   fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+              union lp_type src_type,
+              union lp_type dst_type,
+              double cycles,
+              boolean success)
+{
+   fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+   fprintf(fp, "%.1f\t", cycles + 0.5);
+
+   dump_type(fp, src_type);
+   fprintf(fp, "\t");
+
+   dump_type(fp, dst_type);
+   fprintf(fp, "\t");
+
+   fflush(fp);
+}
+
+
+static void
+dump_conv_types(FILE *fp,
+               union lp_type src_type,
+               union lp_type dst_type)
+{
+   fprintf(fp, "src_type=");
+   dump_type(fp, src_type);
+
+   fprintf(fp, " dst_type=");
+   dump_type(fp, dst_type);
+
+   fflush(fp);
+}
+
+
+static LLVMValueRef
+add_conv_test(LLVMModuleRef module,
+              union lp_type src_type, unsigned num_srcs,
+              union lp_type dst_type, unsigned num_dsts)
+{
+   LLVMTypeRef args[2];
+   LLVMValueRef func;
+   LLVMValueRef src_ptr;
+   LLVMValueRef dst_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0);
+   args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0);
+
+   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+   src_ptr = LLVMGetParam(func, 0);
+   dst_ptr = LLVMGetParam(func, 1);
+
+   block = LLVMAppendBasicBlock(func, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   for(i = 0; i < num_srcs; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
+      src[i] = LLVMBuildLoad(builder, ptr, "");
+   }
+
+   lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts);
+
+   for(i = 0; i < num_dsts; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
+      LLVMBuildStore(builder, dst[i], ptr);
+   }
+
+   LLVMBuildRetVoid(builder);;
+
+   LLVMDisposeBuilder(builder);
+   return func;
+}
+
+
+static boolean
+test_one(unsigned verbose,
+         FILE *fp,
+         union lp_type src_type,
+         union lp_type dst_type)
+{
+   LLVMModuleRef module = NULL;
+   LLVMValueRef func = NULL;
+   LLVMExecutionEngineRef engine = NULL;
+   LLVMModuleProviderRef provider = NULL;
+   LLVMPassManagerRef pass = NULL;
+   char *error = NULL;
+   conv_test_ptr_t conv_test_ptr;
+   boolean success;
+   const unsigned n = 32;
+   int64_t cycles[n];
+   double cycles_avg = 0.0;
+   unsigned num_srcs;
+   unsigned num_dsts;
+   unsigned i, j;
+
+   if(verbose >= 1)
+      dump_conv_types(stdout, src_type, dst_type);
+
+   if(src_type.length > dst_type.length) {
+      num_srcs = 1;
+      num_dsts = src_type.length/dst_type.length;
+   }
+   else  {
+      num_dsts = 1;
+      num_srcs = dst_type.length/src_type.length;
+   }
+
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+
+   module = LLVMModuleCreateWithName("test");
+
+   func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);
+
+   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      LLVMDumpModule(module);
+      abort();
+   }
+   LLVMDisposeMessage(error);
+
+   provider = LLVMCreateModuleProviderForExistingModule(module);
+   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+      dump_conv_types(stderr, src_type, dst_type);
+      fprintf(stderr, "\n");
+      fprintf(stderr, "%s\n", error);
+      LLVMDisposeMessage(error);
+      abort();
+   }
+
+#if 0
+   pass = LLVMCreatePassManager();
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   LLVMAddConstantPropagationPass(pass);
+   LLVMAddInstructionCombiningPass(pass);
+   LLVMAddPromoteMemoryToRegisterPass(pass);
+   LLVMAddGVNPass(pass);
+   LLVMAddCFGSimplificationPass(pass);
+   LLVMRunPassManager(pass, module);
+#else
+   (void)pass;
+#endif
+
+   if(verbose >= 2)
+      LLVMDumpModule(module);
+
+   conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
+
+   success = TRUE;
+   for(i = 0; i < n && success; ++i) {
+      unsigned src_stride = src_type.length*src_type.width/8;
+      unsigned dst_stride = dst_type.length*dst_type.width/8;
+      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      int64_t start_counter = 0;
+      int64_t end_counter = 0;
+
+      for(j = 0; j < num_srcs; ++j) {
+         random_vec(src_type, src + j*src_stride);
+         read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
+      }
+
+      for(j = 0; j < num_dsts; ++j) {
+         write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
+      }
+
+      start_counter = rdtsc();
+      conv_test_ptr(src, dst);
+      end_counter = rdtsc();
+
+      cycles[i] = end_counter - start_counter;
+
+      for(j = 0; j < num_dsts; ++j) {
+         if(!compare_vec(dst_type, dst + j*dst_stride, ref + j*dst_stride))
+            success = FALSE;
+      }
+
+      if (!success) {
+         dump_conv_types(stderr, src_type, dst_type);
+         fprintf(stderr, "\n");
+         fprintf(stderr, "MISMATCH\n");
+
+         for(j = 0; j < num_srcs; ++j) {
+            fprintf(stderr, "  Src%u: ", j);
+            dump_vec(stderr, src_type, src + j*src_stride);
+            fprintf(stderr, "\n");
+         }
+
+         for(j = 0; j < src_type.length*num_srcs; ++j)
+            fprintf(stderr, " %f", fref[j]);
+         fprintf(stderr, "\n");
+
+         for(j = 0; j < num_dsts; ++j) {
+            fprintf(stderr, "  Dst%u: ", j);
+            dump_vec(stderr, dst_type, dst + j*dst_stride);
+            fprintf(stderr, "\n");
+
+            fprintf(stderr, "  Ref%u: ", j);
+            dump_vec(stderr, dst_type, ref + j*dst_stride);
+            fprintf(stderr, "\n");
+         }
+      }
+   }
+
+   /*
+    * Unfortunately the output of cycle counter is not very reliable as it comes
+    * -- sometimes we get outliers (due IRQs perhaps?) which are
+    * better removed to avoid random or biased data.
+    */
+   {
+      double sum = 0.0, sum2 = 0.0;
+      double avg, std;
+      unsigned m;
+
+      for(i = 0; i < n; ++i) {
+         sum += cycles[i];
+         sum2 += cycles[i]*cycles[i];
+      }
+
+      avg = sum/n;
+      std = sqrtf((sum2 - n*avg*avg)/n);
+
+      m = 0;
+      sum = 0.0;
+      for(i = 0; i < n; ++i) {
+         if(fabs(cycles[i] - avg) <= 4.0*std) {
+            sum += cycles[i];
+            ++m;
+         }
+      }
+
+      cycles_avg = sum/m;
+
+   }
+
+   if(verbose >= 1) {
+      fprintf(stdout, " cycles=%.1f", cycles_avg);
+   }
+
+   if(verbose >= 1) {
+      fprintf(stdout, " result=%s\n", success ? "pass" : "fail");
+      fflush(stdout);
+   }
+
+   if(fp)
+      write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
+
+   if (!success) {
+      LLVMDumpModule(module);
+      LLVMWriteBitcodeToFile(module, "conv.bc");
+      fprintf(stderr, "conv.bc written\n");
+      abort();
+   }
+
+   LLVMFreeMachineCodeForFunction(engine, func);
+
+   LLVMDisposeExecutionEngine(engine);
+   if(pass)
+      LLVMDisposePassManager(pass);
+
+   return success;
+}
+
+
+const union lp_type conv_types[] = {
+   /* float, fixed,  sign,  norm, width, len */
+   {{  TRUE, FALSE,  TRUE,  TRUE,    32,   4 }}, /* f32 x 4 */
+   {{ FALSE, FALSE, FALSE,  TRUE,     8,  16 }}, /* u8n x 16 */
+};
+
+
+const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+   const union lp_type *src_type;
+   const union lp_type *dst_type;
+   bool success = TRUE;
+
+   for(src_type = conv_types; src_type < &conv_types[1 /* num_types */]; ++src_type) {
+      for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) {
+
+         if(src_type == dst_type)
+            continue;
+
+         if(!test_one(verbose, fp, *src_type, *dst_type))
+           success = FALSE;
+
+      }
+   }
+
+   return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+   const union lp_type *src_type;
+   const union lp_type *dst_type;
+   unsigned long i;
+   bool success = TRUE;
+
+   for(i = 0; i < n; ++i) {
+      src_type = &conv_types[0 /* random() % num_types */];
+      
+      do {
+         dst_type = &conv_types[random() % num_types];
+      } while (src_type == dst_type);
+
+      if(!test_one(verbose, fp, *src_type, *dst_type))
+        success = FALSE;
+   }
+
+   return success;
+}