llvmpipe: Bootstrap type conversions.

This commit is contained in:
José Fonseca 2009-08-07 09:51:48 +01:00
parent b19cb0080c
commit 8988424ee8
4 changed files with 642 additions and 0 deletions

View file

@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_fs_llvm.c',
'lp_bld_arit.c',
'lp_bld_const.c',
'lp_bld_conv.c',
'lp_bld_intr.c',
'lp_bld_pack.c',
'lp_bld_unpack.c',
@ -67,4 +68,9 @@ env.Program(
source = ['lp_test_blend.c', 'lp_test_main.c'],
)
env.Program(
target = 'lp_test_conv',
source = ['lp_test_conv.c', 'lp_test_main.c'],
)
Export('llvmpipe')

View file

@ -0,0 +1,190 @@
/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* Helper
*
* LLVM IR doesn't support all basic arithmetic operations we care about (most
* notably min/max and saturated operations), and it is often necessary to
* resort machine-specific intrinsics directly. The functions here hide all
* these implementation details from the other modules.
*
* We also do simple expressions simplification here. Reasons are:
* - it is very easy given we have all necessary information readily available
* - LLVM optimization passes fail to simplify several vector expressions
* - We often know value constraints which the optimization passes have no way
* of knowing, such as when source arguments are known to be in [0, 1] range.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
#include "util/u_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_conv.h"
static LLVMValueRef
lp_build_trunc(LLVMBuilderRef builder,
union lp_type src_type,
union lp_type dst_type,
LLVMValueRef *src, unsigned num_srcs)
{
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned i;
/* Register width must remain constant */
assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length);
for(i = 0; i < num_srcs; ++i)
tmp[i] = src[i];
while(src_type.width > dst_type.width) {
LLVMTypeRef tmp_vec_type = lp_build_vec_type(src_type);
union lp_type new_type = src_type;
LLVMTypeRef new_vec_type;
new_type.width /= 2;
new_type.length *= 2;
new_vec_type = lp_build_vec_type(new_type);
for(i = 0; i < num_srcs/2; ++i) {
LLVMValueRef lo = tmp[2*i + 0];
LLVMValueRef hi = tmp[2*i + 1];
LLVMValueRef packed = NULL;
if(src_type.width == 32) {
/* FIXME: we only have a packed signed intrinsic */
packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi);
}
else if(src_type.width == 16) {
if(dst_type.sign)
packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", tmp_vec_type, lo, hi);
else
packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", tmp_vec_type, lo, hi);
}
else
assert(0);
tmp[i] = LLVMBuildBitCast(builder, packed, new_vec_type, "");
}
src_type = new_type;
num_srcs /= 2;
}
assert(num_srcs == 1);
return tmp[0];
}
/**
* Convert between two SIMD types.
*
* Converting between SIMD types of different element width poses a problem:
* SIMD registers have a fixed number of bits, so different element widths
* imply different vector lengths. Therefore we must multiplex the multiple
* incoming sources into a single destination vector, or demux a single incoming
* vector into multiple vectors.
*/
void
lp_build_conv(LLVMBuilderRef builder,
union lp_type src_type,
union lp_type dst_type,
LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts)
{
unsigned i;
/* Register width must remain constant */
assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
if(!src_type.norm && dst_type.norm) {
/* FIXME: clamp */
}
if(src_type.floating && !dst_type.floating) {
double dscale;
LLVMTypeRef tmp;
/* Rescale */
dscale = lp_const_scale(dst_type);
if (dscale != 1.0) {
LLVMValueRef scale = lp_build_const_uni(src_type, dscale);
for(i = 0; i < num_srcs; ++i)
src[i] = LLVMBuildMul(builder, src[i], scale, "");
}
/* Use an equally sized integer for intermediate computations */
src_type.floating = FALSE;
tmp = lp_build_vec_type(src_type);
for(i = 0; i < num_srcs; ++i) {
#if 0
if(dst_type.sign)
src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
else
src[i] = LLVMBuildFPToUI(builder, src[i], tmp, "");
#else
/* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
src[i] = LLVMBuildFPToSI(builder, src[i], tmp, "");
#endif
}
}
else {
unsigned src_shift = lp_const_shift(src_type);
unsigned dst_shift = lp_const_shift(dst_type);
if(src_shift > dst_shift) {
LLVMValueRef shift = lp_build_int_const_uni(src_type, src_shift - dst_shift);
for(i = 0; i < num_srcs; ++i)
if(dst_type.sign)
src[i] = LLVMBuildAShr(builder, src[i], shift, "");
else
src[i] = LLVMBuildLShr(builder, src[i], shift, "");
}
}
if(src_type.width > dst_type.width) {
assert(num_dsts == 1);
dst[0] = lp_build_trunc(builder, src_type, dst_type, src, num_srcs);
}
else
assert(0);
}

View file

@ -0,0 +1,54 @@
/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* Helper functions for type conversions.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
#ifndef LP_BLD_CONV_H
#define LP_BLD_CONV_H
#include <llvm-c/Core.h>
union lp_type type;
void
lp_build_conv(LLVMBuilderRef builder,
union lp_type src_type,
union lp_type dst_type,
LLVMValueRef *srcs, unsigned num_srcs,
LLVMValueRef *dsts, unsigned num_dsts);
#endif /* !LP_BLD_CONV_H */

View file

@ -0,0 +1,392 @@
/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* Unit tests for type conversion.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
#include "lp_bld_type.h"
#include "lp_bld_conv.h"
#include "lp_test.h"
typedef void (*conv_test_ptr_t)(const void *src, const void *dst);
void
write_tsv_header(FILE *fp)
{
fprintf(fp,
"result\t"
"cycles\t"
"type\t"
"src_type\t"
"dst_type\n");
fflush(fp);
}
static void
write_tsv_row(FILE *fp,
union lp_type src_type,
union lp_type dst_type,
double cycles,
boolean success)
{
fprintf(fp, "%s\t", success ? "pass" : "fail");
fprintf(fp, "%.1f\t", cycles + 0.5);
dump_type(fp, src_type);
fprintf(fp, "\t");
dump_type(fp, dst_type);
fprintf(fp, "\t");
fflush(fp);
}
static void
dump_conv_types(FILE *fp,
union lp_type src_type,
union lp_type dst_type)
{
fprintf(fp, "src_type=");
dump_type(fp, src_type);
fprintf(fp, " dst_type=");
dump_type(fp, dst_type);
fflush(fp);
}
static LLVMValueRef
add_conv_test(LLVMModuleRef module,
union lp_type src_type, unsigned num_srcs,
union lp_type dst_type, unsigned num_dsts)
{
LLVMTypeRef args[2];
LLVMValueRef func;
LLVMValueRef src_ptr;
LLVMValueRef dst_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
unsigned i;
args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0);
args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0);
func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
src_ptr = LLVMGetParam(func, 0);
dst_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
for(i = 0; i < num_srcs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
src[i] = LLVMBuildLoad(builder, ptr, "");
}
lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts);
for(i = 0; i < num_dsts; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
LLVMBuildStore(builder, dst[i], ptr);
}
LLVMBuildRetVoid(builder);;
LLVMDisposeBuilder(builder);
return func;
}
static boolean
test_one(unsigned verbose,
FILE *fp,
union lp_type src_type,
union lp_type dst_type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
LLVMExecutionEngineRef engine = NULL;
LLVMModuleProviderRef provider = NULL;
LLVMPassManagerRef pass = NULL;
char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
const unsigned n = 32;
int64_t cycles[n];
double cycles_avg = 0.0;
unsigned num_srcs;
unsigned num_dsts;
unsigned i, j;
if(verbose >= 1)
dump_conv_types(stdout, src_type, dst_type);
if(src_type.length > dst_type.length) {
num_srcs = 1;
num_dsts = src_type.length/dst_type.length;
}
else {
num_dsts = 1;
num_srcs = dst_type.length/src_type.length;
}
assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
module = LLVMModuleCreateWithName("test");
func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
abort();
}
LLVMDisposeMessage(error);
provider = LLVMCreateModuleProviderForExistingModule(module);
if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
dump_conv_types(stderr, src_type, dst_type);
fprintf(stderr, "\n");
fprintf(stderr, "%s\n", error);
LLVMDisposeMessage(error);
abort();
}
#if 0
pass = LLVMCreatePassManager();
LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
LLVMAddConstantPropagationPass(pass);
LLVMAddInstructionCombiningPass(pass);
LLVMAddPromoteMemoryToRegisterPass(pass);
LLVMAddGVNPass(pass);
LLVMAddCFGSimplificationPass(pass);
LLVMRunPassManager(pass, module);
#else
(void)pass;
#endif
if(verbose >= 2)
LLVMDumpModule(module);
conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
success = TRUE;
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
int64_t end_counter = 0;
for(j = 0; j < num_srcs; ++j) {
random_vec(src_type, src + j*src_stride);
read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
}
for(j = 0; j < num_dsts; ++j) {
write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
}
start_counter = rdtsc();
conv_test_ptr(src, dst);
end_counter = rdtsc();
cycles[i] = end_counter - start_counter;
for(j = 0; j < num_dsts; ++j) {
if(!compare_vec(dst_type, dst + j*dst_stride, ref + j*dst_stride))
success = FALSE;
}
if (!success) {
dump_conv_types(stderr, src_type, dst_type);
fprintf(stderr, "\n");
fprintf(stderr, "MISMATCH\n");
for(j = 0; j < num_srcs; ++j) {
fprintf(stderr, " Src%u: ", j);
dump_vec(stderr, src_type, src + j*src_stride);
fprintf(stderr, "\n");
}
for(j = 0; j < src_type.length*num_srcs; ++j)
fprintf(stderr, " %f", fref[j]);
fprintf(stderr, "\n");
for(j = 0; j < num_dsts; ++j) {
fprintf(stderr, " Dst%u: ", j);
dump_vec(stderr, dst_type, dst + j*dst_stride);
fprintf(stderr, "\n");
fprintf(stderr, " Ref%u: ", j);
dump_vec(stderr, dst_type, ref + j*dst_stride);
fprintf(stderr, "\n");
}
}
}
/*
* Unfortunately the output of cycle counter is not very reliable as it comes
* -- sometimes we get outliers (due IRQs perhaps?) which are
* better removed to avoid random or biased data.
*/
{
double sum = 0.0, sum2 = 0.0;
double avg, std;
unsigned m;
for(i = 0; i < n; ++i) {
sum += cycles[i];
sum2 += cycles[i]*cycles[i];
}
avg = sum/n;
std = sqrtf((sum2 - n*avg*avg)/n);
m = 0;
sum = 0.0;
for(i = 0; i < n; ++i) {
if(fabs(cycles[i] - avg) <= 4.0*std) {
sum += cycles[i];
++m;
}
}
cycles_avg = sum/m;
}
if(verbose >= 1) {
fprintf(stdout, " cycles=%.1f", cycles_avg);
}
if(verbose >= 1) {
fprintf(stdout, " result=%s\n", success ? "pass" : "fail");
fflush(stdout);
}
if(fp)
write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
if (!success) {
LLVMDumpModule(module);
LLVMWriteBitcodeToFile(module, "conv.bc");
fprintf(stderr, "conv.bc written\n");
abort();
}
LLVMFreeMachineCodeForFunction(engine, func);
LLVMDisposeExecutionEngine(engine);
if(pass)
LLVMDisposePassManager(pass);
return success;
}
const union lp_type conv_types[] = {
/* float, fixed, sign, norm, width, len */
{{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, /* f32 x 4 */
{{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */
};
const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
boolean
test_all(unsigned verbose, FILE *fp)
{
const union lp_type *src_type;
const union lp_type *dst_type;
bool success = TRUE;
for(src_type = conv_types; src_type < &conv_types[1 /* num_types */]; ++src_type) {
for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) {
if(src_type == dst_type)
continue;
if(!test_one(verbose, fp, *src_type, *dst_type))
success = FALSE;
}
}
return success;
}
boolean
test_some(unsigned verbose, FILE *fp, unsigned long n)
{
const union lp_type *src_type;
const union lp_type *dst_type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
src_type = &conv_types[0 /* random() % num_types */];
do {
dst_type = &conv_types[random() % num_types];
} while (src_type == dst_type);
if(!test_one(verbose, fp, *src_type, *dst_type))
success = FALSE;
}
return success;
}