llvmpipe: Immediate multiplication.

This commit is contained in:
José Fonseca 2009-10-25 11:48:17 +00:00
parent e1342f871b
commit e4c5e01c10
4 changed files with 64 additions and 32 deletions

View file

@ -47,6 +47,7 @@
#include "util/u_memory.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_string.h"
#include "util/u_cpu_detect.h"
@ -424,6 +425,59 @@ lp_build_mul(struct lp_build_context *bld,
}
/**
* Small vector x scale multiplication optimization.
*/
LLVMValueRef
lp_build_mul_imm(struct lp_build_context *bld,
LLVMValueRef a,
int b)
{
LLVMValueRef factor;
if(b == 0)
return bld->zero;
if(b == 1)
return a;
if(b == -1)
return LLVMBuildNeg(bld->builder, a, "");
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
if(util_is_pot(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
#if 0
/*
* Power of two multiplication by directly manipulating the mantissa.
*
* XXX: This might not be always faster, it will introduce a small error
* for multiplication by zero, and it will produce wrong results
* for Inf and NaN.
*/
unsigned mantissa = lp_mantissa(bld->type);
factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
a = LLVMBuildAdd(bld->builder, a, factor, "");
a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
return a;
#endif
}
else {
factor = lp_build_const_scalar(bld->type, shift);
return LLVMBuildShl(bld->builder, a, factor, "");
}
}
factor = lp_build_const_scalar(bld->type, (double)b);
return lp_build_mul(bld, a, factor);
}
/**
* Generate a / b
*/

View file

@ -66,6 +66,11 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_mul_imm(struct lp_build_context *bld,
LLVMValueRef a,
int b);
LLVMValueRef
lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,

View file

@ -108,32 +108,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
}
/**
* Small vector x scale multiplication optimization.
*
* TODO: Should be elsewhere.
*/
static LLVMValueRef
coeff_multiply(struct lp_build_interp_soa_context *bld,
LLVMValueRef coeff,
int step)
{
LLVMValueRef factor;
switch(step) {
case 0:
return bld->base.zero;
case 1:
return coeff;
case 2:
return lp_build_add(&bld->base, coeff, coeff);
default:
factor = lp_build_const_scalar(bld->base.type, (double)step);
return lp_build_mul(&bld->base, coeff, factor);
}
}
/**
* Multiply the dadx and dady with the xstep and ystep respectively.
*/
@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
if (mode != TGSI_INTERPOLATE_CONSTANT) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
if(mask & (1 << chan)) {
bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
}
}
}

View file

@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255;
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef x0, x1;
@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0);
s = lp_build_mul(&bld->coord_bld, s, f32_c256);
t = lp_build_mul(&bld->coord_bld, t, f32_c256);
s = lp_build_mul_imm(&bld->coord_bld, s, 256);
t = lp_build_mul_imm(&bld->coord_bld, t, 256);
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");