gallivm: Support extended swizzles in lp_build_swizzle1_aos().

And rename to lp_build_swizzle_aos().
This commit is contained in:
José Fonseca 2010-07-01 12:16:09 +01:00
parent e277d5c1f6
commit a70ec096aa
3 changed files with 169 additions and 23 deletions

View file

@ -61,8 +61,8 @@ LLVMValueRef
lp_build_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left);
LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right);
LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
return lp_build_sub(bld, a_right, a_left);
}
@ -71,8 +71,8 @@ LLVMValueRef
lp_build_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top);
LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom);
LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
return lp_build_sub(bld, a_bottom, a_top);
}

View file

@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
if (n <= 4) {
if (type.width >= 16) {
/*
* Shuffle.
*/
@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
struct lp_type type4 = type;
struct lp_type type4;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
/*
* Build a type where each element is an integer that cover the four
* channels.
*/
type4 = type;
type4.floating = FALSE;
type4.width *= 4;
type4.length /= 4;
@ -176,31 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
const unsigned char swizzle[4])
lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef a,
const unsigned char swizzles[4])
{
const unsigned n = bld->type.length;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
if(a == bld->undef || a == bld->zero || a == bld->one)
if (swizzles[0] == PIPE_SWIZZLE_RED &&
swizzles[1] == PIPE_SWIZZLE_GREEN &&
swizzles[2] == PIPE_SWIZZLE_BLUE &&
swizzles[3] == PIPE_SWIZZLE_ALPHA) {
return a;
}
if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
return lp_build_broadcast_aos(bld, a, swizzle[0]);
if (swizzles[0] == swizzles[1] &&
swizzles[1] == swizzles[2] &&
swizzles[2] == swizzles[3]) {
switch (swizzles[0]) {
case PIPE_SWIZZLE_RED:
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
return lp_build_broadcast_aos(bld, a, swizzles[0]);
case PIPE_SWIZZLE_ZERO:
return bld->zero;
case PIPE_SWIZZLE_ONE:
return bld->one;
default:
assert(0);
return bld->undef;
}
}
{
if (type.width >= 16) {
/*
* Shuffle.
*/
LLVMTypeRef elem_type = LLVMInt32Type();
LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
memset(aux, 0, sizeof aux);
return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
for(j = 0; j < n; j += 4) {
for(i = 0; i < 4; ++i) {
unsigned shuffle;
switch (swizzles[i]) {
default:
assert(0);
/* fall through */
case PIPE_SWIZZLE_RED:
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
shuffle = j + swizzles[i];
break;
case PIPE_SWIZZLE_ZERO:
shuffle = type.length + 0;
if (!aux[0]) {
aux[0] = lp_build_const_elem(type, 0.0);
}
break;
case PIPE_SWIZZLE_ONE:
shuffle = type.length + 1;
if (!aux[1]) {
aux[1] = lp_build_const_elem(type, 1.0);
}
break;
}
shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
}
}
for (i = 0; i < n; ++i) {
if (!aux[i]) {
aux[i] = undef;
}
}
return LLVMBuildShuffleVector(bld->builder, a,
LLVMConstVector(aux, n),
LLVMConstVector(shuffles, n), "");
} else {
/*
* Bit mask and shifts.
*
* For example, this will convert BGRA to RGBA by doing
*
* rgba = (bgra & 0x00ff0000) >> 16
* | (bgra & 0xff00ff00)
* | (bgra & 0x000000ff) << 16
*
* This is necessary not only for faster cause, but because X86 backend
* will refuse shuffles of <4 x i8> vectors
*/
LLVMValueRef res;
struct lp_type type4;
boolean cond[4];
unsigned chan;
int shift;
/*
* Start with a mixture of 1 and 0.
*/
for (chan = 0; chan < 4; ++chan) {
cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
}
res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
/*
* Build a type where each element is an integer that cover the four
* channels.
*/
type4 = type;
type4.floating = FALSE;
type4.width *= 4;
type4.length /= 4;
a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
/*
* Mask and shift the channels, trying to group as many channels in the
* same shift as possible
*/
for (shift = -3; shift <= 3; ++shift) {
unsigned long long mask = 0;
assert(type4.width <= sizeof(mask)*8);
for (chan = 0; chan < 4; ++chan) {
/* FIXME: big endian */
if (swizzles[chan] < 4 &&
chan - swizzles[chan] == shift) {
mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
}
}
if (mask) {
LLVMValueRef masked;
LLVMValueRef shifted;
if (0)
debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
masked = LLVMBuildAnd(bld->builder, a,
lp_build_const_int_vec(type4, mask), "");
if (shift > 0) {
shifted = LLVMBuildShl(bld->builder, masked,
lp_build_const_int_vec(type4, shift*type.width), "");
} else if (shift < 0) {
shifted = LLVMBuildLShr(bld->builder, masked,
lp_build_const_int_vec(type4, -shift*type.width), "");
} else {
shifted = masked;
}
res = LLVMBuildOr(bld->builder, res, shifted, "");
}
}
return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
}
}

View file

@ -68,12 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/**
* Swizzle a vector consisting of an array of XYZW structs.
*
* @param swizzle is the in [0,4[ range.
* @param swizzles is the in [0,4[ range.
*/
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
const unsigned char swizzle[4]);
lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef a,
const unsigned char swizzles[4]);
LLVMValueRef