mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 08:10:09 +01:00
gallivm: Support extended swizzles in lp_build_swizzle1_aos().
And rename to lp_build_swizzle_aos().
This commit is contained in:
parent
e277d5c1f6
commit
a70ec096aa
3 changed files with 169 additions and 23 deletions
|
|
@ -61,8 +61,8 @@ LLVMValueRef
|
|||
lp_build_ddx(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left);
|
||||
LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right);
|
||||
LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
|
||||
LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
|
||||
return lp_build_sub(bld, a_right, a_left);
|
||||
}
|
||||
|
||||
|
|
@ -71,8 +71,8 @@ LLVMValueRef
|
|||
lp_build_ddy(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top);
|
||||
LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom);
|
||||
LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
|
||||
LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
|
||||
return lp_build_sub(bld, a_bottom, a_top);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
|
||||
* using shuffles here actually causes worst results. More investigation is
|
||||
* needed. */
|
||||
if (n <= 4) {
|
||||
if (type.width >= 16) {
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
|
|
@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
* YY00 YY00 .... YY00
|
||||
* YYYY YYYY .... YYYY <= output
|
||||
*/
|
||||
struct lp_type type4 = type;
|
||||
struct lp_type type4;
|
||||
const char shifts[4][2] = {
|
||||
{ 1, 2},
|
||||
{-1, 2},
|
||||
|
|
@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
|
||||
a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
|
||||
|
||||
/*
|
||||
* Build a type where each element is an integer that cover the four
|
||||
* channels.
|
||||
*/
|
||||
|
||||
type4 = type;
|
||||
type4.floating = FALSE;
|
||||
type4.width *= 4;
|
||||
type4.length /= 4;
|
||||
|
||||
|
|
@ -176,31 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
const unsigned char swizzle[4])
|
||||
lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
const unsigned char swizzles[4])
|
||||
{
|
||||
const unsigned n = bld->type.length;
|
||||
const struct lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(a == bld->undef || a == bld->zero || a == bld->one)
|
||||
if (swizzles[0] == PIPE_SWIZZLE_RED &&
|
||||
swizzles[1] == PIPE_SWIZZLE_GREEN &&
|
||||
swizzles[2] == PIPE_SWIZZLE_BLUE &&
|
||||
swizzles[3] == PIPE_SWIZZLE_ALPHA) {
|
||||
return a;
|
||||
}
|
||||
|
||||
if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
|
||||
return lp_build_broadcast_aos(bld, a, swizzle[0]);
|
||||
if (swizzles[0] == swizzles[1] &&
|
||||
swizzles[1] == swizzles[2] &&
|
||||
swizzles[2] == swizzles[3]) {
|
||||
switch (swizzles[0]) {
|
||||
case PIPE_SWIZZLE_RED:
|
||||
case PIPE_SWIZZLE_GREEN:
|
||||
case PIPE_SWIZZLE_BLUE:
|
||||
case PIPE_SWIZZLE_ALPHA:
|
||||
return lp_build_broadcast_aos(bld, a, swizzles[0]);
|
||||
case PIPE_SWIZZLE_ZERO:
|
||||
return bld->zero;
|
||||
case PIPE_SWIZZLE_ONE:
|
||||
return bld->one;
|
||||
default:
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if (type.width >= 16) {
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
LLVMTypeRef elem_type = LLVMInt32Type();
|
||||
LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
|
||||
LLVMTypeRef i32t = LLVMInt32Type();
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
|
||||
memset(aux, 0, sizeof aux);
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
|
||||
for(j = 0; j < n; j += 4) {
|
||||
for(i = 0; i < 4; ++i) {
|
||||
unsigned shuffle;
|
||||
switch (swizzles[i]) {
|
||||
default:
|
||||
assert(0);
|
||||
/* fall through */
|
||||
case PIPE_SWIZZLE_RED:
|
||||
case PIPE_SWIZZLE_GREEN:
|
||||
case PIPE_SWIZZLE_BLUE:
|
||||
case PIPE_SWIZZLE_ALPHA:
|
||||
shuffle = j + swizzles[i];
|
||||
break;
|
||||
case PIPE_SWIZZLE_ZERO:
|
||||
shuffle = type.length + 0;
|
||||
if (!aux[0]) {
|
||||
aux[0] = lp_build_const_elem(type, 0.0);
|
||||
}
|
||||
break;
|
||||
case PIPE_SWIZZLE_ONE:
|
||||
shuffle = type.length + 1;
|
||||
if (!aux[1]) {
|
||||
aux[1] = lp_build_const_elem(type, 1.0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (!aux[i]) {
|
||||
aux[i] = undef;
|
||||
}
|
||||
}
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a,
|
||||
LLVMConstVector(aux, n),
|
||||
LLVMConstVector(shuffles, n), "");
|
||||
} else {
|
||||
/*
|
||||
* Bit mask and shifts.
|
||||
*
|
||||
* For example, this will convert BGRA to RGBA by doing
|
||||
*
|
||||
* rgba = (bgra & 0x00ff0000) >> 16
|
||||
* | (bgra & 0xff00ff00)
|
||||
* | (bgra & 0x000000ff) << 16
|
||||
*
|
||||
* This is necessary not only for faster cause, but because X86 backend
|
||||
* will refuse shuffles of <4 x i8> vectors
|
||||
*/
|
||||
LLVMValueRef res;
|
||||
struct lp_type type4;
|
||||
boolean cond[4];
|
||||
unsigned chan;
|
||||
int shift;
|
||||
|
||||
/*
|
||||
* Start with a mixture of 1 and 0.
|
||||
*/
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
|
||||
}
|
||||
res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
|
||||
|
||||
/*
|
||||
* Build a type where each element is an integer that cover the four
|
||||
* channels.
|
||||
*/
|
||||
type4 = type;
|
||||
type4.floating = FALSE;
|
||||
type4.width *= 4;
|
||||
type4.length /= 4;
|
||||
|
||||
a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
|
||||
res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
|
||||
|
||||
/*
|
||||
* Mask and shift the channels, trying to group as many channels in the
|
||||
* same shift as possible
|
||||
*/
|
||||
for (shift = -3; shift <= 3; ++shift) {
|
||||
unsigned long long mask = 0;
|
||||
|
||||
assert(type4.width <= sizeof(mask)*8);
|
||||
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
/* FIXME: big endian */
|
||||
if (swizzles[chan] < 4 &&
|
||||
chan - swizzles[chan] == shift) {
|
||||
mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
|
||||
}
|
||||
}
|
||||
|
||||
if (mask) {
|
||||
LLVMValueRef masked;
|
||||
LLVMValueRef shifted;
|
||||
|
||||
if (0)
|
||||
debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
|
||||
|
||||
masked = LLVMBuildAnd(bld->builder, a,
|
||||
lp_build_const_int_vec(type4, mask), "");
|
||||
if (shift > 0) {
|
||||
shifted = LLVMBuildShl(bld->builder, masked,
|
||||
lp_build_const_int_vec(type4, shift*type.width), "");
|
||||
} else if (shift < 0) {
|
||||
shifted = LLVMBuildLShr(bld->builder, masked,
|
||||
lp_build_const_int_vec(type4, -shift*type.width), "");
|
||||
} else {
|
||||
shifted = masked;
|
||||
}
|
||||
|
||||
res = LLVMBuildOr(bld->builder, res, shifted, "");
|
||||
}
|
||||
}
|
||||
|
||||
return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,12 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
/**
|
||||
* Swizzle a vector consisting of an array of XYZW structs.
|
||||
*
|
||||
* @param swizzle is the in [0,4[ range.
|
||||
* @param swizzles is the in [0,4[ range.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
const unsigned char swizzle[4]);
|
||||
lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
const unsigned char swizzles[4]);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue