mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 04:30:29 +01:00
llvmpipe: Optimize blend swizzles by using bitmasks instead of shuffles for ubytes.
This commit is contained in:
parent
a77084ea4b
commit
1dd7bb17c7
6 changed files with 416 additions and 55 deletions
|
|
@ -19,6 +19,7 @@ llvmpipe = env.ConvenienceLibrary(
|
|||
'lp_bld_loop.c',
|
||||
'lp_bld_logicop.c',
|
||||
'lp_bld_blend.c',
|
||||
'lp_bld_swizzle.c',
|
||||
'lp_bld_type.c',
|
||||
'lp_clear.c',
|
||||
'lp_context.c',
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@
|
|||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -179,67 +180,30 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld,
|
|||
enum lp_build_blend_swizzle rgb_swizzle,
|
||||
unsigned alpha_swizzle)
|
||||
{
|
||||
const unsigned n = bld->base.type.length;
|
||||
LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i, j;
|
||||
|
||||
if(rgb == alpha) {
|
||||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
|
||||
return rgb;
|
||||
|
||||
alpha = bld->base.undef;
|
||||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
|
||||
return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
|
||||
}
|
||||
|
||||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA &&
|
||||
!bld->base.type.floating) {
|
||||
#if 0
|
||||
/* Use a select */
|
||||
/* FIXME: Unfortunetaly select of vectors do not work */
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
swizzles[j + i] = LLVMConstInt(LLVMInt1Type(), i == alpha_swizzle ? 0 : 1, 0);
|
||||
|
||||
return LLVMBuildSelect(bld->base.builder, LLVMConstVector(swizzles, n), rgb, alpha, "");
|
||||
#else
|
||||
/* XXX: Use a bitmask, as byte shuffles often end up being translated
|
||||
* into many PEXTRB. Ideally LLVM X86 code generation should pick this
|
||||
* automatically for us. */
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
swizzles[j + i] = LLVMConstInt(LLVMIntType(bld->base.type.width), i == alpha_swizzle ? 0 : ~0, 0);
|
||||
|
||||
/* TODO: Unfortunately constant propagation prevents from using PANDN. And
|
||||
* on SSE4 we have even better -- PBLENDVB */
|
||||
return LLVMBuildOr(bld->base.builder,
|
||||
LLVMBuildAnd(bld->base.builder, rgb, LLVMConstVector(swizzles, n), ""),
|
||||
LLVMBuildAnd(bld->base.builder, alpha, LLVMBuildNot(bld->base.builder, LLVMConstVector(swizzles, n), ""), ""),
|
||||
"");
|
||||
#endif
|
||||
}
|
||||
|
||||
for(j = 0; j < n; j += 4) {
|
||||
for(i = 0; i < 4; ++i) {
|
||||
unsigned swizzle;
|
||||
|
||||
if(i == alpha_swizzle && alpha != bld->base.undef) {
|
||||
/* Take the alpha from the second shuffle argument */
|
||||
swizzle = n + j + alpha_swizzle;
|
||||
}
|
||||
else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
|
||||
/* Take the alpha from the first shuffle argument */
|
||||
swizzle = j + alpha_swizzle;
|
||||
}
|
||||
else {
|
||||
swizzle = j + i;
|
||||
}
|
||||
|
||||
swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
|
||||
else {
|
||||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
|
||||
boolean cond[4] = {0, 0, 0, 0};
|
||||
cond[alpha_swizzle] = 1;
|
||||
return lp_build_select_aos(&bld->base, alpha, rgb, cond);
|
||||
}
|
||||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
|
||||
unsigned char swizzle[4];
|
||||
swizzle[0] = alpha_swizzle;
|
||||
swizzle[1] = alpha_swizzle;
|
||||
swizzle[2] = alpha_swizzle;
|
||||
swizzle[3] = alpha_swizzle;
|
||||
swizzle[alpha_swizzle] += 4;
|
||||
return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
|
||||
}
|
||||
}
|
||||
|
||||
return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
|
||||
assert(0);
|
||||
return bld->base.undef;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -143,3 +143,38 @@ lp_build_const_aos(union lp_type type,
|
|||
|
||||
return LLVMConstVector(elems, type.length);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_shift(union lp_type type,
|
||||
int c)
|
||||
{
|
||||
LLVMTypeRef elem_type = LLVMIntType(type.width);
|
||||
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i;
|
||||
|
||||
assert(type.length <= LP_MAX_VECTOR_LENGTH);
|
||||
|
||||
for(i = 0; i < type.length; ++i)
|
||||
elems[i] = LLVMConstInt(elem_type, c, 0);
|
||||
|
||||
return LLVMConstVector(elems, type.length);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_mask_aos(union lp_type type,
|
||||
boolean cond[4])
|
||||
{
|
||||
LLVMTypeRef elem_type = LLVMIntType(type.width);
|
||||
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i, j;
|
||||
|
||||
assert(type.length <= LP_MAX_VECTOR_LENGTH);
|
||||
|
||||
for(j = 0; j < type.length; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0);
|
||||
|
||||
return LLVMConstVector(masks, type.length);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,4 +61,14 @@ lp_build_const_aos(union lp_type type,
|
|||
const unsigned char *swizzle);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_shift(union lp_type type,
|
||||
int c);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_mask_aos(union lp_type type,
|
||||
boolean cond[4]);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_CONST_H */
|
||||
|
|
|
|||
264
src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
Normal file
264
src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_broadcast_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned channel)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(a == bld->undef || a == bld->zero || a == bld->one)
|
||||
return a;
|
||||
|
||||
if (n <= 4) {
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
LLVMTypeRef elem_type = LLVMInt32Type();
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Bit mask and recursive shifts
|
||||
*
|
||||
* XYZW XYZW .... XYZW
|
||||
* _Y__ _Y__ .... _Y__
|
||||
* YY_ YY__ .... YY__
|
||||
* YYYY YYYY .... YYYY
|
||||
*/
|
||||
union lp_type type4 = type;
|
||||
const char shifts[4][2] = {
|
||||
{ 1, 2},
|
||||
{-1, 2},
|
||||
{ 1, -2},
|
||||
{-1, -2}
|
||||
};
|
||||
boolean cond[4];
|
||||
unsigned i;
|
||||
|
||||
memset(cond, 0, sizeof cond);
|
||||
cond[channel] = 1;
|
||||
|
||||
a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
|
||||
|
||||
type4.width *= 4;
|
||||
type4.length /= 4;
|
||||
|
||||
a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
|
||||
|
||||
for(i = 0; i < 2; ++i) {
|
||||
LLVMValueRef tmp = NULL;
|
||||
int shift = shifts[channel][i];
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
shift = -shift;
|
||||
#endif
|
||||
|
||||
if(shift > 0)
|
||||
tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_shift(type4, shift*type.width), "");
|
||||
if(shift < 0)
|
||||
tmp = LLVMBuildShl(bld->builder, a, lp_build_const_shift(type4, -shift*type.width), "");
|
||||
|
||||
assert(tmp);
|
||||
if(tmp)
|
||||
a = LLVMBuildOr(bld->builder, a, tmp, "");
|
||||
}
|
||||
|
||||
return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
boolean cond[4])
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(a == b)
|
||||
return a;
|
||||
if(cond[0] && cond[1] && cond[2] && cond[3])
|
||||
return a;
|
||||
if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
|
||||
return b;
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
|
||||
/*
|
||||
* There are three major ways of accomplishing this:
|
||||
* - with a shuffle,
|
||||
* - with a select,
|
||||
* - or with a bit mask.
|
||||
*
|
||||
* Select isn't supported for vector types yet.
|
||||
* The flip between these is empirical and might need to be.
|
||||
*/
|
||||
if (n <= 4) {
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
LLVMTypeRef elem_type = LLVMInt32Type();
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
#if 0
|
||||
else if(0) {
|
||||
/* FIXME: Unfortunately select of vectors do not work */
|
||||
/* Use a select */
|
||||
LLVMTypeRef elem_type = LLVMInt1Type();
|
||||
LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
|
||||
|
||||
return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
|
||||
|
||||
/* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
|
||||
|
||||
a = LLVMBuildAnd(bld->builder, a, mask, "");
|
||||
|
||||
/* This often gets translated to PANDN, but sometimes the NOT is
|
||||
* pre-computed and stored in another constant. The best strategy depends
|
||||
* on available registers, so it is not a big deal -- hopefully LLVM does
|
||||
* the right decision attending the rest of the program.
|
||||
*/
|
||||
b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
|
||||
|
||||
return LLVMBuildOr(bld->builder, a, b, "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned char swizzle[4])
|
||||
{
|
||||
const unsigned n = bld->type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(a == bld->undef || a == bld->zero || a == bld->one)
|
||||
return a;
|
||||
|
||||
if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
|
||||
return lp_build_broadcast_aos(bld, a, swizzle[0]);
|
||||
|
||||
{
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
LLVMTypeRef elem_type = LLVMInt32Type();
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle2_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
unsigned char swizzle[4])
|
||||
{
|
||||
const unsigned n = bld->type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
|
||||
return lp_build_swizzle1_aos(bld, a, swizzle);
|
||||
|
||||
if(a == b) {
|
||||
swizzle[0] %= 4;
|
||||
swizzle[1] %= 4;
|
||||
swizzle[2] %= 4;
|
||||
swizzle[3] %= 4;
|
||||
return lp_build_swizzle1_aos(bld, a, swizzle);
|
||||
}
|
||||
|
||||
if(swizzle[0] % 4 == 0 &&
|
||||
swizzle[1] % 4 == 1 &&
|
||||
swizzle[2] % 4 == 2 &&
|
||||
swizzle[3] % 4 == 3) {
|
||||
boolean cond[4];
|
||||
cond[0] = swizzle[0] / 4;
|
||||
cond[1] = swizzle[1] / 4;
|
||||
cond[2] = swizzle[2] / 4;
|
||||
cond[3] = swizzle[3] / 4;
|
||||
return lp_build_select_aos(bld, a, b, cond);
|
||||
}
|
||||
|
||||
{
|
||||
/*
|
||||
* Shuffle.
|
||||
*/
|
||||
LLVMTypeRef elem_type = LLVMInt32Type();
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
|
||||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
87
src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
Normal file
87
src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Helper functions for constant building.
|
||||
*
|
||||
* @author Jose Fonseca <jfonseca@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef LP_BLD_SWIZZLE_H
|
||||
#define LP_BLD_SWIZZLE_H
|
||||
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_build_context;
|
||||
|
||||
|
||||
/**
|
||||
* Broadcast one channel of a vector composed of arrays of XYZW structures into
|
||||
* all four channel.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_broadcast_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned channel);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
boolean cond[4]);
|
||||
|
||||
|
||||
/**
|
||||
* Swizzle a vector consisting of an array of XYZW structs.
|
||||
*
|
||||
* @param swizzle is the in [0,4[ range.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned char swizzle[4]);
|
||||
|
||||
|
||||
/**
|
||||
* Swizzle two vector consisting of an array of XYZW structs.
|
||||
*
|
||||
* @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle2_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
unsigned char swizzle[4]);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_SWIZZLE_H */
|
||||
Loading…
Add table
Reference in a new issue