gallivm: Use SSE4.1's BLENDV instructions for lp_build_select().

This commit is contained in:
José Fonseca 2010-06-14 16:23:13 +01:00
parent b3d4e5bd26
commit 88b6abfba5

View file

@ -34,6 +34,7 @@
#include "util/u_cpu_detect.h"
#include "util/u_memory.h"
#include "util/u_debug.h"
#include "lp_bld_type.h"
@ -382,6 +383,46 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
else if (util_cpu_caps.has_sse4_1 &&
type.width * type.length == 128 &&
!LLVMIsConstant(a) &&
!LLVMIsConstant(b) &&
!LLVMIsConstant(mask)) {
const char *intrinsic;
LLVMTypeRef arg_type;
LLVMValueRef args[3];
if (type.width == 64) {
intrinsic = "llvm.x86.sse41.blendvpd";
arg_type = LLVMVectorType(LLVMDoubleType(), 2);
} else if (type.width == 32) {
intrinsic = "llvm.x86.sse41.blendvps";
arg_type = LLVMVectorType(LLVMFloatType(), 4);
} else {
intrinsic = "llvm.x86.sse41.pblendvb";
arg_type = LLVMVectorType(LLVMInt8Type(), 16);
}
if (arg_type != bld->int_vec_type) {
mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
}
if (arg_type != bld->vec_type) {
a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
}
args[0] = b;
args[1] = a;
args[2] = mask;
res = lp_build_intrinsic(bld->builder, intrinsic,
arg_type, args, Elements(args));
if (arg_type != bld->vec_type) {
res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
}
}
else {
if(type.floating) {
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);