mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 10:50:16 +01:00
gallivm: Use SSE4.1's ROUNDSS/ROUNDSD for scalar rounding.
This commit is contained in:
parent
21f392c971
commit
e3ccfd4e03
1 changed files with 71 additions and 21 deletions
|
|
@ -989,28 +989,67 @@ lp_build_round_sse41(struct lp_build_context *bld,
|
|||
enum lp_build_round_sse41_mode mode)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMTypeRef i32t = LLVMInt32Type();
|
||||
const char *intrinsic;
|
||||
LLVMValueRef res;
|
||||
|
||||
assert(type.floating);
|
||||
assert(type.width*type.length == 128);
|
||||
|
||||
assert(lp_check_value(type, a));
|
||||
assert(util_cpu_caps.has_sse4_1);
|
||||
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
intrinsic = "llvm.x86.sse41.round.ps";
|
||||
break;
|
||||
case 64:
|
||||
intrinsic = "llvm.x86.sse41.round.pd";
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
if (type.length == 1) {
|
||||
LLVMTypeRef vec_type;
|
||||
LLVMValueRef undef;
|
||||
LLVMValueRef args[3];
|
||||
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
|
||||
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
intrinsic = "llvm.x86.sse41.round.ss";
|
||||
break;
|
||||
case 64:
|
||||
intrinsic = "llvm.x86.sse41.round.sd";
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
vec_type = LLVMVectorType(bld->elem_type, 4);
|
||||
|
||||
undef = LLVMGetUndef(vec_type);
|
||||
|
||||
args[0] = undef;
|
||||
args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
|
||||
args[2] = LLVMConstInt(i32t, mode, 0);
|
||||
|
||||
res = lp_build_intrinsic(bld->builder, intrinsic,
|
||||
vec_type, args, Elements(args));
|
||||
|
||||
res = LLVMBuildExtractElement(bld->builder, res, index0, "");
|
||||
}
|
||||
else {
|
||||
assert(type.width*type.length == 128);
|
||||
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
intrinsic = "llvm.x86.sse41.round.ps";
|
||||
break;
|
||||
case 64:
|
||||
intrinsic = "llvm.x86.sse41.round.pd";
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
res = lp_build_intrinsic_binary(bld->builder, intrinsic,
|
||||
bld->vec_type, a,
|
||||
LLVMConstInt(i32t, mode, 0));
|
||||
}
|
||||
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
|
||||
LLVMConstInt(LLVMInt32Type(), mode, 0));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1028,8 +1067,10 @@ lp_build_trunc(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
|
||||
}
|
||||
else {
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
|
|
@ -1056,8 +1097,10 @@ lp_build_round(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
|
||||
}
|
||||
else {
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMValueRef res;
|
||||
|
|
@ -1082,8 +1125,10 @@ lp_build_floor(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
|
||||
}
|
||||
else {
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMValueRef res;
|
||||
|
|
@ -1108,8 +1153,10 @@ lp_build_ceil(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
|
||||
}
|
||||
else {
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMValueRef res;
|
||||
|
|
@ -1170,7 +1217,8 @@ lp_build_iround(struct lp_build_context *bld,
|
|||
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
|
||||
}
|
||||
else {
|
||||
|
|
@ -1214,7 +1262,8 @@ lp_build_ifloor(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
|
||||
}
|
||||
else {
|
||||
|
|
@ -1264,7 +1313,8 @@ lp_build_iceil(struct lp_build_context *bld,
|
|||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
|
||||
if (util_cpu_caps.has_sse4_1 &&
|
||||
(type.length == 1 || type.width*type.length == 128)) {
|
||||
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue