gallivm: handle nan's in min/max

Both D3D10 and OpenCL say that if one the inputs is nan then
the other should be returned. To preserve that behavior
the patch fixes both the sse and the non-sse paths in both
functions and adds helper code for handling nans.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
Zack Rusin 2013-07-16 13:06:24 -04:00
parent 719000bd7d
commit ab47bbecd6
6 changed files with 482 additions and 52 deletions

View file

@ -73,11 +73,14 @@
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
* NaN's are handled according to the behavior specified by the
* nan_behavior argument.
*/
static LLVMValueRef
lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
@ -120,6 +123,8 @@ lp_build_min_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
debug_printf("%s: altivec doesn't support nan behavior modes\n",
__FUNCTION__);
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
@ -131,7 +136,7 @@ lp_build_min_simple(struct lp_build_context *bld,
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient code, bogus shuffle due to packing\n",
__FUNCTION__);
}
}
if (type.width == 8 && !type.sign) {
intrinsic = "llvm.x86.sse2.pminu.b";
}
@ -147,53 +152,111 @@ lp_build_min_simple(struct lp_build_context *bld,
}
if (type.width == 32 && !type.sign) {
intrinsic = "llvm.x86.sse41.pminud";
}
}
if (type.width == 32 && type.sign) {
intrinsic = "llvm.x86.sse41.pminsd";
}
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminub";
} else {
intrinsic = "llvm.ppc.altivec.vminsb";
}
} else if (type.width == 16) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminuh";
} else {
intrinsic = "llvm.ppc.altivec.vminsh";
}
} else if (type.width == 32) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminuw";
} else {
intrinsic = "llvm.ppc.altivec.vminsw";
}
}
intr_size = 128;
debug_printf("%s: altivec doesn't support nan behavior modes\n",
__FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminub";
} else {
intrinsic = "llvm.ppc.altivec.vminsb";
}
} else if (type.width == 16) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminuh";
} else {
intrinsic = "llvm.ppc.altivec.vminsh";
}
} else if (type.width == 32) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminuw";
} else {
intrinsic = "llvm.ppc.altivec.vminsw";
}
}
}
if(intrinsic) {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
/* We need to handle nan's for floating point numbers. If one of the
* inputs is nan the other should be returned (required by both D3D10+
* and OpenCL).
* The sse intrinsics return the second operator in case of nan by
* default so we need to special code to handle those.
*/
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
LLVMValueRef isnan, max;
max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
isnan = lp_build_isnan(bld, b);
return lp_build_select(bld, isnan, a, max);
} else {
assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
isnan = lp_build_isnan(bld, a);
return lp_build_select(bld, isnan, a, max);
}
} else {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
}
}
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
if (type.floating) {
switch (nan_behavior) {
case GALLIVM_NAN_RETURN_NAN: {
LLVMValueRef isnan = lp_build_isnan(bld, b);
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
return lp_build_select(bld, cond, a, b);
}
break;
case GALLIVM_NAN_RETURN_OTHER: {
LLVMValueRef isnan = lp_build_isnan(bld, a);
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
return lp_build_select(bld, cond, a, b);
}
break;
case GALLIVM_NAN_RETURN_SECOND:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
break;
default:
assert(0);
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
}
} else {
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
}
}
/**
* Generate max(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
* NaN's are handled according to the behavior specified by the
* nan_behavior argument.
*/
static LLVMValueRef
lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
@ -236,6 +299,8 @@ lp_build_max_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
debug_printf("%s: altivec doesn't support nan behavior modes\n",
__FUNCTION__);
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
@ -271,6 +336,8 @@ lp_build_max_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
debug_printf("%s: altivec doesn't support nan behavior modes\n",
__FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
@ -293,13 +360,60 @@ lp_build_max_simple(struct lp_build_context *bld,
}
if(intrinsic) {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
LLVMValueRef isnan, min;
min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
isnan = lp_build_isnan(bld, b);
return lp_build_select(bld, isnan, a, min);
} else {
assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
isnan = lp_build_isnan(bld, a);
return lp_build_select(bld, isnan, a, min);
}
} else {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
}
}
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
if (type.floating) {
switch (nan_behavior) {
case GALLIVM_NAN_RETURN_NAN: {
LLVMValueRef isnan = lp_build_isnan(bld, b);
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
return lp_build_select(bld, cond, a, b);
}
break;
case GALLIVM_NAN_RETURN_OTHER: {
LLVMValueRef isnan = lp_build_isnan(bld, a);
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
return lp_build_select(bld, cond, a, b);
}
break;
case GALLIVM_NAN_RETURN_SECOND:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
break;
default:
assert(0);
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
}
} else {
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
}
}
@ -389,7 +503,7 @@ lp_build_add(struct lp_build_context *bld,
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
a = lp_build_min_simple(bld, a, lp_build_comp(bld, b));
a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
@ -404,7 +518,7 @@ lp_build_add(struct lp_build_context *bld,
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_min_simple(bld, res, bld->one);
res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
/* XXX clamp to floor of -1 or 0??? */
@ -670,7 +784,7 @@ lp_build_sub(struct lp_build_context *bld,
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
a = lp_build_max_simple(bld, a, b);
a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
@ -684,7 +798,7 @@ lp_build_sub(struct lp_build_context *bld,
res = LLVMBuildSub(builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
res = lp_build_max_simple(bld, res, bld->zero, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
return res;
}
@ -1144,7 +1258,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
/**
* Generate min(a, b)
* Do checks for special cases.
* Do checks for special cases but not for nans.
*/
LLVMValueRef
lp_build_min(struct lp_build_context *bld,
@ -1172,13 +1286,48 @@ lp_build_min(struct lp_build_context *bld,
return a;
}
return lp_build_min_simple(bld, a, b);
return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
/**
* Generate min(a, b)
* NaN's are handled according to the behavior specified by the
* nan_behavior argument.
*/
LLVMValueRef
lp_build_min_ext(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior)
{
assert(lp_check_value(bld->type, a));
assert(lp_check_value(bld->type, b));
if(a == bld->undef || b == bld->undef)
return bld->undef;
if(a == b)
return a;
if (bld->type.norm) {
if (!bld->type.sign) {
if (a == bld->zero || b == bld->zero) {
return bld->zero;
}
}
if(a == bld->one)
return b;
if(b == bld->one)
return a;
}
return lp_build_min_simple(bld, a, b, nan_behavior);
}
/**
* Generate max(a, b)
* Do checks for special cases.
* Do checks for special cases, but NaN behavior is undefined.
*/
LLVMValueRef
lp_build_max(struct lp_build_context *bld,
@ -1207,10 +1356,47 @@ lp_build_max(struct lp_build_context *bld,
}
}
return lp_build_max_simple(bld, a, b);
return lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
/**
* Generate max(a, b)
* Checks for special cases.
* NaN's are handled according to the behavior specified by the
* nan_behavior argument.
*/
LLVMValueRef
lp_build_max_ext(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior)
{
assert(lp_check_value(bld->type, a));
assert(lp_check_value(bld->type, b));
if(a == bld->undef || b == bld->undef)
return bld->undef;
if(a == b)
return a;
if(bld->type.norm) {
if(a == bld->one || b == bld->one)
return bld->one;
if (!bld->type.sign) {
if (a == bld->zero) {
return b;
}
if (b == bld->zero) {
return a;
}
}
}
return lp_build_max_simple(bld, a, b, nan_behavior);
}
/**
* Generate clamp(a, min, max)
* Do checks for special cases.
@ -3343,3 +3529,26 @@ lp_build_mod(struct lp_build_context *bld,
res = LLVMBuildURem(builder, x, y, "");
return res;
}
/*
* For floating inputs it creates and returns a mask
* which is all 1's for channels which are NaN.
* Channels inside x which are not NaN will be 0.
*/
LLVMValueRef
lp_build_isnan(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef mask;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, bld->type);
assert(bld->type.floating);
assert(lp_check_value(bld->type, x));
mask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x,
"isnotnan");
mask = LLVMBuildNot(bld->gallivm->builder, mask, "");
mask = LLVMBuildSExt(bld->gallivm->builder, mask, int_vec_type, "isnan");
return mask;
}

View file

@ -131,17 +131,43 @@ lp_build_lerp_3d(struct lp_build_context *bld,
LLVMValueRef v111,
unsigned flags);
/**
* Specifies floating point NaN behavior.
*/
enum gallivm_nan_behavior {
/* Results are undefined with NaN. Results in fastest code */
GALLIVM_NAN_BEHAVIOR_UNDEFINED,
/* If input is NaN, NaN is returned */
GALLIVM_NAN_RETURN_NAN,
/* If one of the inputs is NaN, the other operand is returned */
GALLIVM_NAN_RETURN_OTHER,
/* If one of the inputs is NaN, the second operand is returned.
* In min/max it will be as fast as undefined with sse opcodes */
GALLIVM_NAN_RETURN_SECOND
};
LLVMValueRef
lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_min_ext(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior);
LLVMValueRef
lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_max_ext(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
enum gallivm_nan_behavior nan_behavior);
LLVMValueRef
lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef a,
@ -309,4 +335,8 @@ lp_build_mod(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y);
LLVMValueRef
lp_build_isnan(struct lp_build_context *bld,
LLVMValueRef x);
#endif /* !LP_BLD_ARIT_H */

View file

@ -241,8 +241,6 @@ lp_build_compare(struct gallivm_state *gallivm,
#endif
#endif /* HAVE_LLVM < 0x0207 */
/* XXX: It is not clear if we should use the ordered or unordered operators */
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@ -368,11 +366,189 @@ lp_build_compare(struct gallivm_state *gallivm,
return res;
}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
* If the operands are floating point numbers, the function will use
* ordered comparison which means that it will return true if both
* operands are not a NaN and the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context *bld,
unsigned func,
LLVMValueRef a,
LLVMValueRef b)
{
struct gallivm_state *gallivm = bld->gallivm;
const struct lp_type type = bld->type;
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
LLVMValueRef res;
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
if(func == PIPE_FUNC_NEVER)
return zeros;
if(func == PIPE_FUNC_ALWAYS)
return ones;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* There are no unsigned integer comparison instructions in SSE.
*/
if (!type.floating && !type.sign &&
type.width * type.length == 128 &&
util_cpu_caps.has_sse2 &&
(func == PIPE_FUNC_LESS ||
func == PIPE_FUNC_LEQUAL ||
func == PIPE_FUNC_GREATER ||
func == PIPE_FUNC_GEQUAL) &&
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
__FUNCTION__, type.length, type.width);
}
#endif
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
case PIPE_FUNC_NEVER:
op = LLVMRealPredicateFalse;
break;
case PIPE_FUNC_ALWAYS:
op = LLVMRealPredicateTrue;
break;
case PIPE_FUNC_EQUAL:
op = LLVMRealOEQ;
break;
case PIPE_FUNC_NOTEQUAL:
op = LLVMRealONE;
break;
case PIPE_FUNC_LESS:
op = LLVMRealOLT;
break;
case PIPE_FUNC_LEQUAL:
op = LLVMRealOLE;
break;
case PIPE_FUNC_GREATER:
op = LLVMRealOGT;
break;
case PIPE_FUNC_GEQUAL:
op = LLVMRealOGE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
#if HAVE_LLVM >= 0x0207
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
if (type.length == 1) {
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
}
else {
unsigned i;
res = LLVMGetUndef(int_vec_type);
debug_printf("%s: warning: using slow element-wise float"
" vector comparison\n", __FUNCTION__);
for (i = 0; i < type.length; ++i) {
LLVMValueRef index = lp_build_const_int32(gallivm, i);
cond = LLVMBuildFCmp(builder, op,
LLVMBuildExtractElement(builder, a, index, ""),
LLVMBuildExtractElement(builder, b, index, ""),
"");
cond = LLVMBuildSelect(builder, cond,
LLVMConstExtractElement(ones, index),
LLVMConstExtractElement(zeros, index),
"");
res = LLVMBuildInsertElement(builder, res, cond, index, "");
}
}
#endif
}
else {
LLVMIntPredicate op;
switch(func) {
case PIPE_FUNC_EQUAL:
op = LLVMIntEQ;
break;
case PIPE_FUNC_NOTEQUAL:
op = LLVMIntNE;
break;
case PIPE_FUNC_LESS:
op = type.sign ? LLVMIntSLT : LLVMIntULT;
break;
case PIPE_FUNC_LEQUAL:
op = type.sign ? LLVMIntSLE : LLVMIntULE;
break;
case PIPE_FUNC_GREATER:
op = type.sign ? LLVMIntSGT : LLVMIntUGT;
break;
case PIPE_FUNC_GEQUAL:
op = type.sign ? LLVMIntSGE : LLVMIntUGE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
#if HAVE_LLVM >= 0x0207
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
if (type.length == 1) {
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
}
else {
unsigned i;
res = LLVMGetUndef(int_vec_type);
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
debug_printf("%s: using slow element-wise int"
" vector comparison\n", __FUNCTION__);
}
for(i = 0; i < type.length; ++i) {
LLVMValueRef index = lp_build_const_int32(gallivm, i);
cond = LLVMBuildICmp(builder, op,
LLVMBuildExtractElement(builder, a, index, ""),
LLVMBuildExtractElement(builder, b, index, ""),
"");
cond = LLVMBuildSelect(builder, cond,
LLVMConstExtractElement(ones, index),
LLVMConstExtractElement(zeros, index),
"");
res = LLVMBuildInsertElement(builder, res, cond, index, "");
}
}
#endif
}
return res;
}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
* If the operands are floating point numbers, the function will use
* unordered comparison which means that it will return true if either
* operand is a NaN or the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef

View file

@ -63,6 +63,12 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context *bld,
unsigned func,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_select_bitwise(struct lp_build_context *bld,
LLVMValueRef mask,

View file

@ -1274,8 +1274,10 @@ max_emit_cpu(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
emit_data->args[0], emit_data->args[1]);
emit_data->output[emit_data->chan] =
lp_build_max_ext(&bld_base->base,
emit_data->args[0], emit_data->args[1],
GALLIVM_NAN_RETURN_OTHER);
}
/* TGSI_OPCODE_MIN (CPU Only) */
@ -1285,8 +1287,10 @@ min_emit_cpu(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
emit_data->args[0], emit_data->args[1]);
emit_data->output[emit_data->chan] =
lp_build_min_ext(&bld_base->base,
emit_data->args[0], emit_data->args[1],
GALLIVM_NAN_RETURN_OTHER);
}
/* TGSI_OPCODE_MOD (CPU Only) */

View file

@ -1396,16 +1396,21 @@ emit_store_chan(
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
value = lp_build_max(float_bld, value, float_bld->zero);
value = lp_build_min(float_bld, value, float_bld->one);
value = lp_build_max_ext(float_bld, value, float_bld->zero,
GALLIVM_NAN_RETURN_SECOND);
value = lp_build_min_ext(float_bld, value, float_bld->one,
GALLIVM_NAN_BEHAVIOR_UNDEFINED);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
value = lp_build_min(float_bld, value, float_bld->one);
value = lp_build_max_ext(float_bld, value,
lp_build_const_vec(gallivm, float_bld->type, -1.0),
GALLIVM_NAN_RETURN_SECOND);
value = lp_build_min_ext(float_bld, value, float_bld->one,
GALLIVM_NAN_BEHAVIOR_UNDEFINED);
break;
default: