llvmpipe: use half-even rounding in lerp

This fixes a biases in texture linear sampling,
which can be very noticeable when strong lighting
is applied on mipmapped textures generated at
runtime using successive linear blitting.

More bits aren't actually needed for lerp and the
intrinsic rounding is wrong, so it is removed in
favour of a correct uniform codegen.

Reviewed-by: Jose Fonseca <jose.fonseca@broadcom.com>
Reviewed-by: Roland Scheidegger <roland.scheidegger@broadcom.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37986>
This commit is contained in:
Aleksi Sapon 2025-11-04 15:47:51 -05:00 committed by Marge Bot
parent 34b34c2462
commit c200b18e87

View file

@ -1171,68 +1171,66 @@ lp_build_lerp_simple(struct lp_build_context *bld,
return lp_build_mad(bld, x, delta, v0); return lp_build_mad(bld, x, delta, v0);
} }
if (flags & LP_BLD_LERP_WIDE_NORMALIZED) { if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) {
if (!bld->type.sign) {
if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) { if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
/* /*
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
* most-significant-bit to the lowest-significant-bit, so that * most-significant-bit to the lowest-significant-bit, so that
* later we can just divide by 2**n instead of 2**n - 1. * later we can just divide by 2**n instead of 2**n - 1.
*/ */
x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1)); x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
} }
/* (x * delta) >> n */
/* /*
* For this multiply, higher internal precision is required to pass * To have correct rounding, we must implement (example for 8 bits):
* CTS, the most efficient path to that is pmulhrsw on ssse3 and * uint16_t lerp_round_half_even(uint16_t x, uint16_t v0, uint16_t v1)
* above. This could be opencoded on other arches if conformance was * {
* required. * uint16_t delta = v1 - v0;
* uint16_t m = x * delta;
* uint16_t is_odd = (m & 0x100) >> 8;
* m += 0x7F + is_odd; // + 0.5 for odd, + ~0.498 for even
* m >>= 8;
* return (uint8_t)v0 + (uint8_t)m;
* }
*/ */
if (bld->type.width == 16 && bld->type.length == 8 && util_get_cpu_caps()->has_ssse3) {
res = lp_build_intrinsic_binary(builder, "llvm.x86.ssse3.pmul.hr.sw.128", bld->vec_type, x, lp_build_shl_imm(bld, delta, 7));
res = lp_build_and(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, 0xff));
} else if (bld->type.width == 16 && bld->type.length == 16 && util_get_cpu_caps()->has_avx2) {
res = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.pmul.hr.sw", bld->vec_type, x, lp_build_shl_imm(bld, delta, 7));
res = lp_build_and(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, 0xff));
} else {
res = lp_build_mul(bld, x, delta); res = lp_build_mul(bld, x, delta);
LLVMValueRef is_odd = lp_build_shr_imm(bld,lp_build_and(bld, res,
lp_build_const_int_vec(bld->gallivm, bld->type, 1ll << half_width)), half_width);
res = lp_build_add(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1ll << (half_width - 1)) - 1));
res = lp_build_add(bld, res, is_odd);
res = lp_build_shr_imm(bld, res, half_width); res = lp_build_shr_imm(bld, res, half_width);
}
} else {
/*
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
* most-significant-bit to the lowest-significant-bit, so that
* later we can just divide by 2**n instead of 2**n - 1.
*/
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
}
} else {
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
res = lp_build_mul(bld, x, delta);
}
if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) {
/* /*
* At this point both res and v0 only use the lower half of the bits, * At this point both res and v0 only use the lower half of the bits,
* the rest is zero. Instead of add / mask, do add with half wide type. * the rest is zero. Instead of add / mask, do add with half wide type.
*/ */
struct lp_type narrow_type; struct lp_type narrow_type;
struct lp_build_context narrow_bld;
memset(&narrow_type, 0, sizeof narrow_type); memset(&narrow_type, 0, sizeof narrow_type);
narrow_type.sign = bld->type.sign; narrow_type.sign = bld->type.sign;
narrow_type.width = bld->type.width/2; narrow_type.width = bld->type.width/2;
narrow_type.length = bld->type.length*2; narrow_type.length = bld->type.length*2;
struct lp_build_context narrow_bld;
lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type); lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type);
res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, ""); res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, "");
v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, ""); v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, "");
res = lp_build_add(&narrow_bld, v0, res); res = lp_build_add(&narrow_bld, v0, res);
res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
return res;
}
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
/*
* The rescaling trick above doesn't work for signed numbers, so
* use the 2**n - 1 divison approximation in lp_build_mul_norm
* instead.
*/
res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
} else { } else {
res = lp_build_mul(bld, x, delta);
}
res = lp_build_add(bld, v0, res); res = lp_build_add(bld, v0, res);
if (bld->type.fixed) { if (bld->type.fixed) {
@ -1246,10 +1244,9 @@ lp_build_lerp_simple(struct lp_build_context *bld,
* distinguishing the values interpretation from the value storage. * distinguishing the values interpretation from the value storage.
*/ */
LLVMValueRef low_bits; LLVMValueRef low_bits;
low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1); low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1ll << half_width) - 1);
res = LLVMBuildAnd(builder, res, low_bits, ""); res = LLVMBuildAnd(builder, res, low_bits, "");
} }
}
return res; return res;
} }