mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 18:10:11 +01:00
llvmpipe: use half-even rounding in lerp
This fixes a biases in texture linear sampling, which can be very noticeable when strong lighting is applied on mipmapped textures generated at runtime using successive linear blitting. More bits aren't actually needed for lerp and the intrinsic rounding is wrong, so it is removed in favour of a correct uniform codegen. Reviewed-by: Jose Fonseca <jose.fonseca@broadcom.com> Reviewed-by: Roland Scheidegger <roland.scheidegger@broadcom.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37986>
This commit is contained in:
parent
34b34c2462
commit
c200b18e87
1 changed files with 53 additions and 56 deletions
|
|
@ -1171,68 +1171,66 @@ lp_build_lerp_simple(struct lp_build_context *bld,
|
||||||
return lp_build_mad(bld, x, delta, v0);
|
return lp_build_mad(bld, x, delta, v0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
|
if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) {
|
||||||
if (!bld->type.sign) {
|
|
||||||
if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
|
if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
|
||||||
/*
|
/*
|
||||||
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
|
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
|
||||||
* most-significant-bit to the lowest-significant-bit, so that
|
* most-significant-bit to the lowest-significant-bit, so that
|
||||||
* later we can just divide by 2**n instead of 2**n - 1.
|
* later we can just divide by 2**n instead of 2**n - 1.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
|
x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (x * delta) >> n */
|
|
||||||
/*
|
/*
|
||||||
* For this multiply, higher internal precision is required to pass
|
* To have correct rounding, we must implement (example for 8 bits):
|
||||||
* CTS, the most efficient path to that is pmulhrsw on ssse3 and
|
* uint16_t lerp_round_half_even(uint16_t x, uint16_t v0, uint16_t v1)
|
||||||
* above. This could be opencoded on other arches if conformance was
|
* {
|
||||||
* required.
|
* uint16_t delta = v1 - v0;
|
||||||
|
* uint16_t m = x * delta;
|
||||||
|
* uint16_t is_odd = (m & 0x100) >> 8;
|
||||||
|
* m += 0x7F + is_odd; // + 0.5 for odd, + ~0.498 for even
|
||||||
|
* m >>= 8;
|
||||||
|
* return (uint8_t)v0 + (uint8_t)m;
|
||||||
|
* }
|
||||||
*/
|
*/
|
||||||
if (bld->type.width == 16 && bld->type.length == 8 && util_get_cpu_caps()->has_ssse3) {
|
|
||||||
res = lp_build_intrinsic_binary(builder, "llvm.x86.ssse3.pmul.hr.sw.128", bld->vec_type, x, lp_build_shl_imm(bld, delta, 7));
|
|
||||||
res = lp_build_and(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, 0xff));
|
|
||||||
} else if (bld->type.width == 16 && bld->type.length == 16 && util_get_cpu_caps()->has_avx2) {
|
|
||||||
res = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.pmul.hr.sw", bld->vec_type, x, lp_build_shl_imm(bld, delta, 7));
|
|
||||||
res = lp_build_and(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, 0xff));
|
|
||||||
} else {
|
|
||||||
res = lp_build_mul(bld, x, delta);
|
res = lp_build_mul(bld, x, delta);
|
||||||
|
LLVMValueRef is_odd = lp_build_shr_imm(bld,lp_build_and(bld, res,
|
||||||
|
lp_build_const_int_vec(bld->gallivm, bld->type, 1ll << half_width)), half_width);
|
||||||
|
res = lp_build_add(bld, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1ll << (half_width - 1)) - 1));
|
||||||
|
res = lp_build_add(bld, res, is_odd);
|
||||||
res = lp_build_shr_imm(bld, res, half_width);
|
res = lp_build_shr_imm(bld, res, half_width);
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
|
|
||||||
* most-significant-bit to the lowest-significant-bit, so that
|
|
||||||
* later we can just divide by 2**n instead of 2**n - 1.
|
|
||||||
*/
|
|
||||||
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
|
|
||||||
res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
|
|
||||||
res = lp_build_mul(bld, x, delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) {
|
|
||||||
/*
|
/*
|
||||||
* At this point both res and v0 only use the lower half of the bits,
|
* At this point both res and v0 only use the lower half of the bits,
|
||||||
* the rest is zero. Instead of add / mask, do add with half wide type.
|
* the rest is zero. Instead of add / mask, do add with half wide type.
|
||||||
*/
|
*/
|
||||||
struct lp_type narrow_type;
|
struct lp_type narrow_type;
|
||||||
struct lp_build_context narrow_bld;
|
|
||||||
|
|
||||||
memset(&narrow_type, 0, sizeof narrow_type);
|
memset(&narrow_type, 0, sizeof narrow_type);
|
||||||
narrow_type.sign = bld->type.sign;
|
narrow_type.sign = bld->type.sign;
|
||||||
narrow_type.width = bld->type.width/2;
|
narrow_type.width = bld->type.width/2;
|
||||||
narrow_type.length = bld->type.length*2;
|
narrow_type.length = bld->type.length*2;
|
||||||
|
struct lp_build_context narrow_bld;
|
||||||
lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type);
|
lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type);
|
||||||
|
|
||||||
res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, "");
|
res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, "");
|
||||||
v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, "");
|
v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, "");
|
||||||
res = lp_build_add(&narrow_bld, v0, res);
|
res = lp_build_add(&narrow_bld, v0, res);
|
||||||
res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
|
res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
|
||||||
|
if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
|
||||||
|
/*
|
||||||
|
* The rescaling trick above doesn't work for signed numbers, so
|
||||||
|
* use the 2**n - 1 divison approximation in lp_build_mul_norm
|
||||||
|
* instead.
|
||||||
|
*/
|
||||||
|
res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
|
||||||
} else {
|
} else {
|
||||||
|
res = lp_build_mul(bld, x, delta);
|
||||||
|
}
|
||||||
|
|
||||||
res = lp_build_add(bld, v0, res);
|
res = lp_build_add(bld, v0, res);
|
||||||
|
|
||||||
if (bld->type.fixed) {
|
if (bld->type.fixed) {
|
||||||
|
|
@ -1246,10 +1244,9 @@ lp_build_lerp_simple(struct lp_build_context *bld,
|
||||||
* distinguishing the values interpretation from the value storage.
|
* distinguishing the values interpretation from the value storage.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef low_bits;
|
LLVMValueRef low_bits;
|
||||||
low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1);
|
low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1ll << half_width) - 1);
|
||||||
res = LLVMBuildAnd(builder, res, low_bits, "");
|
res = LLVMBuildAnd(builder, res, low_bits, "");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue