mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-20 16:30:50 +02:00
pan/lib: Rewrite npot divisor algorithm
It was based on reverse engineering and inaccurate.
This also get ride of fp64 usage and will allow us to use it in OpenCL C
shaders for indirect/indexed draw.
On the full 32-bit range, this fixes ~10 millions inaccurate results.
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Fixes: 467ae0d39d ("panfrost: Factor out panfrost_compute_magic_divisor")
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35490>
This commit is contained in:
parent
01fae0c5c2
commit
b53d98eb41
2 changed files with 14 additions and 37 deletions
|
|
@ -87,44 +87,21 @@ pan_padded_vertex_count(unsigned vertex_count)
|
|||
return pan_large_padded_vertex_count(vertex_count);
|
||||
}
|
||||
|
||||
/* The much, much more irritating case -- instancing is enabled. See
|
||||
* pan_job.h for notes on how this works */
|
||||
|
||||
unsigned
|
||||
pan_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
|
||||
unsigned *extra_flags)
|
||||
pan_compute_magic_divisor(unsigned hw_divisor, unsigned *divisor_r,
|
||||
unsigned *divisor_e)
|
||||
{
|
||||
/* We have a NPOT divisor. Here's the fun one (multipling by
|
||||
* the inverse and shifting) */
|
||||
unsigned r = util_logbase2(hw_divisor);
|
||||
|
||||
/* floor(log2(d)) */
|
||||
unsigned shift = util_logbase2(hw_divisor);
|
||||
uint64_t shift_hi = 32 + r;
|
||||
uint64_t t = (uint64_t)1 << shift_hi;
|
||||
uint64_t f0 = t + hw_divisor / 2;
|
||||
uint64_t fi = f0 / hw_divisor;
|
||||
uint64_t ff = f0 - fi * hw_divisor;
|
||||
|
||||
/* m = ceil(2^(32 + shift) / d) */
|
||||
uint64_t shift_hi = 32 + shift;
|
||||
uint64_t t = 1ll << shift_hi;
|
||||
double t_f = t;
|
||||
double hw_divisor_d = hw_divisor;
|
||||
double m_f = ceil(t_f / hw_divisor_d);
|
||||
unsigned m = m_f;
|
||||
uint64_t d = fi - (1ul << 31);
|
||||
*divisor_r = r;
|
||||
*divisor_e = ff > hw_divisor / 2 ? 1 : 0;
|
||||
|
||||
/* Default case */
|
||||
uint32_t magic_divisor = m;
|
||||
|
||||
/* e = 2^(shift + 32) % d */
|
||||
uint64_t e = t % hw_divisor;
|
||||
|
||||
/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
|
||||
* seems to use a different condition */
|
||||
if (e <= (1ll << shift)) {
|
||||
magic_divisor = m - 1;
|
||||
*extra_flags = 1;
|
||||
}
|
||||
|
||||
/* Top flag implicitly set */
|
||||
assert(magic_divisor & (1u << 31));
|
||||
magic_divisor &= ~(1u << 31);
|
||||
*o_shift = shift;
|
||||
|
||||
return magic_divisor;
|
||||
return d;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,8 +78,8 @@ unsigned pan_get_total_stack_size(unsigned thread_size,
|
|||
|
||||
unsigned pan_padded_vertex_count(unsigned vertex_count);
|
||||
|
||||
unsigned pan_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
|
||||
unsigned *extra_flags);
|
||||
unsigned pan_compute_magic_divisor(unsigned hw_divisor, unsigned *divisor_r,
|
||||
unsigned *divisor_e);
|
||||
|
||||
#ifdef PAN_ARCH
|
||||
/* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue