mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
util: implement table-based + linear interpolation linear-to-srgb conversion
Should be much faster, seems to work in softpipe. While here (also it's now disabled) fix up the pow factor - the former value is what is in GL core it is however not actually accurate to fp32 standard (as it is 1.0/2.4), and if someone would do all the accurate math there's no reason to waste 8 mantissa bits or so... v2: use real table generating function instead of just printing the values (might take a bit longer as it does calculations on some 3+ million floats but much more descriptive obviously). Also fix up another inaccurate pow factor (this time in the python code) - wondering where the couple one bit errors came from :-(. Reviewed-by: Jose Fonseca <jfonseca@vmware.com> Reviewed-by: Zack Rusin <zackr@vmware.com>
This commit is contained in:
parent
2d9fea95e8
commit
e3b5e2db1b
2 changed files with 102 additions and 11 deletions
|
|
@ -39,6 +39,7 @@
|
|||
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "u_pack_color.h"
|
||||
#include "u_math.h"
|
||||
|
||||
|
||||
|
|
@ -51,23 +52,58 @@ util_format_srgb_to_linear_8unorm_table[256];
|
|||
extern const uint8_t
|
||||
util_format_linear_to_srgb_8unorm_table[256];
|
||||
|
||||
extern const unsigned
|
||||
util_format_linear_to_srgb_helper_table[104];
|
||||
|
||||
|
||||
/**
|
||||
* Convert a unclamped linear float to srgb value in the [0,255].
|
||||
* XXX this hasn't been tested (render to srgb surface).
|
||||
* XXX this needs optimization.
|
||||
*/
|
||||
static INLINE uint8_t
|
||||
util_format_linear_float_to_srgb_8unorm(float x)
|
||||
{
|
||||
if (x >= 1.0f)
|
||||
return 255;
|
||||
else if (x >= 0.0031308f)
|
||||
return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
|
||||
else if (x > 0.0f)
|
||||
return float_to_ubyte(12.92f * x);
|
||||
else
|
||||
return 0;
|
||||
/* this would be exact but (probably much) slower */
|
||||
if (0) {
|
||||
if (x >= 1.0f)
|
||||
return 255;
|
||||
else if (x >= 0.0031308f)
|
||||
return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f);
|
||||
else if (x > 0.0f)
|
||||
return float_to_ubyte(12.92f * x);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* This is taken from https://gist.github.com/rygorous/2203834
|
||||
* Use LUT and do linear interpolation.
|
||||
*/
|
||||
union fi almostone, minval, f;
|
||||
unsigned tab, bias, scale, t;
|
||||
|
||||
almostone.ui = 0x3f7fffff;
|
||||
minval.ui = (127-13) << 23;
|
||||
|
||||
/*
|
||||
* Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
|
||||
* The tests are carefully written so that NaNs map to 0, same as in the
|
||||
* reference implementation.
|
||||
*/
|
||||
if (!(x > minval.f))
|
||||
x = minval.f;
|
||||
if (x > almostone.f)
|
||||
x = almostone.f;
|
||||
|
||||
/* Do the table lookup and unpack bias, scale */
|
||||
f.f = x;
|
||||
tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> 20];
|
||||
bias = (tab >> 16) << 9;
|
||||
scale = tab & 0xffff;
|
||||
|
||||
/* Grab next-highest mantissa bits and perform linear interpolation */
|
||||
t = (f.ui >> 12) & 0xff;
|
||||
return (uint8_t) ((bias + scale*t) >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ CopyRight = '''
|
|||
|
||||
|
||||
import math
|
||||
import struct
|
||||
|
||||
|
||||
def srgb_to_linear(x):
|
||||
|
|
@ -51,10 +52,11 @@ def srgb_to_linear(x):
|
|||
|
||||
def linear_to_srgb(x):
|
||||
if x >= 0.0031308:
|
||||
return 1.055 * math.pow(x, 0.41666) - 0.055
|
||||
return 1.055 * math.pow(x, 0.41666666) - 0.055
|
||||
else:
|
||||
return 12.92 * x
|
||||
|
||||
|
||||
def generate_srgb_tables():
|
||||
print 'const float'
|
||||
print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
|
||||
|
|
@ -84,6 +86,59 @@ def generate_srgb_tables():
|
|||
print '};'
|
||||
print
|
||||
|
||||
# calculate the table interpolation values used in float linear to unorm8 srgb
|
||||
numexp = 13
|
||||
mantissa_msb = 3
|
||||
# stepshift is just used to only use every x-th float to make things faster,
|
||||
# 5 is largest value which still gives exact same table as 0
|
||||
stepshift = 5
|
||||
nbuckets = numexp << mantissa_msb
|
||||
bucketsize = (1 << (23 - mantissa_msb)) >> stepshift
|
||||
mantshift = 12
|
||||
valtable = []
|
||||
sum_aa = float(bucketsize)
|
||||
sum_ab = 0.0
|
||||
sum_bb = 0.0
|
||||
for i in range(0, bucketsize):
|
||||
j = (i << stepshift) >> mantshift
|
||||
sum_ab += j
|
||||
sum_bb += j*j
|
||||
inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab)
|
||||
|
||||
for bucket in range(0, nbuckets):
|
||||
start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift)
|
||||
sum_a = 0.0
|
||||
sum_b = 0.0
|
||||
|
||||
for i in range(0, bucketsize):
|
||||
j = (i << stepshift) >> mantshift
|
||||
fint = start + (i << stepshift)
|
||||
ffloat = struct.unpack('f', struct.pack('I', fint))[0]
|
||||
val = linear_to_srgb(ffloat) * 255.0 + 0.5
|
||||
sum_a += val
|
||||
sum_b += j*val
|
||||
|
||||
solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b)
|
||||
solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a)
|
||||
|
||||
scaled_a = solved_a * 65536.0 / 512.0
|
||||
scaled_b = solved_b * 65536.0
|
||||
|
||||
int_a = int(scaled_a + 0.5)
|
||||
int_b = int(scaled_b + 0.5)
|
||||
|
||||
valtable.append((int_a << 16) + int_b)
|
||||
|
||||
print 'const unsigned'
|
||||
print 'util_format_linear_to_srgb_helper_table[104] = {'
|
||||
|
||||
for j in range(0, nbuckets, 4):
|
||||
print ' ',
|
||||
for i in range(j, j + 4):
|
||||
print '0x%08x,' % (valtable[i],),
|
||||
print
|
||||
print '};'
|
||||
print
|
||||
|
||||
def main():
|
||||
print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */'
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue