mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-24 07:10:36 +02:00
util: treat denorm'ed floats like zero
The D3D10 spec is very explicit about treatment of denorm floats and the behavior is exactly the same for them as it would be for -0 or +0. This makes our shading code match that behavior, since OpenGL doesn't care and on a few cpu's it's faster (worst case the same). Float16 conversions will likely break but we'll fix them in a follow up commit. Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: Jose Fonseca <jfonseca@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
parent
80bc14370a
commit
63386b2f66
6 changed files with 83 additions and 0 deletions
|
|
@ -459,8 +459,14 @@ draw_vbo(struct draw_context *draw,
|
|||
unsigned instance;
|
||||
unsigned index_limit;
|
||||
unsigned count;
|
||||
unsigned fpstate = util_fpstate_get();
|
||||
struct pipe_draw_info resolved_info;
|
||||
|
||||
/* Make sure that denorms are treated like zeros. This is
|
||||
* the behavior required by D3D10. OpenGL doesn't care.
|
||||
*/
|
||||
util_fpstate_set_denorms_to_zero(fpstate);
|
||||
|
||||
resolve_draw_info(info, &resolved_info);
|
||||
info = &resolved_info;
|
||||
|
||||
|
|
@ -518,6 +524,7 @@ draw_vbo(struct draw_context *draw,
|
|||
if (index_limit == 0) {
|
||||
/* one of the buffers is too small to do any valid drawing */
|
||||
debug_warning("draw: VBO too small to draw anything\n");
|
||||
util_fpstate_set(fpstate);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -558,4 +565,5 @@ draw_vbo(struct draw_context *draw,
|
|||
if (draw->collect_statistics) {
|
||||
draw->render->pipeline_statistics(draw->render, &draw->statistics);
|
||||
}
|
||||
util_fpstate_set(fpstate);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@
|
|||
#include "lp_bld_debug.h"
|
||||
#include "lp_bld_bitarit.h"
|
||||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_flow.h"
|
||||
|
||||
|
||||
#define EXP_POLY_DEGREE 5
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
|
||||
/** 2^x, for x in [-1.0, 1.0) */
|
||||
|
|
@ -70,4 +71,59 @@ util_init_math(void)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the contents of the fpstate (mxcsr on x86) register.
|
||||
*
|
||||
* On platforms without support for it just returns 0.
|
||||
*/
|
||||
unsigned
|
||||
util_fpstate_get(void)
|
||||
{
|
||||
unsigned mxcsr = 0;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if (util_cpu_caps.has_sse) {
|
||||
mxcsr = __builtin_ia32_stmxcsr();
|
||||
}
|
||||
#endif
|
||||
|
||||
return mxcsr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure that the fp treats the denormalized floating
|
||||
* point numbers as zero.
|
||||
*
|
||||
* This is the behavior required by D3D10. OpenGL doesn't care.
|
||||
*/
|
||||
unsigned
|
||||
util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
|
||||
{
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
|
||||
#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
|
||||
if (util_cpu_caps.has_sse) {
|
||||
current_mxcsr |= MXCSR_FTZ;
|
||||
if (util_cpu_caps.has_sse3) {
|
||||
current_mxcsr |= MXCSR_DAZ;
|
||||
}
|
||||
util_fpstate_set(current_mxcsr);
|
||||
}
|
||||
#endif
|
||||
return current_mxcsr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the state of the fpstate (mxcsr on x86) register.
|
||||
*
|
||||
* On platforms without support for it's a noop.
|
||||
*/
|
||||
void
|
||||
util_fpstate_set(unsigned mxcsr)
|
||||
{
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if (util_cpu_caps.has_sse) {
|
||||
__builtin_ia32_ldmxcsr(mxcsr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -763,6 +763,13 @@ static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits)
|
|||
return (int32_t)(value * (1<<frac_bits));
|
||||
}
|
||||
|
||||
unsigned
|
||||
util_fpstate_get(void);
|
||||
unsigned
|
||||
util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
|
||||
void
|
||||
util_fpstate_set(unsigned fpstate);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -751,6 +751,12 @@ static PIPE_THREAD_ROUTINE( thread_function, init_data )
|
|||
struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
|
||||
struct lp_rasterizer *rast = task->rast;
|
||||
boolean debug = false;
|
||||
unsigned fpstate = util_fpstate_get();
|
||||
|
||||
/* Make sure that denorms are treated like zeros. This is
|
||||
* the behavior required by D3D10. OpenGL doesn't care.
|
||||
*/
|
||||
util_fpstate_set_denorms_to_zero(fpstate);
|
||||
|
||||
while (1) {
|
||||
/* wait for work */
|
||||
|
|
|
|||
|
|
@ -370,6 +370,11 @@ int main(int argc, char **argv)
|
|||
unsigned i;
|
||||
boolean success;
|
||||
boolean single = FALSE;
|
||||
unsigned fpstate;
|
||||
|
||||
util_cpu_detect();
|
||||
fpstate = util_fpstate_get();
|
||||
util_fpstate_set_denorms_to_zero(fpstate);
|
||||
|
||||
for(i = 1; i < argc; ++i) {
|
||||
if(strcmp(argv[i], "-v") == 0)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue