mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-27 20:58:11 +02:00
util: implement f16c - fast half<->float conversions
This also happens to fix bptc-float-modes on llvmpipe. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6774>
This commit is contained in:
parent
7dbb1f7462
commit
87900afe5b
7 changed files with 82 additions and 19 deletions
|
|
@ -738,7 +738,6 @@ spec/arb_sparse_buffer/commit: skip
|
|||
spec/arb_sparse_buffer/minmax: skip
|
||||
spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip
|
||||
spec/arb_texture_buffer_object/negative-unsupported: skip
|
||||
spec/arb_texture_compression_bptc/bptc-float-modes: fail
|
||||
spec/arb_texture_cube_map/copyteximage cube samples=16: skip
|
||||
spec/arb_texture_cube_map/copyteximage cube samples=2: skip
|
||||
spec/arb_texture_cube_map/copyteximage cube samples=32: skip
|
||||
|
|
@ -1656,8 +1655,8 @@ wgl/wgl-sanity: skip
|
|||
summary:
|
||||
name: results
|
||||
---- --------
|
||||
pass: 23074
|
||||
fail: 198
|
||||
pass: 23075
|
||||
fail: 197
|
||||
crash: 0
|
||||
skip: 1433
|
||||
timeout: 0
|
||||
|
|
|
|||
13
meson.build
13
meson.build
|
|
@ -1111,6 +1111,19 @@ else
|
|||
sse41_args = []
|
||||
endif
|
||||
|
||||
if cc.has_argument('-mf16c') and cpp.has_argument('-mf16c')
|
||||
pre_args += '-DUSE_F16C'
|
||||
c_args += '-mf16c'
|
||||
cpp_args += '-mf16c'
|
||||
|
||||
# GCC on x86 (not x86_64) with -msse* assumes a 16 byte aligned stack, but
|
||||
# that's not guaranteed (not sure if this also applies to -mf16c)
|
||||
if host_machine.cpu_family() == 'x86'
|
||||
c_args += '-mstackrealign'
|
||||
cpp_args += '-mstackrealign'
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for GCC style atomics
|
||||
dep_atomic = null_dep
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,10 @@
|
|||
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_half.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
static void
|
||||
test(void)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned roundtrip_fails = 0;
|
||||
|
|
@ -28,9 +29,21 @@ main(int argc, char **argv)
|
|||
|
||||
if(roundtrip_fails) {
|
||||
printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
|
||||
return 1;
|
||||
} else {
|
||||
printf("Success!\n");
|
||||
return 0;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
assert(!util_cpu_caps.has_f16c);
|
||||
test();
|
||||
|
||||
/* Test f16c. */
|
||||
util_cpu_detect();
|
||||
if (util_cpu_caps.has_f16c)
|
||||
test();
|
||||
|
||||
printf("Success!\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ typedef union { float f; int32_t i; uint32_t u; } fi_type;
|
|||
* result in the same value as if the expression were executed on the GPU.
|
||||
*/
|
||||
uint16_t
|
||||
_mesa_float_to_half(float val)
|
||||
_mesa_float_to_half_slow(float val)
|
||||
{
|
||||
const fi_type fi = {val};
|
||||
const int flt_m = fi.i & 0x7fffff;
|
||||
|
|
@ -129,9 +129,9 @@ _mesa_float_to_half(float val)
|
|||
}
|
||||
|
||||
uint16_t
|
||||
_mesa_float_to_float16_rtz(float val)
|
||||
_mesa_float_to_float16_rtz_slow(float val)
|
||||
{
|
||||
return _mesa_float_to_half_rtz(val);
|
||||
return _mesa_float_to_half_rtz_slow(val);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -140,7 +140,7 @@ _mesa_float_to_float16_rtz(float val)
|
|||
* http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
|
||||
*/
|
||||
float
|
||||
_mesa_half_to_float(uint16_t val)
|
||||
_mesa_half_to_float_slow(uint16_t val)
|
||||
{
|
||||
return util_half_to_float(val);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,14 @@
|
|||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
#ifdef USE_F16C
|
||||
#include <immintrin.h>
|
||||
#define F16C_NEAREST 0
|
||||
#define F16C_TRUNCATE 3
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -36,18 +44,48 @@ extern "C" {
|
|||
#define FP16_ONE ((uint16_t) 0x3c00)
|
||||
#define FP16_ZERO ((uint16_t) 0)
|
||||
|
||||
uint16_t _mesa_float_to_half(float val);
|
||||
float _mesa_half_to_float(uint16_t val);
|
||||
uint16_t _mesa_float_to_half_slow(float val);
|
||||
float _mesa_half_to_float_slow(uint16_t val);
|
||||
uint8_t _mesa_half_to_unorm8(uint16_t v);
|
||||
uint16_t _mesa_uint16_div_64k_to_half(uint16_t v);
|
||||
|
||||
/*
|
||||
* _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart
|
||||
* _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart
|
||||
* softfloat.h call. Still, softfloat.h conversion API is meant to be kept
|
||||
* private. In other words, only use the API published here, instead of
|
||||
* calling directly the softfloat.h one.
|
||||
*/
|
||||
uint16_t _mesa_float_to_float16_rtz(float val);
|
||||
uint16_t _mesa_float_to_float16_rtz_slow(float val);
|
||||
|
||||
static inline uint16_t
|
||||
_mesa_float_to_half(float val)
|
||||
{
|
||||
#ifdef USE_F16C
|
||||
if (util_cpu_caps.has_f16c)
|
||||
return _cvtss_sh(val, F16C_NEAREST);
|
||||
#endif
|
||||
return _mesa_float_to_half_slow(val);
|
||||
}
|
||||
|
||||
static inline float
|
||||
_mesa_half_to_float(uint16_t val)
|
||||
{
|
||||
#ifdef USE_F16C
|
||||
if (util_cpu_caps.has_f16c)
|
||||
return _cvtsh_ss(val);
|
||||
#endif
|
||||
return _mesa_half_to_float_slow(val);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
_mesa_float_to_float16_rtz(float val)
|
||||
{
|
||||
#ifdef USE_F16C
|
||||
if (util_cpu_caps.has_f16c)
|
||||
return _cvtss_sh(val, F16C_TRUNCATE);
|
||||
#endif
|
||||
return _mesa_float_to_float16_rtz_slow(val);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
_mesa_float_to_float16_rtne(float val)
|
||||
|
|
|
|||
|
|
@ -1435,7 +1435,7 @@ _mesa_double_to_f32(double val, bool rtz)
|
|||
* From f32_to_f16()
|
||||
*/
|
||||
uint16_t
|
||||
_mesa_float_to_half_rtz(float val)
|
||||
_mesa_float_to_half_rtz_slow(float val)
|
||||
{
|
||||
const fi_type fi = {val};
|
||||
const uint32_t flt_m = fi.u & 0x7fffff;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ double _mesa_double_mul_rtz(double a, double b);
|
|||
double _mesa_double_fma_rtz(double a, double b, double c);
|
||||
float _mesa_float_fma_rtz(float a, float b, float c);
|
||||
float _mesa_double_to_f32(double x, bool rtz);
|
||||
uint16_t _mesa_float_to_half_rtz(float x);
|
||||
uint16_t _mesa_float_to_half_rtz_slow(float x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue