util: implement F16C using inline assembly on x86_64

F16C: https://en.wikipedia.org/wiki/F16C

This also happens to fix bptc-float-modes on llvmpipe.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6987>
This commit is contained in:
Marek Olšák 2020-09-18 05:21:09 -04:00
parent 4c54f05915
commit ffcdf76799
6 changed files with 84 additions and 19 deletions

View file

@ -738,7 +738,6 @@ spec/arb_sparse_buffer/commit: skip
spec/arb_sparse_buffer/minmax: skip
spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip
spec/arb_texture_buffer_object/negative-unsupported: skip
spec/arb_texture_compression_bptc/bptc-float-modes: fail
spec/arb_texture_cube_map/copyteximage cube samples=16: skip
spec/arb_texture_cube_map/copyteximage cube samples=2: skip
spec/arb_texture_cube_map/copyteximage cube samples=32: skip
@ -1656,8 +1655,8 @@ wgl/wgl-sanity: skip
summary:
name: results
---- --------
pass: 23074
fail: 198
pass: 23075
fail: 197
crash: 0
skip: 1433
timeout: 0

View file

@ -4,9 +4,10 @@
#include "util/u_math.h"
#include "util/u_half.h"
#include "util/u_cpu_detect.h"
int
main(int argc, char **argv)
static void
test(void)
{
unsigned i;
unsigned roundtrip_fails = 0;
@ -28,9 +29,21 @@ main(int argc, char **argv)
if(roundtrip_fails) {
printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
return 1;
} else {
printf("Success!\n");
return 0;
exit(1);
}
}
int
main(int argc, char **argv)
{
assert(!util_cpu_caps.has_f16c);
test();
/* Test f16c. */
util_cpu_detect();
if (util_cpu_caps.has_f16c)
test();
printf("Success!\n");
return 0;
}

View file

@ -54,7 +54,7 @@ typedef union { float f; int32_t i; uint32_t u; } fi_type;
* result in the same value as if the expression were executed on the GPU.
*/
uint16_t
_mesa_float_to_half(float val)
_mesa_float_to_half_slow(float val)
{
const fi_type fi = {val};
const int flt_m = fi.i & 0x7fffff;
@ -129,9 +129,9 @@ _mesa_float_to_half(float val)
}
uint16_t
_mesa_float_to_float16_rtz(float val)
_mesa_float_to_float16_rtz_slow(float val)
{
return _mesa_float_to_half_rtz(val);
return _mesa_float_to_half_rtz_slow(val);
}
/**
@ -140,7 +140,7 @@ _mesa_float_to_float16_rtz(float val)
* http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
*/
float
_mesa_half_to_float(uint16_t val)
_mesa_half_to_float_slow(uint16_t val)
{
return util_half_to_float(val);
}

View file

@ -28,6 +28,12 @@
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "util/u_cpu_detect.h"
#if defined(USE_X86_64_ASM)
#include <immintrin.h>
#endif
#ifdef __cplusplus
extern "C" {
@ -36,18 +42,65 @@ extern "C" {
#define FP16_ONE ((uint16_t) 0x3c00)
#define FP16_ZERO ((uint16_t) 0)
uint16_t _mesa_float_to_half(float val);
float _mesa_half_to_float(uint16_t val);
uint16_t _mesa_float_to_half_slow(float val);
float _mesa_half_to_float_slow(uint16_t val);
uint8_t _mesa_half_to_unorm8(uint16_t v);
uint16_t _mesa_uint16_div_64k_to_half(uint16_t v);
/*
* _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart
* _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart
* softfloat.h call. Still, softfloat.h conversion API is meant to be kept
* private. In other words, only use the API published here, instead of
* calling directly the softfloat.h one.
*/
uint16_t _mesa_float_to_float16_rtz(float val);
uint16_t _mesa_float_to_float16_rtz_slow(float val);
static inline uint16_t
_mesa_float_to_half(float val)
{
#if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) {
__m128 in = {val};
__m128i out;
/* $0 = round to nearest */
__asm volatile("vcvtps2ph $0, %1, %0" : "=v"(out) : "v"(in));
return out[0];
}
#endif
return _mesa_float_to_half_slow(val);
}
static inline float
_mesa_half_to_float(uint16_t val)
{
#if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) {
__m128i in = {val};
__m128 out;
__asm volatile("vcvtph2ps %1, %0" : "=v"(out) : "v"(in));
return out[0];
}
#endif
return _mesa_half_to_float_slow(val);
}
static inline uint16_t
_mesa_float_to_float16_rtz(float val)
{
#if defined(USE_X86_64_ASM)
if (util_cpu_caps.has_f16c) {
__m128 in = {val};
__m128i out;
/* $3 = round towards zero (truncate) */
__asm volatile("vcvtps2ph $3, %1, %0" : "=v"(out) : "v"(in));
return out[0];
}
#endif
return _mesa_float_to_float16_rtz_slow(val);
}
static inline uint16_t
_mesa_float_to_float16_rtne(float val)

View file

@ -1435,7 +1435,7 @@ _mesa_double_to_f32(double val, bool rtz)
* From f32_to_f16()
*/
uint16_t
_mesa_float_to_half_rtz(float val)
_mesa_float_to_half_rtz_slow(float val)
{
const fi_type fi = {val};
const uint32_t flt_m = fi.u & 0x7fffff;

View file

@ -56,7 +56,7 @@ double _mesa_double_mul_rtz(double a, double b);
double _mesa_double_fma_rtz(double a, double b, double c);
float _mesa_float_fma_rtz(float a, float b, float c);
float _mesa_double_to_f32(double x, bool rtz);
uint16_t _mesa_float_to_half_rtz(float x);
uint16_t _mesa_float_to_half_rtz_slow(float x);
#ifdef __cplusplus
} /* extern C */