diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl
index ba884d651c7..b9f85854e51 100644
--- a/src/asahi/libagx/geometry.cl
+++ b/src/asahi/libagx/geometry.cl
@@ -5,6 +5,7 @@
  */
 
 #include "compiler/libcl/libcl_vk.h"
+#include "util/u_math.h"
 #include "geometry.h"
 #include "libagx_intrinsics.h"
 #include "query.h"
diff --git a/src/asahi/libagx/geometry.h b/src/asahi/libagx/geometry.h
index 7deee8f3093..70bfcf6c29e 100644
--- a/src/asahi/libagx/geometry.h
+++ b/src/asahi/libagx/geometry.h
@@ -8,8 +8,10 @@
 #include "compiler/libcl/libcl.h"
 #include "compiler/shader_enums.h"
 
-#ifndef __OPENCL_VERSION__
 #include "util/bitscan.h"
+#include "util/u_math.h"
+
+#ifndef __OPENCL_VERSION__
 #define libagx_popcount(x)   util_bitcount64(x)
 #define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
 #else
diff --git a/src/asahi/libagx/meson.build b/src/asahi/libagx/meson.build
index f4a504ff3bf..b772415dbbc 100644
--- a/src/asahi/libagx/meson.build
+++ b/src/asahi/libagx/meson.build
@@ -19,6 +19,8 @@ libagx_spv = custom_target(
   command : [
     prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@',
     libagx_shader_files, '--',
+    '-I' + join_paths(meson.project_source_root(), 'include'),
+    '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
     '-I' + join_paths(meson.current_source_dir(), '.'),
     '-I' + join_paths(meson.current_source_dir(), '../../'),
     '-I' + join_paths(meson.current_source_dir(), 'shaders'),
diff --git a/src/asahi/libagx/tessellator.cl b/src/asahi/libagx/tessellator.cl
index a93c77e5bd1..bfbe862d7b1 100644
--- a/src/asahi/libagx/tessellator.cl
+++ b/src/asahi/libagx/tessellator.cl
@@ -20,6 +20,7 @@
    SOFTWARE.
 */
 
+#include "util/u_math.h"
 #include "geometry.h"
 #include "tessellator.h"
 
diff --git a/src/compiler/libcl/assert.h b/src/compiler/libcl/assert.h
new file mode 100644
index 00000000000..fcbfb3fb9d3
--- /dev/null
+++ b/src/compiler/libcl/assert.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+#include <stdlib.h>
+
+/* OpenCL C lacks static_assert, a part of C11. This makes static_assert
+ * available on both host and device. It is defined as variadic to handle also
+ * no-message static_asserts (standardized in C23).
+ */
+#define _S(x) #x
+#define _PASTE_(x, y) x##y
+#define _PASTE(x, y) _PASTE_(x, y)
+#define static_assert(_COND, ...)                                              \
+   typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1]
+
+/* OpenCL C lacks a standard assert. We implement one on top of abort. We are
+ * careful to use a single printf so the lines don't get split up if multiple
+ * threads assert in parallel.
+ */
+#ifndef NDEBUG
+#define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x)
+#define _ASSERT_STRING_INNER(x) #x
+#define assert(x) if (!(x)) { \
+   printf("Shader assertion fail at " __FILE__ ":" \
+          _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \
+   nir_printf_abort(); \
+}
+#else
+#define assert(x)
+#endif
diff --git a/src/compiler/libcl/errno.h b/src/compiler/libcl/errno.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/float.h b/src/compiler/libcl/float.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/libcl.h b/src/compiler/libcl/libcl.h
index 07f1818e7e2..cf0d210cb76 100644
--- a/src/compiler/libcl/libcl.h
+++ b/src/compiler/libcl/libcl.h
@@ -12,15 +12,11 @@
  * OpenCL.
  */
 
-#ifndef __OPENCL_VERSION__
-
-/* The OpenCL version of this header defines many OpenCL versions of stdint.h
- * and util/macros.h functions. #include both here for consistency in shared
- * headers.
- */
 #include <stdint.h>
 #include "util/macros.h"
 
+#ifndef __OPENCL_VERSION__
+
 /* Structures defined in common host/device headers that include device pointers
  * need to resolve to a real pointer in OpenCL but an opaque 64-bit address on
  * the host. The DEVICE macro facilitates that.
@@ -54,20 +50,6 @@
 #define GLOBAL global
 #define CONST constant
 
-/* OpenCL lacks explicitly sized integer types, but we know the sizes of
- * particular integer types. These typedefs allow defining common headers with
- * explicit integer types (and therefore compatible data layouts).
- */
-typedef ulong uint64_t;
-typedef uint uint32_t;
-typedef ushort uint16_t;
-typedef uchar uint8_t;
-
-typedef long int64_t;
-typedef int int32_t;
-typedef short int16_t;
-typedef char int8_t;
-
 /* OpenCL C defines work-item functions to return a scalar for a particular
  * dimension. This is a really annoying papercut, and is not what you want for
  * either 1D or 3D dispatches.  In both cases, it's nicer to get vectors. For
@@ -91,16 +73,6 @@ typedef char int8_t;
 #define cl_group_id            _CL_WORKITEM3(get_group_id)
 #define cl_global_offset       _CL_WORKITEM3(get_global_offset)
 
-/* OpenCL C lacks static_assert, a part of C11. This makes static_assert
- * available on both host and device. It is defined as variadic to handle also
- * no-message static_asserts (standardized in C23).
- */
-#define _S(x) #x
-#define _PASTE_(x, y) x##y
-#define _PASTE(x, y) _PASTE_(x, y)
-#define static_assert(_COND, ...)                                              \
-   typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1]
-
 /* NIR's precompilation infrastructure requires specifying a workgroup size with
  * the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has
  * terrible ergonomics, so we provide these aliases instead.
@@ -119,47 +91,10 @@ typedef char int8_t;
 /* This is not an exact match for the util/macros.h version but without the
  * aligned(4) we get garbage code gen and in practice this is what you want.
  */
-#define PACKED __attribute__((packed, aligned(4)))
-
-/* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter.
- * Compare util/macros.h
- */
-#define ENUM_PACKED
-
-/* FILE * pointers can be useful in function signatures shared across
- * host/device, but are meaningless in OpenCL. Turn them into void* to allow
- * consistent prototype across host/device even though there won't be an actual
- * file pointer on the device side.
- */
-#define FILE void
-
-/* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into
- * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for
- * large amounts of data, since it's necessarily single threaded, but memcpy is
- * too useful for shared CPU/GPU code that it's worth making the standard
- * library function work.
- */
-#define memcpy __builtin_memcpy
-
-/* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */
-void nir_printf_abort(void);
-static inline void abort(void) { nir_printf_abort(); }
-
-/* OpenCL C lacks a standard assert. We implement one on top of abort. We are
- * careful to use a single printf so the lines don't get split up if multiple
- * threads assert in parallel.
- */
-#ifndef NDEBUG
-#define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x)
-#define _ASSERT_STRING_INNER(x) #x
-#define assert(x) if (!(x)) { \
-   printf("Shader assertion fail at " __FILE__ ":" \
-          _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \
-   nir_printf_abort(); \
-}
-#else
-#define assert(x)
+#ifdef PACKED
+#undef PACKED
 #endif
+#define PACKED __attribute__((packed, aligned(4)))
 
 /* This is the unreachable macro from macros.h that uses __builtin_unreachable,
  * which is a clang builtin available in OpenCL C.
@@ -170,103 +105,6 @@ static inline void abort(void) { nir_printf_abort(); }
       __builtin_unreachable();                                                 \
    } while (0)
 
-/* Core OpenCL C like likely/unlikely. We might be able to map to a clang built
- * in though...
- */
-#define likely(x) (x)
-#define unlikely(x) (x)
-
-/* These duplicate the C standard library and are required for the
- * u_intN_min/max implementations.
- */
-#define UINT64_MAX 18446744073709551615ul
-#define INT64_MAX 9223372036854775807l
-
-/* These duplicate util/macros.h. This could maybe be cleaned up */
-#define BITFIELD_BIT(b)  (1u << b)
-#define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1))
-#define ASSERTED
-#define ALWAYS_INLINE
-#define UNUSED
-
-static inline int64_t
-u_intN_max(unsigned bit_size)
-{
-   assert(bit_size <= 64 && bit_size > 0);
-   return INT64_MAX >> (64 - bit_size);
-}
-
-static inline int64_t
-u_intN_min(unsigned bit_size)
-{
-   return (-u_intN_max(bit_size)) - 1;
-}
-
-static inline uint64_t
-u_uintN_max(unsigned bit_size)
-{
-   assert(bit_size <= 64 && bit_size > 0);
-   return UINT64_MAX >> (64 - bit_size);
-}
-
-static inline uint
-align(uint x, uint y)
-{
-   return (x + y - 1) & ~(y - 1);
-}
-
-static inline uint32_t
-util_logbase2(uint32_t n)
-{
-   return (31 - clz(n | 1));
-}
-
-static inline uint32_t
-util_logbase2_ceil(uint32_t n)
-{
-   return (n <= 1) ? 0 : 32 - clz(n - 1);
-}
-
-#define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1))
-#define IS_POT(v)          (((v) & ((v) - 1)) == 0)
-#define IS_POT_NONZERO(v)  ((v) != 0 && IS_POT(v))
-#define DIV_ROUND_UP(A, B)      (((A) + (B) - 1) / (B))
-#define CLAMP(X, MIN, MAX)      ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN))
-#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
-
-/* TODO: Should we define with OpenCL min/max? Do we want to match the host? */
-#define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
-#define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
-
-/* Less worried about these matching */
-#define MIN3(a, b, c)           min(min(a, b), c)
-#define MAX3(a, b, c)           max(max(a, b), c)
-
-static inline uint32_t
-fui(float f)
-{
-   return as_uint(f);
-}
-
-static inline float
-uif(uint32_t ui)
-{
-   return as_float(ui);
-}
-
-#define CL_FLT_EPSILON 1.1920928955078125e-7f
-
-/* OpenCL C lacks roundf and llroundf, we can emulate it */
-static inline float roundf(float x)
-{
-   return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x));
-}
-
-static inline long long llroundf(float x)
-{
-   return roundf(x);
-}
-
 static inline uint16_t
 _mesa_float_to_half(float f)
 {
@@ -279,22 +117,4 @@ _mesa_half_to_float(uint16_t w)
    return convert_float(as_half(w));
 }
 
-/* Duplicates u_math.h. We should make that header CL safe at some point...
- */
-static inline int64_t
-util_sign_extend(uint64_t val, unsigned width)
-{
-   unsigned shift = 64 - width;
-   return (int64_t)(val << shift) >> shift;
-}
-
-/* To make u_foreach_bit work. TODO: Use clz? */
-#define ffs __builtin_ffs
-
-/* Duplicates bitscan.h... */
-#define u_foreach_bit(b, dword)                          \
-   for (uint32_t __dword = (dword), b;                     \
-        ((b) = ffs(__dword) - 1, __dword);      \
-        __dword &= ~(1 << (b)))
-
 #endif
diff --git a/src/compiler/libcl/limits.h b/src/compiler/libcl/limits.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/math.h b/src/compiler/libcl/math.h
new file mode 100644
index 00000000000..99452f85e01
--- /dev/null
+++ b/src/compiler/libcl/math.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+#define CL_FLT_EPSILON 1.1920928955078125e-7f
+
+/* OpenCL C lacks roundf and llroundf, we can emulate it */
+static inline float roundf(float x)
+{
+   return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x));
+}
+
+static inline long long llroundf(float x)
+{
+   return roundf(x);
+}
+
+static inline long lrintf(float x)
+{
+   return (long)roundf(x);
+}
+
+static inline float fabsf(float x)
+{
+   return fabs(x);
+}
diff --git a/src/compiler/libcl/stdalign.h b/src/compiler/libcl/stdalign.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/stdarg.h b/src/compiler/libcl/stdarg.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/stdbool.h b/src/compiler/libcl/stdbool.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/stddef.h b/src/compiler/libcl/stddef.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/compiler/libcl/stdint.h b/src/compiler/libcl/stdint.h
new file mode 100644
index 00000000000..677b10d3833
--- /dev/null
+++ b/src/compiler/libcl/stdint.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+/* OpenCL lacks explicitly sized integer types, but we know the sizes of
+ * particular integer types. These typedefs allow defining common headers with
+ * explicit integer types (and therefore compatible data layouts).
+ */
+typedef ulong uint64_t;
+typedef uint uint32_t;
+typedef ushort uint16_t;
+typedef uchar uint8_t;
+
+typedef long int64_t;
+typedef int int32_t;
+typedef short int16_t;
+typedef char int8_t;
+
+typedef int64_t intmax_t;
+typedef uint64_t uintmax_t;
+
+/* These duplicate the C standard library and are required for the
+ * u_intN_min/max implementations.
+ */
+#define UINT64_MAX 18446744073709551615ul
+#define INT64_MAX 9223372036854775807l
+#define UINT64_C(c)	c##UL
+
+#define INT8_MIN (-128)
+#define INT16_MIN (-32768)
+#define INT32_MIN (-2147483648)
+#define INT64_MIN (-9223372036854775807l - 1)
+
+#define INT8_MAX 127
+#define INT16_MAX 32767
+#define INT32_MAX 2147483647
+#define INT64_MAX 9223372036854775807l
diff --git a/src/compiler/libcl/stdio.h b/src/compiler/libcl/stdio.h
new file mode 100644
index 00000000000..711dce91c3d
--- /dev/null
+++ b/src/compiler/libcl/stdio.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+/* FILE * pointers can be useful in function signatures shared across
+ * host/device, but are meaningless in OpenCL. Turn them into void* to allow
+ * consistent prototype across host/device even though there won't be an actual
+ * file pointer on the device side.
+ */
+#define FILE void
diff --git a/src/compiler/libcl/stdlib.h b/src/compiler/libcl/stdlib.h
new file mode 100644
index 00000000000..3eab3f61bc4
--- /dev/null
+++ b/src/compiler/libcl/stdlib.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+/* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */
+void nir_printf_abort(void);
+static inline void abort(void) { nir_printf_abort(); }
diff --git a/src/compiler/libcl/string.h b/src/compiler/libcl/string.h
new file mode 100644
index 00000000000..60b5881b6f0
--- /dev/null
+++ b/src/compiler/libcl/string.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#ifndef __OPENCL_VERSION__
+#error "should only be included from OpenCL"
+#endif
+
+/* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into
+ * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for
+ * large amounts of data, since it's necessarily single threaded, but memcpy is
+ * too useful for shared CPU/GPU code that it's worth making the standard
+ * library function work.
+ */
+#define memcpy __builtin_memcpy
diff --git a/src/compiler/nir/nir_defines.h b/src/compiler/nir/nir_defines.h
index ee1f1ee4121..b3e80006d8c 100644
--- a/src/compiler/nir/nir_defines.h
+++ b/src/compiler/nir/nir_defines.h
@@ -18,13 +18,9 @@
 #ifndef NIR_DEFINES_H
 #define NIR_DEFINES_H
 
-#ifndef __OPENCL_VERSION__
 #include <stdbool.h>
 #include <stdint.h>
 #include "util/macros.h"
-#else
-#include "compiler/libcl/libcl.h"
-#endif
 #include "util/enum_operators.h"
 
 #ifdef __cplusplus
diff --git a/src/intel/vulkan/grl/meson.build b/src/intel/vulkan/grl/meson.build
index f85afaede1c..0bac5f8e460 100644
--- a/src/intel/vulkan/grl/meson.build
+++ b/src/intel/vulkan/grl/meson.build
@@ -118,6 +118,7 @@ foreach t : [['125', 'gfx125', 'dg2'], ['200', 'gfx20', 'lnl'],
         '-e', entrypoint, prepended_input_args, '-o', '@OUTPUT@', '--',
         '-cl-std=cl2.0', '-D__OPENCL_VERSION__=200',
         '-DMAX_HW_SIMD_WIDTH=16', '-DMAX_WORKGROUP_SIZE=16',
+        '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
         '-I' + join_paths(meson.current_source_dir(), 'gpu'),
         '-I' + join_paths(meson.current_source_dir(), 'include'),
       ],
diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build
index a04501a1b61..11603734d8c 100644
--- a/src/nouveau/vulkan/meson.build
+++ b/src/nouveau/vulkan/meson.build
@@ -98,6 +98,7 @@ nvkcl_spv = custom_target(
   output : 'nvkcl.spv',
   command : [
     prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', nvkcl_files, '--',
+    '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
     '-I' + join_paths(meson.current_source_dir(), '.'),
     '-I' + join_paths(meson.project_source_root(), 'src'),
     cl_args,
diff --git a/src/panfrost/libpan/meson.build b/src/panfrost/libpan/meson.build
index e4bf3b0876c..186ffa131a9 100644
--- a/src/panfrost/libpan/meson.build
+++ b/src/panfrost/libpan/meson.build
@@ -16,6 +16,8 @@ foreach ver : ['4', '5', '6', '7', '9', '10']
         prog_mesa_clc, '-o', '@OUTPUT@',  '--depfile', '@DEPFILE@',
         libpan_shader_files, '--',
         '-DPAN_ARCH=@0@'.format(ver),
+        '-I' + join_paths(meson.project_source_root(), 'include'),
+        '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
         '-I' + join_paths(meson.current_source_dir(), '.'),
         '-I' + join_paths(meson.current_source_dir(), '../../'),
         '-I' + join_paths(meson.current_source_dir(), '../lib/'),
diff --git a/src/util/bitpack_helpers.h b/src/util/bitpack_helpers.h
index bb1a094da47..a55eae9ec75 100644
--- a/src/util/bitpack_helpers.h
+++ b/src/util/bitpack_helpers.h
@@ -24,7 +24,6 @@
 #ifndef UTIL_BITPACK_HELPERS_H
 #define UTIL_BITPACK_HELPERS_H
 
-#ifndef __OPENCL_VERSION__
 #include <math.h>
 #include <stdbool.h>
 
@@ -39,9 +38,6 @@
    VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
 #endif
 #endif
-#else
-#include "compiler/libcl/libcl.h"
-#endif
 
 #ifndef util_bitpack_validate_value
 #define util_bitpack_validate_value(x)
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index e233ee257f6..541bf79d076 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -219,7 +219,7 @@ u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
 static inline void
 u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
 {
-   if (*mask == ~0ull) {
+   if (*mask == UINT64_MAX) {
       *start = 0;
       *count = 64;
       *mask = 0;
diff --git a/src/util/macros.h b/src/util/macros.h
index 88b30299d85..bb8f9b58ffa 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -403,10 +403,10 @@ do {                       \
    (BITFIELD_MASK((b) + (count)) & ~BITFIELD_MASK(b))
 
 /** Set a single bit */
-#define BITFIELD64_BIT(b)      (1ull << (b))
+#define BITFIELD64_BIT(b)      (UINT64_C(1) << (b))
 /** Set all bits up to excluding bit b */
 #define BITFIELD64_MASK(b)      \
-   ((b) == 64 ? (~0ull) : BITFIELD64_BIT((b) & 63) - 1)
+   ((b) == 64 ? (~UINT64_C(0)) : BITFIELD64_BIT((b) & 63) - 1)
 /** Set count bits starting from bit b  */
 #define BITFIELD64_RANGE(b, count) \
    (BITFIELD64_MASK((b) + (count)) & ~BITFIELD64_MASK(b))
diff --git a/src/util/simple_mtx.h b/src/util/simple_mtx.h
index 3cfb9ed05c2..ab206cebcd8 100644
--- a/src/util/simple_mtx.h
+++ b/src/util/simple_mtx.h
@@ -24,28 +24,7 @@
 #ifndef _SIMPLE_MTX_H
 #define _SIMPLE_MTX_H
 
-#include "util/futex.h"
-#include "util/macros.h"
-#include "util/u_call_once.h"
-#include "u_atomic.h"
-
-#if UTIL_FUTEX_SUPPORTED
-#if defined(HAVE_VALGRIND) && !defined(NDEBUG)
-#  include <valgrind.h>
-#  include <helgrind.h>
-#  define HG(x) x
-#else
-#  define HG(x)
-#endif
-#else /* !UTIL_FUTEX_SUPPORTED */
-#  include "c11/threads.h"
-#endif /* UTIL_FUTEX_SUPPORTED */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if UTIL_FUTEX_SUPPORTED
+#include <stdint.h>
 
 /* mtx_t - Fast, simple mutex
  *
@@ -75,6 +54,31 @@ typedef struct {
    uint32_t val;
 } simple_mtx_t;
 
+#ifndef __OPENCL_VERSION__
+
+#include "util/futex.h"
+#include "util/macros.h"
+#include "util/u_call_once.h"
+#include "u_atomic.h"
+
+#if UTIL_FUTEX_SUPPORTED
+#if defined(HAVE_VALGRIND) && !defined(NDEBUG)
+#  include <valgrind.h>
+#  include <helgrind.h>
+#  define HG(x) x
+#else
+#  define HG(x)
+#endif
+#else /* !UTIL_FUTEX_SUPPORTED */
+#  include "c11/threads.h"
+#endif /* UTIL_FUTEX_SUPPORTED */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if UTIL_FUTEX_SUPPORTED
+
 #define SIMPLE_MTX_INITIALIZER { 0 }
 
 #define _SIMPLE_MTX_INVALID_VALUE 0xd0d0d0d0
@@ -203,4 +207,8 @@ simple_mtx_assert_locked(simple_mtx_t *mtx)
 }
 #endif
 
+#else
+
+#endif
+
 #endif /* _SIMPLE_MTX_H */
diff --git a/src/util/u_endian.h b/src/util/u_endian.h
index b01f73017ae..fd786423d1f 100644
--- a/src/util/u_endian.h
+++ b/src/util/u_endian.h
@@ -87,6 +87,11 @@
 #define UTIL_ARCH_LITTLE_ENDIAN 1
 #define UTIL_ARCH_BIG_ENDIAN 0
 
+#elif defined(__OPENCL_VERSION__)
+
+#define UTIL_ARCH_LITTLE_ENDIAN 1
+#define UTIL_ARCH_BIG_ENDIAN 0
+
 #endif
 
 #if !defined(UTIL_ARCH_LITTLE_ENDIAN) || !defined(UTIL_ARCH_BIG_ENDIAN)
diff --git a/src/util/u_math.h b/src/util/u_math.h
index 9bfe395e113..e5277f79e30 100644
--- a/src/util/u_math.h
+++ b/src/util/u_math.h
@@ -284,7 +284,7 @@ util_half_inf_sign(int16_t x)
    return (x < 0) ? -1 : 1;
 }
 
-
+#ifndef __OPENCL_VERSION__
 /**
  * Return float bits.
  */
@@ -296,14 +296,6 @@ fui( float f )
    return fi.ui;
 }
 
-static inline uint64_t
-dui( double f )
-{
-   union di di;
-   di.d = f;
-   return di.ui;
-}
-
 static inline float
 uif(uint32_t ui)
 {
@@ -312,6 +304,28 @@ uif(uint32_t ui)
    return fi.f;
 }
 
+#else
+static inline uint32_t
+fui(float f)
+{
+   return as_uint(f);
+}
+
+static inline float
+uif(uint32_t ui)
+{
+   return as_float(ui);
+}
+#endif
+
+static inline uint64_t
+dui( double f )
+{
+   union di di;
+   di.d = f;
+   return di.ui;
+}
+
 static inline double
 uid(uint64_t ui)
 {