compiler: use libcl.h for CL

instead of redefining BITFIELD_BIT.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32529>
This commit is contained in:
Alyssa Rosenzweig 2024-12-04 07:57:49 -05:00 committed by Marge Bot
parent d695c84829
commit e4f61771d8
8 changed files with 60 additions and 63 deletions

View file

@ -22,7 +22,7 @@
_mesa_half_to_float(__gen_unpack_uint(x, y, z))
static inline uint64_t
__gen_unpack_uint(constant uint32_t *restrict cl, uint32_t start, uint32_t end)
__gen_unpack_uint(CONST uint32_t *restrict cl, uint32_t start, uint32_t end)
{
uint64_t val = 0;
const int width = end - start + 1;
@ -48,13 +48,13 @@ __gen_pack_lod(float f, uint32_t start, uint32_t end)
}
static inline float
__gen_unpack_lod(constant uint32_t *restrict cl, uint32_t start, uint32_t end)
__gen_unpack_lod(CONST uint32_t *restrict cl, uint32_t start, uint32_t end)
{
return ((float)__gen_unpack_uint(cl, start, end)) / (1 << 6);
}
static inline uint64_t
__gen_unpack_sint(constant uint32_t *restrict cl, uint32_t start, uint32_t end)
__gen_unpack_sint(CONST uint32_t *restrict cl, uint32_t start, uint32_t end)
{
int size = end - start + 1;
int64_t val = __gen_unpack_uint(cl, start, end);
@ -90,16 +90,16 @@ __gen_from_groups(uint32_t value, uint32_t group_size, uint32_t length)
#define agx_pack(dst, T, name) \
for (struct AGX_##T name = {AGX_##T##_header}, \
*_loop_count = (global void *)((uintptr_t)0); \
*_loop_count = (GLOBAL void *)((uintptr_t)0); \
(uintptr_t)_loop_count < 1; ( \
{ \
AGX_##T##_pack((global uint32_t *)(dst), &name); \
_loop_count = (global void *)(((uintptr_t)_loop_count) + 1); \
AGX_##T##_pack((GLOBAL uint32_t *)(dst), &name); \
_loop_count = (GLOBAL void *)(((uintptr_t)_loop_count) + 1); \
}))
#define agx_unpack(fp, src, T, name) \
struct AGX_##T name; \
AGX_##T##_unpack(fp, (constant uint8_t *)(src), &name)
AGX_##T##_unpack(fp, (CONST uint8_t *)(src), &name)
#define agx_print(fp, T, var, indent) AGX_##T##_print(fp, &(var), indent)

View file

@ -395,7 +395,7 @@ class Group(object):
convert = None
args = []
args.append('(constant uint32_t *) cl')
args.append('(CONST uint32_t *) cl')
args.append(str(fieldref.start))
args.append(str(fieldref.end))
@ -573,7 +573,7 @@ class Parser(object):
print("};\n")
def emit_pack_function(self, name, group):
print("static inline void\n%s_pack(global uint32_t * restrict cl,\n%sconst struct %s * restrict values)\n{" %
print("static inline void\n%s_pack(GLOBAL uint32_t * restrict cl,\n%sconst struct %s * restrict values)\n{" %
(name, ' ' * (len(name) + 6), name))
group.emit_pack_function()
@ -590,7 +590,7 @@ class Parser(object):
def emit_unpack_function(self, name, group):
print("static inline bool")
print("%s_unpack(FILE *fp, constant uint8_t * restrict cl,\n%sstruct %s * restrict values)\n{" %
print("%s_unpack(FILE *fp, CONST uint8_t * restrict cl,\n%sstruct %s * restrict values)\n{" %
(name.upper(), ' ' * (len(name) + 8), name))
group.emit_unpack_function()

View file

@ -9,11 +9,9 @@
#ifndef __OPENCL_VERSION__
#include "util/bitscan.h"
#define CONST(type_) uint64_t
#define libagx_popcount(x) util_bitcount64(x)
#define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
#else
#define CONST(type_) constant type_ *
#define libagx_popcount(x) popcount(x)
#define libagx_sub_sat(x, y) sub_sat(x, y)
#endif
@ -27,7 +25,7 @@
/* Packed geometry state buffer */
struct agx_geometry_state {
/* Heap to allocate from. */
GLOBAL(uchar) heap;
DEVICE(uchar) heap;
uint32_t heap_bottom, heap_size;
} PACKED;
static_assert(sizeof(struct agx_geometry_state) == 4 * 4);
@ -64,35 +62,35 @@ libagx_index_buffer_range_el(uint size_el, uint offset_el)
struct agx_geometry_params {
/* Persistent (cross-draw) geometry state */
GLOBAL(struct agx_geometry_state) state;
DEVICE(struct agx_geometry_state) state;
/* Address of associated indirect draw buffer */
GLOBAL(uint) indirect_desc;
DEVICE(uint) indirect_desc;
/* Address of count buffer. For an indirect draw, this will be written by the
* indirect setup kernel.
*/
GLOBAL(uint) count_buffer;
DEVICE(uint) count_buffer;
/* Address of the primitives generated counters */
GLOBAL(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_overflow[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_any_overflow;
DEVICE(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
DEVICE(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
DEVICE(uint) xfb_overflow[MAX_VERTEX_STREAMS];
DEVICE(uint) xfb_any_overflow;
/* Pointers to transform feedback buffer offsets in bytes */
GLOBAL(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
DEVICE(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
/* Output index buffer, allocated by pre-GS. */
GLOBAL(uint) output_index_buffer;
DEVICE(uint) output_index_buffer;
/* Address of transform feedback buffer in general, supplied by the CPU. */
GLOBAL(uchar) xfb_base_original[MAX_SO_BUFFERS];
DEVICE(uchar) xfb_base_original[MAX_SO_BUFFERS];
/* Address of transform feedback for the current primitive. Written by pre-GS
* program.
*/
GLOBAL(uchar) xfb_base[MAX_SO_BUFFERS];
DEVICE(uchar) xfb_base[MAX_SO_BUFFERS];
/* Address and present mask for the input to the geometry shader. These will
* reflect the vertex shader for VS->GS or instead the tessellation

View file

@ -29,7 +29,7 @@ struct agx_helper_block {
static_assert(sizeof(struct agx_helper_block) == 16);
struct agx_helper_core {
GLOBAL(struct agx_helper_block) blocklist;
DEVICE(struct agx_helper_block) blocklist;
uint32_t alloc_cur;
uint32_t alloc_max;
uint32_t alloc_failed;

View file

@ -10,13 +10,13 @@
#define agx_push(ptr, T, cfg) \
for (unsigned _loop = 0; _loop < 1; \
++_loop, ptr = (global void *)(((uintptr_t)ptr) + AGX_##T##_LENGTH)) \
++_loop, ptr = (GLOBAL void *)(((uintptr_t)ptr) + AGX_##T##_LENGTH)) \
agx_pack(ptr, T, cfg)
#define agx_push_packed(ptr, src, T) \
static_assert(sizeof(src) == AGX_##T##_LENGTH); \
memcpy(ptr, &src, sizeof(src)); \
ptr = (global void *)(((uintptr_t)ptr) + sizeof(src));
ptr = (GLOBAL void *)(((uintptr_t)ptr) + sizeof(src));
struct agx_workgroup {
uint32_t x, y, z;
@ -79,8 +79,8 @@ enum agx_chip {
AGX_CHIP_G14X,
};
static inline global uint32_t *
agx_cdm_launch(global uint32_t *out, enum agx_chip chip, struct agx_grid grid,
static inline GLOBAL uint32_t *
agx_cdm_launch(GLOBAL uint32_t *out, enum agx_chip chip, struct agx_grid grid,
struct agx_workgroup wg,
struct agx_cdm_launch_word_0_packed launch, uint32_t usc)
{
@ -128,8 +128,8 @@ agx_cdm_launch(global uint32_t *out, enum agx_chip chip, struct agx_grid grid,
return out;
}
static inline global uint32_t *
agx_cdm_barrier(global uint32_t *out, enum agx_chip chip)
static inline GLOBAL uint32_t *
agx_cdm_barrier(GLOBAL uint32_t *out, enum agx_chip chip)
{
agx_push(out, CDM_BARRIER, cfg) {
cfg.unk_5 = true;
@ -175,8 +175,8 @@ agx_cdm_barrier(global uint32_t *out, enum agx_chip chip)
return out;
}
static inline global uint32_t *
agx_cdm_return(global uint32_t *out)
static inline GLOBAL uint32_t *
agx_cdm_return(GLOBAL uint32_t *out)
{
agx_push(out, CDM_STREAM_RETURN, cfg)
;
@ -184,8 +184,8 @@ agx_cdm_return(global uint32_t *out)
return out;
}
static inline global uint32_t *
agx_cdm_terminate(global uint32_t *out)
static inline GLOBAL uint32_t *
agx_cdm_terminate(GLOBAL uint32_t *out)
{
agx_push(out, CDM_STREAM_TERMINATE, _)
;
@ -193,8 +193,8 @@ agx_cdm_terminate(global uint32_t *out)
return out;
}
static inline global uint32_t *
agx_vdm_terminate(global uint32_t *out)
static inline GLOBAL uint32_t *
agx_vdm_terminate(GLOBAL uint32_t *out)
{
agx_push(out, VDM_STREAM_TERMINATE, _)
;
@ -202,8 +202,8 @@ agx_vdm_terminate(global uint32_t *out)
return out;
}
static inline global uint32_t *
agx_cdm_jump(global uint32_t *out, uint64_t target)
static inline GLOBAL uint32_t *
agx_cdm_jump(GLOBAL uint32_t *out, uint64_t target)
{
agx_push(out, CDM_STREAM_LINK, cfg) {
cfg.target_lo = target & BITFIELD_MASK(32);
@ -213,8 +213,8 @@ agx_cdm_jump(global uint32_t *out, uint64_t target)
return out;
}
static inline global uint32_t *
agx_vdm_jump(global uint32_t *out, uint64_t target)
static inline GLOBAL uint32_t *
agx_vdm_jump(GLOBAL uint32_t *out, uint64_t target)
{
agx_push(out, VDM_STREAM_LINK, cfg) {
cfg.target_lo = target & BITFIELD_MASK(32);
@ -224,14 +224,14 @@ agx_vdm_jump(global uint32_t *out, uint64_t target)
return out;
}
static inline global uint32_t *
agx_cs_jump(global uint32_t *out, uint64_t target, bool vdm)
static inline GLOBAL uint32_t *
agx_cs_jump(GLOBAL uint32_t *out, uint64_t target, bool vdm)
{
return vdm ? agx_vdm_jump(out, target) : agx_cdm_jump(out, target);
}
static inline global uint32_t *
agx_cdm_call(global uint32_t *out, uint64_t target)
static inline GLOBAL uint32_t *
agx_cdm_call(GLOBAL uint32_t *out, uint64_t target)
{
agx_push(out, CDM_STREAM_LINK, cfg) {
cfg.target_lo = target & BITFIELD_MASK(32);
@ -242,8 +242,8 @@ agx_cdm_call(global uint32_t *out, uint64_t target)
return out;
}
static inline global uint32_t *
agx_vdm_call(global uint32_t *out, uint64_t target)
static inline GLOBAL uint32_t *
agx_vdm_call(GLOBAL uint32_t *out, uint64_t target)
{
agx_push(out, VDM_STREAM_LINK, cfg) {
cfg.target_lo = target & BITFIELD_MASK(32);
@ -279,7 +279,7 @@ struct agx_shader {
/* Opaque structure representing a USC program being constructed */
struct agx_usc_builder {
global uint8_t *head;
GLOBAL uint8_t *head;
#ifndef NDEBUG
uint8_t *begin;
@ -292,7 +292,7 @@ static_assert(sizeof(struct agx_usc_builder) == 8);
#endif
static struct agx_usc_builder
agx_usc_builder(global void *out, ASSERTED size_t size)
agx_usc_builder(GLOBAL void *out, ASSERTED size_t size)
{
return (struct agx_usc_builder){
.head = out,
@ -352,7 +352,7 @@ agx_usc_uniform(struct agx_usc_builder *b, unsigned start_halfs,
}
static inline void
agx_usc_words_precomp(global uint32_t *out, constant struct agx_shader *s,
agx_usc_words_precomp(GLOBAL uint32_t *out, CONST struct agx_shader *s,
uint64_t data, unsigned data_size)
{
/* Map the data directly as uniforms starting at u0 */

View file

@ -9,12 +9,12 @@
#pragma once
struct libagx_xfb_counter_copy {
GLOBAL(uint32_t) dest[4];
GLOBAL(uint32_t) src[4];
DEVICE(uint32_t) dest[4];
DEVICE(uint32_t) src[4];
};
struct libagx_imm_write {
GLOBAL(uint32_t) address;
DEVICE(uint32_t) address;
uint32_t value;
};

View file

@ -29,35 +29,35 @@ static_assert(sizeof(struct libagx_tess_point) == 8);
struct libagx_tess_args {
/* Heap to allocate tessellator outputs in */
GLOBAL(struct agx_geometry_state) heap;
DEVICE(struct agx_geometry_state) heap;
/* Patch coordinate buffer, indexed as:
*
* coord_allocs[patch_ID] + vertex_in_patch
*/
GLOBAL(struct libagx_tess_point) patch_coord_buffer;
DEVICE(struct libagx_tess_point) patch_coord_buffer;
/* Per-patch index within the heap for the tess coords, written by the
* tessellator based on the allocated memory.
*/
GLOBAL(uint32_t) coord_allocs;
DEVICE(uint32_t) coord_allocs;
/* Space for output draws from the tessellator. API draw calls. */
GLOBAL(uint32_t) out_draws;
DEVICE(uint32_t) out_draws;
/* Tessellation control shader output buffer. */
GLOBAL(float) tcs_buffer;
DEVICE(float) tcs_buffer;
/* Count buffer. # of indices per patch written here, then prefix summed. */
GLOBAL(uint32_t) counts;
DEVICE(uint32_t) counts;
/* Allocated index buffer for all patches, if we're prefix summing counts */
GLOBAL(uint32_t) index_buffer;
DEVICE(uint32_t) index_buffer;
/* Address of the tess eval invocation counter for implementing pipeline
* statistics, if active. Zero if inactive. Incremented by tessellator.
*/
GLOBAL(uint32_t) statistic;
DEVICE(uint32_t) statistic;
/* When geom+tess used together, the buffer containing TES outputs (executed
* as a hardware compute shader).

View file

@ -31,8 +31,7 @@
#include "util/macros.h"
#include "util/u_debug.h"
#else
#define ENUM_PACKED
#define BITFIELD_BIT(b) (1u << (b))
#include "libcl/libcl.h"
#define debug_printf(x, ...)
#endif