libagx: make index buffer fetch robust

for hk

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29742>
This commit is contained in:
Alyssa Rosenzweig 2024-06-11 09:42:23 -04:00
parent 87f9fe3c58
commit 4a71456a1a
4 changed files with 103 additions and 34 deletions

View file

@ -40,11 +40,8 @@ load_vertex_id(nir_builder *b, struct state *state)
if (state->index_size) {
nir_def *ia = nir_load_input_assembly_buffer_agx(b);
nir_def *address =
libagx_index_buffer(b, ia, id, nir_imm_int(b, state->index_size));
nir_def *index = nir_load_global_constant(b, address, state->index_size,
1, state->index_size * 8);
nir_def *index =
libagx_load_index_buffer(b, ia, id, nir_imm_int(b, state->index_size));
id = nir_u2uN(b, index, id->bit_size);
}

View file

@ -227,6 +227,45 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
}
}
static uint
load_index_buffer(uintptr_t index_buffer, uint32_t index_buffer_range_el,
uint id, uint index_size)
{
bool oob = id >= index_buffer_range_el;
/* If the load would be out-of-bounds, load the first element which is
* assumed valid. If the application index buffer is empty with robustness2,
* index_buffer will point to a zero sink where only the first is valid.
*/
if (oob) {
id = 0;
}
uint el;
if (index_size == 1) {
el = ((constant uint8_t *)index_buffer)[id];
} else if (index_size == 2) {
el = ((constant uint16_t *)index_buffer)[id];
} else {
el = ((constant uint32_t *)index_buffer)[id];
}
/* D3D robustness semantics. TODO: Optimize? */
if (oob) {
el = 0;
}
return el;
}
uint
libagx_load_index_buffer(constant struct agx_ia_state *p, uint id,
uint index_size)
{
return load_index_buffer(p->index_buffer, p->index_buffer_range_el, id,
index_size);
}
/*
* Return the ID of the first thread in the workgroup where cond is true, or
* 1024 if cond is false across the workgroup.
@ -303,19 +342,19 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
sizeof(INDEX)); \
\
/* Accessed thru local mem because NIR deref is too aggressive */ \
in_ptr = (uintptr_t)(p->index_buffer + sizeof(INDEX) * in_draw[2]); \
in_ptr = (uintptr_t)(libagx_index_buffer( \
p->index_buffer, p->index_buffer_size_el, in_draw[2], \
sizeof(INDEX), p->zero_sink)); \
} \
\
barrier(CLK_LOCAL_MEM_FENCE); \
global INDEX *out = (global INDEX *)out_ptr; \
constant INDEX *in = (constant INDEX *)in_ptr; \
\
local uint scratch[32]; \
\
uint out_prims = 0; \
INDEX restart_idx = p->restart_index; \
bool flatshade_first = p->flatshade_first; \
uint in_size_el = p->index_buffer_size_B / sizeof(INDEX); \
\
uint needle = 0; \
uint per_prim = mesa_vertices_per_prim(mode); \
@ -323,10 +362,11 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
/* Search for next restart or the end. Lanes load in parallel. */ \
uint next_restart = needle; \
for (;;) { \
/* Relies on shortcircuiting */ \
uint idx = next_restart + tid; \
/* XXX: robustness here */ \
bool restart = idx >= count || in[idx] == restart_idx; \
bool restart = \
idx >= count || \
load_index_buffer(in_ptr, p->index_buffer_size_el, idx, \
sizeof(INDEX)) == restart_idx; \
\
uint next_offs = first_true_thread_in_workgroup(restart, scratch); \
\
@ -346,7 +386,8 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
uint offset = needle + id; \
\
out[((out_prims_base + i) * per_prim) + vtx] = \
offset < in_size_el ? in[offset] : 0; \
load_index_buffer(in_ptr, p->index_buffer_size_el, offset, \
sizeof(INDEX)); \
} \
} \
\
@ -362,13 +403,6 @@ UNROLL(uchar, u8)
UNROLL(ushort, u16)
UNROLL(uint, u32)
uintptr_t
libagx_index_buffer(constant struct agx_ia_state *p, uint id,
uint index_size)
{
return (uintptr_t)&p->index_buffer[id * index_size];
}
uint
libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i)
{
@ -479,8 +513,13 @@ libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi,
* indirect draw, the hardware would do this for us, but for software input
* assembly we need to do it ourselves.
*/
if (gsi->index_buffer) {
ia->index_buffer = gsi->index_buffer + gsi->draw[2] * gsi->index_size_B;
if (gsi->index_size_B) {
ia->index_buffer =
libagx_index_buffer(gsi->index_buffer, gsi->index_buffer_range_el,
gsi->draw[2], gsi->index_size_B, gsi->zero_sink);
ia->index_buffer_range_el =
libagx_index_buffer_range_el(gsi->index_buffer_range_el, gsi->draw[2]);
}
/* We need to allocate VS and GS count buffers, do so now */

View file

@ -9,11 +9,13 @@
#ifndef __OPENCL_VERSION__
#include "util/bitscan.h"
#define CONST(type_) uint64_t
#define libagx_popcount(x) util_bitcount64(x)
#define CONST(type_) uint64_t
#define libagx_popcount(x) util_bitcount64(x)
#define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
#else
#define CONST(type_) constant type_ *
#define libagx_popcount(x) popcount(x)
#define CONST(type_) constant type_ *
#define libagx_popcount(x) popcount(x)
#define libagx_sub_sat(x, y) sub_sat(x, y)
#endif
#ifndef LIBAGX_GEOMETRY_H
@ -35,7 +37,7 @@ struct agx_restart_unroll_params {
GLOBAL(struct agx_geometry_state) heap;
/* Input: index buffer if present. */
CONST(uchar) index_buffer;
uint64_t index_buffer;
/* Input: draw count */
CONST(uint) count;
@ -46,14 +48,17 @@ struct agx_restart_unroll_params {
/* Output draw descriptors */
GLOBAL(uint) out_draws;
/* Pointer to zero */
uint64_t zero_sink;
/* Input: maximum draw count, count is clamped to this */
uint32_t max_draws;
/* Primitive restart index */
uint32_t restart_index;
/* Input index buffer size in bytes */
uint32_t index_buffer_size_B;
/* Input index buffer size in elements */
uint32_t index_buffer_size_el;
/* Stride for the draw descriptor array */
uint32_t draw_stride;
@ -64,11 +69,11 @@ struct agx_restart_unroll_params {
*/
uint32_t flatshade_first;
} PACKED;
AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 15 * 4);
AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4);
struct agx_gs_setup_indirect_params {
/* Index buffer if present. */
CONST(uchar) index_buffer;
uint64_t index_buffer;
/* Indirect draw descriptor. */
CONST(uint) draw;
@ -82,24 +87,49 @@ struct agx_gs_setup_indirect_params {
/* Output geometry parameters */
GLOBAL(struct agx_geometry_params) geom;
/* Pointer to zero */
uint64_t zero_sink;
/* Vertex (TES) output mask for sizing the allocated buffer */
uint64_t vs_outputs;
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
uint32_t index_size_B;
/* Size of the index buffer */
uint32_t index_buffer_range_el;
} PACKED;
AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 13 * 4);
AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4);
struct agx_ia_state {
/* Index buffer if present. */
CONST(uchar) index_buffer;
uint64_t index_buffer;
/* Size of the bound index buffer for bounds checking */
uint32_t index_buffer_range_el;
/* Number of vertices per instance. Written by CPU for direct draw, indirect
* setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
*/
uint32_t verts_per_instance;
} PACKED;
AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 3 * 4);
AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4);
static inline uint64_t
libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
uint elsize_B, uint64_t zero_sink)
{
if (offset_el < size_el)
return index_buffer + (offset_el * elsize_B);
else
return zero_sink;
}
static inline uint
libagx_index_buffer_range_el(uint size_el, uint offset_el)
{
return libagx_sub_sat(size_el, offset_el);
}
struct agx_geometry_params {
/* Persistent (cross-draw) geometry state */

View file

@ -3949,6 +3949,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
{
struct agx_ia_state ia = {
.index_buffer = input_index_buffer,
.index_buffer_range_el = index_buffer_size_B / info->index_size,
.verts_per_instance = draw ? draw->count : 0,
};
@ -4111,6 +4112,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
struct agx_gs_setup_indirect_params gsi = {
.index_buffer = ib,
.index_buffer_range_el = ib_extent / info->index_size,
.draw = rsrc->bo->ptr.gpu + indirect->offset,
.vertex_buffer = batch->uniforms.vertex_output_buffer_ptr,
.ia = batch->uniforms.input_assembly,
@ -4239,7 +4241,7 @@ agx_draw_without_restart(struct agx_batch *batch,
.index_buffer = ib,
.out_draws = out_draws.gpu,
.restart_index = info->restart_index,
.index_buffer_size_B = ib_extent,
.index_buffer_size_el = ib_extent / info->index_size,
.flatshade_first = batch->ctx->rast->base.flatshade_first,
.draws = indirect_rsrc->bo->ptr.gpu + indirect->offset,
};
@ -4559,6 +4561,7 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
struct agx_ia_state ia = {
.index_buffer = ib,
.index_buffer_range_el = ib_extent,
.verts_per_instance = draws ? draws->count : 0,
};