mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
libagx: make index buffer fetch robust
for hk Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29742>
This commit is contained in:
parent
87f9fe3c58
commit
4a71456a1a
4 changed files with 103 additions and 34 deletions
|
|
@ -40,11 +40,8 @@ load_vertex_id(nir_builder *b, struct state *state)
|
|||
if (state->index_size) {
|
||||
nir_def *ia = nir_load_input_assembly_buffer_agx(b);
|
||||
|
||||
nir_def *address =
|
||||
libagx_index_buffer(b, ia, id, nir_imm_int(b, state->index_size));
|
||||
|
||||
nir_def *index = nir_load_global_constant(b, address, state->index_size,
|
||||
1, state->index_size * 8);
|
||||
nir_def *index =
|
||||
libagx_load_index_buffer(b, ia, id, nir_imm_int(b, state->index_size));
|
||||
|
||||
id = nir_u2uN(b, index, id->bit_size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -227,6 +227,45 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
|
|||
}
|
||||
}
|
||||
|
||||
static uint
|
||||
load_index_buffer(uintptr_t index_buffer, uint32_t index_buffer_range_el,
|
||||
uint id, uint index_size)
|
||||
{
|
||||
bool oob = id >= index_buffer_range_el;
|
||||
|
||||
/* If the load would be out-of-bounds, load the first element which is
|
||||
* assumed valid. If the application index buffer is empty with robustness2,
|
||||
* index_buffer will point to a zero sink where only the first is valid.
|
||||
*/
|
||||
if (oob) {
|
||||
id = 0;
|
||||
}
|
||||
|
||||
uint el;
|
||||
if (index_size == 1) {
|
||||
el = ((constant uint8_t *)index_buffer)[id];
|
||||
} else if (index_size == 2) {
|
||||
el = ((constant uint16_t *)index_buffer)[id];
|
||||
} else {
|
||||
el = ((constant uint32_t *)index_buffer)[id];
|
||||
}
|
||||
|
||||
/* D3D robustness semantics. TODO: Optimize? */
|
||||
if (oob) {
|
||||
el = 0;
|
||||
}
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_load_index_buffer(constant struct agx_ia_state *p, uint id,
|
||||
uint index_size)
|
||||
{
|
||||
return load_index_buffer(p->index_buffer, p->index_buffer_range_el, id,
|
||||
index_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the ID of the first thread in the workgroup where cond is true, or
|
||||
* 1024 if cond is false across the workgroup.
|
||||
|
|
@ -303,19 +342,19 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
|
|||
sizeof(INDEX)); \
|
||||
\
|
||||
/* Accessed thru local mem because NIR deref is too aggressive */ \
|
||||
in_ptr = (uintptr_t)(p->index_buffer + sizeof(INDEX) * in_draw[2]); \
|
||||
in_ptr = (uintptr_t)(libagx_index_buffer( \
|
||||
p->index_buffer, p->index_buffer_size_el, in_draw[2], \
|
||||
sizeof(INDEX), p->zero_sink)); \
|
||||
} \
|
||||
\
|
||||
barrier(CLK_LOCAL_MEM_FENCE); \
|
||||
global INDEX *out = (global INDEX *)out_ptr; \
|
||||
constant INDEX *in = (constant INDEX *)in_ptr; \
|
||||
\
|
||||
local uint scratch[32]; \
|
||||
\
|
||||
uint out_prims = 0; \
|
||||
INDEX restart_idx = p->restart_index; \
|
||||
bool flatshade_first = p->flatshade_first; \
|
||||
uint in_size_el = p->index_buffer_size_B / sizeof(INDEX); \
|
||||
\
|
||||
uint needle = 0; \
|
||||
uint per_prim = mesa_vertices_per_prim(mode); \
|
||||
|
|
@ -323,10 +362,11 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
|
|||
/* Search for next restart or the end. Lanes load in parallel. */ \
|
||||
uint next_restart = needle; \
|
||||
for (;;) { \
|
||||
/* Relies on shortcircuiting */ \
|
||||
uint idx = next_restart + tid; \
|
||||
/* XXX: robustness here */ \
|
||||
bool restart = idx >= count || in[idx] == restart_idx; \
|
||||
bool restart = \
|
||||
idx >= count || \
|
||||
load_index_buffer(in_ptr, p->index_buffer_size_el, idx, \
|
||||
sizeof(INDEX)) == restart_idx; \
|
||||
\
|
||||
uint next_offs = first_true_thread_in_workgroup(restart, scratch); \
|
||||
\
|
||||
|
|
@ -346,7 +386,8 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p,
|
|||
uint offset = needle + id; \
|
||||
\
|
||||
out[((out_prims_base + i) * per_prim) + vtx] = \
|
||||
offset < in_size_el ? in[offset] : 0; \
|
||||
load_index_buffer(in_ptr, p->index_buffer_size_el, offset, \
|
||||
sizeof(INDEX)); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
|
@ -362,13 +403,6 @@ UNROLL(uchar, u8)
|
|||
UNROLL(ushort, u16)
|
||||
UNROLL(uint, u32)
|
||||
|
||||
uintptr_t
|
||||
libagx_index_buffer(constant struct agx_ia_state *p, uint id,
|
||||
uint index_size)
|
||||
{
|
||||
return (uintptr_t)&p->index_buffer[id * index_size];
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i)
|
||||
{
|
||||
|
|
@ -479,8 +513,13 @@ libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi,
|
|||
* indirect draw, the hardware would do this for us, but for software input
|
||||
* assembly we need to do it ourselves.
|
||||
*/
|
||||
if (gsi->index_buffer) {
|
||||
ia->index_buffer = gsi->index_buffer + gsi->draw[2] * gsi->index_size_B;
|
||||
if (gsi->index_size_B) {
|
||||
ia->index_buffer =
|
||||
libagx_index_buffer(gsi->index_buffer, gsi->index_buffer_range_el,
|
||||
gsi->draw[2], gsi->index_size_B, gsi->zero_sink);
|
||||
|
||||
ia->index_buffer_range_el =
|
||||
libagx_index_buffer_range_el(gsi->index_buffer_range_el, gsi->draw[2]);
|
||||
}
|
||||
|
||||
/* We need to allocate VS and GS count buffers, do so now */
|
||||
|
|
|
|||
|
|
@ -9,11 +9,13 @@
|
|||
|
||||
#ifndef __OPENCL_VERSION__
|
||||
#include "util/bitscan.h"
|
||||
#define CONST(type_) uint64_t
|
||||
#define libagx_popcount(x) util_bitcount64(x)
|
||||
#define CONST(type_) uint64_t
|
||||
#define libagx_popcount(x) util_bitcount64(x)
|
||||
#define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
|
||||
#else
|
||||
#define CONST(type_) constant type_ *
|
||||
#define libagx_popcount(x) popcount(x)
|
||||
#define CONST(type_) constant type_ *
|
||||
#define libagx_popcount(x) popcount(x)
|
||||
#define libagx_sub_sat(x, y) sub_sat(x, y)
|
||||
#endif
|
||||
|
||||
#ifndef LIBAGX_GEOMETRY_H
|
||||
|
|
@ -35,7 +37,7 @@ struct agx_restart_unroll_params {
|
|||
GLOBAL(struct agx_geometry_state) heap;
|
||||
|
||||
/* Input: index buffer if present. */
|
||||
CONST(uchar) index_buffer;
|
||||
uint64_t index_buffer;
|
||||
|
||||
/* Input: draw count */
|
||||
CONST(uint) count;
|
||||
|
|
@ -46,14 +48,17 @@ struct agx_restart_unroll_params {
|
|||
/* Output draw descriptors */
|
||||
GLOBAL(uint) out_draws;
|
||||
|
||||
/* Pointer to zero */
|
||||
uint64_t zero_sink;
|
||||
|
||||
/* Input: maximum draw count, count is clamped to this */
|
||||
uint32_t max_draws;
|
||||
|
||||
/* Primitive restart index */
|
||||
uint32_t restart_index;
|
||||
|
||||
/* Input index buffer size in bytes */
|
||||
uint32_t index_buffer_size_B;
|
||||
/* Input index buffer size in elements */
|
||||
uint32_t index_buffer_size_el;
|
||||
|
||||
/* Stride for the draw descriptor array */
|
||||
uint32_t draw_stride;
|
||||
|
|
@ -64,11 +69,11 @@ struct agx_restart_unroll_params {
|
|||
*/
|
||||
uint32_t flatshade_first;
|
||||
} PACKED;
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 15 * 4);
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4);
|
||||
|
||||
struct agx_gs_setup_indirect_params {
|
||||
/* Index buffer if present. */
|
||||
CONST(uchar) index_buffer;
|
||||
uint64_t index_buffer;
|
||||
|
||||
/* Indirect draw descriptor. */
|
||||
CONST(uint) draw;
|
||||
|
|
@ -82,24 +87,49 @@ struct agx_gs_setup_indirect_params {
|
|||
/* Output geometry parameters */
|
||||
GLOBAL(struct agx_geometry_params) geom;
|
||||
|
||||
/* Pointer to zero */
|
||||
uint64_t zero_sink;
|
||||
|
||||
/* Vertex (TES) output mask for sizing the allocated buffer */
|
||||
uint64_t vs_outputs;
|
||||
|
||||
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
|
||||
uint32_t index_size_B;
|
||||
|
||||
/* Size of the index buffer */
|
||||
uint32_t index_buffer_range_el;
|
||||
} PACKED;
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 13 * 4);
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4);
|
||||
|
||||
struct agx_ia_state {
|
||||
/* Index buffer if present. */
|
||||
CONST(uchar) index_buffer;
|
||||
uint64_t index_buffer;
|
||||
|
||||
/* Size of the bound index buffer for bounds checking */
|
||||
uint32_t index_buffer_range_el;
|
||||
|
||||
/* Number of vertices per instance. Written by CPU for direct draw, indirect
|
||||
* setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
|
||||
*/
|
||||
uint32_t verts_per_instance;
|
||||
} PACKED;
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 3 * 4);
|
||||
AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4);
|
||||
|
||||
static inline uint64_t
|
||||
libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
|
||||
uint elsize_B, uint64_t zero_sink)
|
||||
{
|
||||
if (offset_el < size_el)
|
||||
return index_buffer + (offset_el * elsize_B);
|
||||
else
|
||||
return zero_sink;
|
||||
}
|
||||
|
||||
static inline uint
|
||||
libagx_index_buffer_range_el(uint size_el, uint offset_el)
|
||||
{
|
||||
return libagx_sub_sat(size_el, offset_el);
|
||||
}
|
||||
|
||||
struct agx_geometry_params {
|
||||
/* Persistent (cross-draw) geometry state */
|
||||
|
|
|
|||
|
|
@ -3949,6 +3949,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
{
|
||||
struct agx_ia_state ia = {
|
||||
.index_buffer = input_index_buffer,
|
||||
.index_buffer_range_el = index_buffer_size_B / info->index_size,
|
||||
.verts_per_instance = draw ? draw->count : 0,
|
||||
};
|
||||
|
||||
|
|
@ -4111,6 +4112,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
|
||||
struct agx_gs_setup_indirect_params gsi = {
|
||||
.index_buffer = ib,
|
||||
.index_buffer_range_el = ib_extent / info->index_size,
|
||||
.draw = rsrc->bo->ptr.gpu + indirect->offset,
|
||||
.vertex_buffer = batch->uniforms.vertex_output_buffer_ptr,
|
||||
.ia = batch->uniforms.input_assembly,
|
||||
|
|
@ -4239,7 +4241,7 @@ agx_draw_without_restart(struct agx_batch *batch,
|
|||
.index_buffer = ib,
|
||||
.out_draws = out_draws.gpu,
|
||||
.restart_index = info->restart_index,
|
||||
.index_buffer_size_B = ib_extent,
|
||||
.index_buffer_size_el = ib_extent / info->index_size,
|
||||
.flatshade_first = batch->ctx->rast->base.flatshade_first,
|
||||
.draws = indirect_rsrc->bo->ptr.gpu + indirect->offset,
|
||||
};
|
||||
|
|
@ -4559,6 +4561,7 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
|
|||
|
||||
struct agx_ia_state ia = {
|
||||
.index_buffer = ib,
|
||||
.index_buffer_range_el = ib_extent,
|
||||
.verts_per_instance = draws ? draws->count : 0,
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue