From 4a71456a1a5a12a415321c5046236bc8f7b0f970 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 11 Jun 2024 09:42:23 -0400 Subject: [PATCH] libagx: make index buffer fetch robust for hk Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/agx_nir_lower_ia.c | 7 +-- src/asahi/lib/shaders/geometry.cl | 71 +++++++++++++++++++++------ src/asahi/lib/shaders/geometry.h | 54 +++++++++++++++----- src/gallium/drivers/asahi/agx_state.c | 5 +- 4 files changed, 103 insertions(+), 34 deletions(-) diff --git a/src/asahi/lib/agx_nir_lower_ia.c b/src/asahi/lib/agx_nir_lower_ia.c index 0d846a55006..28c692d1997 100644 --- a/src/asahi/lib/agx_nir_lower_ia.c +++ b/src/asahi/lib/agx_nir_lower_ia.c @@ -40,11 +40,8 @@ load_vertex_id(nir_builder *b, struct state *state) if (state->index_size) { nir_def *ia = nir_load_input_assembly_buffer_agx(b); - nir_def *address = - libagx_index_buffer(b, ia, id, nir_imm_int(b, state->index_size)); - - nir_def *index = nir_load_global_constant(b, address, state->index_size, - 1, state->index_size * 8); + nir_def *index = + libagx_load_index_buffer(b, ia, id, nir_imm_int(b, state->index_size)); id = nir_u2uN(b, index, id->bit_size); } diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl index a8ed851596d..96e8618e177 100644 --- a/src/asahi/lib/shaders/geometry.cl +++ b/src/asahi/lib/shaders/geometry.cl @@ -227,6 +227,45 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, } } +static uint +load_index_buffer(uintptr_t index_buffer, uint32_t index_buffer_range_el, + uint id, uint index_size) +{ + bool oob = id >= index_buffer_range_el; + + /* If the load would be out-of-bounds, load the first element which is + * assumed valid. If the application index buffer is empty with robustness2, + * index_buffer will point to a zero sink where only the first is valid. + */ + if (oob) { + id = 0; + } + + uint el; + if (index_size == 1) { + el = ((constant uint8_t *)index_buffer)[id]; + } else if (index_size == 2) { + el = ((constant uint16_t *)index_buffer)[id]; + } else { + el = ((constant uint32_t *)index_buffer)[id]; + } + + /* D3D robustness semantics. TODO: Optimize? */ + if (oob) { + el = 0; + } + + return el; +} + +uint +libagx_load_index_buffer(constant struct agx_ia_state *p, uint id, + uint index_size) +{ + return load_index_buffer(p->index_buffer, p->index_buffer_range_el, id, + index_size); +} + /* * Return the ID of the first thread in the workgroup where cond is true, or * 1024 if cond is false across the workgroup. @@ -303,19 +342,19 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p, sizeof(INDEX)); \ \ /* Accessed thru local mem because NIR deref is too aggressive */ \ - in_ptr = (uintptr_t)(p->index_buffer + sizeof(INDEX) * in_draw[2]); \ + in_ptr = (uintptr_t)(libagx_index_buffer( \ + p->index_buffer, p->index_buffer_size_el, in_draw[2], \ + sizeof(INDEX), p->zero_sink)); \ } \ \ barrier(CLK_LOCAL_MEM_FENCE); \ global INDEX *out = (global INDEX *)out_ptr; \ - constant INDEX *in = (constant INDEX *)in_ptr; \ \ local uint scratch[32]; \ \ uint out_prims = 0; \ INDEX restart_idx = p->restart_index; \ bool flatshade_first = p->flatshade_first; \ - uint in_size_el = p->index_buffer_size_B / sizeof(INDEX); \ \ uint needle = 0; \ uint per_prim = mesa_vertices_per_prim(mode); \ @@ -323,10 +362,11 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p, /* Search for next restart or the end. Lanes load in parallel. */ \ uint next_restart = needle; \ for (;;) { \ - /* Relies on shortcircuiting */ \ uint idx = next_restart + tid; \ - /* XXX: robustness here */ \ - bool restart = idx >= count || in[idx] == restart_idx; \ + bool restart = \ + idx >= count || \ + load_index_buffer(in_ptr, p->index_buffer_size_el, idx, \ + sizeof(INDEX)) == restart_idx; \ \ uint next_offs = first_true_thread_in_workgroup(restart, scratch); \ \ @@ -346,7 +386,8 @@ setup_unroll_for_draw(global struct agx_restart_unroll_params *p, uint offset = needle + id; \ \ out[((out_prims_base + i) * per_prim) + vtx] = \ - offset < in_size_el ? in[offset] : 0; \ + load_index_buffer(in_ptr, p->index_buffer_size_el, offset, \ + sizeof(INDEX)); \ } \ } \ \ @@ -362,13 +403,6 @@ UNROLL(uchar, u8) UNROLL(ushort, u16) UNROLL(uint, u32) -uintptr_t -libagx_index_buffer(constant struct agx_ia_state *p, uint id, - uint index_size) -{ - return (uintptr_t)&p->index_buffer[id * index_size]; -} - uint libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i) { @@ -479,8 +513,13 @@ libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi, * indirect draw, the hardware would do this for us, but for software input * assembly we need to do it ourselves. */ - if (gsi->index_buffer) { - ia->index_buffer = gsi->index_buffer + gsi->draw[2] * gsi->index_size_B; + if (gsi->index_size_B) { + ia->index_buffer = + libagx_index_buffer(gsi->index_buffer, gsi->index_buffer_range_el, + gsi->draw[2], gsi->index_size_B, gsi->zero_sink); + + ia->index_buffer_range_el = + libagx_index_buffer_range_el(gsi->index_buffer_range_el, gsi->draw[2]); } /* We need to allocate VS and GS count buffers, do so now */ diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h index 27ecff6b865..745d17d6b26 100644 --- a/src/asahi/lib/shaders/geometry.h +++ b/src/asahi/lib/shaders/geometry.h @@ -9,11 +9,13 @@ #ifndef __OPENCL_VERSION__ #include "util/bitscan.h" -#define CONST(type_) uint64_t -#define libagx_popcount(x) util_bitcount64(x) +#define CONST(type_) uint64_t +#define libagx_popcount(x) util_bitcount64(x) +#define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0) #else -#define CONST(type_) constant type_ * -#define libagx_popcount(x) popcount(x) +#define CONST(type_) constant type_ * +#define libagx_popcount(x) popcount(x) +#define libagx_sub_sat(x, y) sub_sat(x, y) #endif #ifndef LIBAGX_GEOMETRY_H @@ -35,7 +37,7 @@ struct agx_restart_unroll_params { GLOBAL(struct agx_geometry_state) heap; /* Input: index buffer if present. */ - CONST(uchar) index_buffer; + uint64_t index_buffer; /* Input: draw count */ CONST(uint) count; @@ -46,14 +48,17 @@ struct agx_restart_unroll_params { /* Output draw descriptors */ GLOBAL(uint) out_draws; + /* Pointer to zero */ + uint64_t zero_sink; + /* Input: maximum draw count, count is clamped to this */ uint32_t max_draws; /* Primitive restart index */ uint32_t restart_index; - /* Input index buffer size in bytes */ - uint32_t index_buffer_size_B; + /* Input index buffer size in elements */ + uint32_t index_buffer_size_el; /* Stride for the draw descriptor array */ uint32_t draw_stride; @@ -64,11 +69,11 @@ struct agx_restart_unroll_params { */ uint32_t flatshade_first; } PACKED; -AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 15 * 4); +AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4); struct agx_gs_setup_indirect_params { /* Index buffer if present. */ - CONST(uchar) index_buffer; + uint64_t index_buffer; /* Indirect draw descriptor. */ CONST(uint) draw; @@ -82,24 +87,49 @@ struct agx_gs_setup_indirect_params { /* Output geometry parameters */ GLOBAL(struct agx_geometry_params) geom; + /* Pointer to zero */ + uint64_t zero_sink; + /* Vertex (TES) output mask for sizing the allocated buffer */ uint64_t vs_outputs; /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */ uint32_t index_size_B; + + /* Size of the index buffer */ + uint32_t index_buffer_range_el; } PACKED; -AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 13 * 4); +AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4); struct agx_ia_state { /* Index buffer if present. */ - CONST(uchar) index_buffer; + uint64_t index_buffer; + + /* Size of the bound index buffer for bounds checking */ + uint32_t index_buffer_range_el; /* Number of vertices per instance. Written by CPU for direct draw, indirect * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing. */ uint32_t verts_per_instance; } PACKED; -AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 3 * 4); +AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4); + +static inline uint64_t +libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el, + uint elsize_B, uint64_t zero_sink) +{ + if (offset_el < size_el) + return index_buffer + (offset_el * elsize_B); + else + return zero_sink; +} + +static inline uint +libagx_index_buffer_range_el(uint size_el, uint offset_el) +{ + return libagx_sub_sat(size_el, offset_el); +} struct agx_geometry_params { /* Persistent (cross-draw) geometry state */ diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index c9efc3fa77b..fcd5a742af8 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -3949,6 +3949,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, { struct agx_ia_state ia = { .index_buffer = input_index_buffer, + .index_buffer_range_el = index_buffer_size_B / info->index_size, .verts_per_instance = draw ? draw->count : 0, }; @@ -4111,6 +4112,7 @@ agx_launch_gs_prerast(struct agx_batch *batch, struct agx_gs_setup_indirect_params gsi = { .index_buffer = ib, + .index_buffer_range_el = ib_extent / info->index_size, .draw = rsrc->bo->ptr.gpu + indirect->offset, .vertex_buffer = batch->uniforms.vertex_output_buffer_ptr, .ia = batch->uniforms.input_assembly, @@ -4239,7 +4241,7 @@ agx_draw_without_restart(struct agx_batch *batch, .index_buffer = ib, .out_draws = out_draws.gpu, .restart_index = info->restart_index, - .index_buffer_size_B = ib_extent, + .index_buffer_size_el = ib_extent / info->index_size, .flatshade_first = batch->ctx->rast->base.flatshade_first, .draws = indirect_rsrc->bo->ptr.gpu + indirect->offset, }; @@ -4559,6 +4561,7 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, struct agx_ia_state ia = { .index_buffer = ib, + .index_buffer_range_el = ib_extent, .verts_per_instance = draws ? draws->count : 0, };