diff --git a/src/asahi/lib/agx_linker.h b/src/asahi/lib/agx_linker.h index 1de9cce15e2..39ad6d26fa4 100644 --- a/src/asahi/lib/agx_linker.h +++ b/src/asahi/lib/agx_linker.h @@ -62,6 +62,9 @@ struct agx_vs_prolog_key { /* If !hw and the draw call is indexed, the index size */ uint8_t sw_index_size_B; + /* Adjacency primitive to emulate */ + enum mesa_prim adjacency; + /* Robustness settings for the vertex fetch */ struct agx_robustness robustness; }; diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h index 6489aaace6b..d17f44d8b43 100644 --- a/src/asahi/lib/agx_nir_lower_gs.h +++ b/src/asahi/lib/agx_nir_lower_gs.h @@ -23,6 +23,9 @@ struct nir_def *agx_load_per_vertex_input(struct nir_builder *b, nir_intrinsic_instr *intr, struct nir_def *vertex); +nir_def *agx_nir_load_vertex_id(struct nir_builder *b, nir_def *id, + unsigned index_size_B); + bool agx_nir_lower_sw_vs(struct nir_shader *s, unsigned index_size_B); bool agx_nir_lower_vs_before_gs(struct nir_shader *vs, diff --git a/src/asahi/lib/agx_nir_lower_ia.c b/src/asahi/lib/agx_nir_lower_ia.c index 7b346499149..2077db8c702 100644 --- a/src/asahi/lib/agx_nir_lower_ia.c +++ b/src/asahi/lib/agx_nir_lower_ia.c @@ -14,11 +14,9 @@ * vertex shaders, as part of geometry/tessellation lowering. It does not apply * the topology, which happens in the geometry shader. */ -static nir_def * -load_vertex_id(nir_builder *b, unsigned index_size_B) +nir_def * +agx_nir_load_vertex_id(nir_builder *b, nir_def *id, unsigned index_size_B) { - nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0); - /* If drawing with an index buffer, pull the vertex ID. Otherwise, the * vertex ID is just the index as-is. */ @@ -40,7 +38,8 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data) b->cursor = nir_before_instr(&intr->instr); if (intr->intrinsic == nir_intrinsic_load_vertex_id) { - nir_def_replace(&intr->def, load_vertex_id(b, *index_size_B)); + nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0); + nir_def_replace(&intr->def, agx_nir_load_vertex_id(b, id, *index_size_B)); return true; } else if (intr->intrinsic == nir_intrinsic_load_instance_id) { nir_def_replace(&intr->def, diff --git a/src/asahi/lib/agx_nir_prolog_epilog.c b/src/asahi/lib/agx_nir_prolog_epilog.c index c2f18288971..946d564400d 100644 --- a/src/asahi/lib/agx_nir_prolog_epilog.c +++ b/src/asahi/lib/agx_nir_prolog_epilog.c @@ -13,6 +13,7 @@ #include "agx_nir_lower_vbo.h" #include "agx_pack.h" #include "agx_tilebuffer.h" +#include "libagx.h" #include "nir.h" #include "nir_builder.h" #include "nir_builder_opcodes.h" @@ -138,6 +139,36 @@ lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr, } } +static bool +lower_adjacency(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + const struct agx_vs_prolog_key *key = data; + b->cursor = nir_before_instr(&intr->instr); + + if (intr->intrinsic != nir_intrinsic_load_vertex_id) + return false; + + nir_def *id = nir_load_vertex_id(b); + + if (key->adjacency == MESA_PRIM_LINES_ADJACENCY) { + id = libagx_map_to_line_adj(b, id); + } else if (key->adjacency == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) { + id = libagx_map_to_tri_strip_adj(b, id); + } else if (key->adjacency == MESA_PRIM_LINE_STRIP_ADJACENCY) { + id = libagx_map_to_line_strip_adj(b, id); + } else if (key->adjacency == MESA_PRIM_TRIANGLES_ADJACENCY) { + /* Sequence (0, 2, 4), (6, 8, 10), ... */ + id = nir_imul_imm(b, id, 2); + } else { + unreachable("unknown"); + } + + id = agx_nir_load_vertex_id(b, id, key->sw_index_size_B); + + nir_def_replace(&intr->def, id); + return true; +} + void agx_nir_vs_prolog(nir_builder *b, const void *key_) { @@ -169,8 +200,17 @@ agx_nir_vs_prolog(nir_builder *b, const void *key_) /* Now lower the resulting program using the key */ lower_vbo(b->shader, key->attribs, key->robustness); + /* Clean up redundant vertex ID loads */ + if (!key->hw || key->adjacency) { + NIR_PASS(_, b->shader, nir_opt_cse); + NIR_PASS(_, b->shader, nir_opt_dce); + } + if (!key->hw) { agx_nir_lower_sw_vs(b->shader, key->sw_index_size_B); + } else if (key->adjacency) { + nir_shader_intrinsics_pass(b->shader, lower_adjacency, + nir_metadata_control_flow, (void *)key); } /* Finally, lower uniforms according to our ABI */ diff --git a/src/asahi/libagx/draws.cl b/src/asahi/libagx/draws.cl index 3964622fe41..4663a229578 100644 --- a/src/asahi/libagx/draws.cl +++ b/src/asahi/libagx/draws.cl @@ -28,6 +28,34 @@ libagx_predicate_indirect(global uint32_t *out, constant uint32_t *in, } } +/* + * Indexing/offseting is in software if necessary so we strip all + * indexing/offset information. + */ +KERNEL(1) +libagx_draw_without_adj(global VkDrawIndirectCommand *out, + global VkDrawIndirectCommand *in, + global struct agx_ia_state *ia, uint64_t index_buffer, + uint64_t index_buffer_range_el, int index_size_B, + enum mesa_prim prim) +{ + *out = (VkDrawIndirectCommand){ + .vertexCount = libagx_remap_adj_count(in->vertexCount, prim), + .instanceCount = in->instanceCount, + }; + + /* TODO: Deduplicate */ + if (index_size_B) { + uint offs = in->firstVertex; + + ia->index_buffer = libagx_index_buffer( + index_buffer, index_buffer_range_el, offs, index_size_B, 0); + + ia->index_buffer_range_el = + libagx_index_buffer_range_el(index_buffer_range_el, offs); + } +} + /* Precondition: len must be < the group size */ static void libagx_memcpy_small(global uchar *dst, constant uchar *src, uint len, uint tid) diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index 34871e14322..931ad4c3073 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -220,6 +220,50 @@ vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim, } } +uint +libagx_map_to_line_adj(uint id) +{ + /* Sequence (1, 2), (5, 6), (9, 10), ... */ + return ((id & ~1) * 2) + (id & 1) + 1; +} + +uint +libagx_map_to_line_strip_adj(uint id) +{ + /* Sequence (1, 2), (2, 3), (4, 5), .. */ + uint prim = id / 2; + uint vert = id & 1; + return prim + vert + 1; +} + +uint +libagx_map_to_tri_strip_adj(uint id) +{ + /* Sequence (0, 2, 4), (2, 6, 4), (4, 6, 8), (6, 10, 8) + * + * Although tri strips with adjacency have 6 cases in general, after + * disregarding the vertices only available in a geometry shader, there are + * only even/odd cases. In other words, it's just a triangle strip subject to + * extra padding. + * + * Dividing through by two, the sequence is: + * + * (0, 1, 2), (1, 3, 2), (2, 3, 4), (3, 5, 4) + */ + uint prim = id / 3; + uint vtx = id % 3; + + /* Flip the winding order of odd triangles */ + if ((prim % 2) == 1) { + if (vtx == 1) + vtx = 2; + else if (vtx == 2) + vtx = 1; + } + + return 2 * (prim + vtx); +} + static void store_index(uintptr_t index_buffer, uint index_size_B, uint id, uint value) { diff --git a/src/asahi/libagx/libagx_dgc.h b/src/asahi/libagx/libagx_dgc.h index 456554e3857..dbe4f9653c8 100644 --- a/src/asahi/libagx/libagx_dgc.h +++ b/src/asahi/libagx/libagx_dgc.h @@ -6,6 +6,7 @@ #pragma once #include "compiler/libcl/libcl.h" +#include "compiler/shader_enums.h" #include "agx_pack.h" #define agx_push(ptr, T, cfg) \ @@ -496,10 +497,6 @@ struct agx_usc_builder { #endif } PACKED; -#ifdef __OPENCL_VERSION__ -static_assert(sizeof(struct agx_usc_builder) == 8); -#endif - static struct agx_usc_builder agx_usc_builder(GLOBAL void *out, ASSERTED size_t size) { @@ -577,3 +574,18 @@ libagx_draw_robust_index_vdm_size() struct agx_draw draw = agx_draw_indexed(0, 0, 0, 0, 0, 0, 0, 0, 0); return agx_vdm_draw_size(0, draw); } + +static inline unsigned +libagx_remap_adj_count(unsigned count, enum mesa_prim prim) +{ + if (prim == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) { + /* Spec gives formula for # of primitives in a tri strip adj */ + unsigned c4 = count >= 4 ? count - 4 : 0; + return 3 * (c4 / 2); + } else if (prim == MESA_PRIM_LINE_STRIP_ADJACENCY) { + return 2 * (count >= 3 ? count - 3 : 0); + } else { + /* Adjacency lists just drop half the vertices. */ + return count / 2; + } +} diff --git a/src/asahi/vulkan/hk_cmd_buffer.c b/src/asahi/vulkan/hk_cmd_buffer.c index b15dbe790ac..7f6ef6978d2 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.c +++ b/src/asahi/vulkan/hk_cmd_buffer.c @@ -637,7 +637,6 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s, struct hk_device *dev = hk_cmd_buffer_device(cmd); enum pipe_shader_type sw_stage = s->info.stage; - enum pipe_shader_type hw_stage = s->b.info.stage; unsigned constant_push_ranges = DIV_ROUND_UP(s->b.info.rodata.size_16, 64); unsigned push_ranges = 2; @@ -683,7 +682,7 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s, if (cmd->state.gfx.draw_id_ptr) agx_usc_uniform(&b, (6 * count) + 4, 1, cmd->state.gfx.draw_id_ptr); - if (hw_stage == MESA_SHADER_COMPUTE) { + if (linked->sw_indexing) { agx_usc_uniform( &b, (6 * count) + 8, 4, root_ptr + hk_root_descriptor_offset(draw.input_assembly)); diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 8d2a708c108..9f1e824dd8a 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -1098,11 +1098,22 @@ hk_rast_prim(struct hk_cmd_buffer *cmd) { struct hk_graphics_state *gfx = &cmd->state.gfx; struct hk_api_shader *gs = gfx->shaders[MESA_SHADER_GEOMETRY]; + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; - if (gs != NULL) + if (gs != NULL) { return gs->variants[HK_GS_VARIANT_RAST].info.gs.out_prim; - else - return hk_gs_in_prim(cmd); + } else { + switch (dyn->ia.primitive_topology) { + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return MESA_PRIM_LINES; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return MESA_PRIM_TRIANGLES; + default: + return hk_gs_in_prim(cmd); + } + } } static uint64_t @@ -1721,7 +1732,8 @@ hk_get_fast_linked_locked_vs(struct hk_device *dev, struct hk_shader *shader, ralloc_memdup(shader->linked.ht, key, sizeof(*key)); /* XXX: Fix this higher up the stack */ - linked->b.uses_base_param |= !key->prolog.hw; + linked->sw_indexing = !key->prolog.hw || key->prolog.adjacency; + linked->b.uses_base_param |= linked->sw_indexing; _mesa_hash_table_insert(shader->linked.ht, key_clone, linked); return linked; @@ -2545,7 +2557,17 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, .prolog.robustness.soft_fault = agx_has_soft_fault(&dev->dev), }; - if (!key.prolog.hw) { + enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology); + + if (mesa_prim_has_adjacency(prim)) { + if (draw.restart) { + prim = u_decomposed_prim(prim); + } + + key.prolog.adjacency = prim; + } + + if (key.prolog.adjacency || !key.prolog.hw) { key.prolog.sw_index_size_B = draw.indexed ? agx_index_size_to_B(draw.index_size) : 0; } @@ -2886,6 +2908,17 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, gfx->dirty |= HK_DIRTY_VARYINGS; } + if (gfx->shaders[MESA_SHADER_TESS_EVAL] || + gfx->shaders[MESA_SHADER_GEOMETRY] || linked_vs->sw_indexing) { + /* XXX: We should deduplicate this logic */ + bool indirect = agx_is_indirect(draw.b) || draw.restart; + + desc->root.draw.input_assembly = + indirect ? hk_pool_alloc(cmd, sizeof(struct agx_ia_state), 4).gpu + : hk_upload_ia_params(cmd, draw); + desc->root_dirty = true; + } + if (gfx->shaders[MESA_SHADER_TESS_EVAL] || gfx->shaders[MESA_SHADER_GEOMETRY]) { @@ -2895,10 +2928,6 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, /* XXX: We should deduplicate this logic */ bool indirect = agx_is_indirect(draw.b) || draw.restart; - desc->root.draw.input_assembly = - indirect ? hk_pool_alloc(cmd, sizeof(struct agx_ia_state), 4).gpu - : hk_upload_ia_params(cmd, draw); - if (!indirect) { uint32_t verts = draw.b.count[0], instances = draw.b.count[1]; unsigned vb_size = @@ -3075,18 +3104,9 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw) return; /* Determine if we need a geometry shader to emulate XFB or adjacency */ - struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; struct hk_shader *last_sw = hk_bound_sw_vs_before_gs(gfx); uint32_t xfb_outputs = last_sw->info.xfb_info.output_count; - - VkPrimitiveTopology topology = dyn->ia.primitive_topology; - bool adjacency = - (topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY) || - (topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY) || - (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY) || - (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY); - - bool needs_gs = xfb_outputs || adjacency; + bool needs_gs = xfb_outputs; /* Various pipeline statistics are implemented in the pre-GS shader. TODO: * This could easily be optimized. @@ -3137,9 +3157,8 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw) } struct hk_device *dev = hk_cmd_buffer_device(cmd); - perf_debug(dev, "Binding passthrough GS for%s%s%s\n", - xfb_outputs ? " XFB" : "", adjacency ? " adjacency" : "", - ia_stats ? " statistics" : ""); + perf_debug(dev, "Binding passthrough GS for%s%s\n", + xfb_outputs ? " XFB" : "", ia_stats ? " statistics" : ""); gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size); gs->is_passthrough = true; @@ -3376,9 +3395,15 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) if (!cs) return; + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; bool geom = cmd->state.gfx.shaders[MESA_SHADER_GEOMETRY]; bool tess = cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]; bool needs_idx_robust = hk_needs_index_robustness(cmd, &draw); + bool adj = + mesa_prim_has_adjacency(vk_conv_topology(dyn->ia.primitive_topology)); + adj &= !geom; + needs_idx_robust &= !adj; + struct hk_cs *ccs = NULL; uint8_t *out = cs->current; assert(cs->current + 0x1000 < cs->end); @@ -3388,7 +3413,9 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) cs->stats.calls++; - if (geom || tess || ia_stats || needs_idx_robust) { + if (geom || tess || ia_stats || needs_idx_robust || + (adj && (agx_is_indirect(draw.b) || draw.restart))) { + ccs = hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true); if (!ccs) @@ -3415,9 +3442,42 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) continue; } + if (adj) { + assert(!geom && "geometry shaders handle adj directly"); + enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology); + + if (draw.restart) { + draw = hk_draw_without_restart(cmd, ccs, draw, 1); + prim = u_decomposed_prim(prim); + } + + if (agx_is_indirect(draw.b)) { + const size_t size = sizeof(VkDrawIndexedIndirectCommand); + static_assert(size > sizeof(VkDrawIndirectCommand), + "allocation size is conservative"); + + uint64_t out_draw = hk_pool_alloc(cmd, size, 4).gpu; + struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors; + + libagx_draw_without_adj( + ccs, agx_1d(1), out_draw, draw.b.ptr, + desc->root.draw.input_assembly, draw.index_buffer, + draw.indexed ? agx_draw_index_range_el(draw) : 0, + draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim); + + draw = agx_draw_indirect(out_draw); + } else { + unsigned count = libagx_remap_adj_count(draw.b.count[0], prim); + + draw = (struct agx_draw){ + .b = agx_3d(count, draw.b.count[1], 1), + }; + } + } + enum agx_primitive topology = cmd->state.gfx.topology; if (needs_idx_robust) { - assert(!geom && !tess); + assert(!geom && !tess && !adj); perf_debug(dev, "lowering robust index buffer"); cs->current = out; diff --git a/src/asahi/vulkan/hk_shader.h b/src/asahi/vulkan/hk_shader.h index da2181f0104..9865ddd4fdc 100644 --- a/src/asahi/vulkan/hk_shader.h +++ b/src/asahi/vulkan/hk_shader.h @@ -313,6 +313,11 @@ hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc) struct hk_linked_shader { struct agx_linked_shader b; + /* True if the VS prolog uses software indexing, either for geom/tess or + * adjacency primitives. + */ + bool sw_indexing; + /* Distinct from hk_shader::counts due to addition of cf_binding_count, which * is delayed since it depends on cull distance. */