diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index 73e733f5c3f..77709e13d55 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -615,45 +615,45 @@ agx_nir_create_gs_rast_shader(const nir_shader *gs, bool *side_effects_for_rast, if (shader->info.gs.output_primitive != MESA_PRIM_POINTS) shader->info.outputs_written &= ~VARYING_BIT_PSIZ; - nir_def *output_id, *unrolled; - if (state->info->instanced) { - /* vertex ID = ID within the primitive, instance ID = unrolled prim ID */ - output_id = nir_load_vertex_id(b); - unrolled = nir_load_instance_id(b); - } else { - /* vertex ID = unrolled (see calc_unrolled_index_id), no instancing */ - nir_def *raw_id = nir_load_vertex_id(b); - unsigned stride = state->info->indexed - ? output_vertex_id_pot_stride(gs) - : MAX2(state->info->max_indices, 1); + nir_def *raw_vertex_id = nir_load_vertex_id(b); + struct lower_gs_rast_state rs = {.raw_instance_id = nir_load_instance_id(b)}; - output_id = nir_umod_imm(b, raw_id, stride); - unrolled = nir_udiv_imm(b, raw_id, stride); - } + switch (state->info->shape) { + case AGX_GS_SHAPE_DYNAMIC_INDEXED: { + unsigned stride = output_vertex_id_pot_stride(gs); - /* If we are indexed, we know indices are sparse and rounded up to powers of - * two, so we can just shift & mask to pick apart. Otherwise, we fall back on - * a slower integer division. - */ - nir_def *instance_id, *primitive_id; - if (state->info->indexed) { + nir_def *unrolled = nir_udiv_imm(b, raw_vertex_id, stride); nir_def *primitives_log2 = load_geometry_param(b, primitives_log2); - instance_id = nir_ushr(b, unrolled, primitives_log2); - primitive_id = nir_iand( - b, unrolled, - nir_iadd_imm(b, nir_ishl(b, nir_imm_int(b, 1), primitives_log2), -1)); - } else { - nir_def *primitives = load_geometry_param(b, gs_grid[0]); - instance_id = nir_udiv(b, unrolled, primitives); - primitive_id = nir_umod(b, unrolled, primitives); + nir_def *bit = nir_ishl(b, nir_imm_int(b, 1), primitives_log2); + + rs.output_id = nir_umod_imm(b, raw_vertex_id, stride); + rs.instance_id = nir_ushr(b, unrolled, primitives_log2); + rs.primitive_id = nir_iand(b, unrolled, nir_iadd_imm(b, bit, -1)); + break; } - struct lower_gs_rast_state rast_state = { - .raw_instance_id = unrolled, - .instance_id = instance_id, - .primitive_id = primitive_id, - .output_id = output_id, - }; + case AGX_GS_SHAPE_STATIC_INDEXED: + case AGX_GS_SHAPE_STATIC_PER_PRIM: { + nir_def *stride = load_geometry_param(b, gs_grid[0]); + + rs.output_id = raw_vertex_id; + rs.instance_id = nir_udiv(b, rs.raw_instance_id, stride); + rs.primitive_id = nir_umod(b, rs.raw_instance_id, stride); + break; + } + + case AGX_GS_SHAPE_STATIC_PER_INSTANCE: { + unsigned stride = MAX2(state->info->max_indices, 1); + + rs.output_id = nir_umod_imm(b, raw_vertex_id, stride); + rs.primitive_id = nir_udiv_imm(b, raw_vertex_id, stride); + rs.instance_id = rs.raw_instance_id; + break; + } + + default: + unreachable("invalid shape"); + } u_foreach_bit64(slot, shader->info.outputs_written) { const char *slot_name = @@ -664,24 +664,24 @@ agx_nir_create_gs_rast_shader(const nir_shader *gs, bool *side_effects_for_rast, (slot == VARYING_SLOT_VIEWPORT); unsigned comps = scalar ? 1 : 4; - rast_state.outputs.outputs[slot] = nir_variable_create( + rs.outputs.outputs[slot] = nir_variable_create( shader, nir_var_shader_temp, glsl_vector_type(GLSL_TYPE_UINT, comps), ralloc_asprintf(shader, "%s-temp", slot_name)); - rast_state.selected.outputs[slot] = nir_variable_create( + rs.selected.outputs[slot] = nir_variable_create( shader, nir_var_shader_temp, glsl_vector_type(GLSL_TYPE_UINT, comps), ralloc_asprintf(shader, "%s-selected", slot_name)); } nir_shader_intrinsics_pass(shader, lower_to_gs_rast, - nir_metadata_control_flow, &rast_state); + nir_metadata_control_flow, &rs); b->cursor = nir_after_impl(b->impl); /* Forward each selected output to the rasterizer */ u_foreach_bit64(slot, shader->info.outputs_written) { - assert(rast_state.selected.outputs[slot] != NULL); - nir_def *value = nir_load_var(b, rast_state.selected.outputs[slot]); + assert(rs.selected.outputs[slot] != NULL); + nir_def *value = nir_load_var(b, rs.selected.outputs[slot]); /* We set NIR_COMPACT_ARRAYS so clip/cull distance needs to come all in * DIST0. Undo the offset if we need to. @@ -909,7 +909,7 @@ lower_gs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *state) switch (intr->intrinsic) { case nir_intrinsic_set_vertex_and_primitive_count: { - if (!state_->info->dynamic_topology) + if (state_->info->shape != AGX_GS_SHAPE_DYNAMIC_INDEXED) break; /* Points write their index buffer here, other primitives write on end. We @@ -930,8 +930,7 @@ lower_gs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *state) } case nir_intrinsic_end_primitive_with_counter: { - /* If the topology is static, we use the static index buffer instead. */ - if (!state_->info->dynamic_topology) + if (state_->info->shape != AGX_GS_SHAPE_DYNAMIC_INDEXED) break; unsigned min = nir_verts_in_output_prim(b->shader); @@ -1242,7 +1241,7 @@ evaluate_topology(nir_builder *b, nir_intrinsic_instr *intr, void *data) * other stuff). */ if (intr->instr.block != nir_start_block(b->impl)) { - info->dynamic_topology = true; + info->shape = AGX_GS_SHAPE_DYNAMIC_INDEXED; return false; } @@ -1250,7 +1249,7 @@ evaluate_topology(nir_builder *b, nir_intrinsic_instr *intr, void *data) if (!nir_src_is_const(intr->src[0]) || !nir_src_is_const(intr->src[1]) || !nir_src_is_const(intr->src[2])) { - info->dynamic_topology = true; + info->shape = AGX_GS_SHAPE_DYNAMIC_INDEXED; return false; } @@ -1289,7 +1288,7 @@ match_list_topology(struct agx_gs_info *info, uint32_t count) } /* If we match, rewrite the topology and drop indexing */ - info->indexed = false; + info->shape = AGX_GS_SHAPE_STATIC_PER_INSTANCE; info->mode = u_decomposed_prim(info->mode); info->max_indices = (info->max_indices / count_with_restart) * count; return true; @@ -1327,12 +1326,12 @@ static void optimize_static_topology(struct agx_gs_info *info, nir_shader *gs) { nir_shader_intrinsics_pass(gs, evaluate_topology, nir_metadata_all, info); - if (info->dynamic_topology) + if (info->shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) return; - /* Points are always lists, we never have restarts/instancing */ + /* Points are always lists */ if (gs->info.gs.output_primitive == MESA_PRIM_POINTS) { - info->indexed = false; + info->shape = AGX_GS_SHAPE_STATIC_PER_INSTANCE; return; } @@ -1341,14 +1340,14 @@ optimize_static_topology(struct agx_gs_info *info, nir_shader *gs) if (match_list_topology(info, count)) return; - /* Because we're instancing, we can always drop the trailing restart index */ - info->instanced = true; + /* Instancing means we can always drop the trailing restart index */ info->max_indices--; /* Try to pattern match a strip topology */ if (is_strip_topology(info->topology, info->max_indices)) { - info->indexed = false; - return; + info->shape = AGX_GS_SHAPE_STATIC_PER_PRIM; + } else { + info->shape = AGX_GS_SHAPE_STATIC_INDEXED; } } @@ -1433,7 +1432,7 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count, *info = (struct agx_gs_info){ .mode = gs->info.gs.output_primitive, .xfb = gs->xfb_info != NULL, - .indexed = true, + .shape = -1, }; int static_vertices[4] = {0}, static_primitives[4] = {0}; @@ -1458,7 +1457,7 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count, if (static_vertices[0] >= 0 && static_primitives[0] >= 0) { optimize_static_topology(info, gs); } else { - info->dynamic_topology = true; + info->shape = AGX_GS_SHAPE_DYNAMIC_INDEXED; } bool side_effects_for_rast = false; diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h index d6e12988c94..4a1c4ac92e1 100644 --- a/src/asahi/lib/agx_nir_lower_gs.h +++ b/src/asahi/lib/agx_nir_lower_gs.h @@ -7,6 +7,7 @@ #include #include +#include "libagx/geometry.h" #include "nir.h" #include "shader_enums.h" @@ -37,16 +38,10 @@ struct agx_gs_info { /* Whether a prefix sum is required on the count outputs. Implies xfb */ bool prefix_sum; - /* Whether we need to dynamically allocate an index buffer. */ - bool dynamic_topology; + /* Shape of the rasterization draw, named by the instance ID */ + enum agx_gs_shape shape; - /* Whether the topology requires an index buffer */ - bool indexed; - - /* Whether the topology requires hardware instancing */ - bool instanced; - - /* Static topology used if dynamic_topology is false. */ + /* Static topology used if shape = AGX_GS_SHAPE_STATIC_INDEXED */ uint32_t topology[384]; }; diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index 9d26d46e253..cc7dcbbee69 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -583,8 +583,7 @@ libagx_gs_setup_indirect( uint32_t index_size_B /* 0 if no index bffer */, uint32_t index_buffer_range_el, uint32_t prim /* Input primitive type, enum mesa_prim */, - int is_prefix_summing, uint indices_per_in_prim, int dynamic_topology, - int instanced) + int is_prefix_summing, uint max_indices, enum agx_gs_shape shape) { /* Determine the (primitives, instances) grid size. */ uint vertex_count = draw[0]; @@ -637,21 +636,21 @@ libagx_gs_setup_indirect( /* Allocate the index buffer and write the draw consuming it */ global VkDrawIndexedIndirectCommand *cmd = (global void *)p->indirect_desc; - uint count = (instanced ? 1 : p->input_primitives) * indices_per_in_prim; - uint index_buffer_offset_B = 0; - - if (dynamic_topology) { - index_buffer_offset_B = agx_heap_alloc_nonatomic_offs(state, count * 4); - - p->output_index_buffer = - (global uint *)(state->heap + index_buffer_offset_B); - } *cmd = (VkDrawIndexedIndirectCommand){ - .indexCount = count, - .instanceCount = instanced ? p->input_primitives : 1, - .firstIndex = index_buffer_offset_B / 4, + .indexCount = agx_gs_rast_vertices(shape, max_indices, prim_per_instance, + instance_count), + .instanceCount = agx_gs_rast_instances(shape, max_indices, + prim_per_instance, instance_count), }; + + if (shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { + cmd->firstIndex = + agx_heap_alloc_nonatomic_offs(state, cmd->indexCount * 4) / 4; + + p->output_index_buffer = + (global uint *)(state->heap + (cmd->firstIndex * 4)); + } } /* diff --git a/src/asahi/libagx/geometry.h b/src/asahi/libagx/geometry.h index 2dfbd366eb8..73e0d278ca9 100644 --- a/src/asahi/libagx/geometry.h +++ b/src/asahi/libagx/geometry.h @@ -16,6 +16,81 @@ #define MAX_SO_BUFFERS 4 #define MAX_VERTEX_STREAMS 4 +enum agx_gs_shape { + /* Indexed, where indices are encoded as: + * + * round_to_pot(max_indices) * round_to_pot(input_primitives) * + * * instance_count + * + * invoked for max_indices * input_primitives * instance_count indices. + * + * This is used with any dynamic topology. No hardware instancing used. + */ + AGX_GS_SHAPE_DYNAMIC_INDEXED, + + /* Indexed with a static index buffer. Indices ranges up to max_indices. + * Hardware instance count = input_primitives * software instance count. + */ + AGX_GS_SHAPE_STATIC_INDEXED, + + /* Non-indexed. Dispatched as: + * + * (max_indices, input_primitives * instance count). + */ + AGX_GS_SHAPE_STATIC_PER_PRIM, + + /* Non-indexed. Dispatched as: + * + * (max_indices * input_primitives, instance count). + */ + AGX_GS_SHAPE_STATIC_PER_INSTANCE, +}; + +static inline unsigned +agx_gs_rast_vertices(enum agx_gs_shape shape, unsigned max_indices, + unsigned input_primitives, unsigned instance_count) +{ + switch (shape) { + case AGX_GS_SHAPE_DYNAMIC_INDEXED: + return max_indices * input_primitives * instance_count; + + case AGX_GS_SHAPE_STATIC_INDEXED: + case AGX_GS_SHAPE_STATIC_PER_PRIM: + return max_indices; + + case AGX_GS_SHAPE_STATIC_PER_INSTANCE: + return max_indices * input_primitives; + } + + unreachable("invalid shape"); +} + +static inline unsigned +agx_gs_rast_instances(enum agx_gs_shape shape, unsigned max_indices, + unsigned input_primitives, unsigned instance_count) +{ + switch (shape) { + case AGX_GS_SHAPE_DYNAMIC_INDEXED: + return 1; + + case AGX_GS_SHAPE_STATIC_INDEXED: + case AGX_GS_SHAPE_STATIC_PER_PRIM: + return input_primitives * instance_count; + + case AGX_GS_SHAPE_STATIC_PER_INSTANCE: + return instance_count; + } + + unreachable("invalid shape"); +} + +static inline bool +agx_gs_indexed(enum agx_gs_shape shape) +{ + return shape == AGX_GS_SHAPE_DYNAMIC_INDEXED || + shape == AGX_GS_SHAPE_STATIC_INDEXED; +} + /* Packed geometry state buffer */ struct agx_geometry_state { /* Heap to allocate from. */ diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index ac75ff6d33d..1256f491dee 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -1156,14 +1156,19 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) params.vs_grid[4] = params.gs_grid[4] = 1; params.vs_grid[5] = params.gs_grid[5] = 1; + struct agx_gs_info *gsi = &count->info.gs; + if (indirect) { /* TODO: size */ cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu; params.indirect_desc = cmd->geom_indirect; params.vs_grid[2] = params.gs_grid[2] = 1; - cmd->geom_index_buffer = dev->heap->va->addr; - cmd->geom_index_count = dev->heap->size; + + if (gsi->shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { + cmd->geom_index_buffer = dev->heap->va->addr; + cmd->geom_index_count = dev->heap->size; + } } else { uint32_t verts = draw.b.count[0], instances = draw.b.count[1]; @@ -1178,17 +1183,13 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) params.count_buffer = hk_pool_alloc(cmd, size, 4).gpu; } - if (count->info.gs.instanced) { - cmd->geom_index_count = count->info.gs.max_indices; - cmd->geom_instance_count = params.input_primitives; - } else { - cmd->geom_index_count = - params.input_primitives * count->info.gs.max_indices; + cmd->geom_index_count = agx_gs_rast_vertices( + gsi->shape, gsi->max_indices, params.gs_grid[0], instances); - cmd->geom_instance_count = 1; - } + cmd->geom_instance_count = agx_gs_rast_instances( + gsi->shape, gsi->max_indices, params.gs_grid[0], instances); - if (count->info.gs.dynamic_topology) { + if (gsi->shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { params.output_index_buffer = hk_pool_alloc(cmd, cmd->geom_index_count * 4, 4).gpu; @@ -1196,9 +1197,9 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) } } - if (count->info.gs.indexed && !count->info.gs.dynamic_topology) { - cmd->geom_index_buffer = hk_pool_upload( - cmd, count->info.gs.topology, count->info.gs.max_indices * 4, 4); + if (gsi->shape == AGX_GS_SHAPE_STATIC_INDEXED) { + cmd->geom_index_buffer = + hk_pool_upload(cmd, count->info.gs.topology, gsi->max_indices * 4, 4); } desc->root_dirty = true; @@ -1456,9 +1457,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, .vs_outputs = vs->b.info.outputs, .prim = mode, .is_prefix_summing = count->info.gs.prefix_sum, - .indices_per_in_prim = count->info.gs.max_indices, - .dynamic_topology = count->info.gs.dynamic_topology, - .instanced = count->info.gs.instanced, + .max_indices = count->info.gs.max_indices, + .shape = count->info.gs.shape, }; if (cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]) { @@ -1533,7 +1533,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, hk_dispatch_with_local_size(cmd, cs, main, grid_gs, wg); if (agx_is_indirect(draw.b)) { - if (count->info.gs.indexed) { + if (agx_gs_indexed(count->info.gs.shape)) { return agx_draw_indexed_indirect( cmd->geom_indirect, cmd->geom_index_buffer, cmd->geom_index_count, AGX_INDEX_SIZE_U32, true); @@ -1541,7 +1541,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, return agx_draw_indirect(cmd->geom_indirect); } } else { - if (count->info.gs.indexed) { + if (agx_gs_indexed(count->info.gs.shape)) { return agx_draw_indexed( cmd->geom_index_count, cmd->geom_instance_count, 0, 0, 0, cmd->geom_index_buffer, cmd->geom_index_count * 4, diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 4b0ae802175..3645bf3d5de 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -60,6 +60,7 @@ #include "agx_nir_lower_gs.h" #include "agx_nir_lower_vbo.h" #include "agx_tilebuffer.h" +#include "geometry.h" #include "libagx.h" #include "libagx_dgc.h" #include "libagx_shaders.h" @@ -4078,9 +4079,9 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, params.input_buffer = addr; } - if (batch->ctx->gs->gs.dynamic_topology) { - unsigned idx_size = - params.input_primitives * batch->ctx->gs->gs.max_indices; + struct agx_gs_info *gsi = &batch->ctx->gs->gs; + if (gsi->shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { + unsigned idx_size = params.input_primitives * gsi->max_indices; params.output_index_buffer = agx_pool_alloc_aligned_with_bo(&batch->pool, idx_size * 4, 4, @@ -4161,9 +4162,8 @@ agx_launch_gs_prerast(struct agx_batch *batch, .index_size_B = info->index_size, .prim = info->mode, .is_prefix_summing = gs->gs.prefix_sum, - .indices_per_in_prim = gs->gs.max_indices, - .instanced = gs->gs.instanced, - .dynamic_topology = gs->gs.dynamic_topology, + .max_indices = gs->gs.max_indices, + .shape = gs->gs.shape, }; libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi); @@ -5152,16 +5152,16 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, return; /* Setup to rasterize the GS results */ + struct agx_gs_info *gsi = &ctx->gs->gs; info_gs = (struct pipe_draw_info){ - .mode = ctx->gs->gs.mode, - .index_size = ctx->gs->gs.indexed ? 4 : 0, - .primitive_restart = ctx->gs->gs.indexed, + .mode = gsi->mode, + .index_size = agx_gs_indexed(gsi->shape) ? 4 : 0, + .primitive_restart = agx_gs_indexed(gsi->shape), .restart_index = ~0, .index.resource = &index_rsrc.base, .instance_count = 1, }; - unsigned unrolled_prims = 0; if (indirect) { indirect_gs = (struct pipe_draw_indirect_info){ .draw_count = 1, @@ -5171,20 +5171,18 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, indirect = &indirect_gs; } else { - bool instanced = ctx->gs->gs.instanced; - unrolled_prims = - u_decomposed_prims_for_vertices(info->mode, draws->count) * - info->instance_count; + unsigned prims = + u_decomposed_prims_for_vertices(info->mode, draws->count); draw_gs = (struct pipe_draw_start_count_bias){ - .count = ctx->gs->gs.max_indices * (instanced ? 1 : unrolled_prims), + .count = agx_gs_rast_vertices(gsi->shape, gsi->max_indices, prims, + info->instance_count), }; - draws = &draw_gs; + info_gs.instance_count = agx_gs_rast_instances( + gsi->shape, gsi->max_indices, prims, info->instance_count); - if (ctx->gs->gs.instanced) { - info_gs.instance_count = unrolled_prims; - } + draws = &draw_gs; } info = &info_gs; @@ -5193,12 +5191,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, batch->reduced_prim = u_reduced_prim(info->mode); ctx->dirty |= AGX_DIRTY_PRIM; - if (ctx->gs->gs.dynamic_topology) { + if (gsi->shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { ib = batch->geom_index; ib_extent = index_rsrc.bo->size - (batch->geom_index - ib); - } else if (ctx->gs->gs.indexed) { - ib_extent = ctx->gs->gs.max_indices * 4; - ib = agx_pool_upload(&batch->pool, ctx->gs->gs.topology, ib_extent); + } else if (gsi->shape == AGX_GS_SHAPE_STATIC_INDEXED) { + ib_extent = gsi->max_indices * 4; + ib = agx_pool_upload(&batch->pool, gsi->topology, ib_extent); } /* We need to reemit geometry descriptors since the txf sampler may change