diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index 57a4144d55a..c38a7b45c61 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -965,8 +965,7 @@ collect_components(nir_builder *b, nir_intrinsic_instr *intr, void *data) static nir_shader * agx_nir_create_pre_gs(struct lower_gs_state *state, bool indexed, bool restart, struct nir_xfb_info *xfb, unsigned vertices_per_prim, - uint8_t streams, unsigned invocations, - unsigned index_buffer_allocation) + uint8_t streams, unsigned invocations) { nir_builder b_ = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, &agx_nir_options, "Pre-GS patch up"); @@ -975,13 +974,6 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, bool indexed, bool restart, /* Load the number of primitives input to the GS */ nir_def *unrolled_in_prims = load_geometry_param(b, input_primitives); - /* Setup the draw from the rasterization stream (0). */ - if (!state->rasterizer_discard) { - libagx_build_gs_draw( - b, nir_load_geometry_param_buffer_agx(b), - nir_imul_imm(b, unrolled_in_prims, index_buffer_allocation)); - } - /* Determine the number of primitives generated in each stream */ nir_def *in_prims[MAX_VERTEX_STREAMS], *prims[MAX_VERTEX_STREAMS]; @@ -1400,13 +1392,14 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count, *pre_gs = agx_nir_create_pre_gs( &gs_state, true, gs->info.gs.output_primitive != MESA_PRIM_POINTS, gs->xfb_info, verts_in_output_prim(gs), gs->info.gs.active_stream_mask, - gs->info.gs.invocations, gs_state.max_indices); + gs->info.gs.invocations); /* Signal what primitive we want to draw the GS Copy VS with */ *info = (struct agx_gs_info){ .mode = gs->info.gs.output_primitive, .count_words = gs_state.count_stride_el, .prefix_sum = gs_state.prefix_summing, + .max_indices = gs_state.max_indices, }; return true; diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h index 0e6b38cc245..fc901bb35ec 100644 --- a/src/asahi/lib/agx_nir_lower_gs.h +++ b/src/asahi/lib/agx_nir_lower_gs.h @@ -37,6 +37,9 @@ struct agx_gs_info { /* Number of words per primitive in the count buffer */ unsigned count_words; + /* Per-input primitive stride of the output index buffer */ + unsigned max_indices; + /* Whether a prefix sum is required on the count outputs */ bool prefix_sum; }; diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index 2476e6e829b..17ac33bf01c 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -633,7 +633,7 @@ libagx_gs_setup_indirect( uint32_t index_size_B /* 0 if no index bffer */, uint32_t index_buffer_range_el, uint32_t prim /* Input primitive type, enum mesa_prim */, - int is_prefix_summing) + int is_prefix_summing, uint indices_per_in_prim) { /* Determine the (primitives, instances) grid size. */ uint vertex_count = draw[0]; @@ -685,6 +685,8 @@ libagx_gs_setup_indirect( assert(state->heap_bottom < state->heap_size); p->input_mask = vs_outputs; + + libagx_build_gs_draw(p, p->input_primitives * indices_per_in_prim); } /* diff --git a/src/asahi/vulkan/hk_cmd_buffer.h b/src/asahi/vulkan/hk_cmd_buffer.h index 969e31ae7c7..132f7fc2368 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.h +++ b/src/asahi/vulkan/hk_cmd_buffer.h @@ -458,9 +458,11 @@ struct hk_cmd_buffer { /* XXX: move me? * - * Indirect draw generated by the pre-GS for the geometry shader. + * Indirect draw generated by the indirect GS translator. */ uint64_t geom_indirect; + uint64_t geom_index_buffer; + uint32_t geom_index_count; /* Does the command buffer use the geometry heap? */ bool uses_heap; diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index d6053ea490f..2f930b4ff20 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -1132,7 +1132,6 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) struct agx_geometry_params params = { .state = hk_geometry_state(cmd), - .indirect_desc = cmd->geom_indirect, .flat_outputs = fs ? fs->info.fs.interp.flat : 0, .input_topology = mode, @@ -1172,6 +1171,10 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) } if (indirect) { + /* TODO: size */ + cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu; + + params.indirect_desc = cmd->geom_indirect; params.vs_grid[2] = params.gs_grid[2] = 1; } else { uint32_t verts = draw.b.count[0], instances = draw.b.count[1]; @@ -1186,6 +1189,14 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) if (count->info.gs.prefix_sum && size) { params.count_buffer = hk_pool_alloc(cmd, size, 4).gpu; } + + cmd->geom_index_count = + params.input_primitives * count->info.gs.max_indices; + + params.output_index_buffer = + hk_pool_alloc(cmd, cmd->geom_index_count * 4, 4).gpu; + + cmd->geom_index_buffer = params.output_index_buffer; } desc->root_dirty = true; @@ -1440,6 +1451,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, .vs_outputs = vs->b.info.outputs, .prim = mode, .is_prefix_summing = count->info.gs.prefix_sum, + .indices_per_in_prim = count->info.gs.max_indices, }; if (cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]) { @@ -1496,8 +1508,15 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, /* Pre-rast geometry shader */ hk_dispatch_with_local_size(cmd, cs, main, grid_gs, agx_workgroup(1, 1, 1)); - return agx_draw_indexed_indirect(cmd->geom_indirect, dev->heap->va->addr, - dev->heap->size, AGX_INDEX_SIZE_U32, true); + if (agx_is_indirect(draw.b)) { + return agx_draw_indexed_indirect(cmd->geom_indirect, dev->heap->va->addr, + dev->heap->size, AGX_INDEX_SIZE_U32, + true); + } else { + return agx_draw_indexed(cmd->geom_index_count, 1, 0, 0, 0, + cmd->geom_index_buffer, cmd->geom_index_count * 4, + AGX_INDEX_SIZE_U32, true); + } } static struct agx_draw @@ -2957,9 +2976,6 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, } if (gfx->shaders[MESA_SHADER_GEOMETRY]) { - /* TODO: size */ - cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu; - gfx->descriptors.root.draw.geometry_params = hk_upload_geometry_params(cmd, draw); diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 669c317002f..5111305925e 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -4051,6 +4051,9 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, agx_pool_alloc_aligned(&batch->pool, 8, 8).gpu; params.vs_grid[2] = params.gs_grid[2] = 1; + + batch->geom_index_bo = agx_resource(batch->ctx->heap)->bo; + batch->geom_index = batch->geom_index_bo->va->addr; } else { params.vs_grid[0] = draw->count; params.gs_grid[0] = @@ -4076,6 +4079,15 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, params.input_buffer = addr; } + + unsigned idx_size = + params.input_primitives * batch->ctx->gs->gs.max_indices; + + params.output_index_buffer = + agx_pool_alloc_aligned_with_bo(&batch->pool, idx_size * 4, 4, + &batch->geom_index_bo) + .gpu; + batch->geom_index = params.output_index_buffer; } return agx_pool_upload_aligned_with_bo(&batch->pool, ¶ms, sizeof(params), @@ -4149,6 +4161,7 @@ agx_launch_gs_prerast(struct agx_batch *batch, .index_size_B = info->index_size, .prim = info->mode, .is_prefix_summing = gs->gs.prefix_sum, + .indices_per_in_prim = gs->gs.max_indices, }; libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi); @@ -5069,9 +5082,11 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, struct pipe_draw_info info_gs; struct pipe_draw_indirect_info indirect_gs; + struct pipe_draw_start_count_bias draw_gs; /* Wrap the pool allocation in a fake resource for meta-Gallium use */ struct agx_resource indirect_rsrc = {.bo = batch->geom_indirect_bo}; + struct agx_resource index_rsrc = {.bo = batch->geom_index_bo}; if (ctx->gs) { /* Launch the pre-rasterization parts of the geometry shader */ @@ -5086,30 +5101,38 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, .index_size = 4, .primitive_restart = true, .restart_index = ~0, - .index.resource = ctx->heap, + .index.resource = &index_rsrc.base, .instance_count = 1, }; - indirect_gs = (struct pipe_draw_indirect_info){ - .draw_count = 1, - .buffer = &indirect_rsrc.base, - .offset = batch->geom_indirect - indirect_rsrc.bo->va->addr, - }; + if (indirect) { + indirect_gs = (struct pipe_draw_indirect_info){ + .draw_count = 1, + .buffer = &indirect_rsrc.base, + .offset = batch->geom_indirect - indirect_rsrc.bo->va->addr, + }; + + indirect = &indirect_gs; + } else { + unsigned unrolled_prims = + u_decomposed_prims_for_vertices(info->mode, draws->count) * + info->instance_count; + + draw_gs = (struct pipe_draw_start_count_bias){ + .count = ctx->gs->gs.max_indices * unrolled_prims, + }; + + draws = &draw_gs; + } info = &info_gs; - indirect = &indirect_gs; /* TODO: Deduplicate? */ batch->reduced_prim = u_reduced_prim(info->mode); ctx->dirty |= AGX_DIRTY_PRIM; - if (info_gs.index_size) { - ib = agx_resource(ctx->heap)->bo->va->addr; - ib_extent = agx_resource(ctx->heap)->bo->size; - } else { - ib = 0; - ib_extent = 0; - } + ib = batch->geom_index; + ib_extent = index_rsrc.bo->size - (batch->geom_index - ib); /* We need to reemit geometry descriptors since the txf sampler may change * between the GS prepass and the GS rast program. diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 1e1878e1406..294a941f754 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -459,7 +459,8 @@ struct agx_batch { size_t result_off; /* Actual pointer in a uniform */ - struct agx_bo *geom_params_bo; + struct agx_bo *geom_params_bo, *geom_index_bo; + uint64_t geom_index; /* Whether each stage uses scratch */ bool vs_scratch;