mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-01 09:00:30 +01:00
libagx,asahi: hoist GS draw generation
for indirect GS, do it in the indirect kernel (not the pre-GS) for direct, do it on the host (not the pre-GS) we don't want pre-GS. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33901>
This commit is contained in:
parent
40aa260209
commit
dc96093494
7 changed files with 73 additions and 33 deletions
|
|
@ -965,8 +965,7 @@ collect_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
static nir_shader *
|
||||
agx_nir_create_pre_gs(struct lower_gs_state *state, bool indexed, bool restart,
|
||||
struct nir_xfb_info *xfb, unsigned vertices_per_prim,
|
||||
uint8_t streams, unsigned invocations,
|
||||
unsigned index_buffer_allocation)
|
||||
uint8_t streams, unsigned invocations)
|
||||
{
|
||||
nir_builder b_ = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_COMPUTE, &agx_nir_options, "Pre-GS patch up");
|
||||
|
|
@ -975,13 +974,6 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, bool indexed, bool restart,
|
|||
/* Load the number of primitives input to the GS */
|
||||
nir_def *unrolled_in_prims = load_geometry_param(b, input_primitives);
|
||||
|
||||
/* Setup the draw from the rasterization stream (0). */
|
||||
if (!state->rasterizer_discard) {
|
||||
libagx_build_gs_draw(
|
||||
b, nir_load_geometry_param_buffer_agx(b),
|
||||
nir_imul_imm(b, unrolled_in_prims, index_buffer_allocation));
|
||||
}
|
||||
|
||||
/* Determine the number of primitives generated in each stream */
|
||||
nir_def *in_prims[MAX_VERTEX_STREAMS], *prims[MAX_VERTEX_STREAMS];
|
||||
|
||||
|
|
@ -1400,13 +1392,14 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count,
|
|||
*pre_gs = agx_nir_create_pre_gs(
|
||||
&gs_state, true, gs->info.gs.output_primitive != MESA_PRIM_POINTS,
|
||||
gs->xfb_info, verts_in_output_prim(gs), gs->info.gs.active_stream_mask,
|
||||
gs->info.gs.invocations, gs_state.max_indices);
|
||||
gs->info.gs.invocations);
|
||||
|
||||
/* Signal what primitive we want to draw the GS Copy VS with */
|
||||
*info = (struct agx_gs_info){
|
||||
.mode = gs->info.gs.output_primitive,
|
||||
.count_words = gs_state.count_stride_el,
|
||||
.prefix_sum = gs_state.prefix_summing,
|
||||
.max_indices = gs_state.max_indices,
|
||||
};
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -37,6 +37,9 @@ struct agx_gs_info {
|
|||
/* Number of words per primitive in the count buffer */
|
||||
unsigned count_words;
|
||||
|
||||
/* Per-input primitive stride of the output index buffer */
|
||||
unsigned max_indices;
|
||||
|
||||
/* Whether a prefix sum is required on the count outputs */
|
||||
bool prefix_sum;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -633,7 +633,7 @@ libagx_gs_setup_indirect(
|
|||
uint32_t index_size_B /* 0 if no index bffer */,
|
||||
uint32_t index_buffer_range_el,
|
||||
uint32_t prim /* Input primitive type, enum mesa_prim */,
|
||||
int is_prefix_summing)
|
||||
int is_prefix_summing, uint indices_per_in_prim)
|
||||
{
|
||||
/* Determine the (primitives, instances) grid size. */
|
||||
uint vertex_count = draw[0];
|
||||
|
|
@ -685,6 +685,8 @@ libagx_gs_setup_indirect(
|
|||
assert(state->heap_bottom < state->heap_size);
|
||||
|
||||
p->input_mask = vs_outputs;
|
||||
|
||||
libagx_build_gs_draw(p, p->input_primitives * indices_per_in_prim);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -458,9 +458,11 @@ struct hk_cmd_buffer {
|
|||
|
||||
/* XXX: move me?
|
||||
*
|
||||
* Indirect draw generated by the pre-GS for the geometry shader.
|
||||
* Indirect draw generated by the indirect GS translator.
|
||||
*/
|
||||
uint64_t geom_indirect;
|
||||
uint64_t geom_index_buffer;
|
||||
uint32_t geom_index_count;
|
||||
|
||||
/* Does the command buffer use the geometry heap? */
|
||||
bool uses_heap;
|
||||
|
|
|
|||
|
|
@ -1132,7 +1132,6 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
|
||||
struct agx_geometry_params params = {
|
||||
.state = hk_geometry_state(cmd),
|
||||
.indirect_desc = cmd->geom_indirect,
|
||||
.flat_outputs = fs ? fs->info.fs.interp.flat : 0,
|
||||
.input_topology = mode,
|
||||
|
||||
|
|
@ -1172,6 +1171,10 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
}
|
||||
|
||||
if (indirect) {
|
||||
/* TODO: size */
|
||||
cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu;
|
||||
|
||||
params.indirect_desc = cmd->geom_indirect;
|
||||
params.vs_grid[2] = params.gs_grid[2] = 1;
|
||||
} else {
|
||||
uint32_t verts = draw.b.count[0], instances = draw.b.count[1];
|
||||
|
|
@ -1186,6 +1189,14 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
if (count->info.gs.prefix_sum && size) {
|
||||
params.count_buffer = hk_pool_alloc(cmd, size, 4).gpu;
|
||||
}
|
||||
|
||||
cmd->geom_index_count =
|
||||
params.input_primitives * count->info.gs.max_indices;
|
||||
|
||||
params.output_index_buffer =
|
||||
hk_pool_alloc(cmd, cmd->geom_index_count * 4, 4).gpu;
|
||||
|
||||
cmd->geom_index_buffer = params.output_index_buffer;
|
||||
}
|
||||
|
||||
desc->root_dirty = true;
|
||||
|
|
@ -1440,6 +1451,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
.vs_outputs = vs->b.info.outputs,
|
||||
.prim = mode,
|
||||
.is_prefix_summing = count->info.gs.prefix_sum,
|
||||
.indices_per_in_prim = count->info.gs.max_indices,
|
||||
};
|
||||
|
||||
if (cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]) {
|
||||
|
|
@ -1496,8 +1508,15 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
/* Pre-rast geometry shader */
|
||||
hk_dispatch_with_local_size(cmd, cs, main, grid_gs, agx_workgroup(1, 1, 1));
|
||||
|
||||
return agx_draw_indexed_indirect(cmd->geom_indirect, dev->heap->va->addr,
|
||||
dev->heap->size, AGX_INDEX_SIZE_U32, true);
|
||||
if (agx_is_indirect(draw.b)) {
|
||||
return agx_draw_indexed_indirect(cmd->geom_indirect, dev->heap->va->addr,
|
||||
dev->heap->size, AGX_INDEX_SIZE_U32,
|
||||
true);
|
||||
} else {
|
||||
return agx_draw_indexed(cmd->geom_index_count, 1, 0, 0, 0,
|
||||
cmd->geom_index_buffer, cmd->geom_index_count * 4,
|
||||
AGX_INDEX_SIZE_U32, true);
|
||||
}
|
||||
}
|
||||
|
||||
static struct agx_draw
|
||||
|
|
@ -2957,9 +2976,6 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
}
|
||||
|
||||
if (gfx->shaders[MESA_SHADER_GEOMETRY]) {
|
||||
/* TODO: size */
|
||||
cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu;
|
||||
|
||||
gfx->descriptors.root.draw.geometry_params =
|
||||
hk_upload_geometry_params(cmd, draw);
|
||||
|
||||
|
|
|
|||
|
|
@ -4051,6 +4051,9 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
agx_pool_alloc_aligned(&batch->pool, 8, 8).gpu;
|
||||
|
||||
params.vs_grid[2] = params.gs_grid[2] = 1;
|
||||
|
||||
batch->geom_index_bo = agx_resource(batch->ctx->heap)->bo;
|
||||
batch->geom_index = batch->geom_index_bo->va->addr;
|
||||
} else {
|
||||
params.vs_grid[0] = draw->count;
|
||||
params.gs_grid[0] =
|
||||
|
|
@ -4076,6 +4079,15 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
|
||||
params.input_buffer = addr;
|
||||
}
|
||||
|
||||
unsigned idx_size =
|
||||
params.input_primitives * batch->ctx->gs->gs.max_indices;
|
||||
|
||||
params.output_index_buffer =
|
||||
agx_pool_alloc_aligned_with_bo(&batch->pool, idx_size * 4, 4,
|
||||
&batch->geom_index_bo)
|
||||
.gpu;
|
||||
batch->geom_index = params.output_index_buffer;
|
||||
}
|
||||
|
||||
return agx_pool_upload_aligned_with_bo(&batch->pool, ¶ms, sizeof(params),
|
||||
|
|
@ -4149,6 +4161,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
.index_size_B = info->index_size,
|
||||
.prim = info->mode,
|
||||
.is_prefix_summing = gs->gs.prefix_sum,
|
||||
.indices_per_in_prim = gs->gs.max_indices,
|
||||
};
|
||||
|
||||
libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
|
||||
|
|
@ -5069,9 +5082,11 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
|
||||
struct pipe_draw_info info_gs;
|
||||
struct pipe_draw_indirect_info indirect_gs;
|
||||
struct pipe_draw_start_count_bias draw_gs;
|
||||
|
||||
/* Wrap the pool allocation in a fake resource for meta-Gallium use */
|
||||
struct agx_resource indirect_rsrc = {.bo = batch->geom_indirect_bo};
|
||||
struct agx_resource index_rsrc = {.bo = batch->geom_index_bo};
|
||||
|
||||
if (ctx->gs) {
|
||||
/* Launch the pre-rasterization parts of the geometry shader */
|
||||
|
|
@ -5086,30 +5101,38 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
.index_size = 4,
|
||||
.primitive_restart = true,
|
||||
.restart_index = ~0,
|
||||
.index.resource = ctx->heap,
|
||||
.index.resource = &index_rsrc.base,
|
||||
.instance_count = 1,
|
||||
};
|
||||
|
||||
indirect_gs = (struct pipe_draw_indirect_info){
|
||||
.draw_count = 1,
|
||||
.buffer = &indirect_rsrc.base,
|
||||
.offset = batch->geom_indirect - indirect_rsrc.bo->va->addr,
|
||||
};
|
||||
if (indirect) {
|
||||
indirect_gs = (struct pipe_draw_indirect_info){
|
||||
.draw_count = 1,
|
||||
.buffer = &indirect_rsrc.base,
|
||||
.offset = batch->geom_indirect - indirect_rsrc.bo->va->addr,
|
||||
};
|
||||
|
||||
indirect = &indirect_gs;
|
||||
} else {
|
||||
unsigned unrolled_prims =
|
||||
u_decomposed_prims_for_vertices(info->mode, draws->count) *
|
||||
info->instance_count;
|
||||
|
||||
draw_gs = (struct pipe_draw_start_count_bias){
|
||||
.count = ctx->gs->gs.max_indices * unrolled_prims,
|
||||
};
|
||||
|
||||
draws = &draw_gs;
|
||||
}
|
||||
|
||||
info = &info_gs;
|
||||
indirect = &indirect_gs;
|
||||
|
||||
/* TODO: Deduplicate? */
|
||||
batch->reduced_prim = u_reduced_prim(info->mode);
|
||||
ctx->dirty |= AGX_DIRTY_PRIM;
|
||||
|
||||
if (info_gs.index_size) {
|
||||
ib = agx_resource(ctx->heap)->bo->va->addr;
|
||||
ib_extent = agx_resource(ctx->heap)->bo->size;
|
||||
} else {
|
||||
ib = 0;
|
||||
ib_extent = 0;
|
||||
}
|
||||
ib = batch->geom_index;
|
||||
ib_extent = index_rsrc.bo->size - (batch->geom_index - ib);
|
||||
|
||||
/* We need to reemit geometry descriptors since the txf sampler may change
|
||||
* between the GS prepass and the GS rast program.
|
||||
|
|
|
|||
|
|
@ -459,7 +459,8 @@ struct agx_batch {
|
|||
size_t result_off;
|
||||
|
||||
/* Actual pointer in a uniform */
|
||||
struct agx_bo *geom_params_bo;
|
||||
struct agx_bo *geom_params_bo, *geom_index_bo;
|
||||
uint64_t geom_index;
|
||||
|
||||
/* Whether each stage uses scratch */
|
||||
bool vs_scratch;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue