asahi,hk: use indirect-local dispatches for GS

this gets us good workgroup sizes even for indirect draws with GS.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34638>
This commit is contained in:
Alyssa Rosenzweig 2025-04-21 12:35:34 -04:00 committed by Marge Bot
parent 70c805d863
commit 753e3ba55b
3 changed files with 25 additions and 18 deletions

View file

@ -140,10 +140,13 @@ struct agx_geometry_params {
uint32_t xfb_prims[MAX_VERTEX_STREAMS];
/* Within an indirect GS draw, the grids used to dispatch the VS/GS written
* out by the GS indirect setup kernel or the CPU for a direct draw.
* out by the GS indirect setup kernel or the CPU for a direct draw. This is
* the "indirect local" format: first 3 is in threads, second 3 is in grid
* blocks. This lets us use nontrivial workgroups with indirect draws without
* needing any predication.
*/
uint32_t vs_grid[3];
uint32_t gs_grid[3];
uint32_t vs_grid[6];
uint32_t gs_grid[6];
/* Number of input primitives across all instances, calculated by the CPU for
* a direct draw or the GS indirect setup kernel for an indirect draw.
@ -167,7 +170,7 @@ struct agx_geometry_params {
*/
uint32_t input_topology;
} PACKED;
static_assert(sizeof(struct agx_geometry_params) == 82 * 4);
static_assert(sizeof(struct agx_geometry_params) == 88 * 4);
/* TCS shared memory layout:
*

View file

@ -1151,6 +1151,11 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
params.count_buffer = T.gpu;
}
/* Workgroup size */
params.vs_grid[3] = params.gs_grid[3] = 64;
params.vs_grid[4] = params.gs_grid[4] = 1;
params.vs_grid[5] = params.gs_grid[5] = 1;
if (indirect) {
/* TODO: size */
cmd->geom_indirect = hk_pool_alloc(cmd, 64, 4).gpu;
@ -1425,7 +1430,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
uint64_t geometry_params = desc->root.draw.geometry_params;
unsigned count_words = count->info.gs.count_words;
struct agx_workgroup wg;
struct agx_workgroup wg = agx_workgroup(64, 1, 1);
if (false /* TODO */)
perf_debug(cmd, "Transform feedbck");
@ -1473,18 +1478,14 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
libagx_gs_setup_indirect_struct(cmd, agx_1d(1),
AGX_BARRIER_ALL | AGX_PREGFX, gsi);
grid_vs = agx_grid_indirect(
grid_vs = agx_grid_indirect_local(
geometry_params + offsetof(struct agx_geometry_params, vs_grid));
grid_gs = agx_grid_indirect(
grid_gs = agx_grid_indirect_local(
geometry_params + offsetof(struct agx_geometry_params, gs_grid));
/* TODO: Optimize */
wg = agx_workgroup(1, 1, 1);
} else {
grid_vs = grid_gs = draw.b;
grid_gs.count[0] = u_decomposed_prims_for_vertices(mode, draw.b.count[0]);
wg = agx_workgroup(64, 1, 1);
}
/* Launch the vertex shader first */

View file

@ -4039,6 +4039,11 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
params.count_buffer = T.gpu;
}
/* Workgroup size */
params.vs_grid[3] = params.gs_grid[3] = 64;
params.vs_grid[4] = params.gs_grid[4] = 1;
params.vs_grid[5] = params.gs_grid[5] = 1;
if (indirect) {
batch->uniforms.vertex_output_buffer_ptr =
agx_pool_alloc_aligned(&batch->pool, 8, 8).gpu;
@ -4133,7 +4138,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
uint64_t gp = batch->uniforms.geometry_params;
struct agx_grid grid_vs, grid_gs;
struct agx_workgroup wg;
struct agx_workgroup wg = agx_workgroup(64, 1, 1);
/* Setup grids */
if (indirect) {
@ -4163,14 +4168,12 @@ agx_launch_gs_prerast(struct agx_batch *batch,
libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
wg = agx_workgroup(1, 1, 1);
grid_vs =
agx_grid_indirect(gp + offsetof(struct agx_geometry_params, vs_grid));
grid_vs = agx_grid_indirect_local(
gp + offsetof(struct agx_geometry_params, vs_grid));
grid_gs =
agx_grid_indirect(gp + offsetof(struct agx_geometry_params, gs_grid));
grid_gs = agx_grid_indirect_local(
gp + offsetof(struct agx_geometry_params, gs_grid));
} else {
wg = agx_workgroup(64, 1, 1);
grid_vs = agx_3d(draws->count, info->instance_count, 1);
grid_gs =