mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 00:30:11 +01:00
asahi/gs: only prefix sum with XFB
otherwise, an atomic suffices for the count shader. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33901>
This commit is contained in:
parent
184416a5e8
commit
40aa260209
5 changed files with 57 additions and 12 deletions
|
|
@ -44,6 +44,7 @@ struct lower_gs_state {
|
|||
int count_index[MAX_VERTEX_STREAMS];
|
||||
|
||||
bool rasterizer_discard;
|
||||
bool prefix_summing;
|
||||
};
|
||||
|
||||
/* Helpers for loading from the geometry state buffer */
|
||||
|
|
@ -304,11 +305,20 @@ write_xfb_counts(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
struct lower_gs_state *state)
|
||||
{
|
||||
/* Store each required counter */
|
||||
nir_def *addr = load_xfb_count_address(b, state, calc_unrolled_id(b),
|
||||
nir_intrinsic_stream_id(intr));
|
||||
nir_def *id =
|
||||
state->prefix_summing ? calc_unrolled_id(b) : nir_imm_int(b, 0);
|
||||
|
||||
if (addr)
|
||||
nir_def *addr =
|
||||
load_xfb_count_address(b, state, id, nir_intrinsic_stream_id(intr));
|
||||
if (!addr)
|
||||
return;
|
||||
|
||||
if (state->prefix_summing) {
|
||||
nir_store_global(b, addr, 4, intr->src[2].ssa, nir_component_mask(1));
|
||||
} else {
|
||||
nir_global_atomic(b, 32, addr, intr->src[2].ssa,
|
||||
.atomic_op = nir_atomic_op_iadd);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -700,7 +710,7 @@ previous_xfb_primitives(nir_builder *b, struct lower_gs_state *state,
|
|||
* we can calculate the base.
|
||||
*/
|
||||
return nir_imul_imm(b, unrolled_id, static_count);
|
||||
} else {
|
||||
} else if (state->prefix_summing) {
|
||||
/* Otherwise, we need to load from the prefix sum buffer. Note that the
|
||||
* sums are inclusive, so index 0 is nonzero. This requires a little
|
||||
* fixup here. We use a saturating unsigned subtraction so we don't read
|
||||
|
|
@ -713,6 +723,12 @@ previous_xfb_primitives(nir_builder *b, struct lower_gs_state *state,
|
|||
|
||||
return nir_bcsel(b, nir_ieq_imm(b, unrolled_id, 0), nir_imm_int(b, 0),
|
||||
nir_load_global_constant(b, addr, 4, 1, 32));
|
||||
} else {
|
||||
/* If we aren't prefix summing, the count is the only element */
|
||||
nir_def *addr =
|
||||
load_xfb_count_address(b, state, nir_imm_int(b, 0), stream);
|
||||
|
||||
return nir_load_global_constant(b, addr, 4, 1, 32);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1282,6 +1298,9 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count,
|
|||
gs->info.gs.output_primitive, gs->info.gs.vertices_out,
|
||||
static_vertices[0], static_primitives[0]);
|
||||
|
||||
gs_state.prefix_summing =
|
||||
gs_state.count_stride_el > 0 && gs->xfb_info != NULL;
|
||||
|
||||
bool side_effects_for_rast = false;
|
||||
*gs_copy = agx_nir_create_gs_rast_shader(gs, &side_effects_for_rast);
|
||||
|
||||
|
|
@ -1387,6 +1406,7 @@ agx_nir_lower_gs(nir_shader *gs, bool rasterizer_discard, nir_shader **gs_count,
|
|||
*info = (struct agx_gs_info){
|
||||
.mode = gs->info.gs.output_primitive,
|
||||
.count_words = gs_state.count_stride_el,
|
||||
.prefix_sum = gs_state.prefix_summing,
|
||||
};
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,9 @@ struct agx_gs_info {
|
|||
|
||||
/* Number of words per primitive in the count buffer */
|
||||
unsigned count_words;
|
||||
|
||||
/* Whether a prefix sum is required on the count outputs */
|
||||
bool prefix_sum;
|
||||
};
|
||||
|
||||
bool agx_nir_lower_gs(struct nir_shader *gs, bool rasterizer_discard,
|
||||
|
|
|
|||
|
|
@ -632,7 +632,8 @@ libagx_gs_setup_indirect(
|
|||
uint64_t vs_outputs /* Vertex (TES) output mask */,
|
||||
uint32_t index_size_B /* 0 if no index bffer */,
|
||||
uint32_t index_buffer_range_el,
|
||||
uint32_t prim /* Input primitive type, enum mesa_prim */)
|
||||
uint32_t prim /* Input primitive type, enum mesa_prim */,
|
||||
int is_prefix_summing)
|
||||
{
|
||||
/* Determine the (primitives, instances) grid size. */
|
||||
uint vertex_count = draw[0];
|
||||
|
|
@ -672,9 +673,11 @@ libagx_gs_setup_indirect(
|
|||
uint vertex_buffer_size =
|
||||
libagx_tcs_in_size(vertex_count * instance_count, vs_outputs);
|
||||
|
||||
p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
|
||||
state->heap_bottom +=
|
||||
align(p->input_primitives * p->count_buffer_stride, 16);
|
||||
if (is_prefix_summing) {
|
||||
p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
|
||||
state->heap_bottom +=
|
||||
align(p->input_primitives * p->count_buffer_stride, 16);
|
||||
}
|
||||
|
||||
p->input_buffer = (uintptr_t)(state->heap + state->heap_bottom);
|
||||
*vertex_buffer = p->input_buffer;
|
||||
|
|
|
|||
|
|
@ -1165,6 +1165,12 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
*/
|
||||
params.count_buffer_stride = count->info.gs.count_words * 4;
|
||||
|
||||
if (!count->info.gs.prefix_sum && params.count_buffer_stride) {
|
||||
struct agx_ptr T = hk_pool_alloc(cmd, 16, 4);
|
||||
memset(T.cpu, 0, 16);
|
||||
params.count_buffer = T.gpu;
|
||||
}
|
||||
|
||||
if (indirect) {
|
||||
params.vs_grid[2] = params.gs_grid[2] = 1;
|
||||
} else {
|
||||
|
|
@ -1177,7 +1183,7 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
params.input_primitives = params.gs_grid[0] * instances;
|
||||
|
||||
unsigned size = params.input_primitives * params.count_buffer_stride;
|
||||
if (size) {
|
||||
if (count->info.gs.prefix_sum && size) {
|
||||
params.count_buffer = hk_pool_alloc(cmd, size, 4).gpu;
|
||||
}
|
||||
}
|
||||
|
|
@ -1433,6 +1439,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
.p = desc->root.draw.geometry_params,
|
||||
.vs_outputs = vs->b.info.outputs,
|
||||
.prim = mode,
|
||||
.is_prefix_summing = count->info.gs.prefix_sum,
|
||||
};
|
||||
|
||||
if (cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]) {
|
||||
|
|
@ -1476,8 +1483,10 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
hk_dispatch_with_local_size(cmd, cs, count, grid_gs,
|
||||
agx_workgroup(1, 1, 1));
|
||||
|
||||
libagx_prefix_sum_geom(cmd, agx_1d(1024 * count_words),
|
||||
AGX_BARRIER_ALL | AGX_PREGFX, geometry_params);
|
||||
if (count->info.gs.prefix_sum) {
|
||||
libagx_prefix_sum_geom(cmd, agx_1d(1024 * count_words),
|
||||
AGX_BARRIER_ALL | AGX_PREGFX, geometry_params);
|
||||
}
|
||||
}
|
||||
|
||||
/* Pre-GS shader */
|
||||
|
|
|
|||
|
|
@ -4039,6 +4039,13 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
params.input_mask = batch->uniforms.vertex_outputs;
|
||||
params.count_buffer_stride = batch->ctx->gs->gs.count_words * 4;
|
||||
|
||||
bool prefix_sum = batch->ctx->gs->gs.prefix_sum;
|
||||
if (!prefix_sum && params.count_buffer_stride) {
|
||||
struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, 16, 4);
|
||||
memset(T.cpu, 0, 16);
|
||||
params.count_buffer = T.gpu;
|
||||
}
|
||||
|
||||
if (indirect) {
|
||||
batch->uniforms.vertex_output_buffer_ptr =
|
||||
agx_pool_alloc_aligned(&batch->pool, 8, 8).gpu;
|
||||
|
|
@ -4057,7 +4064,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
batch->uniforms.vertex_outputs);
|
||||
unsigned size = params.input_primitives * params.count_buffer_stride;
|
||||
|
||||
if (size) {
|
||||
if (size && prefix_sum) {
|
||||
params.count_buffer =
|
||||
agx_pool_alloc_aligned(&batch->pool, size, 4).gpu;
|
||||
}
|
||||
|
|
@ -4141,6 +4148,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
.vs_outputs = batch->uniforms.vertex_outputs,
|
||||
.index_size_B = info->index_size,
|
||||
.prim = info->mode,
|
||||
.is_prefix_summing = gs->gs.prefix_sum,
|
||||
};
|
||||
|
||||
libagx_gs_setup_indirect_struct(batch, agx_1d(1), AGX_BARRIER_ALL, gsi);
|
||||
|
|
@ -4168,7 +4176,9 @@ agx_launch_gs_prerast(struct agx_batch *batch,
|
|||
perf_debug(dev, "Geometry shader count");
|
||||
agx_launch(batch, grid_gs, wg, gs->gs_count, NULL, PIPE_SHADER_GEOMETRY,
|
||||
0);
|
||||
}
|
||||
|
||||
if (gs->gs.prefix_sum) {
|
||||
libagx_prefix_sum_geom(batch, agx_1d(1024 * gs->gs.count_words),
|
||||
AGX_BARRIER_ALL, gp);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue