asahi: implement xfb overflow queries

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26963>
This commit is contained in:
Alyssa Rosenzweig 2023-12-04 17:14:26 -04:00
parent c4fd1abc8b
commit 1006e27627
8 changed files with 59 additions and 3 deletions

View file

@ -236,7 +236,7 @@ GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+, d3d12
GL_ARB_shader_group_vote DONE (freedreno/a6xx, nvc0, llvmpipe, crocus)
GL_ARB_spirv_extensions DONE (freedreno, llvmpipe)
GL_ARB_texture_filter_anisotropic DONE (etnaviv/HALTI0, freedreno, nv50, nvc0, r600, softpipe, llvmpipe, v3d, panfrost/g72+, asahi, crocus)
GL_ARB_transform_feedback_overflow_query DONE (freedreno/a6xx+, nvc0, llvmpipe, softpipe, crocus/gen6+)
GL_ARB_transform_feedback_overflow_query DONE (freedreno/a6xx+, nvc0, llvmpipe, softpipe, crocus/gen6+, asahi)
GL_KHR_no_error DONE (all drivers)
These are the extensions cherry-picked to make GLES 3.1

View file

@ -16,6 +16,7 @@ GL_ARB_indirect_parameters on Asahi
GL_ARB_viewport_array on Asahi
GL_ARB_fragment_layer_viewport on Asahi
GL_ARB_cull_distance on Asahi
GL_ARB_transform_feedback_overflow_query on Asahi
VK_KHR_calibrated_timestamps on RADV
VK_KHR_vertex_attribute_divisor on RADV
VK_KHR_maintenance6 on RADV

View file

@ -832,9 +832,11 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, const nir_shader *libagx,
}
/* Determine the number of primitives generated in each stream */
nir_def *prims[MAX_VERTEX_STREAMS];
nir_def *in_prims[MAX_VERTEX_STREAMS], *prims[MAX_VERTEX_STREAMS];
u_foreach_bit(i, streams) {
prims[i] = previous_xfb_primitives(b, state, i, unrolled_in_prims);
in_prims[i] = previous_xfb_primitives(b, state, i, unrolled_in_prims);
prims[i] = in_prims[i];
add_counter(b, load_geometry_param(b, prims_generated_counter[i]),
prims[i]);
@ -879,13 +881,24 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, const nir_shader *libagx,
prims[stream] = nir_umin(b, prims[stream], max_prims);
}
nir_def *any_overflow = nir_imm_false(b);
u_foreach_bit(i, streams) {
nir_def *overflow = nir_ult(b, prims[i], in_prims[i]);
any_overflow = nir_ior(b, any_overflow, overflow);
store_geometry_param(b, xfb_prims[i], prims[i]);
add_counter(b, load_geometry_param(b, xfb_overflow[i]),
nir_b2i32(b, overflow));
add_counter(b, load_geometry_param(b, xfb_prims_generated_counter[i]),
prims[i]);
}
add_counter(b, load_geometry_param(b, xfb_any_overflow),
nir_b2i32(b, any_overflow));
/* Update XFB counters */
u_foreach_bit(i, xfb->buffers_written) {
uint32_t prim_stride_B = xfb->buffers[i].stride * vertices_per_prim;

View file

@ -110,6 +110,8 @@ struct agx_geometry_params {
/* Address of the primitives generated counters */
GLOBAL(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_overflow[MAX_VERTEX_STREAMS];
GLOBAL(uint) xfb_any_overflow;
/* Pointers to transform feedback buffer offsets in bytes */
GLOBAL(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];

View file

@ -1568,6 +1568,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
case PIPE_CAP_ANISOTROPIC_FILTER:

View file

@ -108,6 +108,14 @@ agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
ctx->tf_prims_generated[query->index] = query;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
ctx->tf_overflow[query->index] = query;
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
ctx->tf_any_overflow = query;
break;
case PIPE_QUERY_TIME_ELAPSED:
ctx->time_elapsed = query;
query->timestamp_begin = UINT64_MAX;
@ -157,6 +165,12 @@ agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
case PIPE_QUERY_PRIMITIVES_EMITTED:
ctx->tf_prims_generated[query->index] = NULL;
return true;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
ctx->tf_overflow[query->index] = NULL;
return true;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
ctx->tf_any_overflow = NULL;
return true;
case PIPE_QUERY_TIME_ELAPSED:
ctx->time_elapsed = NULL;
return true;
@ -213,6 +227,11 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
vresult->b = query->value;
return true;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
vresult->b = query->value > 0;
return true;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
@ -259,6 +278,10 @@ agx_get_query_result_resource(struct pipe_context *pipe, struct pipe_query *q,
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
result.u32 = result.b;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
result.u32 = (bool)(result.u32 > 0);
break;
default:
break;
}

View file

@ -3628,6 +3628,20 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
}
}
if (batch->ctx->active_queries && batch->ctx->streamout.num_targets > 0) {
for (unsigned i = 0; i < ARRAY_SIZE(batch->ctx->tf_overflow); ++i) {
if (batch->ctx->tf_overflow[i]) {
params.xfb_overflow[i] =
agx_get_query_address(batch, batch->ctx->tf_overflow[i]);
}
}
if (batch->ctx->tf_any_overflow) {
params.xfb_any_overflow =
agx_get_query_address(batch, batch->ctx->tf_any_overflow);
}
}
/* Calculate input primitive count for direct draws, and allocate the count
* buffer. GPU calculates and allocates for indirect draws.
*/

View file

@ -537,6 +537,8 @@ struct agx_context {
struct agx_query *occlusion_query;
struct agx_query *prims_generated[4];
struct agx_query *tf_prims_generated[4];
struct agx_query *tf_overflow[4];
struct agx_query *tf_any_overflow;
struct agx_query *time_elapsed;
bool active_queries;