mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 02:20:11 +01:00
libagx,hk: handle pipeline stats queries without a GS
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32759>
This commit is contained in:
parent
5bc89aa991
commit
a3b796ba19
4 changed files with 115 additions and 60 deletions
|
|
@ -1077,15 +1077,6 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, const nir_shader *libagx,
|
|||
}
|
||||
}
|
||||
|
||||
/* The geometry shader receives a number of input primitives. The driver
|
||||
* should disable this counter when tessellation is active TODO and count
|
||||
* patches separately.
|
||||
*/
|
||||
add_counter(
|
||||
b,
|
||||
nir_load_stat_query_address_agx(b, .base = PIPE_STAT_QUERY_IA_PRIMITIVES),
|
||||
unrolled_in_prims);
|
||||
|
||||
/* The geometry shader is invoked once per primitive (after unrolling
|
||||
* primitive restart). From the spec:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -318,31 +318,40 @@ libagx_load_index_buffer(constant struct agx_ia_state *p, uint id,
|
|||
}
|
||||
|
||||
static void
|
||||
increment_ia_counters(global uint32_t *ia_vertices,
|
||||
global uint32_t *vs_invocations, uint count)
|
||||
increment_counters(global uint32_t *a, global uint32_t *b, global uint32_t *c,
|
||||
uint count)
|
||||
{
|
||||
if (ia_vertices) {
|
||||
*ia_vertices += count;
|
||||
}
|
||||
global uint32_t *ptr[] = {a, b, c};
|
||||
|
||||
if (vs_invocations) {
|
||||
*vs_invocations += count;
|
||||
for (uint i = 0; i < 3; ++i) {
|
||||
if (ptr[i]) {
|
||||
*(ptr[i]) += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL(1)
|
||||
libagx_increment_ia(global uint32_t *ia_vertices,
|
||||
global uint32_t *vs_invocations, constant uint32_t *draw)
|
||||
global uint32_t *ia_primitives,
|
||||
global uint32_t *vs_invocations, global uint32_t *c_prims,
|
||||
global uint32_t *c_invs, constant uint32_t *draw,
|
||||
enum mesa_prim prim)
|
||||
{
|
||||
increment_ia_counters(ia_vertices, vs_invocations, draw[0] * draw[1]);
|
||||
increment_counters(ia_vertices, vs_invocations, NULL, draw[0] * draw[1]);
|
||||
|
||||
uint prims = u_decomposed_prims_for_vertices(prim, draw[0]) * draw[1];
|
||||
increment_counters(ia_primitives, c_prims, c_invs, prims);
|
||||
}
|
||||
|
||||
KERNEL(1024)
|
||||
libagx_increment_ia_restart(global uint32_t *ia_vertices,
|
||||
global uint32_t *ia_primitives,
|
||||
global uint32_t *vs_invocations,
|
||||
global uint32_t *c_prims, global uint32_t *c_invs,
|
||||
constant uint32_t *draw, uint64_t index_buffer,
|
||||
uint32_t index_buffer_range_el,
|
||||
uint32_t restart_index, uint32_t index_size_B)
|
||||
uint32_t restart_index, uint32_t index_size_B,
|
||||
enum mesa_prim prim)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
unsigned count = draw[0];
|
||||
|
|
@ -368,7 +377,30 @@ libagx_increment_ia_restart(global uint32_t *ia_vertices,
|
|||
|
||||
/* Elect a single thread from the workgroup to increment the counters */
|
||||
if (tid == 0) {
|
||||
increment_ia_counters(ia_vertices, vs_invocations, scratch * draw[1]);
|
||||
increment_counters(ia_vertices, vs_invocations, NULL, scratch * draw[1]);
|
||||
}
|
||||
|
||||
/* TODO: We should vectorize this */
|
||||
if ((ia_primitives || c_prims || c_invs) && tid == 0) {
|
||||
uint accum = 0;
|
||||
int last_restart = -1;
|
||||
for (uint i = 0; i < count; ++i) {
|
||||
uint index = load_index(index_buffer, index_buffer_range_el, start + i,
|
||||
index_size_B);
|
||||
|
||||
if (index == restart_index) {
|
||||
accum +=
|
||||
u_decomposed_prims_for_vertices(prim, i - last_restart - 1);
|
||||
last_restart = i;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
accum +=
|
||||
u_decomposed_prims_for_vertices(prim, count - last_restart - 1);
|
||||
}
|
||||
|
||||
increment_counters(ia_primitives, c_prims, c_invs, accum * draw[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
#include "util/format/u_formats.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
#include "layout.h"
|
||||
#include "libagx_dgc.h"
|
||||
|
|
@ -3108,23 +3109,6 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
uint32_t xfb_outputs = last_sw->info.xfb_info.output_count;
|
||||
bool needs_gs = xfb_outputs;
|
||||
|
||||
/* Various pipeline statistics are implemented in the pre-GS shader. TODO:
|
||||
* This could easily be optimized.
|
||||
*/
|
||||
VkQueryPipelineStatisticFlagBits ia_statistics[] = {
|
||||
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
|
||||
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
|
||||
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
|
||||
};
|
||||
|
||||
bool ia_stats = false;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ia_statistics); ++i) {
|
||||
ia_stats |= hk_pipeline_stat_addr(cmd, ia_statistics[i]) != 0;
|
||||
}
|
||||
|
||||
needs_gs |= ia_stats;
|
||||
|
||||
/* If we already have a matching GS configuration, we're done */
|
||||
if ((gs != NULL) == needs_gs)
|
||||
return;
|
||||
|
|
@ -3157,8 +3141,7 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
}
|
||||
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
perf_debug(dev, "Binding passthrough GS for%s%s\n",
|
||||
xfb_outputs ? " XFB" : "", ia_stats ? " statistics" : "");
|
||||
perf_debug(dev, "Binding passthrough GS for%s\n", xfb_outputs ? " XFB" : "");
|
||||
|
||||
gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size);
|
||||
gs->is_passthrough = true;
|
||||
|
|
@ -3337,7 +3320,8 @@ hk_set_view_index(struct hk_cmd_buffer *cmd, uint32_t view_idx)
|
|||
|
||||
static void
|
||||
hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
||||
uint64_t ia_vertices, uint64_t vs_invocations)
|
||||
uint64_t ia_vertices, uint64_t ia_prims, uint64_t vs_invocations,
|
||||
uint64_t c_prims, uint64_t c_inv)
|
||||
{
|
||||
/* XXX: stream link needed? */
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
|
@ -3351,15 +3335,33 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
|||
draw_ptr = hk_pool_upload(cmd, &desc, sizeof(desc), 4);
|
||||
}
|
||||
|
||||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||
enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology);
|
||||
|
||||
bool geom = cmd->state.gfx.shaders[MESA_SHADER_GEOMETRY];
|
||||
bool tess = cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
/* Clipper counters depend on geom/tess outputs and must be written with the
|
||||
* geom/tess output. They are updated as IA counters only when geom/tess is
|
||||
* not used.
|
||||
*
|
||||
* TODO: Tessellation clipper counters not actually wired up, pending CTS.
|
||||
*/
|
||||
if (geom || tess) {
|
||||
c_prims = 0;
|
||||
c_inv = 0;
|
||||
}
|
||||
|
||||
if (draw.restart) {
|
||||
uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
|
||||
|
||||
libagx_increment_ia_restart(cs, agx_1d(1024), ia_vertices, vs_invocations,
|
||||
draw_ptr, draw.index_buffer,
|
||||
agx_draw_index_range_el(draw),
|
||||
cmd->state.gfx.index.restart, index_size_B);
|
||||
libagx_increment_ia_restart(
|
||||
cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims,
|
||||
c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw),
|
||||
cmd->state.gfx.index.restart, index_size_B, prim);
|
||||
} else {
|
||||
libagx_increment_ia(cs, agx_1d(1), ia_vertices, vs_invocations, draw_ptr);
|
||||
libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations,
|
||||
c_prims, c_inv, draw_ptr, prim);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3380,10 +3382,20 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
uint64_t stat_ia_verts = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT);
|
||||
|
||||
uint64_t stat_ia_prims = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT);
|
||||
|
||||
uint64_t stat_vs_inv = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT);
|
||||
|
||||
bool ia_stats = stat_ia_verts || stat_vs_inv;
|
||||
uint64_t stat_c_inv = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT);
|
||||
|
||||
uint64_t stat_c_prims = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT);
|
||||
|
||||
bool ia_stats = stat_ia_verts || stat_ia_prims || stat_vs_inv ||
|
||||
stat_c_inv || stat_c_prims;
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
hk_foreach_view(cmd) {
|
||||
|
|
@ -3423,7 +3435,8 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
}
|
||||
|
||||
if (ia_stats) {
|
||||
hk_ia_update(cmd, ccs, draw, stat_ia_verts, stat_vs_inv);
|
||||
hk_ia_update(cmd, ccs, draw, stat_ia_verts, stat_ia_prims, stat_vs_inv,
|
||||
stat_c_inv, stat_c_prims);
|
||||
}
|
||||
|
||||
if (tess) {
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@
|
|||
#include "util/u_inlines.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "agx_bg_eot.h"
|
||||
|
|
@ -3890,19 +3891,41 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
|
|||
uint64_t ia_vertices = agx_get_query_address(
|
||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES]);
|
||||
|
||||
uint64_t ia_primitives = agx_get_query_address(
|
||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES]);
|
||||
|
||||
uint64_t vs_invocations = agx_get_query_address(
|
||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS]);
|
||||
|
||||
uint64_t c_prims = agx_get_query_address(
|
||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES]);
|
||||
|
||||
uint64_t c_invs = agx_get_query_address(
|
||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]);
|
||||
|
||||
/* With a geometry shader, clipper counters are written by the pre-GS kernel
|
||||
* since they depend on the output on the geometry shader. Without a geometry
|
||||
* shader, they are written along with IA.
|
||||
*
|
||||
* TODO: Broken tessellation interaction, but nobody cares.
|
||||
*/
|
||||
if (ctx->stage[PIPE_SHADER_GEOMETRY].shader) {
|
||||
c_prims = 0;
|
||||
c_invs = 0;
|
||||
}
|
||||
|
||||
if (info->primitive_restart) {
|
||||
perf_debug(dev, "Input assembly counters with primitive restart");
|
||||
|
||||
libagx_increment_ia_restart(batch, agx_1d(1024), ia_vertices,
|
||||
vs_invocations, draw, ib, ib_range_el,
|
||||
info->restart_index, info->index_size);
|
||||
libagx_increment_ia_restart(
|
||||
batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations,
|
||||
c_prims, c_invs, draw, ib, ib_range_el, info->restart_index,
|
||||
info->index_size, info->mode);
|
||||
} else {
|
||||
perf_debug(dev, "Input assembly counters");
|
||||
|
||||
libagx_increment_ia(batch, agx_1d(1), ia_vertices, vs_invocations, draw);
|
||||
libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives,
|
||||
vs_invocations, c_prims, c_invs, draw, info->mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4269,14 +4292,6 @@ agx_needs_passthrough_gs(struct agx_context *ctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Various pipeline statistics are implemented in the pre-GS shader. */
|
||||
if (ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES] ||
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES] ||
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]) {
|
||||
perf_debug_ctx(ctx, "Using passthrough GS due to pipeline statistics");
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Transform feedback is layered on geometry shaders, so if transform
|
||||
* feedback is used, we need a GS.
|
||||
*/
|
||||
|
|
@ -4870,7 +4885,11 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
|
||||
if (ctx->active_queries && !ctx->active_draw_without_restart &&
|
||||
(ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES] ||
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS])) {
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES] ||
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS] ||
|
||||
((ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES] ||
|
||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]) &&
|
||||
!ctx->stage[PIPE_SHADER_GEOMETRY].shader))) {
|
||||
|
||||
uint64_t ptr;
|
||||
if (indirect) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue