mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
libagx,hk: handle pipeline stats queries without a GS
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32759>
This commit is contained in:
parent
5bc89aa991
commit
a3b796ba19
4 changed files with 115 additions and 60 deletions
|
|
@ -1077,15 +1077,6 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, const nir_shader *libagx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The geometry shader receives a number of input primitives. The driver
|
|
||||||
* should disable this counter when tessellation is active TODO and count
|
|
||||||
* patches separately.
|
|
||||||
*/
|
|
||||||
add_counter(
|
|
||||||
b,
|
|
||||||
nir_load_stat_query_address_agx(b, .base = PIPE_STAT_QUERY_IA_PRIMITIVES),
|
|
||||||
unrolled_in_prims);
|
|
||||||
|
|
||||||
/* The geometry shader is invoked once per primitive (after unrolling
|
/* The geometry shader is invoked once per primitive (after unrolling
|
||||||
* primitive restart). From the spec:
|
* primitive restart). From the spec:
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -318,31 +318,40 @@ libagx_load_index_buffer(constant struct agx_ia_state *p, uint id,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
increment_ia_counters(global uint32_t *ia_vertices,
|
increment_counters(global uint32_t *a, global uint32_t *b, global uint32_t *c,
|
||||||
global uint32_t *vs_invocations, uint count)
|
uint count)
|
||||||
{
|
{
|
||||||
if (ia_vertices) {
|
global uint32_t *ptr[] = {a, b, c};
|
||||||
*ia_vertices += count;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vs_invocations) {
|
for (uint i = 0; i < 3; ++i) {
|
||||||
*vs_invocations += count;
|
if (ptr[i]) {
|
||||||
|
*(ptr[i]) += count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
KERNEL(1)
|
KERNEL(1)
|
||||||
libagx_increment_ia(global uint32_t *ia_vertices,
|
libagx_increment_ia(global uint32_t *ia_vertices,
|
||||||
global uint32_t *vs_invocations, constant uint32_t *draw)
|
global uint32_t *ia_primitives,
|
||||||
|
global uint32_t *vs_invocations, global uint32_t *c_prims,
|
||||||
|
global uint32_t *c_invs, constant uint32_t *draw,
|
||||||
|
enum mesa_prim prim)
|
||||||
{
|
{
|
||||||
increment_ia_counters(ia_vertices, vs_invocations, draw[0] * draw[1]);
|
increment_counters(ia_vertices, vs_invocations, NULL, draw[0] * draw[1]);
|
||||||
|
|
||||||
|
uint prims = u_decomposed_prims_for_vertices(prim, draw[0]) * draw[1];
|
||||||
|
increment_counters(ia_primitives, c_prims, c_invs, prims);
|
||||||
}
|
}
|
||||||
|
|
||||||
KERNEL(1024)
|
KERNEL(1024)
|
||||||
libagx_increment_ia_restart(global uint32_t *ia_vertices,
|
libagx_increment_ia_restart(global uint32_t *ia_vertices,
|
||||||
|
global uint32_t *ia_primitives,
|
||||||
global uint32_t *vs_invocations,
|
global uint32_t *vs_invocations,
|
||||||
|
global uint32_t *c_prims, global uint32_t *c_invs,
|
||||||
constant uint32_t *draw, uint64_t index_buffer,
|
constant uint32_t *draw, uint64_t index_buffer,
|
||||||
uint32_t index_buffer_range_el,
|
uint32_t index_buffer_range_el,
|
||||||
uint32_t restart_index, uint32_t index_size_B)
|
uint32_t restart_index, uint32_t index_size_B,
|
||||||
|
enum mesa_prim prim)
|
||||||
{
|
{
|
||||||
uint tid = get_global_id(0);
|
uint tid = get_global_id(0);
|
||||||
unsigned count = draw[0];
|
unsigned count = draw[0];
|
||||||
|
|
@ -368,7 +377,30 @@ libagx_increment_ia_restart(global uint32_t *ia_vertices,
|
||||||
|
|
||||||
/* Elect a single thread from the workgroup to increment the counters */
|
/* Elect a single thread from the workgroup to increment the counters */
|
||||||
if (tid == 0) {
|
if (tid == 0) {
|
||||||
increment_ia_counters(ia_vertices, vs_invocations, scratch * draw[1]);
|
increment_counters(ia_vertices, vs_invocations, NULL, scratch * draw[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: We should vectorize this */
|
||||||
|
if ((ia_primitives || c_prims || c_invs) && tid == 0) {
|
||||||
|
uint accum = 0;
|
||||||
|
int last_restart = -1;
|
||||||
|
for (uint i = 0; i < count; ++i) {
|
||||||
|
uint index = load_index(index_buffer, index_buffer_range_el, start + i,
|
||||||
|
index_size_B);
|
||||||
|
|
||||||
|
if (index == restart_index) {
|
||||||
|
accum +=
|
||||||
|
u_decomposed_prims_for_vertices(prim, i - last_restart - 1);
|
||||||
|
last_restart = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
accum +=
|
||||||
|
u_decomposed_prims_for_vertices(prim, count - last_restart - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
increment_counters(ia_primitives, c_prims, c_invs, accum * draw[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@
|
||||||
#include "util/format/u_formats.h"
|
#include "util/format/u_formats.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
|
#include "util/u_prim.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
#include "layout.h"
|
#include "layout.h"
|
||||||
#include "libagx_dgc.h"
|
#include "libagx_dgc.h"
|
||||||
|
|
@ -3108,23 +3109,6 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
||||||
uint32_t xfb_outputs = last_sw->info.xfb_info.output_count;
|
uint32_t xfb_outputs = last_sw->info.xfb_info.output_count;
|
||||||
bool needs_gs = xfb_outputs;
|
bool needs_gs = xfb_outputs;
|
||||||
|
|
||||||
/* Various pipeline statistics are implemented in the pre-GS shader. TODO:
|
|
||||||
* This could easily be optimized.
|
|
||||||
*/
|
|
||||||
VkQueryPipelineStatisticFlagBits ia_statistics[] = {
|
|
||||||
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
|
|
||||||
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
|
|
||||||
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
|
|
||||||
};
|
|
||||||
|
|
||||||
bool ia_stats = false;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(ia_statistics); ++i) {
|
|
||||||
ia_stats |= hk_pipeline_stat_addr(cmd, ia_statistics[i]) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
needs_gs |= ia_stats;
|
|
||||||
|
|
||||||
/* If we already have a matching GS configuration, we're done */
|
/* If we already have a matching GS configuration, we're done */
|
||||||
if ((gs != NULL) == needs_gs)
|
if ((gs != NULL) == needs_gs)
|
||||||
return;
|
return;
|
||||||
|
|
@ -3157,8 +3141,7 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
perf_debug(dev, "Binding passthrough GS for%s%s\n",
|
perf_debug(dev, "Binding passthrough GS for%s\n", xfb_outputs ? " XFB" : "");
|
||||||
xfb_outputs ? " XFB" : "", ia_stats ? " statistics" : "");
|
|
||||||
|
|
||||||
gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size);
|
gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size);
|
||||||
gs->is_passthrough = true;
|
gs->is_passthrough = true;
|
||||||
|
|
@ -3337,7 +3320,8 @@ hk_set_view_index(struct hk_cmd_buffer *cmd, uint32_t view_idx)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
||||||
uint64_t ia_vertices, uint64_t vs_invocations)
|
uint64_t ia_vertices, uint64_t ia_prims, uint64_t vs_invocations,
|
||||||
|
uint64_t c_prims, uint64_t c_inv)
|
||||||
{
|
{
|
||||||
/* XXX: stream link needed? */
|
/* XXX: stream link needed? */
|
||||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
|
|
@ -3351,15 +3335,33 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw,
|
||||||
draw_ptr = hk_pool_upload(cmd, &desc, sizeof(desc), 4);
|
draw_ptr = hk_pool_upload(cmd, &desc, sizeof(desc), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||||
|
enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology);
|
||||||
|
|
||||||
|
bool geom = cmd->state.gfx.shaders[MESA_SHADER_GEOMETRY];
|
||||||
|
bool tess = cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];
|
||||||
|
|
||||||
|
/* Clipper counters depend on geom/tess outputs and must be written with the
|
||||||
|
* geom/tess output. They are updated as IA counters only when geom/tess is
|
||||||
|
* not used.
|
||||||
|
*
|
||||||
|
* TODO: Tessellation clipper counters not actually wired up, pending CTS.
|
||||||
|
*/
|
||||||
|
if (geom || tess) {
|
||||||
|
c_prims = 0;
|
||||||
|
c_inv = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (draw.restart) {
|
if (draw.restart) {
|
||||||
uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
|
uint32_t index_size_B = agx_index_size_to_B(draw.index_size);
|
||||||
|
|
||||||
libagx_increment_ia_restart(cs, agx_1d(1024), ia_vertices, vs_invocations,
|
libagx_increment_ia_restart(
|
||||||
draw_ptr, draw.index_buffer,
|
cs, agx_1d(1024), ia_vertices, ia_prims, vs_invocations, c_prims,
|
||||||
agx_draw_index_range_el(draw),
|
c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw),
|
||||||
cmd->state.gfx.index.restart, index_size_B);
|
cmd->state.gfx.index.restart, index_size_B, prim);
|
||||||
} else {
|
} else {
|
||||||
libagx_increment_ia(cs, agx_1d(1), ia_vertices, vs_invocations, draw_ptr);
|
libagx_increment_ia(cs, agx_1d(1), ia_vertices, ia_prims, vs_invocations,
|
||||||
|
c_prims, c_inv, draw_ptr, prim);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3380,10 +3382,20 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
||||||
uint64_t stat_ia_verts = hk_pipeline_stat_addr(
|
uint64_t stat_ia_verts = hk_pipeline_stat_addr(
|
||||||
cmd, VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT);
|
cmd, VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT);
|
||||||
|
|
||||||
|
uint64_t stat_ia_prims = hk_pipeline_stat_addr(
|
||||||
|
cmd, VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT);
|
||||||
|
|
||||||
uint64_t stat_vs_inv = hk_pipeline_stat_addr(
|
uint64_t stat_vs_inv = hk_pipeline_stat_addr(
|
||||||
cmd, VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT);
|
cmd, VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT);
|
||||||
|
|
||||||
bool ia_stats = stat_ia_verts || stat_vs_inv;
|
uint64_t stat_c_inv = hk_pipeline_stat_addr(
|
||||||
|
cmd, VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT);
|
||||||
|
|
||||||
|
uint64_t stat_c_prims = hk_pipeline_stat_addr(
|
||||||
|
cmd, VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT);
|
||||||
|
|
||||||
|
bool ia_stats = stat_ia_verts || stat_ia_prims || stat_vs_inv ||
|
||||||
|
stat_c_inv || stat_c_prims;
|
||||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
|
|
||||||
hk_foreach_view(cmd) {
|
hk_foreach_view(cmd) {
|
||||||
|
|
@ -3423,7 +3435,8 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ia_stats) {
|
if (ia_stats) {
|
||||||
hk_ia_update(cmd, ccs, draw, stat_ia_verts, stat_vs_inv);
|
hk_ia_update(cmd, ccs, draw, stat_ia_verts, stat_ia_prims, stat_vs_inv,
|
||||||
|
stat_c_inv, stat_c_prims);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tess) {
|
if (tess) {
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@
|
||||||
#include "util/u_inlines.h"
|
#include "util/u_inlines.h"
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "util/u_memory.h"
|
#include "util/u_memory.h"
|
||||||
|
#include "util/u_prim.h"
|
||||||
#include "util/u_transfer.h"
|
#include "util/u_transfer.h"
|
||||||
#include "util/u_upload_mgr.h"
|
#include "util/u_upload_mgr.h"
|
||||||
#include "agx_bg_eot.h"
|
#include "agx_bg_eot.h"
|
||||||
|
|
@ -3890,19 +3891,41 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
|
||||||
uint64_t ia_vertices = agx_get_query_address(
|
uint64_t ia_vertices = agx_get_query_address(
|
||||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES]);
|
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES]);
|
||||||
|
|
||||||
|
uint64_t ia_primitives = agx_get_query_address(
|
||||||
|
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES]);
|
||||||
|
|
||||||
uint64_t vs_invocations = agx_get_query_address(
|
uint64_t vs_invocations = agx_get_query_address(
|
||||||
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS]);
|
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS]);
|
||||||
|
|
||||||
|
uint64_t c_prims = agx_get_query_address(
|
||||||
|
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES]);
|
||||||
|
|
||||||
|
uint64_t c_invs = agx_get_query_address(
|
||||||
|
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]);
|
||||||
|
|
||||||
|
/* With a geometry shader, clipper counters are written by the pre-GS kernel
|
||||||
|
* since they depend on the output on the geometry shader. Without a geometry
|
||||||
|
* shader, they are written along with IA.
|
||||||
|
*
|
||||||
|
* TODO: Broken tessellation interaction, but nobody cares.
|
||||||
|
*/
|
||||||
|
if (ctx->stage[PIPE_SHADER_GEOMETRY].shader) {
|
||||||
|
c_prims = 0;
|
||||||
|
c_invs = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (info->primitive_restart) {
|
if (info->primitive_restart) {
|
||||||
perf_debug(dev, "Input assembly counters with primitive restart");
|
perf_debug(dev, "Input assembly counters with primitive restart");
|
||||||
|
|
||||||
libagx_increment_ia_restart(batch, agx_1d(1024), ia_vertices,
|
libagx_increment_ia_restart(
|
||||||
vs_invocations, draw, ib, ib_range_el,
|
batch, agx_1d(1024), ia_vertices, ia_primitives, vs_invocations,
|
||||||
info->restart_index, info->index_size);
|
c_prims, c_invs, draw, ib, ib_range_el, info->restart_index,
|
||||||
|
info->index_size, info->mode);
|
||||||
} else {
|
} else {
|
||||||
perf_debug(dev, "Input assembly counters");
|
perf_debug(dev, "Input assembly counters");
|
||||||
|
|
||||||
libagx_increment_ia(batch, agx_1d(1), ia_vertices, vs_invocations, draw);
|
libagx_increment_ia(batch, agx_1d(1), ia_vertices, ia_primitives,
|
||||||
|
vs_invocations, c_prims, c_invs, draw, info->mode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4269,14 +4292,6 @@ agx_needs_passthrough_gs(struct agx_context *ctx,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Various pipeline statistics are implemented in the pre-GS shader. */
|
|
||||||
if (ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES] ||
|
|
||||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES] ||
|
|
||||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]) {
|
|
||||||
perf_debug_ctx(ctx, "Using passthrough GS due to pipeline statistics");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Transform feedback is layered on geometry shaders, so if transform
|
/* Transform feedback is layered on geometry shaders, so if transform
|
||||||
* feedback is used, we need a GS.
|
* feedback is used, we need a GS.
|
||||||
*/
|
*/
|
||||||
|
|
@ -4870,7 +4885,11 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
||||||
|
|
||||||
if (ctx->active_queries && !ctx->active_draw_without_restart &&
|
if (ctx->active_queries && !ctx->active_draw_without_restart &&
|
||||||
(ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES] ||
|
(ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_VERTICES] ||
|
||||||
ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS])) {
|
ctx->pipeline_statistics[PIPE_STAT_QUERY_IA_PRIMITIVES] ||
|
||||||
|
ctx->pipeline_statistics[PIPE_STAT_QUERY_VS_INVOCATIONS] ||
|
||||||
|
((ctx->pipeline_statistics[PIPE_STAT_QUERY_C_PRIMITIVES] ||
|
||||||
|
ctx->pipeline_statistics[PIPE_STAT_QUERY_C_INVOCATIONS]) &&
|
||||||
|
!ctx->stage[PIPE_SHADER_GEOMETRY].shader))) {
|
||||||
|
|
||||||
uint64_t ptr;
|
uint64_t ptr;
|
||||||
if (indirect) {
|
if (indirect) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue