radeonsi: add primitive culling stats to the HUD

Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2019-02-19 23:27:16 -05:00
parent c9b7a37b8f
commit 0252fb92b8
4 changed files with 44 additions and 4 deletions

View file

@ -1105,6 +1105,9 @@ struct si_context {
unsigned num_resident_handles;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */
unsigned compute_num_verts_accepted;
unsigned compute_num_verts_rejected;
unsigned compute_num_verts_ineligible; /* due to low vertex count */
unsigned context_roll;
/* Queries. */

View file

@ -255,6 +255,15 @@ static bool si_query_sw_begin(struct si_context *sctx,
query->begin_result =
p_atomic_read(&sctx->screen->num_shader_cache_hits);
break;
case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
query->begin_result = sctx->compute_num_verts_accepted;
break;
case SI_QUERY_PD_NUM_PRIMS_REJECTED:
query->begin_result = sctx->compute_num_verts_rejected;
break;
case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
query->begin_result = sctx->compute_num_verts_ineligible;
break;
case SI_QUERY_GPIN_ASIC_ID:
case SI_QUERY_GPIN_NUM_SIMD:
case SI_QUERY_GPIN_NUM_RB:
@ -420,6 +429,15 @@ static bool si_query_sw_end(struct si_context *sctx,
query->end_result =
p_atomic_read(&sctx->screen->num_shader_cache_hits);
break;
case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
query->end_result = sctx->compute_num_verts_accepted;
break;
case SI_QUERY_PD_NUM_PRIMS_REJECTED:
query->end_result = sctx->compute_num_verts_rejected;
break;
case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
query->end_result = sctx->compute_num_verts_ineligible;
break;
case SI_QUERY_GPIN_ASIC_ID:
case SI_QUERY_GPIN_NUM_SIMD:
case SI_QUERY_GPIN_NUM_RB:
@ -465,6 +483,12 @@ static bool si_query_sw_get_result(struct si_context *sctx,
result->u64 = (query->end_result - query->begin_result) * 100 /
(query->end_time - query->begin_time);
return true;
case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
case SI_QUERY_PD_NUM_PRIMS_REJECTED:
case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
result->u64 = ((unsigned)query->end_result -
(unsigned)query->begin_result) / 3;
return true;
case SI_QUERY_GPIN_ASIC_ID:
result->u32 = 0;
return true;
@ -1782,6 +1806,10 @@ static struct pipe_driver_query_info si_driver_query_list[] = {
X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
X("pd-num-prims-accepted", PD_NUM_PRIMS_ACCEPTED, UINT64, AVERAGE),
X("pd-num-prims-rejected", PD_NUM_PRIMS_REJECTED, UINT64, AVERAGE),
X("pd-num-prims-ineligible", PD_NUM_PRIMS_INELIGIBLE,UINT64, AVERAGE),
};
#undef X

View file

@ -112,6 +112,9 @@ enum {
SI_QUERY_GPIN_NUM_SE,
SI_QUERY_TIME_ELAPSED_SDMA,
SI_QUERY_TIME_ELAPSED_SDMA_SI, /* emulated, measured on the CPU */
SI_QUERY_PD_NUM_PRIMS_ACCEPTED,
SI_QUERY_PD_NUM_PRIMS_REJECTED,
SI_QUERY_PD_NUM_PRIMS_INELIGIBLE,
SI_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
};

View file

@ -1591,14 +1591,17 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
}
}
} else {
direct_count = info->count * instance_count;
/* Multiply by 3 for strips and fans to get an approximate vertex
* count as triangles. */
direct_count = info->count * instance_count *
(prim == PIPE_PRIM_TRIANGLES ? 1 : 3);
}
/* Determine if we can use the primitive discard compute shader. */
if (si_compute_prim_discard_enabled(sctx) &&
/* Multiply by 3 for strips and fans to get the vertex count as triangles. */
direct_count * (prim == PIPE_PRIM_TRIANGLES ? 1 : 3) >
sctx->prim_discard_vertex_count_threshold &&
(direct_count > sctx->prim_discard_vertex_count_threshold ?
(sctx->compute_num_verts_rejected += direct_count, true) : /* Add, then return true. */
(sctx->compute_num_verts_ineligible += direct_count, false)) && /* Add, then return false. */
(!info->count_from_stream_output || pd_msg("draw_opaque")) &&
(primitive_restart ?
/* Supported prim types with primitive restart: */
@ -1648,10 +1651,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
index_size = 4;
instance_count = 1;
primitive_restart = false;
sctx->compute_num_verts_rejected -= direct_count;
sctx->compute_num_verts_accepted += direct_count;
break;
case SI_PRIM_DISCARD_DISABLED:
break;
case SI_PRIM_DISCARD_DRAW_SPLIT:
sctx->compute_num_verts_rejected -= direct_count;
goto return_cleanup;
}
}