freedreno/perfcntrs: Use helper for derived counters

Use helper to assign/reserve counters for derived counters.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
This commit is contained in:
Rob Clark 2026-05-01 14:14:05 -07:00
parent b5461ca40b
commit f86f48ee96
4 changed files with 101 additions and 82 deletions

View file

@ -97,104 +97,104 @@ enum {
static_assert(DERIVED_COUNTER_PERFCNTR_MAX_VALUE <= FD_DERIVED_COUNTER_COLLECTION_MAX_ENABLED_PERFCNTRS, "");
#define DERIVED_COUNTER_PERFCNTR(_enum, _counter) \
[DERIVED_COUNTER_PERFCNTR_##_enum] = { .counter = _counter, .countable = A7XX_PERF_##_enum }
#define DERIVED_COUNTER_PERFCNTR_BV(_enum, _counter) \
[DERIVED_COUNTER_PERFCNTR_BV_##_enum] = { .counter = _counter, .countable = A7XX_PERF_##_enum }
#define DERIVED_COUNTER_PERFCNTR(_countable, _group) \
[DERIVED_COUNTER_PERFCNTR_##_countable] = { .countable = "PERF_" #_countable, .group = #_group }
#define DERIVED_COUNTER_PERFCNTR_BV(_countable, _group) \
[DERIVED_COUNTER_PERFCNTR_BV_##_countable] = { .countable = "PERF_" #_countable, .group = "BV_" #_group }
const struct fd_derived_counter_perfcntr a7xx_derived_counter_perfcntrs[] = {
/* CP: 3/14 counters */
DERIVED_COUNTER_PERFCNTR(CP_ALWAYS_COUNT, &cp_counters[0]),
DERIVED_COUNTER_PERFCNTR(CP_NUM_PREEMPTIONS, &cp_counters[1]),
DERIVED_COUNTER_PERFCNTR(CP_PREEMPTION_REACTION_DELAY, &cp_counters[2]),
DERIVED_COUNTER_PERFCNTR(CP_ALWAYS_COUNT, CP),
DERIVED_COUNTER_PERFCNTR(CP_NUM_PREEMPTIONS, CP),
DERIVED_COUNTER_PERFCNTR(CP_PREEMPTION_REACTION_DELAY, CP),
/* RBBM: 1/4 counters */
DERIVED_COUNTER_PERFCNTR(RBBM_STATUS_MASKED, &rbbm_counters[0]),
DERIVED_COUNTER_PERFCNTR(RBBM_STATUS_MASKED, RBBM),
/* PC: 3/8 counters */
DERIVED_COUNTER_PERFCNTR(PC_STALL_CYCLES_VFD, &pc_counters[0]),
DERIVED_COUNTER_PERFCNTR(PC_VERTEX_HITS, &pc_counters[1]),
DERIVED_COUNTER_PERFCNTR(PC_VS_INVOCATIONS, &pc_counters[2]),
DERIVED_COUNTER_PERFCNTR(PC_STALL_CYCLES_VFD, PC),
DERIVED_COUNTER_PERFCNTR(PC_VERTEX_HITS, PC),
DERIVED_COUNTER_PERFCNTR(PC_VS_INVOCATIONS, PC),
/* TSE: 4/4 counters */
DERIVED_COUNTER_PERFCNTR(TSE_INPUT_PRIM, &tse_counters[0]),
DERIVED_COUNTER_PERFCNTR(TSE_TRIVAL_REJ_PRIM, &tse_counters[1]),
DERIVED_COUNTER_PERFCNTR(TSE_CLIPPED_PRIM, &tse_counters[2]),
DERIVED_COUNTER_PERFCNTR(TSE_OUTPUT_VISIBLE_PRIM, &tse_counters[3]),
DERIVED_COUNTER_PERFCNTR(TSE_INPUT_PRIM, TSE),
DERIVED_COUNTER_PERFCNTR(TSE_TRIVAL_REJ_PRIM, TSE),
DERIVED_COUNTER_PERFCNTR(TSE_CLIPPED_PRIM, TSE),
DERIVED_COUNTER_PERFCNTR(TSE_OUTPUT_VISIBLE_PRIM, TSE),
/* UCHE: 5/12 counters */
DERIVED_COUNTER_PERFCNTR(UCHE_STALL_CYCLES_ARBITER, &uche_counters[0]),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_TP, &uche_counters[1]),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_VFD, &uche_counters[2]),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_SP, &uche_counters[3]),
DERIVED_COUNTER_PERFCNTR(UCHE_READ_REQUESTS_TP, &uche_counters[4]),
DERIVED_COUNTER_PERFCNTR(UCHE_STALL_CYCLES_ARBITER, UCHE),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_TP, UCHE),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_VFD, UCHE),
DERIVED_COUNTER_PERFCNTR(UCHE_VBIF_READ_BEATS_SP, UCHE),
DERIVED_COUNTER_PERFCNTR(UCHE_READ_REQUESTS_TP, UCHE),
/* TP: 7/12 counters */
DERIVED_COUNTER_PERFCNTR(TP_BUSY_CYCLES, &tp_counters[0]),
DERIVED_COUNTER_PERFCNTR(TP_L1_CACHELINE_REQUESTS, &tp_counters[1]),
DERIVED_COUNTER_PERFCNTR(TP_L1_CACHELINE_MISSES, &tp_counters[2]),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS, &tp_counters[3]),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_POINT, &tp_counters[4]),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_BILINEAR, &tp_counters[5]),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_ANISO, &tp_counters[6]),
DERIVED_COUNTER_PERFCNTR(TP_BUSY_CYCLES, TP),
DERIVED_COUNTER_PERFCNTR(TP_L1_CACHELINE_REQUESTS, TP),
DERIVED_COUNTER_PERFCNTR(TP_L1_CACHELINE_MISSES, TP),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS, TP),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_POINT, TP),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_BILINEAR, TP),
DERIVED_COUNTER_PERFCNTR(TP_OUTPUT_PIXELS_ANISO, TP),
/* SP: 24/24 counters */
DERIVED_COUNTER_PERFCNTR(SP_BUSY_CYCLES, &sp_counters[ 0]),
DERIVED_COUNTER_PERFCNTR(SP_ALU_WORKING_CYCLES, &sp_counters[ 1]),
DERIVED_COUNTER_PERFCNTR(SP_EFU_WORKING_CYCLES, &sp_counters[ 2]),
DERIVED_COUNTER_PERFCNTR(SP_STALL_CYCLES_TP, &sp_counters[ 3]),
DERIVED_COUNTER_PERFCNTR(SP_NON_EXECUTION_CYCLES, &sp_counters[ 4]),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_TEX_INSTRUCTIONS, &sp_counters[ 5]),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_EFU_INSTRUCTIONS, &sp_counters[ 6]),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, &sp_counters[ 7]),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_EFU_INSTRUCTIONS, &sp_counters[ 8]),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, &sp_counters[ 9]),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, &sp_counters[10]),
DERIVED_COUNTER_PERFCNTR(SP_ICL1_REQUESTS, &sp_counters[11]),
DERIVED_COUNTER_PERFCNTR(SP_ICL1_MISSES, &sp_counters[12]),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_FS_STAGE, &sp_counters[13]),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_VS_STAGE, &sp_counters[14]),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_CS_STAGE, &sp_counters[15]),
DERIVED_COUNTER_PERFCNTR(SP_PIXELS, &sp_counters[16]),
DERIVED_COUNTER_PERFCNTR(SP_RAY_QUERY_INSTRUCTIONS, &sp_counters[17]),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BUSY_CYCLES, &sp_counters[18]),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BVH_FETCH_LATENCY_CYCLES, &sp_counters[19]),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BVH_FETCH_LATENCY_SAMPLES, &sp_counters[20]),
DERIVED_COUNTER_PERFCNTR(SP_RTU_RAY_BOX_INTERSECTIONS, &sp_counters[21]),
DERIVED_COUNTER_PERFCNTR(SP_RTU_RAY_TRIANGLE_INTERSECTIONS, &sp_counters[22]),
DERIVED_COUNTER_PERFCNTR(SP_SCH_STALL_CYCLES_RTU, &sp_counters[23]),
DERIVED_COUNTER_PERFCNTR(SP_BUSY_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_ALU_WORKING_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_EFU_WORKING_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_STALL_CYCLES_TP, SP),
DERIVED_COUNTER_PERFCNTR(SP_NON_EXECUTION_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_TEX_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_EFU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_EFU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_ICL1_REQUESTS, SP),
DERIVED_COUNTER_PERFCNTR(SP_ICL1_MISSES, SP),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_FS_STAGE, SP),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_VS_STAGE, SP),
DERIVED_COUNTER_PERFCNTR(SP_ANY_EU_WORKING_CS_STAGE, SP),
DERIVED_COUNTER_PERFCNTR(SP_PIXELS, SP),
DERIVED_COUNTER_PERFCNTR(SP_RAY_QUERY_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BUSY_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BVH_FETCH_LATENCY_CYCLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_RTU_BVH_FETCH_LATENCY_SAMPLES, SP),
DERIVED_COUNTER_PERFCNTR(SP_RTU_RAY_BOX_INTERSECTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_RTU_RAY_TRIANGLE_INTERSECTIONS, SP),
DERIVED_COUNTER_PERFCNTR(SP_SCH_STALL_CYCLES_RTU, SP),
/* CMP: 1/4 counters */
DERIVED_COUNTER_PERFCNTR(CMPDECMP_VBIF_READ_DATA, &cmp_counters[0]),
DERIVED_COUNTER_PERFCNTR(CMPDECMP_VBIF_READ_DATA, CMP),
/* BV_PC: 3/8 counters */
DERIVED_COUNTER_PERFCNTR_BV(PC_STALL_CYCLES_VFD, &bv_pc_counters[0]),
DERIVED_COUNTER_PERFCNTR_BV(PC_VERTEX_HITS, &bv_pc_counters[1]),
DERIVED_COUNTER_PERFCNTR_BV(PC_VS_INVOCATIONS, &bv_pc_counters[2]),
DERIVED_COUNTER_PERFCNTR_BV(PC_STALL_CYCLES_VFD, PC),
DERIVED_COUNTER_PERFCNTR_BV(PC_VERTEX_HITS, PC),
DERIVED_COUNTER_PERFCNTR_BV(PC_VS_INVOCATIONS, PC),
/* BV_TP: 6/6 counters */
DERIVED_COUNTER_PERFCNTR_BV(TP_L1_CACHELINE_REQUESTS, &bv_tp_counters[0]),
DERIVED_COUNTER_PERFCNTR_BV(TP_L1_CACHELINE_MISSES, &bv_tp_counters[1]),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS, &bv_tp_counters[2]),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_POINT, &bv_tp_counters[3]),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_BILINEAR, &bv_tp_counters[4]),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_ANISO, &bv_tp_counters[5]),
DERIVED_COUNTER_PERFCNTR_BV(TP_L1_CACHELINE_REQUESTS, TP),
DERIVED_COUNTER_PERFCNTR_BV(TP_L1_CACHELINE_MISSES, TP),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS, TP),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_POINT, TP),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_BILINEAR, TP),
DERIVED_COUNTER_PERFCNTR_BV(TP_OUTPUT_PIXELS_ANISO, TP),
/* GP: 8/12 counters */
DERIVED_COUNTER_PERFCNTR_BV(SP_STALL_CYCLES_TP, &bv_sp_counters[0]),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_TEX_INSTRUCTIONS, &bv_sp_counters[1]),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_EFU_INSTRUCTIONS, &bv_sp_counters[2]),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, &bv_sp_counters[3]),
DERIVED_COUNTER_PERFCNTR_BV(SP_ICL1_REQUESTS, &bv_sp_counters[4]),
DERIVED_COUNTER_PERFCNTR_BV(SP_ICL1_MISSES, &bv_sp_counters[5]),
DERIVED_COUNTER_PERFCNTR_BV(SP_ANY_EU_WORKING_FS_STAGE, &bv_sp_counters[6]),
DERIVED_COUNTER_PERFCNTR_BV(SP_ANY_EU_WORKING_VS_STAGE, &bv_sp_counters[7]),
DERIVED_COUNTER_PERFCNTR_BV(SP_STALL_CYCLES_TP, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_TEX_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_EFU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_ICL1_REQUESTS, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_ICL1_MISSES, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_ANY_EU_WORKING_FS_STAGE, SP),
DERIVED_COUNTER_PERFCNTR_BV(SP_ANY_EU_WORKING_VS_STAGE, SP),
/* LRZ: 4/4 counters */
DERIVED_COUNTER_PERFCNTR(LRZ_TOTAL_PIXEL, &lrz_counters[0]),
DERIVED_COUNTER_PERFCNTR(LRZ_VISIBLE_PIXEL_AFTER_LRZ, &lrz_counters[1]),
DERIVED_COUNTER_PERFCNTR(LRZ_TILE_KILLED, &lrz_counters[2]),
DERIVED_COUNTER_PERFCNTR(LRZ_PRIM_KILLED_BY_LRZ, &lrz_counters[3]),
DERIVED_COUNTER_PERFCNTR(LRZ_TOTAL_PIXEL, LRZ),
DERIVED_COUNTER_PERFCNTR(LRZ_VISIBLE_PIXEL_AFTER_LRZ, LRZ),
DERIVED_COUNTER_PERFCNTR(LRZ_TILE_KILLED, LRZ),
DERIVED_COUNTER_PERFCNTR(LRZ_PRIM_KILLED_BY_LRZ, LRZ),
};
static uint64_t

View file

@ -320,9 +320,10 @@ fd_derived_counters(const struct fd_dev_id *id, unsigned *count)
}
void
fd_generate_derived_counter_collection(const struct fd_dev_id *id, struct fd_derived_counter_collection *collection)
fd_reserve_derived_counter_collection(struct fd_perfcntr_state *perfcntrs, struct fd_derived_counter_collection *collection)
{
const struct fd_derived_counter_perfcntr *derived_counter_perfcntrs = NULL;
const struct fd_dev_id *id = perfcntrs->id;
switch (fd_dev_gen(id)) {
case 7:
@ -360,10 +361,15 @@ fd_generate_derived_counter_collection(const struct fd_dev_id *id, struct fd_der
uint8_t enabled_perfcntr_index = collection->num_enabled_perfcntrs++;
collection->enabled_perfcntrs_map[i] = enabled_perfcntr_index;
collection->enabled_perfcntrs[enabled_perfcntr_index].counter =
derived_counter_perfcntrs[i].counter;
collection->enabled_perfcntrs[enabled_perfcntr_index].countable =
derived_counter_perfcntrs[i].countable;
const struct fd_perfcntr_group *group =
fd_perfcntrs_group(perfcntrs->id, derived_counter_perfcntrs[i].group);
const struct fd_perfcntr_countable *countable =
fd_perfcntrs_countable(group, derived_counter_perfcntrs[i].countable);
const struct fd_perfcntr_counter *counter =
fd_perfcntr_reserve(perfcntrs, group, countable);
collection->enabled_perfcntrs[enabled_perfcntr_index].counter = counter;
collection->enabled_perfcntrs[enabled_perfcntr_index].countable = countable->selector;
}
const struct fd_dev_info *info = fd_dev_info_raw(id);
@ -376,3 +382,10 @@ fd_generate_derived_counter_collection(const struct fd_dev_id *id, struct fd_der
break;
}
}
void
fd_release_derived_counter_collection(struct fd_perfcntr_state *perfcntrs, struct fd_derived_counter_collection *collection)
{
for (unsigned i = 0; i < collection->num_enabled_perfcntrs; i++)
fd_perfcntr_release(perfcntrs, collection->enabled_perfcntrs[i].counter);
}

View file

@ -153,8 +153,8 @@ struct fd_derived_counter {
};
struct fd_derived_counter_perfcntr {
const struct fd_perfcntr_counter *counter;
unsigned countable;
const char *countable;
const char *group;
};
const struct fd_derived_counter **fd_derived_counters(const struct fd_dev_id *id, unsigned *count);
@ -177,7 +177,8 @@ struct fd_derived_counter_collection {
struct fd_derivation_context derivation_context;
};
void fd_generate_derived_counter_collection(const struct fd_dev_id *id, struct fd_derived_counter_collection *collection);
void fd_reserve_derived_counter_collection(struct fd_perfcntr_state *perfcntrs, struct fd_derived_counter_collection *collection);
void fd_release_derived_counter_collection(struct fd_perfcntr_state *perfcntrs, struct fd_derived_counter_collection *collection);
#ifdef __cplusplus
} /* end of extern "C" */

View file

@ -266,6 +266,11 @@ tu_query_pool_destroy(struct tu_device *device, struct tu_query_pool *pool,
for (uint32_t i = 0; i < perf_query->counter_index_count; i++)
fd_perfcntr_release(device->perfcntrs, perf_query->data[i].counter);
} else if (is_perf_query_raw(pool)) {
struct tu_perf_query_derived *perf_query = &pool->perf_query.derived;
struct fd_derived_counter_collection *collection = perf_query->collection;
fd_release_derived_counter_collection(device->perfcntrs, collection);
}
if (pool->bo)
@ -400,7 +405,7 @@ tu_CreateQueryPool(VkDevice _device,
collection->counters[i] = perf_query->derived_counters[counter_index];
}
fd_generate_derived_counter_collection(&device->physical_device->dev_id, collection);
fd_reserve_derived_counter_collection(device->perfcntrs, collection);
slot_size += sizeof(struct perfcntr_query_slot) * collection->num_enabled_perfcntrs;
}