From 59dc20262c493be363c7e6d0e852a4c141868159 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 2 Jan 2026 17:42:15 +0100 Subject: [PATCH] ac/perfcounter: rename ac_pc_block::num_instances to num_scoped_instances Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_perfcounter.c | 40 +++++++++---------- src/amd/common/ac_perfcounter.h | 4 +- src/amd/common/ac_spm.c | 12 +++--- src/amd/vulkan/radv_perfcounter.c | 24 +++++------ src/gallium/drivers/radeonsi/si_perfcounter.c | 12 +++--- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/src/amd/common/ac_perfcounter.c b/src/amd/common/ac_perfcounter.c index 9aea2d7af8c..21c455c6438 100644 --- a/src/amd/common/ac_perfcounter.c +++ b/src/amd/common/ac_perfcounter.c @@ -624,7 +624,7 @@ bool ac_init_block_names(const struct radeon_info *info, char *p; if (per_instance_groups) - groups_instance = block->num_instances; + groups_instance = block->num_scoped_instances; if (per_se_groups) groups_se = info->max_se; if (block->b->b->flags & AC_PC_BLOCK_SHADER) @@ -747,7 +747,7 @@ bool ac_init_perfcounters(const struct radeon_info *info, struct ac_pc_block *block = &pc->blocks[i]; block->b = &blocks[i]; - block->num_instances = MAX2(1, block->b->instances); + block->num_scoped_instances = MAX2(1, block->b->instances); if (info->gfx_level >= GFX10) { /* TODO: Generalize this to older generations. */ @@ -763,27 +763,27 @@ bool ac_init_perfcounters(const struct radeon_info *info, switch (block->b->b->gpu_block) { case CB: case DB: - block->num_instances = rb_per_sa; + block->num_scoped_instances = rb_per_sa; break; case GL2C: case GCEA: case GCEA_SE: - block->num_instances = info->num_tcc_blocks; + block->num_scoped_instances = info->num_tcc_blocks; break; case GL2A: if (info->family == CHIP_NAVI21 || info->family == CHIP_NAVI31) { - block->num_instances = 4; + block->num_scoped_instances = 4; } else if (info->family == CHIP_NAVI14 || info->family == CHIP_NAVI32 || info->family == CHIP_NAVI33) { - block->num_instances = 2; + block->num_scoped_instances = 2; } break; case TA: case TD: case TCP: - block->num_instances = MAX2(1, info->max_good_cu_per_sa); + block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa); break; default: break; @@ -791,13 +791,13 @@ bool ac_init_perfcounters(const struct radeon_info *info, switch (block->b->b->distribution) { case AC_PC_PER_SHADER_ARRAY: - block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se; + block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se; break; case AC_PC_PER_SHADER_ENGINE: - block->num_global_instances = block->num_instances * info->num_se; + block->num_global_instances = block->num_scoped_instances * info->num_se; break; case AC_PC_GLOBAL_BLOCK: - block->num_global_instances = block->num_instances; + block->num_global_instances = block->num_scoped_instances; break; default: UNREACHABLE("Invalid perf block distribution mode."); @@ -806,39 +806,39 @@ bool ac_init_perfcounters(const struct radeon_info *info, if (!strcmp(block->b->b->name, "CB") || !strcmp(block->b->b->name, "DB") || !strcmp(block->b->b->name, "RMI")) - block->num_instances = info->max_se; + block->num_scoped_instances = info->max_se; else if (!strcmp(block->b->b->name, "TCC")) - block->num_instances = info->max_tcc_blocks; + block->num_scoped_instances = info->max_tcc_blocks; else if (!strcmp(block->b->b->name, "IA")) - block->num_instances = MAX2(1, info->max_se / 2); + block->num_scoped_instances = MAX2(1, info->max_se / 2); else if (!strcmp(block->b->b->name, "TA") || !strcmp(block->b->b->name, "TCP") || !strcmp(block->b->b->name, "TD")) { - block->num_instances = MAX2(1, info->max_good_cu_per_sa); + block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa); } if (info->gfx_level >= GFX10) { if (!strcmp(block->b->b->name, "TCP")) { block->num_global_instances = MAX2(1, info->num_cu_per_sh) * info->num_se * info->max_sa_per_se; } else if (!strcmp(block->b->b->name, "SQ")) { - block->num_global_instances = block->num_instances * info->num_se; + block->num_global_instances = block->num_scoped_instances * info->num_se; } else if (!strcmp(block->b->b->name, "GL1C") || !strcmp(block->b->b->name, "SQ_WGP")) { - block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se; + block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se; } else if (!strcmp(block->b->b->name, "GL2C") || !strcmp(block->b->b->name, "GCEA")) { - block->num_instances = block->num_global_instances = info->num_tcc_blocks; + block->num_scoped_instances = block->num_global_instances = info->num_tcc_blocks; } else if (!strcmp(block->b->b->name, "CPF")) { - block->num_instances = block->num_global_instances = 1; + block->num_scoped_instances = block->num_global_instances = 1; } else if (!strcmp(block->b->b->name, "TA") || !strcmp(block->b->b->name, "TD")) { - block->num_global_instances = block->num_instances; + block->num_global_instances = block->num_scoped_instances; } } } if (ac_pc_block_has_per_instance_groups(pc, block)) { - block->num_groups = block->num_instances; + block->num_groups = block->num_scoped_instances; } else { block->num_groups = 1; } diff --git a/src/amd/common/ac_perfcounter.h b/src/amd/common/ac_perfcounter.h index 50faf57b3dd..c5f57e85a07 100644 --- a/src/amd/common/ac_perfcounter.h +++ b/src/amd/common/ac_perfcounter.h @@ -137,7 +137,7 @@ struct ac_pc_block_gfxdescr { struct ac_pc_block { const struct ac_pc_block_gfxdescr *b; - unsigned num_instances; + unsigned num_scoped_instances; unsigned num_global_instances; unsigned num_groups; @@ -187,7 +187,7 @@ ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc, const struct ac_pc_block *block) { return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS || - (block->num_instances > 1 && pc->separate_instance); + (block->num_scoped_instances > 1 && pc->separate_instance); } struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc, diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index 3d425f14c86..696a6bc33d6 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -324,16 +324,16 @@ ac_spm_init_instance_mapping(const struct radeon_info *info, /* We want the SE index to be the outer index and the local instance to * be the inner index. */ - se_index = counter->instance / block->num_instances; - instance_index = counter->instance % block->num_instances; + se_index = counter->instance / block->num_scoped_instances; + instance_index = counter->instance % block->num_scoped_instances; break; case AC_PC_PER_SHADER_ARRAY: /* From the outermost to the innermost, the internal indices are in the * order: SE, SA, local instance. */ - se_index = (counter->instance / block->num_instances) / info->max_sa_per_se; - sa_index = (counter->instance / block->num_instances) % info->max_sa_per_se; - instance_index = counter->instance % block->num_instances; + se_index = (counter->instance / block->num_scoped_instances) / info->max_sa_per_se; + sa_index = (counter->instance / block->num_scoped_instances) % info->max_sa_per_se; + instance_index = counter->instance % block->num_scoped_instances; break; default: UNREACHABLE("Invalid perf block distribution mode."); @@ -341,7 +341,7 @@ ac_spm_init_instance_mapping(const struct radeon_info *info, if (se_index >= info->num_se || sa_index >= info->max_sa_per_se || - instance_index >= block->num_instances) + instance_index >= block->num_scoped_instances) return false; mapping->se_index = se_index; diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 290eaa556be..583207432e2 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -341,9 +341,9 @@ radv_get_counter_registers(const struct radv_physical_device *pdev, uint32_t num } static unsigned -radv_pc_get_num_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block) +radv_pc_get_num_scoped_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block) { - return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1); + return ac_block->num_scoped_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1); } static unsigned @@ -404,10 +404,10 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea for (unsigned i = 0; i < pool->num_pc_regs; ++i) { enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16; struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block); - unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block); + unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block); - pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances); - offset += sizeof(uint64_t) * 2 * num_instances; + pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_scoped_instances); + offset += sizeof(uint64_t) * 2 * num_scoped_instances; } /* allow an uint32_t per pass to signal completion. */ @@ -513,7 +513,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va, AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false); - va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block); + va += sizeof(uint64_t) * 2 * radv_pc_get_num_scoped_instances(pdev, block); reg += reg_delta; } } @@ -528,7 +528,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo se_end = pdev->info.max_se; for (unsigned se = 0; se < se_end; ++se) { - for (unsigned instance = 0; instance < block->num_instances; ++instance) { + for (unsigned instance = 0; instance < block->num_scoped_instances; ++instance) { radv_emit_instance(cmd_buffer, se, instance); radv_pc_emit_block_instance_read(cmd_buffer, block, count, va); va += sizeof(uint64_t) * 2; @@ -582,8 +582,8 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query for (unsigned i = 0; i < pool->num_pc_regs;) { enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block); - unsigned offset = ac_block->num_instances * pass; - unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block); + unsigned offset = ac_block->num_scoped_instances * pass; + unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block); unsigned cnt = 1; while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt])) @@ -592,11 +592,11 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query if (offset < cnt) { unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters); radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt, - reg_va + offset * num_instances * sizeof(uint64_t)); + reg_va + offset * num_scoped_instances * sizeof(uint64_t)); } i += cnt; - reg_va += num_instances * sizeof(uint64_t) * 2 * cnt; + reg_va += num_scoped_instances * sizeof(uint64_t) * 2 * cnt; } if (end) { @@ -648,7 +648,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo for (unsigned i = 0; i < pool->num_pc_regs;) { enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block); - unsigned offset = ac_block->num_instances * pass; + unsigned offset = ac_block->num_scoped_instances * pass; unsigned cnt = 1; while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt])) diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 74df42a600f..13fd05be41c 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -298,7 +298,7 @@ static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery si_pc_emit_instance(sctx, se, instance); si_pc_emit_read(sctx, block, group->num_counters, va); va += sizeof(uint64_t) * group->num_counters; - } while (group->instance < 0 && ++instance < block->num_instances); + } while (group->instance < 0 && ++instance < block->num_scoped_instances); } while (++se < se_end); } @@ -408,7 +408,7 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s group->sub_gid = sub_gid; if (block->b->b->flags & AC_PC_BLOCK_SHADER) { - unsigned sub_gids = block->num_instances; + unsigned sub_gids = block->num_scoped_instances; unsigned shader_id; unsigned shaders; unsigned query_shaders; @@ -436,8 +436,8 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s } if (ac_pc_block_has_per_se_groups(&pc->base, block)) { - group->se = sub_gid / block->num_instances; - sub_gid = sub_gid % block->num_instances; + group->se = sub_gid / block->num_scoped_instances; + sub_gid = sub_gid % block->num_scoped_instances; } else { group->se = -1; } @@ -516,7 +516,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_ if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0) instances = screen->info.max_se; if (group->instance < 0) - instances *= block->num_instances; + instances *= block->num_scoped_instances; group->result_base = i; query->result_size += sizeof(uint64_t) * instances * group->num_counters; @@ -559,7 +559,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_ if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0) counter->qwords = screen->info.max_se; if (group->instance < 0) - counter->qwords *= block->num_instances; + counter->qwords *= block->num_scoped_instances; } return (struct pipe_query *)query;