ac/perfcounter: rename ac_pc_block::num_instances to num_scoped_instances

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39155>
This commit is contained in:
Samuel Pitoiset 2026-01-02 17:42:15 +01:00 committed by Marge Bot
parent 3658d9588f
commit 59dc20262c
5 changed files with 46 additions and 46 deletions

View file

@ -624,7 +624,7 @@ bool ac_init_block_names(const struct radeon_info *info,
char *p;
if (per_instance_groups)
groups_instance = block->num_instances;
groups_instance = block->num_scoped_instances;
if (per_se_groups)
groups_se = info->max_se;
if (block->b->b->flags & AC_PC_BLOCK_SHADER)
@ -747,7 +747,7 @@ bool ac_init_perfcounters(const struct radeon_info *info,
struct ac_pc_block *block = &pc->blocks[i];
block->b = &blocks[i];
block->num_instances = MAX2(1, block->b->instances);
block->num_scoped_instances = MAX2(1, block->b->instances);
if (info->gfx_level >= GFX10) {
/* TODO: Generalize this to older generations. */
@ -763,27 +763,27 @@ bool ac_init_perfcounters(const struct radeon_info *info,
switch (block->b->b->gpu_block) {
case CB:
case DB:
block->num_instances = rb_per_sa;
block->num_scoped_instances = rb_per_sa;
break;
case GL2C:
case GCEA:
case GCEA_SE:
block->num_instances = info->num_tcc_blocks;
block->num_scoped_instances = info->num_tcc_blocks;
break;
case GL2A:
if (info->family == CHIP_NAVI21 ||
info->family == CHIP_NAVI31) {
block->num_instances = 4;
block->num_scoped_instances = 4;
} else if (info->family == CHIP_NAVI14 ||
info->family == CHIP_NAVI32 ||
info->family == CHIP_NAVI33) {
block->num_instances = 2;
block->num_scoped_instances = 2;
}
break;
case TA:
case TD:
case TCP:
block->num_instances = MAX2(1, info->max_good_cu_per_sa);
block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa);
break;
default:
break;
@ -791,13 +791,13 @@ bool ac_init_perfcounters(const struct radeon_info *info,
switch (block->b->b->distribution) {
case AC_PC_PER_SHADER_ARRAY:
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se;
break;
case AC_PC_PER_SHADER_ENGINE:
block->num_global_instances = block->num_instances * info->num_se;
block->num_global_instances = block->num_scoped_instances * info->num_se;
break;
case AC_PC_GLOBAL_BLOCK:
block->num_global_instances = block->num_instances;
block->num_global_instances = block->num_scoped_instances;
break;
default:
UNREACHABLE("Invalid perf block distribution mode.");
@ -806,39 +806,39 @@ bool ac_init_perfcounters(const struct radeon_info *info,
if (!strcmp(block->b->b->name, "CB") ||
!strcmp(block->b->b->name, "DB") ||
!strcmp(block->b->b->name, "RMI"))
block->num_instances = info->max_se;
block->num_scoped_instances = info->max_se;
else if (!strcmp(block->b->b->name, "TCC"))
block->num_instances = info->max_tcc_blocks;
block->num_scoped_instances = info->max_tcc_blocks;
else if (!strcmp(block->b->b->name, "IA"))
block->num_instances = MAX2(1, info->max_se / 2);
block->num_scoped_instances = MAX2(1, info->max_se / 2);
else if (!strcmp(block->b->b->name, "TA") ||
!strcmp(block->b->b->name, "TCP") ||
!strcmp(block->b->b->name, "TD")) {
block->num_instances = MAX2(1, info->max_good_cu_per_sa);
block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa);
}
if (info->gfx_level >= GFX10) {
if (!strcmp(block->b->b->name, "TCP")) {
block->num_global_instances = MAX2(1, info->num_cu_per_sh) * info->num_se * info->max_sa_per_se;
} else if (!strcmp(block->b->b->name, "SQ")) {
block->num_global_instances = block->num_instances * info->num_se;
block->num_global_instances = block->num_scoped_instances * info->num_se;
} else if (!strcmp(block->b->b->name, "GL1C") ||
!strcmp(block->b->b->name, "SQ_WGP")) {
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se;
} else if (!strcmp(block->b->b->name, "GL2C") ||
!strcmp(block->b->b->name, "GCEA")) {
block->num_instances = block->num_global_instances = info->num_tcc_blocks;
block->num_scoped_instances = block->num_global_instances = info->num_tcc_blocks;
} else if (!strcmp(block->b->b->name, "CPF")) {
block->num_instances = block->num_global_instances = 1;
block->num_scoped_instances = block->num_global_instances = 1;
} else if (!strcmp(block->b->b->name, "TA") ||
!strcmp(block->b->b->name, "TD")) {
block->num_global_instances = block->num_instances;
block->num_global_instances = block->num_scoped_instances;
}
}
}
if (ac_pc_block_has_per_instance_groups(pc, block)) {
block->num_groups = block->num_instances;
block->num_groups = block->num_scoped_instances;
} else {
block->num_groups = 1;
}

View file

@ -137,7 +137,7 @@ struct ac_pc_block_gfxdescr {
struct ac_pc_block {
const struct ac_pc_block_gfxdescr *b;
unsigned num_instances;
unsigned num_scoped_instances;
unsigned num_global_instances;
unsigned num_groups;
@ -187,7 +187,7 @@ ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
const struct ac_pc_block *block)
{
return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
(block->num_instances > 1 && pc->separate_instance);
(block->num_scoped_instances > 1 && pc->separate_instance);
}
struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,

View file

@ -324,16 +324,16 @@ ac_spm_init_instance_mapping(const struct radeon_info *info,
/* We want the SE index to be the outer index and the local instance to
* be the inner index.
*/
se_index = counter->instance / block->num_instances;
instance_index = counter->instance % block->num_instances;
se_index = counter->instance / block->num_scoped_instances;
instance_index = counter->instance % block->num_scoped_instances;
break;
case AC_PC_PER_SHADER_ARRAY:
/* From the outermost to the innermost, the internal indices are in the
* order: SE, SA, local instance.
*/
se_index = (counter->instance / block->num_instances) / info->max_sa_per_se;
sa_index = (counter->instance / block->num_instances) % info->max_sa_per_se;
instance_index = counter->instance % block->num_instances;
se_index = (counter->instance / block->num_scoped_instances) / info->max_sa_per_se;
sa_index = (counter->instance / block->num_scoped_instances) % info->max_sa_per_se;
instance_index = counter->instance % block->num_scoped_instances;
break;
default:
UNREACHABLE("Invalid perf block distribution mode.");
@ -341,7 +341,7 @@ ac_spm_init_instance_mapping(const struct radeon_info *info,
if (se_index >= info->num_se ||
sa_index >= info->max_sa_per_se ||
instance_index >= block->num_instances)
instance_index >= block->num_scoped_instances)
return false;
mapping->se_index = se_index;

View file

@ -341,9 +341,9 @@ radv_get_counter_registers(const struct radv_physical_device *pdev, uint32_t num
}
static unsigned
radv_pc_get_num_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block)
radv_pc_get_num_scoped_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block)
{
return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1);
return ac_block->num_scoped_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1);
}
static unsigned
@ -404,10 +404,10 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea
for (unsigned i = 0; i < pool->num_pc_regs; ++i) {
enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16;
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block);
pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances);
offset += sizeof(uint64_t) * 2 * num_instances;
pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_scoped_instances);
offset += sizeof(uint64_t) * 2 * num_scoped_instances;
}
/* allow an uint32_t per pass to signal completion. */
@ -513,7 +513,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va,
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_scoped_instances(pdev, block);
reg += reg_delta;
}
}
@ -528,7 +528,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo
se_end = pdev->info.max_se;
for (unsigned se = 0; se < se_end; ++se) {
for (unsigned instance = 0; instance < block->num_instances; ++instance) {
for (unsigned instance = 0; instance < block->num_scoped_instances; ++instance) {
radv_emit_instance(cmd_buffer, se, instance);
radv_pc_emit_block_instance_read(cmd_buffer, block, count, va);
va += sizeof(uint64_t) * 2;
@ -582,8 +582,8 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
for (unsigned i = 0; i < pool->num_pc_regs;) {
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
unsigned offset = ac_block->num_instances * pass;
unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
unsigned offset = ac_block->num_scoped_instances * pass;
unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block);
unsigned cnt = 1;
while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
@ -592,11 +592,11 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
if (offset < cnt) {
unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt,
reg_va + offset * num_instances * sizeof(uint64_t));
reg_va + offset * num_scoped_instances * sizeof(uint64_t));
}
i += cnt;
reg_va += num_instances * sizeof(uint64_t) * 2 * cnt;
reg_va += num_scoped_instances * sizeof(uint64_t) * 2 * cnt;
}
if (end) {
@ -648,7 +648,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
for (unsigned i = 0; i < pool->num_pc_regs;) {
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
unsigned offset = ac_block->num_instances * pass;
unsigned offset = ac_block->num_scoped_instances * pass;
unsigned cnt = 1;
while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))

View file

@ -298,7 +298,7 @@ static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery
si_pc_emit_instance(sctx, se, instance);
si_pc_emit_read(sctx, block, group->num_counters, va);
va += sizeof(uint64_t) * group->num_counters;
} while (group->instance < 0 && ++instance < block->num_instances);
} while (group->instance < 0 && ++instance < block->num_scoped_instances);
} while (++se < se_end);
}
@ -408,7 +408,7 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s
group->sub_gid = sub_gid;
if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned sub_gids = block->num_scoped_instances;
unsigned shader_id;
unsigned shaders;
unsigned query_shaders;
@ -436,8 +436,8 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s
}
if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
group->se = sub_gid / block->num_instances;
sub_gid = sub_gid % block->num_instances;
group->se = sub_gid / block->num_scoped_instances;
sub_gid = sub_gid % block->num_scoped_instances;
} else {
group->se = -1;
}
@ -516,7 +516,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_
if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
instances = screen->info.max_se;
if (group->instance < 0)
instances *= block->num_instances;
instances *= block->num_scoped_instances;
group->result_base = i;
query->result_size += sizeof(uint64_t) * instances * group->num_counters;
@ -559,7 +559,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_
if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
counter->qwords *= block->num_scoped_instances;
}
return (struct pipe_query *)query;