mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 13:28:06 +02:00
ac/perfcounter: rename ac_pc_block::num_instances to num_scoped_instances
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39155>
This commit is contained in:
parent
3658d9588f
commit
59dc20262c
5 changed files with 46 additions and 46 deletions
|
|
@ -624,7 +624,7 @@ bool ac_init_block_names(const struct radeon_info *info,
|
|||
char *p;
|
||||
|
||||
if (per_instance_groups)
|
||||
groups_instance = block->num_instances;
|
||||
groups_instance = block->num_scoped_instances;
|
||||
if (per_se_groups)
|
||||
groups_se = info->max_se;
|
||||
if (block->b->b->flags & AC_PC_BLOCK_SHADER)
|
||||
|
|
@ -747,7 +747,7 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
struct ac_pc_block *block = &pc->blocks[i];
|
||||
|
||||
block->b = &blocks[i];
|
||||
block->num_instances = MAX2(1, block->b->instances);
|
||||
block->num_scoped_instances = MAX2(1, block->b->instances);
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
/* TODO: Generalize this to older generations. */
|
||||
|
|
@ -763,27 +763,27 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
switch (block->b->b->gpu_block) {
|
||||
case CB:
|
||||
case DB:
|
||||
block->num_instances = rb_per_sa;
|
||||
block->num_scoped_instances = rb_per_sa;
|
||||
break;
|
||||
case GL2C:
|
||||
case GCEA:
|
||||
case GCEA_SE:
|
||||
block->num_instances = info->num_tcc_blocks;
|
||||
block->num_scoped_instances = info->num_tcc_blocks;
|
||||
break;
|
||||
case GL2A:
|
||||
if (info->family == CHIP_NAVI21 ||
|
||||
info->family == CHIP_NAVI31) {
|
||||
block->num_instances = 4;
|
||||
block->num_scoped_instances = 4;
|
||||
} else if (info->family == CHIP_NAVI14 ||
|
||||
info->family == CHIP_NAVI32 ||
|
||||
info->family == CHIP_NAVI33) {
|
||||
block->num_instances = 2;
|
||||
block->num_scoped_instances = 2;
|
||||
}
|
||||
break;
|
||||
case TA:
|
||||
case TD:
|
||||
case TCP:
|
||||
block->num_instances = MAX2(1, info->max_good_cu_per_sa);
|
||||
block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
@ -791,13 +791,13 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
|
||||
switch (block->b->b->distribution) {
|
||||
case AC_PC_PER_SHADER_ARRAY:
|
||||
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
|
||||
block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se;
|
||||
break;
|
||||
case AC_PC_PER_SHADER_ENGINE:
|
||||
block->num_global_instances = block->num_instances * info->num_se;
|
||||
block->num_global_instances = block->num_scoped_instances * info->num_se;
|
||||
break;
|
||||
case AC_PC_GLOBAL_BLOCK:
|
||||
block->num_global_instances = block->num_instances;
|
||||
block->num_global_instances = block->num_scoped_instances;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Invalid perf block distribution mode.");
|
||||
|
|
@ -806,39 +806,39 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
if (!strcmp(block->b->b->name, "CB") ||
|
||||
!strcmp(block->b->b->name, "DB") ||
|
||||
!strcmp(block->b->b->name, "RMI"))
|
||||
block->num_instances = info->max_se;
|
||||
block->num_scoped_instances = info->max_se;
|
||||
else if (!strcmp(block->b->b->name, "TCC"))
|
||||
block->num_instances = info->max_tcc_blocks;
|
||||
block->num_scoped_instances = info->max_tcc_blocks;
|
||||
else if (!strcmp(block->b->b->name, "IA"))
|
||||
block->num_instances = MAX2(1, info->max_se / 2);
|
||||
block->num_scoped_instances = MAX2(1, info->max_se / 2);
|
||||
else if (!strcmp(block->b->b->name, "TA") ||
|
||||
!strcmp(block->b->b->name, "TCP") ||
|
||||
!strcmp(block->b->b->name, "TD")) {
|
||||
block->num_instances = MAX2(1, info->max_good_cu_per_sa);
|
||||
block->num_scoped_instances = MAX2(1, info->max_good_cu_per_sa);
|
||||
}
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
if (!strcmp(block->b->b->name, "TCP")) {
|
||||
block->num_global_instances = MAX2(1, info->num_cu_per_sh) * info->num_se * info->max_sa_per_se;
|
||||
} else if (!strcmp(block->b->b->name, "SQ")) {
|
||||
block->num_global_instances = block->num_instances * info->num_se;
|
||||
block->num_global_instances = block->num_scoped_instances * info->num_se;
|
||||
} else if (!strcmp(block->b->b->name, "GL1C") ||
|
||||
!strcmp(block->b->b->name, "SQ_WGP")) {
|
||||
block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
|
||||
block->num_global_instances = block->num_scoped_instances * info->num_se * info->max_sa_per_se;
|
||||
} else if (!strcmp(block->b->b->name, "GL2C") ||
|
||||
!strcmp(block->b->b->name, "GCEA")) {
|
||||
block->num_instances = block->num_global_instances = info->num_tcc_blocks;
|
||||
block->num_scoped_instances = block->num_global_instances = info->num_tcc_blocks;
|
||||
} else if (!strcmp(block->b->b->name, "CPF")) {
|
||||
block->num_instances = block->num_global_instances = 1;
|
||||
block->num_scoped_instances = block->num_global_instances = 1;
|
||||
} else if (!strcmp(block->b->b->name, "TA") ||
|
||||
!strcmp(block->b->b->name, "TD")) {
|
||||
block->num_global_instances = block->num_instances;
|
||||
block->num_global_instances = block->num_scoped_instances;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ac_pc_block_has_per_instance_groups(pc, block)) {
|
||||
block->num_groups = block->num_instances;
|
||||
block->num_groups = block->num_scoped_instances;
|
||||
} else {
|
||||
block->num_groups = 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ struct ac_pc_block_gfxdescr {
|
|||
|
||||
struct ac_pc_block {
|
||||
const struct ac_pc_block_gfxdescr *b;
|
||||
unsigned num_instances;
|
||||
unsigned num_scoped_instances;
|
||||
unsigned num_global_instances;
|
||||
|
||||
unsigned num_groups;
|
||||
|
|
@ -187,7 +187,7 @@ ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
|
|||
const struct ac_pc_block *block)
|
||||
{
|
||||
return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
|
||||
(block->num_instances > 1 && pc->separate_instance);
|
||||
(block->num_scoped_instances > 1 && pc->separate_instance);
|
||||
}
|
||||
|
||||
struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
|
||||
|
|
|
|||
|
|
@ -324,16 +324,16 @@ ac_spm_init_instance_mapping(const struct radeon_info *info,
|
|||
/* We want the SE index to be the outer index and the local instance to
|
||||
* be the inner index.
|
||||
*/
|
||||
se_index = counter->instance / block->num_instances;
|
||||
instance_index = counter->instance % block->num_instances;
|
||||
se_index = counter->instance / block->num_scoped_instances;
|
||||
instance_index = counter->instance % block->num_scoped_instances;
|
||||
break;
|
||||
case AC_PC_PER_SHADER_ARRAY:
|
||||
/* From the outermost to the innermost, the internal indices are in the
|
||||
* order: SE, SA, local instance.
|
||||
*/
|
||||
se_index = (counter->instance / block->num_instances) / info->max_sa_per_se;
|
||||
sa_index = (counter->instance / block->num_instances) % info->max_sa_per_se;
|
||||
instance_index = counter->instance % block->num_instances;
|
||||
se_index = (counter->instance / block->num_scoped_instances) / info->max_sa_per_se;
|
||||
sa_index = (counter->instance / block->num_scoped_instances) % info->max_sa_per_se;
|
||||
instance_index = counter->instance % block->num_scoped_instances;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Invalid perf block distribution mode.");
|
||||
|
|
@ -341,7 +341,7 @@ ac_spm_init_instance_mapping(const struct radeon_info *info,
|
|||
|
||||
if (se_index >= info->num_se ||
|
||||
sa_index >= info->max_sa_per_se ||
|
||||
instance_index >= block->num_instances)
|
||||
instance_index >= block->num_scoped_instances)
|
||||
return false;
|
||||
|
||||
mapping->se_index = se_index;
|
||||
|
|
|
|||
|
|
@ -341,9 +341,9 @@ radv_get_counter_registers(const struct radv_physical_device *pdev, uint32_t num
|
|||
}
|
||||
|
||||
static unsigned
|
||||
radv_pc_get_num_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block)
|
||||
radv_pc_get_num_scoped_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block)
|
||||
{
|
||||
return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1);
|
||||
return ac_block->num_scoped_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -404,10 +404,10 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea
|
|||
for (unsigned i = 0; i < pool->num_pc_regs; ++i) {
|
||||
enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16;
|
||||
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
|
||||
unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
|
||||
unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block);
|
||||
|
||||
pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances);
|
||||
offset += sizeof(uint64_t) * 2 * num_instances;
|
||||
pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_scoped_instances);
|
||||
offset += sizeof(uint64_t) * 2 * num_scoped_instances;
|
||||
}
|
||||
|
||||
/* allow an uint32_t per pass to signal completion. */
|
||||
|
|
@ -513,7 +513,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
|
|||
ac_emit_cp_copy_data(cs->b, COPY_DATA_PERF, COPY_DATA_TC_L2, reg >> 2, va,
|
||||
AC_CP_COPY_DATA_WR_CONFIRM | AC_CP_COPY_DATA_COUNT_SEL, false);
|
||||
|
||||
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
|
||||
va += sizeof(uint64_t) * 2 * radv_pc_get_num_scoped_instances(pdev, block);
|
||||
reg += reg_delta;
|
||||
}
|
||||
}
|
||||
|
|
@ -528,7 +528,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo
|
|||
se_end = pdev->info.max_se;
|
||||
|
||||
for (unsigned se = 0; se < se_end; ++se) {
|
||||
for (unsigned instance = 0; instance < block->num_instances; ++instance) {
|
||||
for (unsigned instance = 0; instance < block->num_scoped_instances; ++instance) {
|
||||
radv_emit_instance(cmd_buffer, se, instance);
|
||||
radv_pc_emit_block_instance_read(cmd_buffer, block, count, va);
|
||||
va += sizeof(uint64_t) * 2;
|
||||
|
|
@ -582,8 +582,8 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
for (unsigned i = 0; i < pool->num_pc_regs;) {
|
||||
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
|
||||
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
|
||||
unsigned offset = ac_block->num_instances * pass;
|
||||
unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
|
||||
unsigned offset = ac_block->num_scoped_instances * pass;
|
||||
unsigned num_scoped_instances = radv_pc_get_num_scoped_instances(pdev, ac_block);
|
||||
|
||||
unsigned cnt = 1;
|
||||
while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
|
||||
|
|
@ -592,11 +592,11 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
if (offset < cnt) {
|
||||
unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
|
||||
radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt,
|
||||
reg_va + offset * num_instances * sizeof(uint64_t));
|
||||
reg_va + offset * num_scoped_instances * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
i += cnt;
|
||||
reg_va += num_instances * sizeof(uint64_t) * 2 * cnt;
|
||||
reg_va += num_scoped_instances * sizeof(uint64_t) * 2 * cnt;
|
||||
}
|
||||
|
||||
if (end) {
|
||||
|
|
@ -648,7 +648,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
for (unsigned i = 0; i < pool->num_pc_regs;) {
|
||||
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
|
||||
struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
|
||||
unsigned offset = ac_block->num_instances * pass;
|
||||
unsigned offset = ac_block->num_scoped_instances * pass;
|
||||
|
||||
unsigned cnt = 1;
|
||||
while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
|
||||
|
|
|
|||
|
|
@ -298,7 +298,7 @@ static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery
|
|||
si_pc_emit_instance(sctx, se, instance);
|
||||
si_pc_emit_read(sctx, block, group->num_counters, va);
|
||||
va += sizeof(uint64_t) * group->num_counters;
|
||||
} while (group->instance < 0 && ++instance < block->num_instances);
|
||||
} while (group->instance < 0 && ++instance < block->num_scoped_instances);
|
||||
} while (++se < se_end);
|
||||
}
|
||||
|
||||
|
|
@ -408,7 +408,7 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s
|
|||
group->sub_gid = sub_gid;
|
||||
|
||||
if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
|
||||
unsigned sub_gids = block->num_instances;
|
||||
unsigned sub_gids = block->num_scoped_instances;
|
||||
unsigned shader_id;
|
||||
unsigned shaders;
|
||||
unsigned query_shaders;
|
||||
|
|
@ -436,8 +436,8 @@ static struct si_query_group *get_group_state(struct si_screen *screen, struct s
|
|||
}
|
||||
|
||||
if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
|
||||
group->se = sub_gid / block->num_instances;
|
||||
sub_gid = sub_gid % block->num_instances;
|
||||
group->se = sub_gid / block->num_scoped_instances;
|
||||
sub_gid = sub_gid % block->num_scoped_instances;
|
||||
} else {
|
||||
group->se = -1;
|
||||
}
|
||||
|
|
@ -516,7 +516,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_
|
|||
if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
|
||||
instances = screen->info.max_se;
|
||||
if (group->instance < 0)
|
||||
instances *= block->num_instances;
|
||||
instances *= block->num_scoped_instances;
|
||||
|
||||
group->result_base = i;
|
||||
query->result_size += sizeof(uint64_t) * instances * group->num_counters;
|
||||
|
|
@ -559,7 +559,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_
|
|||
if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
|
||||
counter->qwords = screen->info.max_se;
|
||||
if (group->instance < 0)
|
||||
counter->qwords *= block->num_instances;
|
||||
counter->qwords *= block->num_scoped_instances;
|
||||
}
|
||||
|
||||
return (struct pipe_query *)query;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue