diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index e7c32c3bcc4..448dcca9436 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -125,15 +125,14 @@ ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block) memset(new_block_sel, 0, sizeof(*new_block_sel)); new_block_sel->b = block; - new_block_sel->num_counters = block->b->b->num_spm_counters; + new_block_sel->instances = + calloc(block->num_global_instances, sizeof(*new_block_sel->instances)); + if (!new_block_sel->instances) + return NULL; + new_block_sel->num_instances = block->num_global_instances; - /* Broadcast global block writes to SEs and SAs */ - if (!(block->b->b->flags & (AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER))) - new_block_sel->grbm_gfx_index = S_030800_SE_BROADCAST_WRITES(1) | - S_030800_SH_BROADCAST_WRITES(1); - /* Broadcast per SE block writes to SAs */ - else if (block->b->b->flags & AC_PC_BLOCK_SE) - new_block_sel->grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1); + for (unsigned i = 0; i < new_block_sel->num_instances; i++) + new_block_sel->instances[i].num_counters = block->b->b->num_spm_counters; return new_block_sel; } @@ -197,9 +196,37 @@ ac_spm_init_muxsel(const struct ac_pc_block *block, muxsel->instance = mapping->instance_index; } +static uint32_t +ac_spm_init_grbm_gfx_index(const struct ac_pc_block *block, + const struct ac_spm_instance_mapping *mapping) +{ + uint32_t grbm_gfx_index = 0; + + grbm_gfx_index |= S_030800_SE_INDEX(mapping->se_index) | + S_030800_SH_INDEX(mapping->sa_index) | + S_030800_INSTANCE_INDEX(mapping->instance_index); + + switch (block->b->b->gpu_block) { + case GL2C: + /* Global blocks. */ + grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); + break; + case SQ: + /* Per-SE blocks. */ + grbm_gfx_index |= S_030800_SH_BROADCAST_WRITES(1); + break; + default: + /* Other blocks shouldn't broadcast. */ + break; + } + + return grbm_gfx_index; +} + static bool ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel, struct ac_spm_counter_info *counter, + const struct ac_spm_instance_mapping *mapping, uint32_t *spm_wire) { uint32_t instance = counter->instance; @@ -228,8 +255,16 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel, } } else { /* Generic blocks. */ - for (unsigned i = 0; i < block_sel->num_counters; i++) { - struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i]; + struct ac_spm_block_instance *block_instance = + &block_sel->instances[instance]; + + if (!block_instance->grbm_gfx_index) { + block_instance->grbm_gfx_index = + ac_spm_init_grbm_gfx_index(block_sel->b, mapping); + } + + for (unsigned i = 0; i < block_instance->num_counters; i++) { + struct ac_spm_counter_select *cntr_sel = &block_instance->counters[i]; int index = ffs(~cntr_sel->active) - 1; switch (index) { @@ -320,7 +355,7 @@ ac_spm_add_counter(const struct radeon_info *info, } /* Map the counter to the select block. */ - if (!ac_spm_map_counter(spm, block_sel, counter, &spm_wire)) { + if (!ac_spm_map_counter(spm, block_sel, counter, &instance_mapping, &spm_wire)) { fprintf(stderr, "ac/spm: No free slots available!\n"); return false; } @@ -462,6 +497,11 @@ void ac_destroy_spm(struct ac_spm *spm) for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { FREE(spm->muxsel_lines[s]); } + + for (unsigned i = 0; i < spm->num_block_sel; i++) { + FREE(spm->block_sel[i].instances); + } + FREE(spm->block_sel); FREE(spm->counters); } diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index eaeb2689d7b..5f051291ca2 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -122,14 +122,20 @@ struct ac_spm_counter_select { uint32_t sel1; }; -struct ac_spm_block_select { - const struct ac_pc_block *b; +struct ac_spm_block_instance { uint32_t grbm_gfx_index; uint32_t num_counters; struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK]; }; +struct ac_spm_block_select { + const struct ac_pc_block *b; + + uint32_t num_instances; + struct ac_spm_block_instance *instances; +}; + struct ac_spm { /* struct radeon_winsys_bo or struct pb_buffer */ void *bo; diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index 4a2ed63d19b..a1d635743f3 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -90,21 +90,25 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu struct ac_spm_block_select *block_sel = &spm->block_sel[b]; struct ac_pc_block_base *regs = block_sel->b->b->b; - radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6)); + for (unsigned i = 0; i < block_sel->num_instances; i++) { + struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); + radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6)); - for (unsigned c = 0; c < block_sel->num_counters; c++) { - const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c]; + radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); - if (!cntr_sel->active) - continue; + for (unsigned c = 0; c < block_instance->num_counters; c++) { + const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c]; - radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select0[c], 1); - radeon_emit(cs, cntr_sel->sel0); + if (!cntr_sel->active) + continue; - radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select1[c], 1); - radeon_emit(cs, cntr_sel->sel1); + radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select0[c], 1); + radeon_emit(cs, cntr_sel->sel0); + + radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select1[c], 1); + radeon_emit(cs, cntr_sel->sel1); + } } } diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 139ce15c771..0a68d7efc8a 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -757,19 +757,23 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) struct ac_spm_block_select *block_sel = &spm->block_sel[b]; struct ac_pc_block_base *regs = block_sel->b->b->b; - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); + for (unsigned i = 0; i < block_sel->num_instances; i++) { + struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; - for (unsigned c = 0; c < block_sel->num_counters; c++) { - const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c]; + radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); - if (!cntr_sel->active) - continue; + for (unsigned c = 0; c < block_instance->num_counters; c++) { + const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c]; - radeon_set_uconfig_reg_seq(regs->select0[c], 1, false); - radeon_emit(cntr_sel->sel0); + if (!cntr_sel->active) + continue; - radeon_set_uconfig_reg_seq(regs->select1[c], 1, false); - radeon_emit(cntr_sel->sel1); + radeon_set_uconfig_reg_seq(regs->select0[c], 1, false); + radeon_emit(cntr_sel->sel0); + + radeon_set_uconfig_reg_seq(regs->select1[c], 1, false); + radeon_emit(cntr_sel->sel1); + } } }