From 70a4290e287b0a9f0d57ca2e793ac1699dee2dfe Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 Sep 2023 17:12:04 +0200 Subject: [PATCH] radv: add SPM support for GFX11 Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_perfcounter.c | 14 +++++--- src/amd/vulkan/radv_private.h | 2 +- src/amd/vulkan/radv_spm.c | 59 ++++++++++++++++++++++++------- src/amd/vulkan/radv_sqtt.c | 2 +- 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index c8746aef451..884fba6dc25 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -30,11 +30,15 @@ #include "sid.h" void -radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders) +radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders) { - radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); - radeon_emit(cs, shaders & 0x7f); - radeon_emit(cs, 0xffffffff); + if (device->physical_device->rad_info.gfx_level >= GFX11) { + radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f); + } else { + radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); + radeon_emit(cs, shaders & 0x7f); + radeon_emit(cs, 0xffffffff); + } } static void @@ -644,7 +648,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_emit_inhibit_clockgating(cmd_buffer->device, cs, true); radv_emit_spi_config_cntl(cmd_buffer->device, cs, true); - radv_perfcounter_emit_shaders(cs, 0x7f); + radv_perfcounter_emit_shaders(cmd_buffer->device, cs, 0x7f); for (unsigned pass = 0; pass < pool->num_passes; ++pass) { uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index fd96218259e..6266fe37877 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -3685,7 +3685,7 @@ radv_has_pops(const struct radv_physical_device *pdevice) } /* radv_perfcounter.c */ -void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders); +void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders); void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs); void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family); void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family); diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index a1d635743f3..ba8b7057fc5 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -65,6 +65,27 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; struct ac_spm *spm = &device->spm; + if (gfx_level >= GFX11) { + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) { + uint32_t num_counters = spm->sq_wgp[instance].num_counters; + + if (!num_counters) + continue; + + radeon_check_space(device->ws, cs, 3 + num_counters * 3); + + radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index); + + for (uint32_t b = 0; b < num_counters; b++) { + const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b]; + uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + + radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1); + radeon_emit(cs, cntr_sel->sel0); + } + } + } + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { uint32_t num_counters = spm->sqg[instance].num_counters; @@ -148,15 +169,25 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r } radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); - radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); - radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) | - S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) | - S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) | - S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3])); - radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, - S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL])); + + if (device->physical_device->rad_info.gfx_level >= GFX11) { + radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE, + S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) | + S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) | + S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines)); + + radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_RING_WRPTR, 0); + } else { + radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); + radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, + S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) | + S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) | + S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) | + S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3])); + radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, + S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | + S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL])); + } /* Upload each muxsel ram to the RLC. */ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { @@ -169,13 +200,15 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); - rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; - rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; + rlc_muxsel_addr = + gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; + rlc_muxsel_data = + gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; } else { grbm_gfx_index |= S_030800_SE_INDEX(s); - rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR; - rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA; + rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR; + rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA; } radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE)); diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 4e72271e4b1..bdb9ec46544 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -685,7 +685,7 @@ radv_begin_sqtt(struct radv_queue *queue) if (device->spm.bo) { /* Enable all shader stages by default. */ - radv_perfcounter_emit_shaders(cs, ac_sqtt_get_shader_mask(&device->physical_device->rad_info)); + radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->rad_info)); radv_emit_spm_setup(device, cs, family); }