diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index e24da87fb69..01149ff0a85 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: MIT */ +#include "ac_cmdbuf.h" #include "ac_spm.h" #include "util/bitscan.h" @@ -658,3 +659,201 @@ bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace) return ac_spm_get_num_samples(spm, &trace->num_samples); } + +static void +ac_emit_spm_muxsel(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, const struct ac_spm *spm) +{ + /* Upload each muxsel ram to the RLC. */ + for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { + unsigned rlc_muxsel_addr, rlc_muxsel_data; + unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1); + + if (!spm->num_muxsel_lines[s]) + continue; + + if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { + grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); + + rlc_muxsel_addr = gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR + : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; + rlc_muxsel_data = gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA + : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; + } else { + grbm_gfx_index |= S_030800_SE_INDEX(s); + + rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR + : R_03721C_RLC_SPM_SE_MUXSEL_ADDR; + rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA + : R_037220_RLC_SPM_SE_MUXSEL_DATA; + } + + ac_cmdbuf_begin(cs); + + ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); + + for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) { + uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; + + /* Select MUXSEL_ADDR to point to the next muxsel. */ + ac_cmdbuf_set_uconfig_perfctr_reg(gfx_level, ip_type, rlc_muxsel_addr, + l * AC_SPM_MUXSEL_LINE_SIZE); + + /* Write the muxsel line configuration with MUXSEL_DATA. */ + ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); + ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME) | + S_370_WR_ONE_ADDR(1)); + ac_cmdbuf_emit(rlc_muxsel_data >> 2); + ac_cmdbuf_emit(0); + ac_cmdbuf_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE); + } + + ac_cmdbuf_end(); + } +} + +static void +ac_emit_spm_counters(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, + const struct ac_spm *spm) +{ + if (gfx_level >= GFX11) { + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) { + uint32_t num_counters = spm->sq_wgp[instance].num_counters; + + if (!num_counters) + continue; + + ac_cmdbuf_begin(cs); + ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index); + + for (uint32_t b = 0; b < num_counters; b++) { + const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b]; + uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + + ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, + reg_base + b * 4, 1); + ac_cmdbuf_emit(cntr_sel->sel0); + } + + ac_cmdbuf_end(); + } + } + + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { + uint32_t num_counters = spm->sqg[instance].num_counters; + + if (!num_counters) + continue; + + ac_cmdbuf_begin(cs); + ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1) | + S_030800_SE_INDEX(instance)); + + for (uint32_t b = 0; b < num_counters; b++) { + const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; + uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + + ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, + reg_base + b * 4, 1); + ac_cmdbuf_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ + } + + ac_cmdbuf_end(); + } + + for (uint32_t b = 0; b < spm->num_block_sel; b++) { + struct ac_spm_block_select *block_sel = &spm->block_sel[b]; + struct ac_pc_block_base *regs = block_sel->b->b->b; + + for (unsigned i = 0; i < block_sel->num_instances; i++) { + struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; + + ac_cmdbuf_begin(cs); + ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); + + for (unsigned c = 0; c < block_instance->num_counters; c++) { + const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c]; + + if (!cntr_sel->active) + continue; + + ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select0[c], 1); + ac_cmdbuf_emit(cntr_sel->sel0); + + ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select1[c], 1); + ac_cmdbuf_emit(cntr_sel->sel1); + } + + ac_cmdbuf_end(); + } + } + + /* Restore global broadcasting. */ + ac_cmdbuf_begin(cs); + ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | + S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1)); + ac_cmdbuf_end(); +} + +void +ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, const struct ac_spm *spm, + uint64_t va) +{ + /* It's required that the ring VA and the size are correctly aligned. */ + assert(!(va & (AC_SPM_RING_BASE_ALIGN - 1))); + assert(!(spm->buffer_size & (AC_SPM_RING_BASE_ALIGN - 1))); + assert(spm->sample_interval >= 32); + + ac_cmdbuf_begin(cs); + + /* Configure the SPM ring buffer. */ + ac_cmdbuf_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL, + S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ + S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ + ac_cmdbuf_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); + ac_cmdbuf_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, + S_037208_RING_BASE_HI(va >> 32)); + ac_cmdbuf_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, spm->buffer_size); + + /* Configure the muxsel. */ + uint32_t total_muxsel_lines = 0; + for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { + total_muxsel_lines += spm->num_muxsel_lines[s]; + } + + ac_cmdbuf_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0); + + if (gfx_level >= GFX11) { + ac_cmdbuf_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE, + S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) | + S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) | + S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines)); + + ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0); + } else { + ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); + ac_cmdbuf_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, + S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) | + S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) | + S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) | + S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3])); + ac_cmdbuf_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, + S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | + S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL])); + } + + ac_cmdbuf_end(); + + /* Upload each muxsel ram to the RLC. */ + ac_emit_spm_muxsel(cs, gfx_level, ip_type, spm); + + /* Select SPM counters. */ + ac_emit_spm_counters(cs, gfx_level, ip_type, spm); +} diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index 431c8b20b1c..648848b4180 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -11,12 +11,16 @@ #include "ac_perfcounter.h" +struct ac_cmdbuf; + #define AC_SPM_MAX_COUNTER_PER_BLOCK 16 #define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/ #define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */ #define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */ #define AC_SPM_NUM_PERF_SEL 4 +#define AC_SPM_RING_BASE_ALIGN 32 + /* GFX10+ */ enum ac_spm_global_block { AC_SPM_GLOBAL_BLOCK_CPG, @@ -197,4 +201,9 @@ void ac_destroy_spm(struct ac_spm *spm); bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace); +void +ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + enum amd_ip_type ip_type, const struct ac_spm *spm, + uint64_t va); + #endif diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index 1638a1e6e72..0f909995dae 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -11,8 +11,6 @@ #include "radv_spm.h" #include "sid.h" -#define SPM_RING_BASE_ALIGN 32 - static bool radv_spm_init_bo(struct radv_device *device) { @@ -67,209 +65,15 @@ radv_spm_resize_bo(struct radv_device *device) return radv_spm_init_bo(device); } -static void -radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_gfx_level gfx_level = pdev->info.gfx_level; - struct ac_spm *spm = &device->spm; - - if (gfx_level >= GFX11) { - for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) { - uint32_t num_counters = spm->sq_wgp[instance].num_counters; - - if (!num_counters) - continue; - - radeon_check_space(device->ws, cs->b, 3 + num_counters * 3); - radeon_begin(cs); - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index); - - for (uint32_t b = 0; b < num_counters; b++) { - const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b]; - uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; - - radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1); - radeon_emit(cntr_sel->sel0); - } - - radeon_end(); - } - } - - for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { - uint32_t num_counters = spm->sqg[instance].num_counters; - - if (!num_counters) - continue; - - radeon_check_space(device->ws, cs->b, 3 + num_counters * 3); - radeon_begin(cs); - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1) | - S_030800_SE_INDEX(instance)); - - for (uint32_t b = 0; b < num_counters; b++) { - const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; - uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; - - radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1); - radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ - } - - radeon_end(); - } - - for (uint32_t b = 0; b < spm->num_block_sel; b++) { - struct ac_spm_block_select *block_sel = &spm->block_sel[b]; - struct ac_pc_block_base *regs = block_sel->b->b->b; - - for (unsigned i = 0; i < block_sel->num_instances; i++) { - struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; - - radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6)); - radeon_begin(cs); - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); - - for (unsigned c = 0; c < block_instance->num_counters; c++) { - const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c]; - - if (!cntr_sel->active) - continue; - - radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select0[c], 1); - radeon_emit(cntr_sel->sel0); - - radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select1[c], 1); - radeon_emit(cntr_sel->sel1); - } - - radeon_end(); - } - } - - /* Restore global broadcasting. */ - radeon_begin(cs); - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); - radeon_end(); -} - -static void -radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct ac_spm *spm = &device->spm; - - /* Upload each muxsel ram to the RLC. */ - for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - unsigned rlc_muxsel_addr, rlc_muxsel_data; - unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1); - - if (!spm->num_muxsel_lines[s]) - continue; - - if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { - grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); - - rlc_muxsel_addr = - pdev->info.gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; - rlc_muxsel_data = - pdev->info.gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; - } else { - grbm_gfx_index |= S_030800_SE_INDEX(s); - - rlc_muxsel_addr = - pdev->info.gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR; - rlc_muxsel_data = - pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA; - } - - radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE)); - radeon_begin(cs); - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); - - for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) { - uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; - - /* Select MUXSEL_ADDR to point to the next muxsel. */ - radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, cs->hw_ip, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); - - /* Write the muxsel line configuration with MUXSEL_DATA. */ - radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); - radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) | - S_370_WR_ONE_ADDR(1)); - radeon_emit(rlc_muxsel_data >> 2); - radeon_emit(0); - radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE); - } - - radeon_end(); - } -} - void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_spm *spm = &device->spm; uint64_t va = radv_buffer_get_va(spm->bo); - uint64_t ring_size = spm->buffer_size; - /* It's required that the ring VA and the size are correctly aligned. */ - assert(!(va & (SPM_RING_BASE_ALIGN - 1))); - assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); - assert(spm->sample_interval >= 32); - - radeon_check_space(device->ws, cs->b, 27); - radeon_begin(cs); - - /* Configure the SPM ring buffer. */ - radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL, - S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ - S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ - radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); - radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32)); - radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size); - - /* Configure the muxsel. */ - uint32_t total_muxsel_lines = 0; - for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - total_muxsel_lines += spm->num_muxsel_lines[s]; - } - - radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0); - - if (pdev->info.gfx_level >= GFX11) { - radeon_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE, - S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) | - S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) | - S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines)); - - radeon_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0); - } else { - radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); - radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) | - S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) | - S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) | - S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3])); - radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, - S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL])); - } - - radeon_end(); - - /* Upload each muxsel ram to the RLC. */ - radv_emit_spm_muxsel(device, cs); - - /* Select SPM counters. */ - radv_emit_spm_counters(device, cs); + radeon_check_space(device->ws, cs->b, 2048); + ac_emit_spm_setup(cs->b, pdev->info.gfx_level, cs->hw_ip, spm, va); } bool diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index ae5bbe2d0b1..409c61ff65b 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -732,152 +732,14 @@ si_spm_init_bo(struct si_context *sctx) return sctx->spm.bo != NULL; } - -static void -si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) -{ - struct ac_spm *spm = &sctx->spm; - - radeon_begin(cs); - - for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { - uint32_t num_counters = spm->sqg[instance].num_counters; - - if (!num_counters) - continue; - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, - S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1) | - S_030800_SE_INDEX(instance)); - - for (uint32_t b = 0; b < num_counters; b++) { - const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; - uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; - - radeon_set_uconfig_reg_seq(reg_base + b * 4, 1); - radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ - } - } - - for (uint32_t b = 0; b < spm->num_block_sel; b++) { - struct ac_spm_block_select *block_sel = &spm->block_sel[b]; - struct ac_pc_block_base *regs = block_sel->b->b->b; - - for (unsigned i = 0; i < block_sel->num_instances; i++) { - struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); - - for (unsigned c = 0; c < block_instance->num_counters; c++) { - const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c]; - - if (!cntr_sel->active) - continue; - - radeon_set_uconfig_reg_seq(regs->select0[c], 1); - radeon_emit(cntr_sel->sel0); - - radeon_set_uconfig_reg_seq(regs->select1[c], 1); - radeon_emit(cntr_sel->sel1); - } - } - } - - /* Restore global broadcasting. */ - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, - S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); - - radeon_end(); -} - -#define SPM_RING_BASE_ALIGN 32 - void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) { + const enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs); struct ac_spm *spm = &sctx->spm; uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo); - uint64_t ring_size = spm->buffer_size; - /* It's required that the ring VA and the size are correctly aligned. */ - assert(!(va & (SPM_RING_BASE_ALIGN - 1))); - assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); - assert(spm->sample_interval >= 32); - - radeon_begin(cs); - - /* Configure the SPM ring buffer. */ - radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL, - S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ - S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ - radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); - radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, - S_037208_RING_BASE_HI(va >> 32)); - radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size); - - /* Configure the muxsel. */ - uint32_t total_muxsel_lines = 0; - for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - total_muxsel_lines += spm->num_muxsel_lines[s]; - } - - radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0); - radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); - radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) | - S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) | - S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) | - S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3])); - radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, - S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL])); - - /* Upload each muxsel ram to the RLC. */ - for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - unsigned rlc_muxsel_addr, rlc_muxsel_data; - unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1); - - if (!spm->num_muxsel_lines[s]) - continue; - - if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { - grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); - - rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; - rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; - } else { - grbm_gfx_index |= S_030800_SE_INDEX(s); - - rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR; - rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA; - } - - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); - - for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) { - uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; - - /* Select MUXSEL_ADDR to point to the next muxsel. */ - radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); - - /* Write the muxsel line configuration with MUXSEL_DATA. */ - radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); - radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME) | - S_370_WR_ONE_ADDR(1)); - radeon_emit(rlc_muxsel_data >> 2); - radeon_emit(0); - radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE); - } - } - radeon_end(); - - /* Select SPM counters. */ - si_emit_spm_counters(sctx, cs); + ac_emit_spm_setup(&cs->current, sctx->gfx_level, ip_type, spm, va); } bool