amd,radv,radeonsi: add ac_emit_spm_setup()

This moves all SPM emit code to common code. This likely also fixes
SPM on GFX11+ for RadeonSI.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37956>
This commit is contained in:
Samuel Pitoiset 2025-10-20 14:06:05 +02:00 committed by Marge Bot
parent 202f8db793
commit 22d73fc077
4 changed files with 212 additions and 338 deletions

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: MIT
*/
#include "ac_cmdbuf.h"
#include "ac_spm.h"
#include "util/bitscan.h"
@ -658,3 +659,201 @@ bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace)
return ac_spm_get_num_samples(spm, &trace->num_samples);
}
static void
ac_emit_spm_muxsel(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm)
{
/* Upload each muxsel ram to the RLC. */
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
unsigned rlc_muxsel_addr, rlc_muxsel_data;
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1);
if (!spm->num_muxsel_lines[s])
continue;
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
rlc_muxsel_addr = gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR
: R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
rlc_muxsel_data = gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA
: R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
} else {
grbm_gfx_index |= S_030800_SE_INDEX(s);
rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR
: R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA
: R_037220_RLC_SPM_SE_MUXSEL_DATA;
}
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
/* Select MUXSEL_ADDR to point to the next muxsel. */
ac_cmdbuf_set_uconfig_perfctr_reg(gfx_level, ip_type, rlc_muxsel_addr,
l * AC_SPM_MUXSEL_LINE_SIZE);
/* Write the muxsel line configuration with MUXSEL_DATA. */
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME) |
S_370_WR_ONE_ADDR(1));
ac_cmdbuf_emit(rlc_muxsel_data >> 2);
ac_cmdbuf_emit(0);
ac_cmdbuf_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
}
ac_cmdbuf_end();
}
}
static void
ac_emit_spm_counters(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type,
const struct ac_spm *spm)
{
if (gfx_level >= GFX11) {
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
uint32_t num_counters = spm->sq_wgp[instance].num_counters;
if (!num_counters)
continue;
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
for (uint32_t b = 0; b < num_counters; b++) {
const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type,
reg_base + b * 4, 1);
ac_cmdbuf_emit(cntr_sel->sel0);
}
ac_cmdbuf_end();
}
}
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
uint32_t num_counters = spm->sqg[instance].num_counters;
if (!num_counters)
continue;
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1) |
S_030800_SE_INDEX(instance));
for (uint32_t b = 0; b < num_counters; b++) {
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type,
reg_base + b * 4, 1);
ac_cmdbuf_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
}
ac_cmdbuf_end();
}
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
struct ac_pc_block_base *regs = block_sel->b->b->b;
for (unsigned i = 0; i < block_sel->num_instances; i++) {
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
for (unsigned c = 0; c < block_instance->num_counters; c++) {
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
if (!cntr_sel->active)
continue;
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select0[c], 1);
ac_cmdbuf_emit(cntr_sel->sel0);
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select1[c], 1);
ac_cmdbuf_emit(cntr_sel->sel1);
}
ac_cmdbuf_end();
}
}
/* Restore global broadcasting. */
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) |
S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1));
ac_cmdbuf_end();
}
void
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm,
uint64_t va)
{
/* It's required that the ring VA and the size are correctly aligned. */
assert(!(va & (AC_SPM_RING_BASE_ALIGN - 1)));
assert(!(spm->buffer_size & (AC_SPM_RING_BASE_ALIGN - 1)));
assert(spm->sample_interval >= 32);
ac_cmdbuf_begin(cs);
/* Configure the SPM ring buffer. */
ac_cmdbuf_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
ac_cmdbuf_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
ac_cmdbuf_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
S_037208_RING_BASE_HI(va >> 32));
ac_cmdbuf_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, spm->buffer_size);
/* Configure the muxsel. */
uint32_t total_muxsel_lines = 0;
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
total_muxsel_lines += spm->num_muxsel_lines[s];
}
ac_cmdbuf_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
if (gfx_level >= GFX11) {
ac_cmdbuf_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0);
} else {
ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
ac_cmdbuf_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
ac_cmdbuf_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
}
ac_cmdbuf_end();
/* Upload each muxsel ram to the RLC. */
ac_emit_spm_muxsel(cs, gfx_level, ip_type, spm);
/* Select SPM counters. */
ac_emit_spm_counters(cs, gfx_level, ip_type, spm);
}

View file

@ -11,12 +11,16 @@
#include "ac_perfcounter.h"
struct ac_cmdbuf;
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
#define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/
#define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */
#define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */
#define AC_SPM_NUM_PERF_SEL 4
#define AC_SPM_RING_BASE_ALIGN 32
/* GFX10+ */
enum ac_spm_global_block {
AC_SPM_GLOBAL_BLOCK_CPG,
@ -197,4 +201,9 @@ void ac_destroy_spm(struct ac_spm *spm);
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
void
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm,
uint64_t va);
#endif

View file

@ -11,8 +11,6 @@
#include "radv_spm.h"
#include "sid.h"
#define SPM_RING_BASE_ALIGN 32
static bool
radv_spm_init_bo(struct radv_device *device)
{
@ -67,209 +65,15 @@ radv_spm_resize_bo(struct radv_device *device)
return radv_spm_init_bo(device);
}
static void
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm;
if (gfx_level >= GFX11) {
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
uint32_t num_counters = spm->sq_wgp[instance].num_counters;
if (!num_counters)
continue;
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
for (uint32_t b = 0; b < num_counters; b++) {
const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
radeon_emit(cntr_sel->sel0);
}
radeon_end();
}
}
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
uint32_t num_counters = spm->sqg[instance].num_counters;
if (!num_counters)
continue;
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1) |
S_030800_SE_INDEX(instance));
for (uint32_t b = 0; b < num_counters; b++) {
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
}
radeon_end();
}
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
struct ac_pc_block_base *regs = block_sel->b->b->b;
for (unsigned i = 0; i < block_sel->num_instances; i++) {
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
for (unsigned c = 0; c < block_instance->num_counters; c++) {
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
if (!cntr_sel->active)
continue;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select0[c], 1);
radeon_emit(cntr_sel->sel0);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select1[c], 1);
radeon_emit(cntr_sel->sel1);
}
radeon_end();
}
}
/* Restore global broadcasting. */
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1));
radeon_end();
}
static void
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct ac_spm *spm = &device->spm;
/* Upload each muxsel ram to the RLC. */
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
unsigned rlc_muxsel_addr, rlc_muxsel_data;
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
if (!spm->num_muxsel_lines[s])
continue;
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
rlc_muxsel_addr =
pdev->info.gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
rlc_muxsel_data =
pdev->info.gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
} else {
grbm_gfx_index |= S_030800_SE_INDEX(s);
rlc_muxsel_addr =
pdev->info.gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
rlc_muxsel_data =
pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
}
radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
/* Select MUXSEL_ADDR to point to the next muxsel. */
radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, cs->hw_ip, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
/* Write the muxsel line configuration with MUXSEL_DATA. */
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
S_370_WR_ONE_ADDR(1));
radeon_emit(rlc_muxsel_data >> 2);
radeon_emit(0);
radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
}
radeon_end();
}
}
void
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_spm *spm = &device->spm;
uint64_t va = radv_buffer_get_va(spm->bo);
uint64_t ring_size = spm->buffer_size;
/* It's required that the ring VA and the size are correctly aligned. */
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
assert(spm->sample_interval >= 32);
radeon_check_space(device->ws, cs->b, 27);
radeon_begin(cs);
/* Configure the SPM ring buffer. */
radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
/* Configure the muxsel. */
uint32_t total_muxsel_lines = 0;
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
total_muxsel_lines += spm->num_muxsel_lines[s];
}
radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
radeon_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0);
} else {
radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
}
radeon_end();
/* Upload each muxsel ram to the RLC. */
radv_emit_spm_muxsel(device, cs);
/* Select SPM counters. */
radv_emit_spm_counters(device, cs);
radeon_check_space(device->ws, cs->b, 2048);
ac_emit_spm_setup(cs->b, pdev->info.gfx_level, cs->hw_ip, spm, va);
}
bool

View file

@ -732,152 +732,14 @@ si_spm_init_bo(struct si_context *sctx)
return sctx->spm.bo != NULL;
}
static void
si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
{
struct ac_spm *spm = &sctx->spm;
radeon_begin(cs);
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
uint32_t num_counters = spm->sqg[instance].num_counters;
if (!num_counters)
continue;
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1) |
S_030800_SE_INDEX(instance));
for (uint32_t b = 0; b < num_counters; b++) {
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
radeon_set_uconfig_reg_seq(reg_base + b * 4, 1);
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
}
}
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
struct ac_pc_block_base *regs = block_sel->b->b->b;
for (unsigned i = 0; i < block_sel->num_instances; i++) {
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
for (unsigned c = 0; c < block_instance->num_counters; c++) {
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
if (!cntr_sel->active)
continue;
radeon_set_uconfig_reg_seq(regs->select0[c], 1);
radeon_emit(cntr_sel->sel0);
radeon_set_uconfig_reg_seq(regs->select1[c], 1);
radeon_emit(cntr_sel->sel1);
}
}
}
/* Restore global broadcasting. */
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1));
radeon_end();
}
#define SPM_RING_BASE_ALIGN 32
void
si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
{
const enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs);
struct ac_spm *spm = &sctx->spm;
uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo);
uint64_t ring_size = spm->buffer_size;
/* It's required that the ring VA and the size are correctly aligned. */
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
assert(spm->sample_interval >= 32);
radeon_begin(cs);
/* Configure the SPM ring buffer. */
radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
S_037208_RING_BASE_HI(va >> 32));
radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
/* Configure the muxsel. */
uint32_t total_muxsel_lines = 0;
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
total_muxsel_lines += spm->num_muxsel_lines[s];
}
radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
/* Upload each muxsel ram to the RLC. */
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
unsigned rlc_muxsel_addr, rlc_muxsel_data;
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1);
if (!spm->num_muxsel_lines[s])
continue;
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
} else {
grbm_gfx_index |= S_030800_SE_INDEX(s);
rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
}
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
/* Select MUXSEL_ADDR to point to the next muxsel. */
radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
/* Write the muxsel line configuration with MUXSEL_DATA. */
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME) |
S_370_WR_ONE_ADDR(1));
radeon_emit(rlc_muxsel_data >> 2);
radeon_emit(0);
radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
}
}
radeon_end();
/* Select SPM counters. */
si_emit_spm_counters(sctx, cs);
ac_emit_spm_setup(&cs->current, sctx->gfx_level, ip_type, spm, va);
}
bool