mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
amd,radv,radeonsi: add ac_emit_spm_setup()
This moves all SPM emit code to common code. This likely also fixes SPM on GFX11+ for RadeonSI. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37956>
This commit is contained in:
parent
202f8db793
commit
22d73fc077
4 changed files with 212 additions and 338 deletions
|
|
@ -4,6 +4,7 @@
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "ac_cmdbuf.h"
|
||||||
#include "ac_spm.h"
|
#include "ac_spm.h"
|
||||||
|
|
||||||
#include "util/bitscan.h"
|
#include "util/bitscan.h"
|
||||||
|
|
@ -658,3 +659,201 @@ bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace)
|
||||||
|
|
||||||
return ac_spm_get_num_samples(spm, &trace->num_samples);
|
return ac_spm_get_num_samples(spm, &trace->num_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ac_emit_spm_muxsel(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||||
|
enum amd_ip_type ip_type, const struct ac_spm *spm)
|
||||||
|
{
|
||||||
|
/* Upload each muxsel ram to the RLC. */
|
||||||
|
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||||
|
unsigned rlc_muxsel_addr, rlc_muxsel_data;
|
||||||
|
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
|
||||||
|
S_030800_INSTANCE_BROADCAST_WRITES(1);
|
||||||
|
|
||||||
|
if (!spm->num_muxsel_lines[s])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
|
||||||
|
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
|
||||||
|
|
||||||
|
rlc_muxsel_addr = gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR
|
||||||
|
: R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
|
||||||
|
rlc_muxsel_data = gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA
|
||||||
|
: R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
|
||||||
|
} else {
|
||||||
|
grbm_gfx_index |= S_030800_SE_INDEX(s);
|
||||||
|
|
||||||
|
rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR
|
||||||
|
: R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
|
||||||
|
rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA
|
||||||
|
: R_037220_RLC_SPM_SE_MUXSEL_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
||||||
|
|
||||||
|
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
|
||||||
|
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
|
||||||
|
|
||||||
|
/* Select MUXSEL_ADDR to point to the next muxsel. */
|
||||||
|
ac_cmdbuf_set_uconfig_perfctr_reg(gfx_level, ip_type, rlc_muxsel_addr,
|
||||||
|
l * AC_SPM_MUXSEL_LINE_SIZE);
|
||||||
|
|
||||||
|
/* Write the muxsel line configuration with MUXSEL_DATA. */
|
||||||
|
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
|
||||||
|
ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
|
||||||
|
S_370_WR_CONFIRM(1) |
|
||||||
|
S_370_ENGINE_SEL(V_370_ME) |
|
||||||
|
S_370_WR_ONE_ADDR(1));
|
||||||
|
ac_cmdbuf_emit(rlc_muxsel_data >> 2);
|
||||||
|
ac_cmdbuf_emit(0);
|
||||||
|
ac_cmdbuf_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ac_emit_spm_counters(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||||
|
enum amd_ip_type ip_type,
|
||||||
|
const struct ac_spm *spm)
|
||||||
|
{
|
||||||
|
if (gfx_level >= GFX11) {
|
||||||
|
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
|
||||||
|
uint32_t num_counters = spm->sq_wgp[instance].num_counters;
|
||||||
|
|
||||||
|
if (!num_counters)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
|
||||||
|
|
||||||
|
for (uint32_t b = 0; b < num_counters; b++) {
|
||||||
|
const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
|
||||||
|
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type,
|
||||||
|
reg_base + b * 4, 1);
|
||||||
|
ac_cmdbuf_emit(cntr_sel->sel0);
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
|
||||||
|
uint32_t num_counters = spm->sqg[instance].num_counters;
|
||||||
|
|
||||||
|
if (!num_counters)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
|
||||||
|
S_030800_INSTANCE_BROADCAST_WRITES(1) |
|
||||||
|
S_030800_SE_INDEX(instance));
|
||||||
|
|
||||||
|
for (uint32_t b = 0; b < num_counters; b++) {
|
||||||
|
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
|
||||||
|
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type,
|
||||||
|
reg_base + b * 4, 1);
|
||||||
|
ac_cmdbuf_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
|
||||||
|
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
|
||||||
|
struct ac_pc_block_base *regs = block_sel->b->b->b;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < block_sel->num_instances; i++) {
|
||||||
|
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
|
||||||
|
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
|
||||||
|
|
||||||
|
for (unsigned c = 0; c < block_instance->num_counters; c++) {
|
||||||
|
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
|
||||||
|
|
||||||
|
if (!cntr_sel->active)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select0[c], 1);
|
||||||
|
ac_cmdbuf_emit(cntr_sel->sel0);
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, regs->select1[c], 1);
|
||||||
|
ac_cmdbuf_emit(cntr_sel->sel1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Restore global broadcasting. */
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) |
|
||||||
|
S_030800_SH_BROADCAST_WRITES(1) |
|
||||||
|
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||||
|
enum amd_ip_type ip_type, const struct ac_spm *spm,
|
||||||
|
uint64_t va)
|
||||||
|
{
|
||||||
|
/* It's required that the ring VA and the size are correctly aligned. */
|
||||||
|
assert(!(va & (AC_SPM_RING_BASE_ALIGN - 1)));
|
||||||
|
assert(!(spm->buffer_size & (AC_SPM_RING_BASE_ALIGN - 1)));
|
||||||
|
assert(spm->sample_interval >= 32);
|
||||||
|
|
||||||
|
ac_cmdbuf_begin(cs);
|
||||||
|
|
||||||
|
/* Configure the SPM ring buffer. */
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
|
||||||
|
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
|
||||||
|
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
|
||||||
|
S_037208_RING_BASE_HI(va >> 32));
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, spm->buffer_size);
|
||||||
|
|
||||||
|
/* Configure the muxsel. */
|
||||||
|
uint32_t total_muxsel_lines = 0;
|
||||||
|
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||||
|
total_muxsel_lines += spm->num_muxsel_lines[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
||||||
|
|
||||||
|
if (gfx_level >= GFX11) {
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
|
||||||
|
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
|
||||||
|
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
|
||||||
|
S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
|
||||||
|
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0);
|
||||||
|
} else {
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
|
||||||
|
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
|
||||||
|
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
|
||||||
|
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
|
||||||
|
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
|
||||||
|
ac_cmdbuf_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
|
||||||
|
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
|
||||||
|
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
|
||||||
|
}
|
||||||
|
|
||||||
|
ac_cmdbuf_end();
|
||||||
|
|
||||||
|
/* Upload each muxsel ram to the RLC. */
|
||||||
|
ac_emit_spm_muxsel(cs, gfx_level, ip_type, spm);
|
||||||
|
|
||||||
|
/* Select SPM counters. */
|
||||||
|
ac_emit_spm_counters(cs, gfx_level, ip_type, spm);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,12 +11,16 @@
|
||||||
|
|
||||||
#include "ac_perfcounter.h"
|
#include "ac_perfcounter.h"
|
||||||
|
|
||||||
|
struct ac_cmdbuf;
|
||||||
|
|
||||||
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
|
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
|
||||||
#define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/
|
#define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/
|
||||||
#define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */
|
#define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */
|
||||||
#define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */
|
#define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */
|
||||||
#define AC_SPM_NUM_PERF_SEL 4
|
#define AC_SPM_NUM_PERF_SEL 4
|
||||||
|
|
||||||
|
#define AC_SPM_RING_BASE_ALIGN 32
|
||||||
|
|
||||||
/* GFX10+ */
|
/* GFX10+ */
|
||||||
enum ac_spm_global_block {
|
enum ac_spm_global_block {
|
||||||
AC_SPM_GLOBAL_BLOCK_CPG,
|
AC_SPM_GLOBAL_BLOCK_CPG,
|
||||||
|
|
@ -197,4 +201,9 @@ void ac_destroy_spm(struct ac_spm *spm);
|
||||||
|
|
||||||
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
|
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||||
|
enum amd_ip_type ip_type, const struct ac_spm *spm,
|
||||||
|
uint64_t va);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,6 @@
|
||||||
#include "radv_spm.h"
|
#include "radv_spm.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
|
||||||
#define SPM_RING_BASE_ALIGN 32
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
radv_spm_init_bo(struct radv_device *device)
|
radv_spm_init_bo(struct radv_device *device)
|
||||||
{
|
{
|
||||||
|
|
@ -67,209 +65,15 @@ radv_spm_resize_bo(struct radv_device *device)
|
||||||
return radv_spm_init_bo(device);
|
return radv_spm_init_bo(device);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs)
|
|
||||||
{
|
|
||||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
|
||||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
|
||||||
struct ac_spm *spm = &device->spm;
|
|
||||||
|
|
||||||
if (gfx_level >= GFX11) {
|
|
||||||
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
|
|
||||||
uint32_t num_counters = spm->sq_wgp[instance].num_counters;
|
|
||||||
|
|
||||||
if (!num_counters)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
|
|
||||||
|
|
||||||
for (uint32_t b = 0; b < num_counters; b++) {
|
|
||||||
const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
|
|
||||||
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
|
||||||
|
|
||||||
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
|
|
||||||
radeon_emit(cntr_sel->sel0);
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
|
|
||||||
uint32_t num_counters = spm->sqg[instance].num_counters;
|
|
||||||
|
|
||||||
if (!num_counters)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_SE_INDEX(instance));
|
|
||||||
|
|
||||||
for (uint32_t b = 0; b < num_counters; b++) {
|
|
||||||
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
|
|
||||||
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
|
||||||
|
|
||||||
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
|
|
||||||
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
|
|
||||||
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
|
|
||||||
struct ac_pc_block_base *regs = block_sel->b->b->b;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < block_sel->num_instances; i++) {
|
|
||||||
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
|
|
||||||
|
|
||||||
radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
|
|
||||||
|
|
||||||
for (unsigned c = 0; c < block_instance->num_counters; c++) {
|
|
||||||
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
|
|
||||||
|
|
||||||
if (!cntr_sel->active)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select0[c], 1);
|
|
||||||
radeon_emit(cntr_sel->sel0);
|
|
||||||
|
|
||||||
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select1[c], 1);
|
|
||||||
radeon_emit(cntr_sel->sel1);
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Restore global broadcasting. */
|
|
||||||
radeon_begin(cs);
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs)
|
|
||||||
{
|
|
||||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
|
||||||
const struct ac_spm *spm = &device->spm;
|
|
||||||
|
|
||||||
/* Upload each muxsel ram to the RLC. */
|
|
||||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
|
||||||
unsigned rlc_muxsel_addr, rlc_muxsel_data;
|
|
||||||
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
|
|
||||||
|
|
||||||
if (!spm->num_muxsel_lines[s])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
|
|
||||||
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
|
|
||||||
|
|
||||||
rlc_muxsel_addr =
|
|
||||||
pdev->info.gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
|
|
||||||
rlc_muxsel_data =
|
|
||||||
pdev->info.gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
|
|
||||||
} else {
|
|
||||||
grbm_gfx_index |= S_030800_SE_INDEX(s);
|
|
||||||
|
|
||||||
rlc_muxsel_addr =
|
|
||||||
pdev->info.gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
|
|
||||||
rlc_muxsel_data =
|
|
||||||
pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
|
||||||
|
|
||||||
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
|
|
||||||
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
|
|
||||||
|
|
||||||
/* Select MUXSEL_ADDR to point to the next muxsel. */
|
|
||||||
radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, cs->hw_ip, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
|
|
||||||
|
|
||||||
/* Write the muxsel line configuration with MUXSEL_DATA. */
|
|
||||||
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
|
|
||||||
radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
|
|
||||||
S_370_WR_ONE_ADDR(1));
|
|
||||||
radeon_emit(rlc_muxsel_data >> 2);
|
|
||||||
radeon_emit(0);
|
|
||||||
radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs)
|
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs)
|
||||||
{
|
{
|
||||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||||
struct ac_spm *spm = &device->spm;
|
struct ac_spm *spm = &device->spm;
|
||||||
uint64_t va = radv_buffer_get_va(spm->bo);
|
uint64_t va = radv_buffer_get_va(spm->bo);
|
||||||
uint64_t ring_size = spm->buffer_size;
|
|
||||||
|
|
||||||
/* It's required that the ring VA and the size are correctly aligned. */
|
radeon_check_space(device->ws, cs->b, 2048);
|
||||||
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
|
ac_emit_spm_setup(cs->b, pdev->info.gfx_level, cs->hw_ip, spm, va);
|
||||||
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
|
|
||||||
assert(spm->sample_interval >= 32);
|
|
||||||
|
|
||||||
radeon_check_space(device->ws, cs->b, 27);
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
/* Configure the SPM ring buffer. */
|
|
||||||
radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
|
|
||||||
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
|
|
||||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
|
|
||||||
radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
|
|
||||||
radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
|
|
||||||
radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
|
|
||||||
|
|
||||||
/* Configure the muxsel. */
|
|
||||||
uint32_t total_muxsel_lines = 0;
|
|
||||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
|
||||||
total_muxsel_lines += spm->num_muxsel_lines[s];
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
|
||||||
|
|
||||||
if (pdev->info.gfx_level >= GFX11) {
|
|
||||||
radeon_set_uconfig_reg(R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
|
|
||||||
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
|
|
||||||
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
|
|
||||||
S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_037210_RLC_SPM_RING_WRPTR, 0);
|
|
||||||
} else {
|
|
||||||
radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
|
|
||||||
radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
|
|
||||||
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
|
|
||||||
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
|
|
||||||
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
|
|
||||||
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
|
|
||||||
radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
|
|
||||||
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
|
|
||||||
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
|
|
||||||
/* Upload each muxsel ram to the RLC. */
|
|
||||||
radv_emit_spm_muxsel(device, cs);
|
|
||||||
|
|
||||||
/* Select SPM counters. */
|
|
||||||
radv_emit_spm_counters(device, cs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
||||||
|
|
@ -732,152 +732,14 @@ si_spm_init_bo(struct si_context *sctx)
|
||||||
return sctx->spm.bo != NULL;
|
return sctx->spm.bo != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
|
||||||
si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|
||||||
{
|
|
||||||
struct ac_spm *spm = &sctx->spm;
|
|
||||||
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
|
|
||||||
uint32_t num_counters = spm->sqg[instance].num_counters;
|
|
||||||
|
|
||||||
if (!num_counters)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
|
|
||||||
S_030800_SH_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_SE_INDEX(instance));
|
|
||||||
|
|
||||||
for (uint32_t b = 0; b < num_counters; b++) {
|
|
||||||
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
|
|
||||||
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg_seq(reg_base + b * 4, 1);
|
|
||||||
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
|
|
||||||
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
|
|
||||||
struct ac_pc_block_base *regs = block_sel->b->b->b;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < block_sel->num_instances; i++) {
|
|
||||||
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
|
|
||||||
|
|
||||||
for (unsigned c = 0; c < block_instance->num_counters; c++) {
|
|
||||||
const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
|
|
||||||
|
|
||||||
if (!cntr_sel->active)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg_seq(regs->select0[c], 1);
|
|
||||||
radeon_emit(cntr_sel->sel0);
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg_seq(regs->select1[c], 1);
|
|
||||||
radeon_emit(cntr_sel->sel1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Restore global broadcasting. */
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
|
|
||||||
S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
|
||||||
|
|
||||||
radeon_end();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SPM_RING_BASE_ALIGN 32
|
|
||||||
|
|
||||||
void
|
void
|
||||||
si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
{
|
{
|
||||||
|
const enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs);
|
||||||
struct ac_spm *spm = &sctx->spm;
|
struct ac_spm *spm = &sctx->spm;
|
||||||
uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo);
|
uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo);
|
||||||
uint64_t ring_size = spm->buffer_size;
|
|
||||||
|
|
||||||
/* It's required that the ring VA and the size are correctly aligned. */
|
ac_emit_spm_setup(&cs->current, sctx->gfx_level, ip_type, spm, va);
|
||||||
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
|
|
||||||
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
|
|
||||||
assert(spm->sample_interval >= 32);
|
|
||||||
|
|
||||||
radeon_begin(cs);
|
|
||||||
|
|
||||||
/* Configure the SPM ring buffer. */
|
|
||||||
radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
|
|
||||||
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
|
|
||||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
|
|
||||||
radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
|
|
||||||
radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
|
|
||||||
S_037208_RING_BASE_HI(va >> 32));
|
|
||||||
radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
|
|
||||||
|
|
||||||
/* Configure the muxsel. */
|
|
||||||
uint32_t total_muxsel_lines = 0;
|
|
||||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
|
||||||
total_muxsel_lines += spm->num_muxsel_lines[s];
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
|
||||||
radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
|
|
||||||
radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
|
|
||||||
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
|
|
||||||
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
|
|
||||||
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
|
|
||||||
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
|
|
||||||
radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
|
|
||||||
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
|
|
||||||
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
|
|
||||||
|
|
||||||
/* Upload each muxsel ram to the RLC. */
|
|
||||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
|
||||||
unsigned rlc_muxsel_addr, rlc_muxsel_data;
|
|
||||||
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
|
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1);
|
|
||||||
|
|
||||||
if (!spm->num_muxsel_lines[s])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
|
|
||||||
grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
|
|
||||||
|
|
||||||
rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
|
|
||||||
rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
|
|
||||||
} else {
|
|
||||||
grbm_gfx_index |= S_030800_SE_INDEX(s);
|
|
||||||
|
|
||||||
rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
|
|
||||||
rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
|
|
||||||
}
|
|
||||||
|
|
||||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
|
||||||
|
|
||||||
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
|
|
||||||
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
|
|
||||||
|
|
||||||
/* Select MUXSEL_ADDR to point to the next muxsel. */
|
|
||||||
radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
|
|
||||||
|
|
||||||
/* Write the muxsel line configuration with MUXSEL_DATA. */
|
|
||||||
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
|
|
||||||
radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
|
|
||||||
S_370_WR_CONFIRM(1) |
|
|
||||||
S_370_ENGINE_SEL(V_370_ME) |
|
|
||||||
S_370_WR_ONE_ADDR(1));
|
|
||||||
radeon_emit(rlc_muxsel_data >> 2);
|
|
||||||
radeon_emit(0);
|
|
||||||
radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
radeon_end();
|
|
||||||
|
|
||||||
/* Select SPM counters. */
|
|
||||||
si_emit_spm_counters(sctx, cs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue