mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 01:20:17 +01:00
ac/sqtt: fix registers programming for GFX12
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34049>
This commit is contained in:
parent
13836575e3
commit
3bf2f95a91
4 changed files with 31 additions and 26 deletions
|
|
@ -15,13 +15,6 @@
|
|||
|
||||
#include "sid.h"
|
||||
|
||||
uint32_t
|
||||
ac_sqtt_get_buffer_align_shift(const struct radeon_info *info)
|
||||
{
|
||||
/* SQTT buffer VA is 36-bits on GFX8-11.5. */
|
||||
return info->gfx_level >= GFX12 ? 0 : 12;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ac_sqtt_get_info_offset(unsigned se)
|
||||
{
|
||||
|
|
@ -31,11 +24,10 @@ ac_sqtt_get_info_offset(unsigned se)
|
|||
uint64_t
|
||||
ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *data, unsigned se)
|
||||
{
|
||||
const uint32_t align_shift = ac_sqtt_get_buffer_align_shift(rad_info);
|
||||
unsigned max_se = rad_info->max_se;
|
||||
uint64_t data_offset;
|
||||
|
||||
data_offset = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1ull << align_shift);
|
||||
data_offset = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1ull << SQTT_BUFFER_ALIGN_SHIFT);
|
||||
data_offset += data->buffer_size * se;
|
||||
|
||||
return data_offset;
|
||||
|
|
@ -239,7 +231,9 @@ ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se)
|
|||
{
|
||||
uint32_t cu_index;
|
||||
|
||||
if (info->gfx_level >= GFX11) {
|
||||
if (info->gfx_level >= GFX12) {
|
||||
cu_index = 0;
|
||||
}else if (info->gfx_level >= GFX11) {
|
||||
/* GFX11 seems to operate on the last active CU. */
|
||||
cu_index = util_last_bit(info->cu_mask[se][0]) - 1;
|
||||
} else {
|
||||
|
|
@ -303,7 +297,7 @@ ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable)
|
|||
|
||||
if (info->gfx_level >= GFX11) {
|
||||
if (info->gfx_level >= GFX12) {
|
||||
ctrl = S_0367B0_UTIL_TIMER_GFX12(1);
|
||||
ctrl = S_0367B0_UTIL_TIMER_GFX12(1) | S_0367B0_LOWATER_OFFSET(4);
|
||||
} else {
|
||||
ctrl = S_0367B0_UTIL_TIMER_GFX11(1) | S_0367B0_RT_FREQ(2); /* 4096 clk */
|
||||
}
|
||||
|
|
@ -346,14 +340,13 @@ void
|
|||
ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
||||
const struct ac_sqtt *sqtt, bool is_compute_queue)
|
||||
{
|
||||
const uint32_t align_shift = ac_sqtt_get_buffer_align_shift(info);
|
||||
const uint32_t shifted_size = sqtt->buffer_size >> align_shift;
|
||||
const uint32_t shifted_size = sqtt->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
const unsigned shader_mask = ac_sqtt_get_shader_mask(info);
|
||||
const unsigned max_se = info->max_se;
|
||||
|
||||
for (unsigned se = 0; se < max_se; se++) {
|
||||
uint64_t data_va = ac_sqtt_get_data_va(info, sqtt, se);
|
||||
uint64_t shifted_va = data_va >> align_shift;
|
||||
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
int active_cu = ac_sqtt_get_active_cu(info, se);
|
||||
|
||||
if (ac_sqtt_se_is_disabled(info, se))
|
||||
|
|
@ -371,6 +364,8 @@ ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
|||
|
||||
ac_pm4_set_reg(pm4, R_03679C_SQ_THREAD_TRACE_BUF0_BASE_LO, shifted_va);
|
||||
ac_pm4_set_reg(pm4, R_0367A0_SQ_THREAD_TRACE_BUF0_BASE_HI, S_0367A0_BASE_HI(shifted_va >> 32));
|
||||
|
||||
ac_pm4_set_reg(pm4, R_0367BC_SQ_THREAD_TRACE_WPTR, 0);
|
||||
} else {
|
||||
ac_pm4_set_reg(pm4, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
|
||||
|
|
@ -387,7 +382,7 @@ ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
|||
V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG);
|
||||
|
||||
/* Performance counters with SQTT are considered deprecated. */
|
||||
uint32_t token_exclude = V_0367B8_TOKEN_EXCLUDE_PERF;
|
||||
uint32_t token_exclude = 0;
|
||||
|
||||
if (!sqtt->instruction_timing_enabled) {
|
||||
/* Reduce SQTT traffic when instruction timing isn't enabled. */
|
||||
|
|
@ -397,8 +392,14 @@ ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
|||
}
|
||||
|
||||
if (info->gfx_level >= GFX12) {
|
||||
sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE_GFX12(token_exclude) | S_0367B8_BOP_EVENTS_TOKEN_INCLUDE_GFX12(1);
|
||||
sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE_GFX12(token_exclude) |
|
||||
S_0367B8_BOP_EVENTS_TOKEN_INCLUDE_GFX12(1) |
|
||||
S_0367B8_EXCLUDE_BARRIER_WAIT(1) |
|
||||
S_0367B8_REG_EXCLUDE(2); /* CP_ME_MC_RADDR */
|
||||
} else {
|
||||
/* Performance counters with SQTT are considered deprecated. */
|
||||
token_exclude |= V_0367B8_TOKEN_EXCLUDE_PERF;
|
||||
|
||||
sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE_GFX11(token_exclude) | S_0367B8_BOP_EVENTS_TOKEN_INCLUDE_GFX11(1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@
|
|||
#include "ac_rgp.h"
|
||||
#include "amd_family.h"
|
||||
|
||||
#define SQTT_BUFFER_ALIGN_SHIFT 12
|
||||
|
||||
struct radeon_cmdbuf;
|
||||
struct radeon_info;
|
||||
|
||||
|
|
@ -543,8 +545,6 @@ bool ac_check_profile_state(const struct radeon_info *info);
|
|||
union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
|
||||
enum amd_ip_type ip_type);
|
||||
|
||||
uint32_t ac_sqtt_get_buffer_align_shift(const struct radeon_info *info);
|
||||
|
||||
bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
|
||||
struct ac_sqtt_trace *sqtt_trace);
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,10 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
|
|||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_uconfig_reg(cs, R_031120_SPI_SQG_EVENT_CTL,
|
||||
S_031120_ENABLE_SQG_TOP_EVENTS(enable) | S_031120_ENABLE_SQG_BOP_EVENTS(enable));
|
||||
} else if (pdev->info.gfx_level >= GFX9) {
|
||||
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
|
||||
S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
|
||||
|
||||
|
|
@ -316,7 +319,6 @@ static bool
|
|||
radv_sqtt_init_bo(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t align_shift = ac_sqtt_get_buffer_align_shift(&pdev->info);
|
||||
unsigned max_se = pdev->info.max_se;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
VkResult result;
|
||||
|
|
@ -325,10 +327,10 @@ radv_sqtt_init_bo(struct radv_device *device)
|
|||
/* The buffer size and address need to be aligned in HW regs. Align the
|
||||
* size as early as possible so that we do all the allocation & addressing
|
||||
* correctly. */
|
||||
device->sqtt.buffer_size = align64(device->sqtt.buffer_size, 1ull << align_shift);
|
||||
device->sqtt.buffer_size = align64(device->sqtt.buffer_size, 1ull << SQTT_BUFFER_ALIGN_SHIFT);
|
||||
|
||||
/* Compute total size of the thread trace BO for all SEs. */
|
||||
size = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1ull << align_shift);
|
||||
size = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1ull << SQTT_BUFFER_ALIGN_SHIFT);
|
||||
size += device->sqtt.buffer_size * (uint64_t)max_se;
|
||||
|
||||
struct radeon_winsys_bo *bo = NULL;
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ si_emit_spi_config_cntl(struct si_context *sctx,
|
|||
|
||||
static bool si_sqtt_init_bo(struct si_context *sctx)
|
||||
{
|
||||
const uint32_t align_shift = ac_sqtt_get_buffer_align_shift(&sctx->screen->info);
|
||||
unsigned max_se = sctx->screen->info.max_se;
|
||||
struct radeon_winsys *ws = sctx->ws;
|
||||
uint64_t size;
|
||||
|
|
@ -30,11 +29,11 @@ static bool si_sqtt_init_bo(struct si_context *sctx)
|
|||
* size as early as possible so that we do all the allocation & addressing
|
||||
* correctly. */
|
||||
sctx->sqtt->buffer_size =
|
||||
align64(sctx->sqtt->buffer_size, 1ull << align_shift);
|
||||
align64(sctx->sqtt->buffer_size, 1ull << SQTT_BUFFER_ALIGN_SHIFT);
|
||||
|
||||
/* Compute total size of the thread trace BO for all SEs. */
|
||||
size = align64(sizeof(struct ac_sqtt_data_info) * max_se,
|
||||
1ull << align_shift);
|
||||
1ull << SQTT_BUFFER_ALIGN_SHIFT);
|
||||
size += sctx->sqtt->buffer_size * (uint64_t)max_se;
|
||||
|
||||
sctx->sqtt->bo =
|
||||
|
|
@ -531,7 +530,10 @@ si_emit_spi_config_cntl(struct si_context *sctx,
|
|||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
if (sctx->gfx_level >= GFX9) {
|
||||
if (sctx->gfx_level >= GFX12) {
|
||||
radeon_set_uconfig_reg(R_031120_SPI_SQG_EVENT_CTL,
|
||||
S_031120_ENABLE_SQG_TOP_EVENTS(enable) | S_031120_ENABLE_SQG_BOP_EVENTS(enable));
|
||||
} else if (sctx->gfx_level >= GFX9) {
|
||||
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
|
||||
S_031100_EXP_PRIORITY_ORDER(3) |
|
||||
S_031100_ENABLE_SQG_TOP_EVENTS(enable) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue