radv: Remove qf from radv_spm/sqtt/perfcounter where applicable
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37775>
This commit is contained in:
Timur Kristóf 2025-10-08 18:40:45 +02:00 committed by Marge Bot
parent 8501c8cd43
commit 772b9ce411
5 changed files with 47 additions and 65 deletions

View file

@ -35,11 +35,11 @@ radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream
}
static void
radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, int family, bool enable)
radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, bool enable)
{
radeon_begin(cs);
if (family == RADV_QUEUE_GENERAL) {
if (cs->hw_ip == AMD_IP_GFX) {
radeon_event_write(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP);
}
@ -101,20 +101,20 @@ radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm)
}
void
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family)
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs)
{
/* Start SPM counters. */
radv_perfcounter_emit_start(cs, true);
radv_emit_windowed_counters(device, cs, family, true);
radv_emit_windowed_counters(device, cs, true);
}
void
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family)
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_emit_windowed_counters(device, cs, family, false);
radv_emit_windowed_counters(device, cs, false);
/* Stop SPM counters. */
if (pdev->info.never_stop_sq_perf_counters) {
@ -518,7 +518,6 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_pc_block_base *regs = block->b->b;
struct radv_cmd_stream *cs = cmd_buffer->cs;
@ -533,7 +532,8 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
radeon_begin(cs);
for (idx = 0; idx < count; ++idx) {
radeon_set_uconfig_perfctr_reg(gfx_level, ring, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or);
radeon_set_uconfig_perfctr_reg(gfx_level, cs->hw_ip, regs->select0[idx],
G_REG_SEL(selectors[idx]) | regs->select_or);
}
for (idx = 0; idx < regs->num_spm_counters; idx++) {
@ -627,7 +627,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
radv_perfcounter_emit_sample(cs);
radv_pc_wait_idle(cmd_buffer);
radv_emit_instance(cmd_buffer, -1, -1);
radv_emit_windowed_counters(device, cs, cmd_buffer->qf, false);
radv_emit_windowed_counters(device, cs, false);
radv_perfcounter_emit_stop(cs, false);
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
@ -742,7 +742,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
radv_pc_stop_and_sample(cmd_buffer, pool, va, false);
radv_perfcounter_emit_start(cs, false);
radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true);
radv_emit_windowed_counters(device, cs, true);
assert(cs->b->cdw <= cdw_max);
}

View file

@ -34,9 +34,9 @@ void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_s
void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm);
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family);
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs);
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family);
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs);
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);

View file

@ -68,10 +68,9 @@ radv_spm_resize_bo(struct radv_device *device)
}
static void
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm;
@ -91,7 +90,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e
const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg_base + b * 4, 1);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
radeon_emit(cntr_sel->sel0);
}
@ -116,7 +115,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e
const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg_base + b * 4, 1);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1);
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
}
@ -141,10 +140,10 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e
if (!cntr_sel->active)
continue;
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, regs->select0[c], 1);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select0[c], 1);
radeon_emit(cntr_sel->sel0);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, regs->select1[c], 1);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select1[c], 1);
radeon_emit(cntr_sel->sel1);
}
@ -160,10 +159,9 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e
}
static void
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
const struct ac_spm *spm = &device->spm;
/* Upload each muxsel ram to the RLC. */
@ -199,7 +197,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enu
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
/* Select MUXSEL_ADDR to point to the next muxsel. */
radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, ring, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, cs->hw_ip, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
/* Write the muxsel line configuration with MUXSEL_DATA. */
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
@ -215,7 +213,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enu
}
void
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_spm *spm = &device->spm;
@ -268,10 +266,10 @@ radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum
radeon_end();
/* Upload each muxsel ram to the RLC. */
radv_emit_spm_muxsel(device, cs, qf);
radv_emit_spm_muxsel(device, cs);
/* Select SPM counters. */
radv_emit_spm_counters(device, cs, qf);
radv_emit_spm_counters(device, cs);
}
bool

View file

@ -15,7 +15,7 @@
#include "radv_queue.h"
#include "radv_radeon_winsys.h"
void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf);
void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs);
bool radv_spm_init(struct radv_device *device);

View file

@ -32,39 +32,24 @@ radv_sqtt_queue_events_enabled(void)
return debug_get_bool_option("RADV_THREAD_TRACE_QUEUE_EVENTS", true);
}
static enum radv_queue_family
radv_ip_to_queue_family(enum amd_ip_type t)
{
switch (t) {
case AMD_IP_GFX:
return RADV_QUEUE_GENERAL;
case AMD_IP_COMPUTE:
return RADV_QUEUE_COMPUTE;
case AMD_IP_SDMA:
return RADV_QUEUE_TRANSFER;
default:
UNREACHABLE("Unknown IP type");
}
}
static void
radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs, int family)
radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
enum rgp_flush_bits sqtt_flush_bits = 0;
radv_cs_emit_cache_flush(
device->ws, cs, pdev->info.gfx_level, NULL, 0,
(family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
(cs->hw_ip == AMD_IP_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
&sqtt_flush_bits, 0);
}
static void
radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
const bool is_compute_queue = cs->hw_ip == AMD_IP_COMPUTE;
struct ac_pm4_state *pm4;
pm4 = ac_pm4_create_sized(&pdev->info, false, 512, is_compute_queue);
@ -81,10 +66,10 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *c
}
static void
radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
const bool is_compute_queue = cs->hw_ip == AMD_IP_COMPUTE;
struct ac_pm4_state *pm4;
pm4 = ac_pm4_create_sized(&pdev->info, false, 512, is_compute_queue);
@ -101,7 +86,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs
if (pdev->info.has_sqtt_rb_harvest_bug) {
/* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
radv_emit_wait_for_idle(device, cs, qf);
radv_emit_wait_for_idle(device, cs);
}
ac_sqtt_emit_wait(&pdev->info, pm4, &device->sqtt, is_compute_queue);
@ -119,7 +104,6 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint32_t *dwords = (uint32_t *)data;
@ -136,7 +120,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
/* Without the perfctr bit the CP might not always pass the
* write on correctly. */
if (pdev->info.gfx_level >= GFX10)
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
else
radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
radeon_emit_array(dwords, count);
@ -532,25 +516,25 @@ radv_begin_sqtt(struct radv_queue *queue)
radeon_begin(&cs);
switch (family) {
case RADV_QUEUE_GENERAL:
switch (cs.hw_ip) {
case AMD_IP_GFX:
radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
case AMD_IP_COMPUTE:
radeon_emit(PKT3(PKT3_NOP, 0, 0));
radeon_emit(0);
break;
default:
UNREACHABLE("Incorrect queue family");
UNREACHABLE("Incorrect HW IP type");
break;
}
radeon_end();
/* Make sure to wait-for-idle before starting SQTT. */
radv_emit_wait_for_idle(device, &cs, family);
radv_emit_wait_for_idle(device, &cs);
/* Disable clock gating before starting SQTT. */
radv_emit_inhibit_clockgating(device, &cs, true);
@ -564,15 +548,15 @@ radv_begin_sqtt(struct radv_queue *queue)
/* Enable all shader stages by default. */
radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info));
radv_emit_spm_setup(device, &cs, family);
radv_emit_spm_setup(device, &cs);
}
/* Start SQTT. */
radv_emit_sqtt_start(device, &cs, family);
radv_emit_sqtt_start(device, &cs);
if (device->spm.bo) {
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_start(device, &cs, family);
radv_perfcounter_emit_spm_start(device, &cs);
}
result = ws->cs_finalize(cs.b);
@ -609,33 +593,33 @@ radv_end_sqtt(struct radv_queue *queue)
radeon_begin(&cs);
switch (family) {
case RADV_QUEUE_GENERAL:
switch (cs.hw_ip) {
case AMD_IP_GFX:
radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
case AMD_IP_COMPUTE:
radeon_emit(PKT3(PKT3_NOP, 0, 0));
radeon_emit(0);
break;
default:
UNREACHABLE("Incorrect queue family");
UNREACHABLE("Incorrect HW IP type");
break;
}
radeon_end();
/* Make sure to wait-for-idle before stopping SQTT. */
radv_emit_wait_for_idle(device, &cs, family);
radv_emit_wait_for_idle(device, &cs);
if (device->spm.bo) {
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_stop(device, &cs, family);
radv_perfcounter_emit_spm_stop(device, &cs);
}
/* Stop SQTT. */
radv_emit_sqtt_stop(device, &cs, family);
radv_emit_sqtt_stop(device, &cs);
radv_perfcounter_emit_reset(&cs, true);