diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index af90f9cf3d8..096243e6518 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -35,11 +35,11 @@ radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream } static void -radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, int family, bool enable) +radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, bool enable) { radeon_begin(cs); - if (family == RADV_QUEUE_GENERAL) { + if (cs->hw_ip == AMD_IP_GFX) { radeon_event_write(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP); } @@ -101,20 +101,20 @@ radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm) } void -radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family) +radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs) { /* Start SPM counters. */ radv_perfcounter_emit_start(cs, true); - radv_emit_windowed_counters(device, cs, family, true); + radv_emit_windowed_counters(device, cs, true); } void -radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family) +radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); - radv_emit_windowed_counters(device, cs, family, false); + radv_emit_windowed_counters(device, cs, false); /* Stop SPM counters. */ if (pdev->info.never_stop_sq_perf_counters) { @@ -518,7 +518,6 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct ac_pc_block_base *regs = block->b->b; struct radv_cmd_stream *cs = cmd_buffer->cs; @@ -533,7 +532,8 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, radeon_begin(cs); for (idx = 0; idx < count; ++idx) { - radeon_set_uconfig_perfctr_reg(gfx_level, ring, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or); + radeon_set_uconfig_perfctr_reg(gfx_level, cs->hw_ip, regs->select0[idx], + G_REG_SEL(selectors[idx]) | regs->select_or); } for (idx = 0; idx < regs->num_spm_counters; idx++) { @@ -627,7 +627,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query radv_perfcounter_emit_sample(cs); radv_pc_wait_idle(cmd_buffer); radv_emit_instance(cmd_buffer, -1, -1); - radv_emit_windowed_counters(device, cs, cmd_buffer->qf, false); + radv_emit_windowed_counters(device, cs, false); radv_perfcounter_emit_stop(cs, false); for (unsigned pass = 0; pass < pool->num_passes; ++pass) { @@ -742,7 +742,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_pc_stop_and_sample(cmd_buffer, pool, va, false); radv_perfcounter_emit_start(cs, false); - radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true); + radv_emit_windowed_counters(device, cs, true); assert(cs->b->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_perfcounter.h b/src/amd/vulkan/radv_perfcounter.h index b44daa5ac8b..0da3d575819 100644 --- a/src/amd/vulkan/radv_perfcounter.h +++ b/src/amd/vulkan/radv_perfcounter.h @@ -34,9 +34,9 @@ void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_s void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm); -void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family); +void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs); -void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family); +void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs); void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool); diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index 379ebd4cfd8..1638a1e6e72 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -68,10 +68,9 @@ radv_spm_resize_bo(struct radv_device *device) } static void -radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) +radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct ac_spm *spm = &device->spm; @@ -91,7 +90,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b]; uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; - radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg_base + b * 4, 1); + radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1); radeon_emit(cntr_sel->sel0); } @@ -116,7 +115,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; - radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg_base + b * 4, 1); + radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, reg_base + b * 4, 1); radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ } @@ -141,10 +140,10 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e if (!cntr_sel->active) continue; - radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, regs->select0[c], 1); + radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select0[c], 1); radeon_emit(cntr_sel->sel0); - radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, regs->select1[c], 1); + radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, regs->select1[c], 1); radeon_emit(cntr_sel->sel1); } @@ -160,10 +159,9 @@ radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, e } static void -radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) +radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf); const struct ac_spm *spm = &device->spm; /* Upload each muxsel ram to the RLC. */ @@ -199,7 +197,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enu uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; /* Select MUXSEL_ADDR to point to the next muxsel. */ - radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, ring, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); + radeon_set_uconfig_perfctr_reg(pdev->info.gfx_level, cs->hw_ip, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); /* Write the muxsel line configuration with MUXSEL_DATA. */ radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); @@ -215,7 +213,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enu } void -radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) +radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_spm *spm = &device->spm; @@ -268,10 +266,10 @@ radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radeon_end(); /* Upload each muxsel ram to the RLC. */ - radv_emit_spm_muxsel(device, cs, qf); + radv_emit_spm_muxsel(device, cs); /* Select SPM counters. */ - radv_emit_spm_counters(device, cs, qf); + radv_emit_spm_counters(device, cs); } bool diff --git a/src/amd/vulkan/radv_spm.h b/src/amd/vulkan/radv_spm.h index 6ade0028c25..2d0a1f1a086 100644 --- a/src/amd/vulkan/radv_spm.h +++ b/src/amd/vulkan/radv_spm.h @@ -15,7 +15,7 @@ #include "radv_queue.h" #include "radv_radeon_winsys.h" -void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf); +void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs); bool radv_spm_init(struct radv_device *device); diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index c8f28a93566..88e415e33f4 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -32,39 +32,24 @@ radv_sqtt_queue_events_enabled(void) return debug_get_bool_option("RADV_THREAD_TRACE_QUEUE_EVENTS", true); } -static enum radv_queue_family -radv_ip_to_queue_family(enum amd_ip_type t) -{ - switch (t) { - case AMD_IP_GFX: - return RADV_QUEUE_GENERAL; - case AMD_IP_COMPUTE: - return RADV_QUEUE_COMPUTE; - case AMD_IP_SDMA: - return RADV_QUEUE_TRANSFER; - default: - UNREACHABLE("Unknown IP type"); - } -} - static void -radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs, int family) +radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); enum rgp_flush_bits sqtt_flush_bits = 0; radv_cs_emit_cache_flush( device->ws, cs, pdev->info.gfx_level, NULL, 0, - (family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH - : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | + (cs->hw_ip == AMD_IP_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH + : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2, &sqtt_flush_bits, 0); } static void -radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) +radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); - const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE; + const bool is_compute_queue = cs->hw_ip == AMD_IP_COMPUTE; struct ac_pm4_state *pm4; pm4 = ac_pm4_create_sized(&pdev->info, false, 512, is_compute_queue); @@ -81,10 +66,10 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *c } static void -radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) +radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); - const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE; + const bool is_compute_queue = cs->hw_ip == AMD_IP_COMPUTE; struct ac_pm4_state *pm4; pm4 = ac_pm4_create_sized(&pdev->info, false, 512, is_compute_queue); @@ -101,7 +86,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs if (pdev->info.has_sqtt_rb_harvest_bug) { /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */ - radv_emit_wait_for_idle(device, cs, qf); + radv_emit_wait_for_idle(device, cs); } ac_sqtt_emit_wait(&pdev->info, pm4, &device->sqtt, is_compute_queue); @@ -119,7 +104,6 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; - const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); struct radv_cmd_stream *cs = cmd_buffer->cs; const uint32_t *dwords = (uint32_t *)data; @@ -136,7 +120,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da /* Without the perfctr bit the CP might not always pass the * write on correctly. */ if (pdev->info.gfx_level >= GFX10) - radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); + radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); else radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); radeon_emit_array(dwords, count); @@ -532,25 +516,25 @@ radv_begin_sqtt(struct radv_queue *queue) radeon_begin(&cs); - switch (family) { - case RADV_QUEUE_GENERAL: + switch (cs.hw_ip) { + case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); break; - case RADV_QUEUE_COMPUTE: + case AMD_IP_COMPUTE: radeon_emit(PKT3(PKT3_NOP, 0, 0)); radeon_emit(0); break; default: - UNREACHABLE("Incorrect queue family"); + UNREACHABLE("Incorrect HW IP type"); break; } radeon_end(); /* Make sure to wait-for-idle before starting SQTT. */ - radv_emit_wait_for_idle(device, &cs, family); + radv_emit_wait_for_idle(device, &cs); /* Disable clock gating before starting SQTT. */ radv_emit_inhibit_clockgating(device, &cs, true); @@ -564,15 +548,15 @@ radv_begin_sqtt(struct radv_queue *queue) /* Enable all shader stages by default. */ radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info)); - radv_emit_spm_setup(device, &cs, family); + radv_emit_spm_setup(device, &cs); } /* Start SQTT. */ - radv_emit_sqtt_start(device, &cs, family); + radv_emit_sqtt_start(device, &cs); if (device->spm.bo) { radeon_check_space(ws, cs.b, 8); - radv_perfcounter_emit_spm_start(device, &cs, family); + radv_perfcounter_emit_spm_start(device, &cs); } result = ws->cs_finalize(cs.b); @@ -609,33 +593,33 @@ radv_end_sqtt(struct radv_queue *queue) radeon_begin(&cs); - switch (family) { - case RADV_QUEUE_GENERAL: + switch (cs.hw_ip) { + case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); break; - case RADV_QUEUE_COMPUTE: + case AMD_IP_COMPUTE: radeon_emit(PKT3(PKT3_NOP, 0, 0)); radeon_emit(0); break; default: - UNREACHABLE("Incorrect queue family"); + UNREACHABLE("Incorrect HW IP type"); break; } radeon_end(); /* Make sure to wait-for-idle before stopping SQTT. */ - radv_emit_wait_for_idle(device, &cs, family); + radv_emit_wait_for_idle(device, &cs); if (device->spm.bo) { radeon_check_space(ws, cs.b, 8); - radv_perfcounter_emit_spm_stop(device, &cs, family); + radv_perfcounter_emit_spm_stop(device, &cs); } /* Stop SQTT. */ - radv_emit_sqtt_stop(device, &cs, family); + radv_emit_sqtt_stop(device, &cs); radv_perfcounter_emit_reset(&cs, true);