diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 2fc87f75a65..c5484fc9648 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -483,3 +483,21 @@ ac_emit_cp_load_context_reg_index(struct ac_cmdbuf *cs, uint32_t reg, ac_cmdbuf_emit(reg_count); /* in DWORDS */ ac_cmdbuf_end(); } + +void +ac_emit_cp_inhibit_clockgating(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + bool inhibit) +{ + if (gfx_level >= GFX11) + return; /* not needed */ + + ac_cmdbuf_begin(cs); + if (gfx_level >= GFX10) { + ac_cmdbuf_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL, + S_037390_PERFMON_CLOCK_STATE(inhibit)); + } else if (gfx_level >= GFX8) { + ac_cmdbuf_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL, + S_0372FC_PERFMON_CLOCK_STATE(inhibit)); + } + ac_cmdbuf_end(); +} diff --git a/src/amd/common/ac_cmdbuf_cp.h b/src/amd/common/ac_cmdbuf_cp.h index c748d184777..73ab7425b0d 100644 --- a/src/amd/common/ac_cmdbuf_cp.h +++ b/src/amd/common/ac_cmdbuf_cp.h @@ -112,6 +112,10 @@ ac_emit_cp_load_context_reg_index(struct ac_cmdbuf *cs, uint32_t reg, uint32_t reg_count, uint64_t va, bool predicate); +void +ac_emit_cp_inhibit_clockgating(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, + bool inhibit); + #ifdef __cplusplus } #endif diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 2fd7710145c..460942b36c2 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -684,7 +684,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_pc_wait_idle(cmd_buffer); radv_perfcounter_emit_reset(cs, false); - radv_emit_inhibit_clockgating(device, cs, true); + ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, true); radv_emit_spi_config_cntl(device, cs, true); radv_perfcounter_emit_shaders(device, cs, 0x7f); @@ -752,7 +752,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool radv_pc_stop_and_sample(cmd_buffer, pool, va, true); radv_perfcounter_emit_reset(cs, false); radv_emit_spi_config_cntl(device, cs, false); - radv_emit_inhibit_clockgating(device, cs, false); + ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, false); assert(cs->b->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 0c0e5a775d0..cc8ca3ec5f4 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -159,25 +159,6 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stre radeon_end(); } -void -radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (pdev->info.gfx_level >= GFX11) - return; /* not needed */ - - radeon_begin(cs); - - if (pdev->info.gfx_level >= GFX10) { - radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit)); - } else if (pdev->info.gfx_level >= GFX8) { - radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit)); - } - - radeon_end(); -} - VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo, uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr) @@ -539,7 +520,7 @@ radv_begin_sqtt(struct radv_queue *queue) radv_emit_wait_for_idle(device, &cs); /* Disable clock gating before starting SQTT. */ - radv_emit_inhibit_clockgating(device, &cs, true); + ac_emit_cp_inhibit_clockgating(cs.b, pdev->info.gfx_level, true); /* Enable SQG events that collects thread trace data. */ radv_emit_spi_config_cntl(device, &cs, true); @@ -576,6 +557,7 @@ static bool radv_end_sqtt(struct radv_queue *queue) { struct radv_device *device = radv_queue_device(queue); + const struct radv_physical_device *pdev = radv_device_physical(device); enum radv_queue_family family = queue->state.qf; struct radeon_winsys *ws = device->ws; struct radv_cmd_stream cs; @@ -631,7 +613,7 @@ radv_end_sqtt(struct radv_queue *queue) radv_emit_spi_config_cntl(device, &cs, false); /* Restore previous state by re-enabling clock gating. */ - radv_emit_inhibit_clockgating(device, &cs, false); + ac_emit_cp_inhibit_clockgating(cs.b, pdev->info.gfx_level, false); result = ws->cs_finalize(cs.b); if (result != VK_SUCCESS) { diff --git a/src/amd/vulkan/radv_sqtt.h b/src/amd/vulkan/radv_sqtt.h index 49087ad4fb5..20425a77831 100644 --- a/src/amd/vulkan/radv_sqtt.h +++ b/src/amd/vulkan/radv_sqtt.h @@ -67,8 +67,6 @@ void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const voi void radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable); -void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit); - VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo, uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index e2361ef14db..ae5bbe2d0b1 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -267,19 +267,7 @@ static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit) { - if (sctx->gfx_level >= GFX11) - return; - - radeon_begin(&sctx->gfx_cs); - - if (sctx->gfx_level >= GFX10) { - radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL, - S_037390_PERFMON_CLOCK_STATE(inhibit)); - } else if (sctx->gfx_level >= GFX8) { - radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL, - S_0372FC_PERFMON_CLOCK_STATE(inhibit)); - } - radeon_end(); + ac_emit_cp_inhibit_clockgating(&sctx->gfx_cs.current, sctx->gfx_level, inhibit); } static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)