From b8338dee3920afe0da29e7b8168abfaeef5abee1 Mon Sep 17 00:00:00 2001 From: Zan Dobersek Date: Mon, 10 Feb 2025 13:53:11 +0100 Subject: [PATCH] tu/a7xx: disable preemption during performance query measurement Use CP_SCOPE_CNTL to disable preemption when beginning performance query and enable it back when that performance query is ended. This way the collected perfcounter measurements will only cover work that's encompassed by the query. Signed-off-by: Zan Dobersek Reviewed-by: Danylo Piliaiev Part-of: --- .../.gitlab-ci/reference/afuc_test_a7xx.asm | 6 ++--- src/freedreno/registers/adreno/adreno_pm4.xml | 12 +++++++++ src/freedreno/vulkan/tu_query_pool.cc | 25 +++++++++++++++++-- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm index f0e6f188b76..3b63625eb4c 100644 --- a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm +++ b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm @@ -243,6 +243,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SET_AMBLE: CP_SET_BIN_DATA5_OFFSET: @@ -277,7 +278,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: @@ -409,6 +409,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SCRATCH_WRITE: CP_SET_AMBLE: @@ -448,7 +449,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: @@ -572,6 +572,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SCRATCH_WRITE: CP_SET_AMBLE: @@ -611,7 +612,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml index 63a333cd306..d4179b3dae7 100644 --- a/src/freedreno/registers/adreno/adreno_pm4.xml +++ b/src/freedreno/registers/adreno/adreno_pm4.xml @@ -661,6 +661,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> Invalidates the "CCHE" introduced on a740 + + @@ -2372,5 +2374,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + + + + diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index 3331e0876b4..e237314c2ca 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -1039,6 +1039,7 @@ emit_perfcntrs_pass_start(struct tu_cs *cs, uint32_t pass) tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); } +template static void emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -1070,6 +1071,16 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); + /* Keep preemption disabled for the duration of this query. This way + * changes in perfcounter values should only apply to work done during + * this query. + */ + if (CHIP == A7XX) { + tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1); + tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = true, + .scope = INTERRUPTS).value); + } + for (uint32_t i = 0; i < pool->counter_index_count; i++) { struct tu_perf_query_data *data = &pool->perf_query_data[i]; @@ -1200,7 +1211,7 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, emit_begin_prim_generated_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: - emit_begin_perf_query(cmdbuf, pool, query); + emit_begin_perf_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: emit_begin_stat_query(cmdbuf, pool, query); @@ -1442,6 +1453,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_qw(cs, 0x1); } +template static void emit_end_perf_query(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -1512,6 +1524,15 @@ emit_end_perf_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + /* This reverts the preemption disablement done at the start + * of the query. + */ + if (CHIP == A7XX) { + tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1); + tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = false, + .scope = INTERRUPTS).value); + } + if (cmdbuf->state.pass) cs = &cmdbuf->draw_epilogue_cs; @@ -1691,7 +1712,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, emit_end_prim_generated_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: - emit_end_perf_query(cmdbuf, pool, query); + emit_end_perf_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: emit_end_stat_query(cmdbuf, pool, query);