diff --git a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm index f0e6f188b76..3b63625eb4c 100644 --- a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm +++ b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm @@ -243,6 +243,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SET_AMBLE: CP_SET_BIN_DATA5_OFFSET: @@ -277,7 +278,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: @@ -409,6 +409,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SCRATCH_WRITE: CP_SET_AMBLE: @@ -448,7 +449,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: @@ -572,6 +572,7 @@ CP_REG_WR_NO_CTXT: CP_RESET_CONTEXT_STATE: CP_RESOURCE_LIST: CP_RUN_OPENCL: +CP_SCOPE_CNTL: CP_SCRATCH_TO_REG: CP_SCRATCH_WRITE: CP_SET_AMBLE: @@ -611,7 +612,6 @@ UNKN103: UNKN104: UNKN105: UNKN106: -UNKN108: UNKN109: UNKN110: UNKN112: diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml index 63a333cd306..d4179b3dae7 100644 --- a/src/freedreno/registers/adreno/adreno_pm4.xml +++ b/src/freedreno/registers/adreno/adreno_pm4.xml @@ -661,6 +661,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> Invalidates the "CCHE" introduced on a740 + + @@ -2372,5 +2374,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + + + + diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index 3331e0876b4..e237314c2ca 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -1039,6 +1039,7 @@ emit_perfcntrs_pass_start(struct tu_cs *cs, uint32_t pass) tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); } +template static void emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -1070,6 +1071,16 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); + /* Keep preemption disabled for the duration of this query. This way + * changes in perfcounter values should only apply to work done during + * this query. + */ + if (CHIP == A7XX) { + tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1); + tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = true, + .scope = INTERRUPTS).value); + } + for (uint32_t i = 0; i < pool->counter_index_count; i++) { struct tu_perf_query_data *data = &pool->perf_query_data[i]; @@ -1200,7 +1211,7 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, emit_begin_prim_generated_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: - emit_begin_perf_query(cmdbuf, pool, query); + emit_begin_perf_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: emit_begin_stat_query(cmdbuf, pool, query); @@ -1442,6 +1453,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_qw(cs, 0x1); } +template static void emit_end_perf_query(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -1512,6 +1524,15 @@ emit_end_perf_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + /* This reverts the preemption disablement done at the start + * of the query. + */ + if (CHIP == A7XX) { + tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1); + tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = false, + .scope = INTERRUPTS).value); + } + if (cmdbuf->state.pass) cs = &cmdbuf->draw_epilogue_cs; @@ -1691,7 +1712,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, emit_end_prim_generated_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: - emit_end_perf_query(cmdbuf, pool, query); + emit_end_perf_query(cmdbuf, pool, query); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: emit_end_stat_query(cmdbuf, pool, query);