diff --git a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm
index f0e6f188b76..3b63625eb4c 100644
--- a/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm
+++ b/src/freedreno/.gitlab-ci/reference/afuc_test_a7xx.asm
@@ -243,6 +243,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
+CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SET_AMBLE:
CP_SET_BIN_DATA5_OFFSET:
@@ -277,7 +278,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
-UNKN108:
UNKN109:
UNKN110:
UNKN112:
@@ -409,6 +409,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
+CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SCRATCH_WRITE:
CP_SET_AMBLE:
@@ -448,7 +449,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
-UNKN108:
UNKN109:
UNKN110:
UNKN112:
@@ -572,6 +572,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
+CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SCRATCH_WRITE:
CP_SET_AMBLE:
@@ -611,7 +612,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
-UNKN108:
UNKN109:
UNKN110:
UNKN112:
diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml
index 63a333cd306..d4179b3dae7 100644
--- a/src/freedreno/registers/adreno/adreno_pm4.xml
+++ b/src/freedreno/registers/adreno/adreno_pm4.xml
@@ -661,6 +661,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
Invalidates the "CCHE" introduced on a740
+
+
@@ -2372,5 +2374,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
+
+
+
+
+
+
+
+
+
+
diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc
index 3331e0876b4..e237314c2ca 100644
--- a/src/freedreno/vulkan/tu_query_pool.cc
+++ b/src/freedreno/vulkan/tu_query_pool.cc
@@ -1039,6 +1039,7 @@ emit_perfcntrs_pass_start(struct tu_cs *cs, uint32_t pass)
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
}
+template
static void
emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@@ -1070,6 +1071,16 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs);
+ /* Keep preemption disabled for the duration of this query. This way
+ * changes in perfcounter values should only apply to work done during
+ * this query.
+ */
+ if (CHIP == A7XX) {
+ tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1);
+ tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = true,
+ .scope = INTERRUPTS).value);
+ }
+
for (uint32_t i = 0; i < pool->counter_index_count; i++) {
struct tu_perf_query_data *data = &pool->perf_query_data[i];
@@ -1200,7 +1211,7 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,
emit_begin_prim_generated_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
- emit_begin_perf_query(cmdbuf, pool, query);
+ emit_begin_perf_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_begin_stat_query(cmdbuf, pool, query);
@@ -1442,6 +1453,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, 0x1);
}
+template
static void
emit_end_perf_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@@ -1512,6 +1524,15 @@ emit_end_perf_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
+ /* This reverts the preemption disablement done at the start
+ * of the query.
+ */
+ if (CHIP == A7XX) {
+ tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1);
+ tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = false,
+ .scope = INTERRUPTS).value);
+ }
+
if (cmdbuf->state.pass)
cs = &cmdbuf->draw_epilogue_cs;
@@ -1691,7 +1712,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
emit_end_prim_generated_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
- emit_end_perf_query(cmdbuf, pool, query);
+ emit_end_perf_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_end_stat_query(cmdbuf, pool, query);