tu/a7xx: disable preemption during performance query measurement

Use CP_SCOPE_CNTL to disable preemption when beginning performance query
and enable it back when that performance query is ended. This way the
collected perfcounter measurements will only cover work that's encompassed
by the query.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33208>
This commit is contained in:
Zan Dobersek 2025-02-10 13:53:11 +01:00 committed by Marge Bot
parent c964a96ab2
commit b8338dee39
3 changed files with 38 additions and 5 deletions

View file

@ -243,6 +243,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SET_AMBLE:
CP_SET_BIN_DATA5_OFFSET:
@ -277,7 +278,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
UNKN108:
UNKN109:
UNKN110:
UNKN112:
@ -409,6 +409,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SCRATCH_WRITE:
CP_SET_AMBLE:
@ -448,7 +449,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
UNKN108:
UNKN109:
UNKN110:
UNKN112:
@ -572,6 +572,7 @@ CP_REG_WR_NO_CTXT:
CP_RESET_CONTEXT_STATE:
CP_RESOURCE_LIST:
CP_RUN_OPENCL:
CP_SCOPE_CNTL:
CP_SCRATCH_TO_REG:
CP_SCRATCH_WRITE:
CP_SET_AMBLE:
@ -611,7 +612,6 @@ UNKN103:
UNKN104:
UNKN105:
UNKN106:
UNKN108:
UNKN109:
UNKN110:
UNKN112:

View file

@ -661,6 +661,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<doc>Invalidates the "CCHE" introduced on a740</doc>
<value name="CP_CCHE_INVALIDATE" value="0x3a" variants="A7XX-"/>
<value name="CP_SCOPE_CNTL" value="0x6c" variants="A7XX-"/>
</enum>
@ -2372,5 +2374,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
</domain>
<domain name="CP_SCOPE_CNTL" width="32">
<enum name="cp_scope">
<value value="0" name="INTERRUPTS"/>
</enum>
<reg32 offset="0" name="0">
<bitfield name="DISABLE_PREEMPTION" pos="0" type="boolean"/>
<bitfield low="28" high="31" name="SCOPE" type="cp_scope"/>
</reg32>
</domain>
</database>

View file

@ -1039,6 +1039,7 @@ emit_perfcntrs_pass_start(struct tu_cs *cs, uint32_t pass)
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
}
template <chip CHIP>
static void
emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@ -1070,6 +1071,16 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs);
/* Keep preemption disabled for the duration of this query. This way
* changes in perfcounter values should only apply to work done during
* this query.
*/
if (CHIP == A7XX) {
tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1);
tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = true,
.scope = INTERRUPTS).value);
}
for (uint32_t i = 0; i < pool->counter_index_count; i++) {
struct tu_perf_query_data *data = &pool->perf_query_data[i];
@ -1200,7 +1211,7 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,
emit_begin_prim_generated_query<CHIP>(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
emit_begin_perf_query(cmdbuf, pool, query);
emit_begin_perf_query<CHIP>(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_begin_stat_query<CHIP>(cmdbuf, pool, query);
@ -1442,6 +1453,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, 0x1);
}
template <chip CHIP>
static void
emit_end_perf_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@ -1512,6 +1524,15 @@ emit_end_perf_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
/* This reverts the preemption disablement done at the start
* of the query.
*/
if (CHIP == A7XX) {
tu_cs_emit_pkt7(cs, CP_SCOPE_CNTL, 1);
tu_cs_emit(cs, CP_SCOPE_CNTL_0(.disable_preemption = false,
.scope = INTERRUPTS).value);
}
if (cmdbuf->state.pass)
cs = &cmdbuf->draw_epilogue_cs;
@ -1691,7 +1712,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
emit_end_prim_generated_query<CHIP>(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
emit_end_perf_query(cmdbuf, pool, query);
emit_end_perf_query<CHIP>(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_end_stat_query<CHIP>(cmdbuf, pool, query);