mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
tu: Rework pipeline stat queries
Drop the legacy register offset macros. And re-work how we map the vk query to pipeline stat offset, to account for re-ordering of the pipeline stat regs in gen8. Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39029>
This commit is contained in:
parent
d9f1f0e9b9
commit
76cd06426d
1 changed files with 41 additions and 21 deletions
|
|
@ -27,8 +27,19 @@
|
||||||
|
|
||||||
#define NSEC_PER_SEC 1000000000ull
|
#define NSEC_PER_SEC 1000000000ull
|
||||||
#define WAIT_TIMEOUT 5
|
#define WAIT_TIMEOUT 5
|
||||||
#define STAT_COUNT ((REG_A6XX_RBBM_PIPESTAT_CSINVOCATIONS - REG_A6XX_RBBM_PIPESTAT_IAVERTICES) / 2 + 1)
|
#define __COUNTER_REG(CHIP, name) __RBBM_PIPESTAT_ ## name <CHIP>({}).reg
|
||||||
#define COUNTER_REG(name) __RBBM_PIPESTAT_ ## name <CHIP>({}).reg
|
#define COUNTER_REG(name) __COUNTER_REG(CHIP, name)
|
||||||
|
|
||||||
|
/* Note: gen8 changes the order of the pipestat regs, but in either case
|
||||||
|
* they ones we are interested in are consecutive, so for the purposes of
|
||||||
|
* knowning how many values to read we can just use A6XX reg addresses.
|
||||||
|
*
|
||||||
|
* And in both cases, RBBM_PIPESTAT_IAVERTICES is the first one.
|
||||||
|
*
|
||||||
|
* Depending on how/if they shuffle around in the future, we might need
|
||||||
|
* to shift to reading them individually, like gallium does.
|
||||||
|
*/
|
||||||
|
#define STAT_COUNT ((__COUNTER_REG(A6XX, CSINVOCATIONS) - __COUNTER_REG(A6XX, IAVERTICES)) / 2 + 1)
|
||||||
|
|
||||||
struct PACKED query_slot {
|
struct PACKED query_slot {
|
||||||
uint64_t available;
|
uint64_t available;
|
||||||
|
|
@ -464,35 +475,38 @@ get_result_count(struct tu_query_pool *pool)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <chip CHIP>
|
||||||
static uint32_t
|
static uint32_t
|
||||||
statistics_index(uint32_t *statistics)
|
statistics_index(uint32_t *statistics)
|
||||||
{
|
{
|
||||||
uint32_t stat;
|
uint32_t stat;
|
||||||
stat = u_bit_scan(statistics);
|
stat = u_bit_scan(statistics);
|
||||||
|
|
||||||
|
#define COUNTER_OFFSET(name) ((COUNTER_REG(name) - COUNTER_REG(IAVERTICES)) / 2)
|
||||||
|
|
||||||
switch (1 << stat) {
|
switch (1 << stat) {
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
|
||||||
return 0;
|
return COUNTER_OFFSET(IAVERTICES);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
|
||||||
return 1;
|
return COUNTER_OFFSET(IAPRIMITIVES);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
|
||||||
return 2;
|
return COUNTER_OFFSET(VSINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
|
||||||
return 5;
|
return COUNTER_OFFSET(GSINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
|
||||||
return 6;
|
return COUNTER_OFFSET(GSPRIMITIVES);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
|
||||||
return 7;
|
return COUNTER_OFFSET(CINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
|
||||||
return 8;
|
return COUNTER_OFFSET(CPRIMITIVES);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
|
||||||
return 9;
|
return COUNTER_OFFSET(PSINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
|
||||||
return 3;
|
return COUNTER_OFFSET(HSINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
|
||||||
return 4;
|
return COUNTER_OFFSET(DSINVOCATIONS);
|
||||||
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
|
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
|
||||||
return 10;
|
return COUNTER_OFFSET(CSINVOCATIONS);
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -589,6 +603,7 @@ write_performance_query_value_cpu(char *base,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <chip CHIP>
|
||||||
static VkResult
|
static VkResult
|
||||||
get_query_pool_results(struct tu_device *device,
|
get_query_pool_results(struct tu_device *device,
|
||||||
struct tu_query_pool *pool,
|
struct tu_query_pool *pool,
|
||||||
|
|
@ -635,7 +650,7 @@ get_query_pool_results(struct tu_device *device,
|
||||||
uint64_t *result;
|
uint64_t *result;
|
||||||
|
|
||||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||||
uint32_t stat_idx = statistics_index(&statistics);
|
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||||
result = query_result_addr(pool, query, uint64_t, stat_idx);
|
result = query_result_addr(pool, query, uint64_t, stat_idx);
|
||||||
} else if (is_perf_query_raw(pool)) {
|
} else if (is_perf_query_raw(pool)) {
|
||||||
result = query_result_addr(pool, query, struct perfcntr_query_slot, k);
|
result = query_result_addr(pool, query, struct perfcntr_query_slot, k);
|
||||||
|
|
@ -704,6 +719,7 @@ get_query_pool_results(struct tu_device *device,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <chip CHIP>
|
||||||
VKAPI_ATTR VkResult VKAPI_CALL
|
VKAPI_ATTR VkResult VKAPI_CALL
|
||||||
tu_GetQueryPoolResults(VkDevice _device,
|
tu_GetQueryPoolResults(VkDevice _device,
|
||||||
VkQueryPool queryPool,
|
VkQueryPool queryPool,
|
||||||
|
|
@ -732,13 +748,14 @@ tu_GetQueryPoolResults(VkDevice _device,
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
return get_query_pool_results(device, pool, firstQuery, queryCount,
|
return get_query_pool_results<CHIP>(device, pool, firstQuery, queryCount,
|
||||||
dataSize, pData, stride, flags);
|
dataSize, pData, stride, flags);
|
||||||
default:
|
default:
|
||||||
assert(!"Invalid query type");
|
assert(!"Invalid query type");
|
||||||
}
|
}
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
TU_GENX(tu_GetQueryPoolResults);
|
||||||
|
|
||||||
/* Copies a query value from one buffer to another from the GPU. */
|
/* Copies a query value from one buffer to another from the GPU. */
|
||||||
static void
|
static void
|
||||||
|
|
@ -809,7 +826,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
|
||||||
uint64_t result_iova;
|
uint64_t result_iova;
|
||||||
|
|
||||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||||
uint32_t stat_idx = statistics_index(&statistics);
|
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||||
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
||||||
} else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
} else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||||
result_iova = query_result_iova(pool, query,
|
result_iova = query_result_iova(pool, query,
|
||||||
|
|
@ -896,6 +913,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdCopyQueryPoolResults);
|
TU_GENX(tu_CmdCopyQueryPoolResults);
|
||||||
|
|
||||||
|
template <chip CHIP>
|
||||||
static void
|
static void
|
||||||
emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
||||||
struct tu_query_pool *pool,
|
struct tu_query_pool *pool,
|
||||||
|
|
@ -916,7 +934,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
||||||
uint64_t result_iova;
|
uint64_t result_iova;
|
||||||
|
|
||||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||||
uint32_t stat_idx = statistics_index(&statistics);
|
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||||
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
||||||
} else if (is_perf_query_raw(pool)) {
|
} else if (is_perf_query_raw(pool)) {
|
||||||
result_iova = query_result_iova(pool, query,
|
result_iova = query_result_iova(pool, query,
|
||||||
|
|
@ -950,6 +968,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <chip CHIP>
|
||||||
VKAPI_ATTR void VKAPI_CALL
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
||||||
VkQueryPool queryPool,
|
VkQueryPool queryPool,
|
||||||
|
|
@ -970,12 +989,13 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
|
emit_reset_query_pool<CHIP>(cmdbuf, pool, firstQuery, queryCount);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(!"Invalid query type");
|
assert(!"Invalid query type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TU_GENX(tu_CmdResetQueryPool);
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
tu_ResetQueryPool(VkDevice device,
|
tu_ResetQueryPool(VkDevice device,
|
||||||
|
|
@ -1148,7 +1168,7 @@ emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf,
|
||||||
tu_cs_emit_wfi(cs);
|
tu_cs_emit_wfi(cs);
|
||||||
|
|
||||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) |
|
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
|
||||||
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
||||||
CP_REG_TO_MEM_0_64B);
|
CP_REG_TO_MEM_0_64B);
|
||||||
tu_cs_emit_qw(cs, begin_iova);
|
tu_cs_emit_qw(cs, begin_iova);
|
||||||
|
|
@ -1634,7 +1654,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
|
||||||
tu_cs_emit_wfi(cs);
|
tu_cs_emit_wfi(cs);
|
||||||
|
|
||||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) |
|
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
|
||||||
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
||||||
CP_REG_TO_MEM_0_64B);
|
CP_REG_TO_MEM_0_64B);
|
||||||
tu_cs_emit_qw(cs, end_iova);
|
tu_cs_emit_qw(cs, end_iova);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue