diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index a3b2dabd0b7..fc535bf37b5 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -27,8 +27,19 @@ #define NSEC_PER_SEC 1000000000ull #define WAIT_TIMEOUT 5 -#define STAT_COUNT ((REG_A6XX_RBBM_PIPESTAT_CSINVOCATIONS - REG_A6XX_RBBM_PIPESTAT_IAVERTICES) / 2 + 1) -#define COUNTER_REG(name) __RBBM_PIPESTAT_ ## name ({}).reg +#define __COUNTER_REG(CHIP, name) __RBBM_PIPESTAT_ ## name ({}).reg +#define COUNTER_REG(name) __COUNTER_REG(CHIP, name) + +/* Note: gen8 changes the order of the pipestat regs, but in either case + * they ones we are interested in are consecutive, so for the purposes of + * knowning how many values to read we can just use A6XX reg addresses. + * + * And in both cases, RBBM_PIPESTAT_IAVERTICES is the first one. + * + * Depending on how/if they shuffle around in the future, we might need + * to shift to reading them individually, like gallium does. + */ +#define STAT_COUNT ((__COUNTER_REG(A6XX, CSINVOCATIONS) - __COUNTER_REG(A6XX, IAVERTICES)) / 2 + 1) struct PACKED query_slot { uint64_t available; @@ -464,35 +475,38 @@ get_result_count(struct tu_query_pool *pool) } } +template static uint32_t statistics_index(uint32_t *statistics) { uint32_t stat; stat = u_bit_scan(statistics); +#define COUNTER_OFFSET(name) ((COUNTER_REG(name) - COUNTER_REG(IAVERTICES)) / 2) + switch (1 << stat) { case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT: - return 0; + return COUNTER_OFFSET(IAVERTICES); case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT: - return 1; + return COUNTER_OFFSET(IAPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT: - return 2; + return COUNTER_OFFSET(VSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT: - return 5; + return COUNTER_OFFSET(GSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT: - return 6; + return COUNTER_OFFSET(GSPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT: - return 7; + return COUNTER_OFFSET(CINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT: - return 8; + return COUNTER_OFFSET(CPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT: - return 9; + return COUNTER_OFFSET(PSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT: - return 3; + return COUNTER_OFFSET(HSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT: - return 4; + return COUNTER_OFFSET(DSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT: - return 10; + return COUNTER_OFFSET(CSINVOCATIONS); default: return 0; } @@ -589,6 +603,7 @@ write_performance_query_value_cpu(char *base, } } +template static VkResult get_query_pool_results(struct tu_device *device, struct tu_query_pool *pool, @@ -635,7 +650,7 @@ get_query_pool_results(struct tu_device *device, uint64_t *result; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result = query_result_addr(pool, query, uint64_t, stat_idx); } else if (is_perf_query_raw(pool)) { result = query_result_addr(pool, query, struct perfcntr_query_slot, k); @@ -704,6 +719,7 @@ get_query_pool_results(struct tu_device *device, return result; } +template VKAPI_ATTR VkResult VKAPI_CALL tu_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, @@ -732,13 +748,14 @@ tu_GetQueryPoolResults(VkDevice _device, case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: - return get_query_pool_results(device, pool, firstQuery, queryCount, - dataSize, pData, stride, flags); + return get_query_pool_results(device, pool, firstQuery, queryCount, + dataSize, pData, stride, flags); default: assert(!"Invalid query type"); } return VK_SUCCESS; } +TU_GENX(tu_GetQueryPoolResults); /* Copies a query value from one buffer to another from the GPU. */ static void @@ -809,7 +826,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf, uint64_t result_iova; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result_iova = query_result_iova(pool, query, uint64_t, stat_idx); } else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { result_iova = query_result_iova(pool, query, @@ -896,6 +913,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, } TU_GENX(tu_CmdCopyQueryPoolResults); +template static void emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -916,7 +934,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, uint64_t result_iova; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result_iova = query_result_iova(pool, query, uint64_t, stat_idx); } else if (is_perf_query_raw(pool)) { result_iova = query_result_iova(pool, query, @@ -950,6 +968,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, } +template VKAPI_ATTR void VKAPI_CALL tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -970,12 +989,13 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: - emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); + emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); break; default: assert(!"Invalid query type"); } } +TU_GENX(tu_CmdResetQueryPool); VKAPI_ATTR void VKAPI_CALL tu_ResetQueryPool(VkDevice device, @@ -1148,7 +1168,7 @@ emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, begin_iova); @@ -1634,7 +1654,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, end_iova);