From 3ed5557baf6e12765f72b722c7dfa60a17d97178 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 4 Dec 2024 09:43:37 +0100 Subject: [PATCH] panvk: Switch JM copy queries to CLC Signed-off-by: Mary Guillemard Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/libpan/meson.build | 2 +- src/panfrost/libpan/placeholder.cl | 0 src/panfrost/libpan/query_pool.cl | 57 +++++ src/panfrost/vulkan/jm/panvk_vX_cmd_query.c | 234 ++------------------ src/panfrost/vulkan/panvk_meta.h | 1 - 5 files changed, 71 insertions(+), 223 deletions(-) delete mode 100644 src/panfrost/libpan/placeholder.cl create mode 100644 src/panfrost/libpan/query_pool.cl diff --git a/src/panfrost/libpan/meson.build b/src/panfrost/libpan/meson.build index b84eb539100..af33c03a368 100644 --- a/src/panfrost/libpan/meson.build +++ b/src/panfrost/libpan/meson.build @@ -3,7 +3,7 @@ libpan_shader_files = files( - 'placeholder.cl', + 'query_pool.cl', ) # We need to set -fmacro-prefix-map properly for reproducability. diff --git a/src/panfrost/libpan/placeholder.cl b/src/panfrost/libpan/placeholder.cl deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/panfrost/libpan/query_pool.cl b/src/panfrost/libpan/query_pool.cl new file mode 100644 index 00000000000..9654b943b2b --- /dev/null +++ b/src/panfrost/libpan/query_pool.cl @@ -0,0 +1,57 @@ +/* + * Copyright 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl.h" +#include "compiler/libcl/libcl_vk.h" +#include "genxml/gen_macros.h" + +#if (PAN_ARCH >= 6 && PAN_ARCH < 10) +static inline void +write_occlusion_query_result(uintptr_t dst_addr, int32_t idx, uint32_t flags, + global uint64_t *report_addr, + uint32_t report_count) +{ + uint64_t value = 0; + + for (uint32_t i = 0; i < report_count; i++) + value += report_addr[i]; + + vk_write_query(dst_addr, idx, flags, value); +} + +KERNEL(1) +panlib_copy_query_result(uint64_t pool_addr, global uint32_t *available_addr, + uint32_t query_stride, uint32_t first_query, + uint32_t query_count, uint64_t dst_addr, + uint64_t dst_stride, uint32_t query_type, + uint32_t flags, uint32_t report_count) +{ + uint32_t i = cl_global_id.x; + + if (i >= query_count) + return; + + uint32_t query = first_query + i; + uintptr_t dst = dst_addr + ((uint64_t)i * dst_stride); + global uint64_t *report_addr = + (global uint64_t *)(pool_addr + ((uint64_t)query * query_stride)); + + bool available = available_addr[query]; + + if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) { + switch (query_type) { + case VK_QUERY_TYPE_OCCLUSION: + write_occlusion_query_result(dst, 0, flags, report_addr, report_count); + break; + default: + unreachable("Unsupported query type"); + break; + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + vk_write_query(dst, 1, flags, available); + } +} +#endif diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_query.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_query.c index 64a8bcfd1a1..0cde40286cd 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_query.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_query.c @@ -5,6 +5,7 @@ #include "util/os_time.h" +#include "libpan_dgc.h" #include "nir_builder.h" #include "vk_log.h" @@ -16,11 +17,14 @@ #include "panvk_buffer.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_meta.h" +#include "panvk_cmd_precomp.h" #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_macros.h" #include "panvk_query_pool.h" +#include "libpan.h" + static nir_def * panvk_nir_query_report_dev_addr(nir_builder *b, nir_def *pool_addr, nir_def *query_stride, nir_def *query) @@ -366,168 +370,6 @@ panvk_per_arch(CmdEndQueryIndexedEXT)(VkCommandBuffer commandBuffer, close_batch(cmd, had_batch); } -static void -nir_write_query_result(nir_builder *b, nir_def *dst_addr, nir_def *idx, - nir_def *flags, nir_def *result) -{ - assert(result->num_components == 1); - assert(result->bit_size == 64); - - nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT)); - { - nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8)); - nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1); - } - nir_push_else(b, NULL); - { - nir_def *result32 = nir_u2u32(b, result); - nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4)); - nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1); - } - nir_pop_if(b, NULL); -} - -static void -nir_write_occlusion_query_result(nir_builder *b, nir_def *dst_addr, - nir_def *idx, nir_def *flags, - nir_def *report_addr, unsigned core_count) -{ - nir_def *value = nir_imm_int64(b, 0); - - for (unsigned core_idx = 0; core_idx < core_count; core_idx++) { - /* Start values start at the second entry */ - unsigned report_offset = core_idx * sizeof(struct panvk_query_report); - - value = nir_iadd( - b, value, - nir_load_global( - b, nir_iadd(b, report_addr, nir_imm_int64(b, report_offset)), 8, 1, - 64)); - } - - nir_write_query_result(b, dst_addr, idx, flags, value); -} - -struct panvk_copy_query_push { - uint64_t pool_addr; - uint32_t available_addr; - uint32_t query_stride; - uint32_t first_query; - uint32_t query_count; - uint64_t dst_addr; - uint64_t dst_stride; - uint32_t flags; -}; - -static void -panvk_nir_copy_query(nir_builder *b, VkQueryType query_type, - unsigned core_count, nir_def *i) -{ - nir_def *pool_addr = load_info(b, struct panvk_copy_query_push, pool_addr); - nir_def *available_addr = - nir_u2u64(b, load_info(b, struct panvk_copy_query_push, available_addr)); - nir_def *query_stride = - load_info(b, struct panvk_copy_query_push, query_stride); - nir_def *first_query = - load_info(b, struct panvk_copy_query_push, first_query); - nir_def *dst_addr = load_info(b, struct panvk_copy_query_push, dst_addr); - nir_def *dst_stride = load_info(b, struct panvk_copy_query_push, dst_stride); - nir_def *flags = load_info(b, struct panvk_copy_query_push, flags); - - nir_def *query = nir_iadd(b, first_query, i); - - nir_def *avail_addr = panvk_nir_available_dev_addr(b, available_addr, query); - nir_def *available = nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32)); - - nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT); - nir_def *write_results = nir_ior(b, available, partial); - - nir_def *report_addr = - panvk_nir_query_report_dev_addr(b, pool_addr, query_stride, query); - nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride); - - nir_push_if(b, write_results); - { - switch (query_type) { - case VK_QUERY_TYPE_OCCLUSION: { - nir_write_occlusion_query_result(b, nir_iadd(b, dst_addr, dst_offset), - nir_imm_int(b, 0), flags, report_addr, - core_count); - break; - } - default: - unreachable("Unsupported query type"); - } - } - nir_pop_if(b, NULL); - - nir_push_if(b, - nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); - { - nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset), - nir_imm_int(b, 1), flags, nir_b2i64(b, available)); - } - nir_pop_if(b, NULL); -} - -static nir_shader * -build_copy_queries_shader(VkQueryType query_type, uint32_t max_threads_per_wg, - unsigned core_count) -{ - nir_builder build = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, NULL, - "panvk-meta-copy-queries(query_type=%d,core_count=%u)", query_type, - core_count); - nir_builder *b = &build; - - b->shader->info.workgroup_size[0] = max_threads_per_wg; - nir_def *wg_id = nir_load_workgroup_id(b); - nir_def *i = - nir_iadd(b, nir_load_subgroup_invocation(b), - nir_imul_imm(b, nir_channel(b, wg_id, 0), max_threads_per_wg)); - - nir_def *query_count = - load_info(b, struct panvk_copy_query_push, query_count); - nir_push_if(b, nir_ilt(b, i, query_count)); - { - panvk_nir_copy_query(b, query_type, core_count, i); - } - nir_pop_if(b, NULL); - - return build.shader; -} - -static VkResult -get_copy_queries_pipeline(struct panvk_device *dev, VkQueryType query_type, - const void *key_data, size_t key_size, - VkPipelineLayout layout, VkPipeline *pipeline_out) -{ - const struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - - unsigned core_count; - panfrost_query_core_count(&phys_dev->kmod.props, &core_count); - const VkPipelineShaderStageNirCreateInfoMESA nir_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, - .nir = build_copy_queries_shader( - query_type, phys_dev->kmod.props.max_threads_per_wg, core_count), - }; - const VkComputePipelineCreateInfo info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = &nir_info, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .pName = "main", - }, - .layout = layout, - }; - - return vk_meta_create_compute_pipeline(&dev->vk, &dev->meta, &info, key_data, - key_size, pipeline_out); -} - static void panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd, struct panvk_query_pool *pool, @@ -535,12 +377,7 @@ panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd, uint64_t dst_addr, uint64_t dst_stride, VkQueryResultFlags flags) { - struct panvk_device *dev = to_panvk_device(cmd->vk.base.device); - const struct panvk_physical_device *phys_dev = - to_panvk_physical_device(dev->vk.physical); - VkResult result; - - const struct panvk_copy_query_push push = { + const struct panlib_copy_query_result_args push = { .pool_addr = panvk_priv_mem_dev_addr(pool->mem), .available_addr = panvk_priv_mem_dev_addr(pool->available_mem), .query_stride = pool->query_stride, @@ -548,62 +385,17 @@ panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd, .query_count = query_count, .dst_addr = dst_addr, .dst_stride = dst_stride, + .query_type = pool->vk.query_type, .flags = flags, + .report_count = pool->reports_per_query, }; - enum panvk_meta_object_key_type key; - - switch (pool->vk.query_type) { - case VK_QUERY_TYPE_OCCLUSION: { - key = PANVK_META_OBJECT_KEY_COPY_QUERY_POOL_RESULTS_OQ_PIPELINE; - break; - } - default: - unreachable("Unsupported query type"); - } - - const VkPushConstantRange push_range = { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .size = sizeof(push), - }; - VkPipelineLayout layout; - result = vk_meta_get_pipeline_layout(&dev->vk, &dev->meta, NULL, &push_range, - &key, sizeof(key), &layout); - if (result != VK_SUCCESS) { - vk_command_buffer_set_error(&cmd->vk, result); - return; - } - - VkPipeline pipeline = vk_meta_lookup_pipeline(&dev->meta, &key, sizeof(key)); - - if (pipeline == VK_NULL_HANDLE) { - result = get_copy_queries_pipeline(dev, pool->vk.query_type, &key, - sizeof(key), layout, &pipeline); - - if (result != VK_SUCCESS) { - vk_command_buffer_set_error(&cmd->vk, result); - return; - } - } - - /* Save previous cmd state */ - struct panvk_cmd_meta_compute_save_ctx save = {0}; - panvk_per_arch(cmd_meta_compute_start)(cmd, &save); - - dev->vk.dispatch_table.CmdBindPipeline(panvk_cmd_buffer_to_handle(cmd), - VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); - - dev->vk.dispatch_table.CmdPushConstants(panvk_cmd_buffer_to_handle(cmd), - layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(push), &push); - - dev->vk.dispatch_table.CmdDispatchBase( - panvk_cmd_buffer_to_handle(cmd), 0, 0, 0, - DIV_ROUND_UP(query_count, phys_dev->kmod.props.max_threads_per_wg), 1, 1); - - /* Restore previous cmd state */ - panvk_per_arch(cmd_meta_compute_end)(cmd, &save); + bool had_batch; + open_batch(cmd, &had_batch); + struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmd); + panlib_copy_query_result_struct(&precomp_ctx, panlib_1d(query_count), + PANLIB_BARRIER_NONE, push); + close_batch(cmd, had_batch); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/panvk_meta.h b/src/panfrost/vulkan/panvk_meta.h index 4c3ff6936c1..482dab2d3db 100644 --- a/src/panfrost/vulkan/panvk_meta.h +++ b/src/panfrost/vulkan/panvk_meta.h @@ -17,7 +17,6 @@ enum panvk_meta_object_key_type { PANVK_META_OBJECT_KEY_COPY_DESC_SHADER, PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER, PANVK_META_OBJECT_KEY_CLEAR_QUERY_POOL_PIPELINE, - PANVK_META_OBJECT_KEY_COPY_QUERY_POOL_RESULTS_OQ_PIPELINE, }; static inline VkFormat