mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
panvk: Switch JM copy queries to CLC
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32721>
This commit is contained in:
parent
c51a2e85d8
commit
3ed5557baf
5 changed files with 71 additions and 223 deletions
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
libpan_shader_files = files(
|
||||
'placeholder.cl',
|
||||
'query_pool.cl',
|
||||
)
|
||||
|
||||
# We need to set -fmacro-prefix-map properly for reproducability.
|
||||
|
|
|
|||
57
src/panfrost/libpan/query_pool.cl
Normal file
57
src/panfrost/libpan/query_pool.cl
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright 2024 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#if (PAN_ARCH >= 6 && PAN_ARCH < 10)
|
||||
static inline void
|
||||
write_occlusion_query_result(uintptr_t dst_addr, int32_t idx, uint32_t flags,
|
||||
global uint64_t *report_addr,
|
||||
uint32_t report_count)
|
||||
{
|
||||
uint64_t value = 0;
|
||||
|
||||
for (uint32_t i = 0; i < report_count; i++)
|
||||
value += report_addr[i];
|
||||
|
||||
vk_write_query(dst_addr, idx, flags, value);
|
||||
}
|
||||
|
||||
KERNEL(1)
|
||||
panlib_copy_query_result(uint64_t pool_addr, global uint32_t *available_addr,
|
||||
uint32_t query_stride, uint32_t first_query,
|
||||
uint32_t query_count, uint64_t dst_addr,
|
||||
uint64_t dst_stride, uint32_t query_type,
|
||||
uint32_t flags, uint32_t report_count)
|
||||
{
|
||||
uint32_t i = cl_global_id.x;
|
||||
|
||||
if (i >= query_count)
|
||||
return;
|
||||
|
||||
uint32_t query = first_query + i;
|
||||
uintptr_t dst = dst_addr + ((uint64_t)i * dst_stride);
|
||||
global uint64_t *report_addr =
|
||||
(global uint64_t *)(pool_addr + ((uint64_t)query * query_stride));
|
||||
|
||||
bool available = available_addr[query];
|
||||
|
||||
if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
|
||||
switch (query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
write_occlusion_query_result(dst, 0, flags, report_addr, report_count);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
|
||||
vk_write_query(dst, 1, flags, available);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "util/os_time.h"
|
||||
|
||||
#include "libpan_dgc.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "vk_log.h"
|
||||
|
|
@ -16,11 +17,14 @@
|
|||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_meta.h"
|
||||
#include "panvk_cmd_precomp.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_macros.h"
|
||||
#include "panvk_query_pool.h"
|
||||
|
||||
#include "libpan.h"
|
||||
|
||||
static nir_def *
|
||||
panvk_nir_query_report_dev_addr(nir_builder *b, nir_def *pool_addr,
|
||||
nir_def *query_stride, nir_def *query)
|
||||
|
|
@ -366,168 +370,6 @@ panvk_per_arch(CmdEndQueryIndexedEXT)(VkCommandBuffer commandBuffer,
|
|||
close_batch(cmd, had_batch);
|
||||
}
|
||||
|
||||
static void
|
||||
nir_write_query_result(nir_builder *b, nir_def *dst_addr, nir_def *idx,
|
||||
nir_def *flags, nir_def *result)
|
||||
{
|
||||
assert(result->num_components == 1);
|
||||
assert(result->bit_size == 64);
|
||||
|
||||
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT));
|
||||
{
|
||||
nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8));
|
||||
nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_def *result32 = nir_u2u32(b, result);
|
||||
nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4));
|
||||
nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
nir_write_occlusion_query_result(nir_builder *b, nir_def *dst_addr,
|
||||
nir_def *idx, nir_def *flags,
|
||||
nir_def *report_addr, unsigned core_count)
|
||||
{
|
||||
nir_def *value = nir_imm_int64(b, 0);
|
||||
|
||||
for (unsigned core_idx = 0; core_idx < core_count; core_idx++) {
|
||||
/* Start values start at the second entry */
|
||||
unsigned report_offset = core_idx * sizeof(struct panvk_query_report);
|
||||
|
||||
value = nir_iadd(
|
||||
b, value,
|
||||
nir_load_global(
|
||||
b, nir_iadd(b, report_addr, nir_imm_int64(b, report_offset)), 8, 1,
|
||||
64));
|
||||
}
|
||||
|
||||
nir_write_query_result(b, dst_addr, idx, flags, value);
|
||||
}
|
||||
|
||||
struct panvk_copy_query_push {
|
||||
uint64_t pool_addr;
|
||||
uint32_t available_addr;
|
||||
uint32_t query_stride;
|
||||
uint32_t first_query;
|
||||
uint32_t query_count;
|
||||
uint64_t dst_addr;
|
||||
uint64_t dst_stride;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
static void
|
||||
panvk_nir_copy_query(nir_builder *b, VkQueryType query_type,
|
||||
unsigned core_count, nir_def *i)
|
||||
{
|
||||
nir_def *pool_addr = load_info(b, struct panvk_copy_query_push, pool_addr);
|
||||
nir_def *available_addr =
|
||||
nir_u2u64(b, load_info(b, struct panvk_copy_query_push, available_addr));
|
||||
nir_def *query_stride =
|
||||
load_info(b, struct panvk_copy_query_push, query_stride);
|
||||
nir_def *first_query =
|
||||
load_info(b, struct panvk_copy_query_push, first_query);
|
||||
nir_def *dst_addr = load_info(b, struct panvk_copy_query_push, dst_addr);
|
||||
nir_def *dst_stride = load_info(b, struct panvk_copy_query_push, dst_stride);
|
||||
nir_def *flags = load_info(b, struct panvk_copy_query_push, flags);
|
||||
|
||||
nir_def *query = nir_iadd(b, first_query, i);
|
||||
|
||||
nir_def *avail_addr = panvk_nir_available_dev_addr(b, available_addr, query);
|
||||
nir_def *available = nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32));
|
||||
|
||||
nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT);
|
||||
nir_def *write_results = nir_ior(b, available, partial);
|
||||
|
||||
nir_def *report_addr =
|
||||
panvk_nir_query_report_dev_addr(b, pool_addr, query_stride, query);
|
||||
nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride);
|
||||
|
||||
nir_push_if(b, write_results);
|
||||
{
|
||||
switch (query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
nir_write_occlusion_query_result(b, nir_iadd(b, dst_addr, dst_offset),
|
||||
nir_imm_int(b, 0), flags, report_addr,
|
||||
core_count);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_push_if(b,
|
||||
nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||||
{
|
||||
nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
|
||||
nir_imm_int(b, 1), flags, nir_b2i64(b, available));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_copy_queries_shader(VkQueryType query_type, uint32_t max_threads_per_wg,
|
||||
unsigned core_count)
|
||||
{
|
||||
nir_builder build = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_COMPUTE, NULL,
|
||||
"panvk-meta-copy-queries(query_type=%d,core_count=%u)", query_type,
|
||||
core_count);
|
||||
nir_builder *b = &build;
|
||||
|
||||
b->shader->info.workgroup_size[0] = max_threads_per_wg;
|
||||
nir_def *wg_id = nir_load_workgroup_id(b);
|
||||
nir_def *i =
|
||||
nir_iadd(b, nir_load_subgroup_invocation(b),
|
||||
nir_imul_imm(b, nir_channel(b, wg_id, 0), max_threads_per_wg));
|
||||
|
||||
nir_def *query_count =
|
||||
load_info(b, struct panvk_copy_query_push, query_count);
|
||||
nir_push_if(b, nir_ilt(b, i, query_count));
|
||||
{
|
||||
panvk_nir_copy_query(b, query_type, core_count, i);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return build.shader;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
get_copy_queries_pipeline(struct panvk_device *dev, VkQueryType query_type,
|
||||
const void *key_data, size_t key_size,
|
||||
VkPipelineLayout layout, VkPipeline *pipeline_out)
|
||||
{
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
|
||||
unsigned core_count;
|
||||
panfrost_query_core_count(&phys_dev->kmod.props, &core_count);
|
||||
const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
|
||||
.nir = build_copy_queries_shader(
|
||||
query_type, phys_dev->kmod.props.max_threads_per_wg, core_count),
|
||||
};
|
||||
const VkComputePipelineCreateInfo info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage =
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = &nir_info,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pName = "main",
|
||||
},
|
||||
.layout = layout,
|
||||
};
|
||||
|
||||
return vk_meta_create_compute_pipeline(&dev->vk, &dev->meta, &info, key_data,
|
||||
key_size, pipeline_out);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
|
|
@ -535,12 +377,7 @@ panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd,
|
|||
uint64_t dst_addr, uint64_t dst_stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmd->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
VkResult result;
|
||||
|
||||
const struct panvk_copy_query_push push = {
|
||||
const struct panlib_copy_query_result_args push = {
|
||||
.pool_addr = panvk_priv_mem_dev_addr(pool->mem),
|
||||
.available_addr = panvk_priv_mem_dev_addr(pool->available_mem),
|
||||
.query_stride = pool->query_stride,
|
||||
|
|
@ -548,62 +385,17 @@ panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd,
|
|||
.query_count = query_count,
|
||||
.dst_addr = dst_addr,
|
||||
.dst_stride = dst_stride,
|
||||
.query_type = pool->vk.query_type,
|
||||
.flags = flags,
|
||||
.report_count = pool->reports_per_query,
|
||||
};
|
||||
|
||||
enum panvk_meta_object_key_type key;
|
||||
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
key = PANVK_META_OBJECT_KEY_COPY_QUERY_POOL_RESULTS_OQ_PIPELINE;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
|
||||
const VkPushConstantRange push_range = {
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.size = sizeof(push),
|
||||
};
|
||||
VkPipelineLayout layout;
|
||||
result = vk_meta_get_pipeline_layout(&dev->vk, &dev->meta, NULL, &push_range,
|
||||
&key, sizeof(key), &layout);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
VkPipeline pipeline = vk_meta_lookup_pipeline(&dev->meta, &key, sizeof(key));
|
||||
|
||||
if (pipeline == VK_NULL_HANDLE) {
|
||||
result = get_copy_queries_pipeline(dev, pool->vk.query_type, &key,
|
||||
sizeof(key), layout, &pipeline);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Save previous cmd state */
|
||||
struct panvk_cmd_meta_compute_save_ctx save = {0};
|
||||
panvk_per_arch(cmd_meta_compute_start)(cmd, &save);
|
||||
|
||||
dev->vk.dispatch_table.CmdBindPipeline(panvk_cmd_buffer_to_handle(cmd),
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
dev->vk.dispatch_table.CmdPushConstants(panvk_cmd_buffer_to_handle(cmd),
|
||||
layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(push), &push);
|
||||
|
||||
dev->vk.dispatch_table.CmdDispatchBase(
|
||||
panvk_cmd_buffer_to_handle(cmd), 0, 0, 0,
|
||||
DIV_ROUND_UP(query_count, phys_dev->kmod.props.max_threads_per_wg), 1, 1);
|
||||
|
||||
/* Restore previous cmd state */
|
||||
panvk_per_arch(cmd_meta_compute_end)(cmd, &save);
|
||||
bool had_batch;
|
||||
open_batch(cmd, &had_batch);
|
||||
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmd);
|
||||
panlib_copy_query_result_struct(&precomp_ctx, panlib_1d(query_count),
|
||||
PANLIB_BARRIER_NONE, push);
|
||||
close_batch(cmd, had_batch);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ enum panvk_meta_object_key_type {
|
|||
PANVK_META_OBJECT_KEY_COPY_DESC_SHADER,
|
||||
PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
|
||||
PANVK_META_OBJECT_KEY_CLEAR_QUERY_POOL_PIPELINE,
|
||||
PANVK_META_OBJECT_KEY_COPY_QUERY_POOL_RESULTS_OQ_PIPELINE,
|
||||
};
|
||||
|
||||
static inline VkFormat
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue