mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-09 07:58:16 +02:00
nvk: VK_KHR_copy_memory_indirect
Reviewed-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39869>
This commit is contained in:
parent
6824004a0b
commit
cbec12627b
11 changed files with 243 additions and 4 deletions
|
|
@ -540,6 +540,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_KHR_calibrated_timestamps DONE (anv, hk, kk, nvk, panvk/v10+, radv, tu/a750+, vn)
|
||||
VK_KHR_compute_shader_derivatives DONE (anv, lvp, nvk, radv, tu, vn)
|
||||
VK_KHR_cooperative_matrix DONE (anv, nvk/Turing+, radv/gfx11+, vn)
|
||||
VK_KHR_copy_memory_indirect DONE (nvk)
|
||||
VK_KHR_depth_clamp_zero_one DONE (anv, nvk, panvk, radv, tu, vn)
|
||||
VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv, tu, vn)
|
||||
VK_KHR_display DONE (anv, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||
|
|
|
|||
|
|
@ -14,3 +14,4 @@ VK_EXT_zero_initialize_device_memory on panvk
|
|||
GL_EXT_shader_image_load_store on panfrost
|
||||
VK_KHR_swapchain_mutable_format on panvk
|
||||
VK_EXT_astc_decode_mode on panvk
|
||||
VK_KHR_copy_memory_indirect on nvk
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ nv_push_class_dump_h = custom_target(
|
|||
command : [prog_python, '@INPUT0@', '--classes', flat_nv_classes, '--out-h', '@OUTPUT0@'],
|
||||
)
|
||||
|
||||
_libnvidia_headers = static_library(
|
||||
libnvidia_headers = static_library(
|
||||
'nvidia_headers_c',
|
||||
['nv_push.c', cl_generated, nv_push_class_dump_h],
|
||||
include_directories : ['.', 'nvidia/classes', inc_include, inc_src],
|
||||
|
|
@ -118,7 +118,7 @@ _libnvidia_headers = static_library(
|
|||
idep_nvidia_headers = declare_dependency(
|
||||
include_directories : include_directories('.', 'nvidia/classes'),
|
||||
sources : cl_generated,
|
||||
link_with : _libnvidia_headers,
|
||||
link_with : libnvidia_headers,
|
||||
)
|
||||
|
||||
executable(
|
||||
|
|
|
|||
50
src/nouveau/vulkan/cl/nvk_copy_indirect.cl
Normal file
50
src/nouveau/vulkan/cl/nvk_copy_indirect.cl
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright 2026 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "nvk_copy_indirect.h"
|
||||
|
||||
#include "nv_push.h"
|
||||
#include "nv_push_cl90b5.h"
|
||||
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
|
||||
void
|
||||
nvk_copy_indirect(const __global void* in, uintptr_t in_stride,
|
||||
__global uint32_t* restrict out, uint32_t count)
|
||||
{
|
||||
uint i = get_sub_group_local_id() + cl_group_id.x * 32;
|
||||
if (i >= count)
|
||||
return;
|
||||
|
||||
in += i * in_stride;
|
||||
VkCopyMemoryIndirectCommandKHR cmd = *((__global VkCopyMemoryIndirectCommandKHR*)in);
|
||||
|
||||
uint32_t push_data[NVK_COPY_INDIRECT_CMD_WORDS];
|
||||
struct nv_push push;
|
||||
struct nv_push *p = &push;
|
||||
nv_push_init(p, push_data, ARRAY_SIZE(push_data),
|
||||
BITFIELD_BIT(SUBC_NV90B5));
|
||||
|
||||
P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
|
||||
P_NV90B5_OFFSET_IN_UPPER(p, cmd.srcAddress >> 32);
|
||||
P_NV90B5_OFFSET_IN_LOWER(p, cmd.srcAddress & 0xffffffff);
|
||||
P_NV90B5_OFFSET_OUT_UPPER(p, cmd.dstAddress >> 32);
|
||||
P_NV90B5_OFFSET_OUT_LOWER(p, cmd.dstAddress & 0xffffffff);
|
||||
|
||||
P_MTHD(p, NV90B5, LINE_LENGTH_IN);
|
||||
P_NV90B5_LINE_LENGTH_IN(p, cmd.size);
|
||||
|
||||
P_IMMD_WORD(p, NV90B5, LAUNCH_DMA, {
|
||||
.data_transfer_type = DATA_TRANSFER_TYPE_PIPELINED,
|
||||
.multi_line_enable = MULTI_LINE_ENABLE_FALSE,
|
||||
.flush_enable = FLUSH_ENABLE_TRUE,
|
||||
.src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
|
||||
.dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
|
||||
.remap_enable = REMAP_ENABLE_TRUE,
|
||||
});
|
||||
|
||||
assert(nv_push_dw_count(p) == NVK_COPY_INDIRECT_CMD_WORDS);
|
||||
memcpy(out + i * NVK_COPY_INDIRECT_CMD_WORDS, push_data, sizeof(push_data));
|
||||
}
|
||||
8
src/nouveau/vulkan/cl/nvk_copy_indirect.h
Normal file
8
src/nouveau/vulkan/cl/nvk_copy_indirect.h
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright 2026 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#define NVK_COPY_INDIRECT_CMD_WORDS 8
|
||||
#define NVK_COPY_INDIRECT_CMD_BYTES (NVK_COPY_INDIRECT_CMD_WORDS * sizeof(uint32_t))
|
||||
|
|
@ -79,6 +79,7 @@ nvk_files = files(
|
|||
|
||||
nvkcl_files = files(
|
||||
'cl/nvk_query.cl',
|
||||
'cl/nvk_copy_indirect.cl',
|
||||
)
|
||||
|
||||
nvk_entrypoints = custom_target(
|
||||
|
|
@ -94,16 +95,19 @@ nvk_entrypoints = custom_target(
|
|||
)
|
||||
|
||||
nvkcl_spv = custom_target(
|
||||
input : nvkcl_files,
|
||||
input : [nvkcl_files, libnvidia_headers],
|
||||
output : 'nvkcl.spv',
|
||||
command : [
|
||||
prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', nvkcl_files, '--',
|
||||
'-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
|
||||
'-I' + join_paths(meson.current_source_dir(), '.'),
|
||||
'-I' + join_paths(meson.project_source_root(), 'src'),
|
||||
'-I' + join_paths(meson.project_source_root(), 'src/nouveau/headers'),
|
||||
'-I' + join_paths(meson.project_source_root(), 'src/nouveau/headers/nvidia/classes'),
|
||||
'-I' + join_paths(meson.project_build_root(), 'src/nouveau/headers'),
|
||||
cl_args,
|
||||
],
|
||||
depfile : '@PLAINNAME@.d',
|
||||
depfile : 'nvkcl.spv.d',
|
||||
)
|
||||
|
||||
nvkcl = custom_target(
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
|
|||
list_inithead(&cmd->owned_gart_mem);
|
||||
list_inithead(&cmd->owned_qmd);
|
||||
cmd->pushes = UTIL_DYNARRAY_INIT;
|
||||
cmd->copy_memory_indirect_temps = UTIL_DYNARRAY_INIT;
|
||||
|
||||
cmd->prev_subc = ffs(nvk_cmd_buffer_subchannel_mask(cmd)) - 1;
|
||||
|
||||
|
|
@ -126,6 +127,7 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|||
cmd->cond_render_mem = NULL;
|
||||
|
||||
util_dynarray_clear(&cmd->pushes);
|
||||
util_dynarray_clear(&cmd->copy_memory_indirect_temps);
|
||||
|
||||
memset(&cmd->state, 0, sizeof(cmd->state));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -226,6 +226,8 @@ struct nvk_cmd_buffer {
|
|||
uint32_t upload_offset;
|
||||
|
||||
struct nvk_cmd_mem *cond_render_mem;
|
||||
/** Array of struct nvk_cmd_mem* */
|
||||
struct util_dynarray copy_memory_indirect_temps;
|
||||
|
||||
struct nvk_cmd_mem *push_mem;
|
||||
uint32_t *push_mem_limit;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@
|
|||
#include "nir_builder.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
#include "nvkcl.h"
|
||||
#include "cl/nvk_copy_indirect.h"
|
||||
|
||||
#include "clcb97.h"
|
||||
#include "nv_push.h"
|
||||
#include "nv_push_cl9097.h"
|
||||
|
|
@ -1133,3 +1136,163 @@ nvk_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct nvk_copy_indirect_push {
|
||||
uint64_t in;
|
||||
uint64_t in_stride;
|
||||
uint64_t out;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
static nir_shader *
|
||||
build_copy_indierct_shader(void)
|
||||
{
|
||||
nir_builder build =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
|
||||
"nvk-meta-copy-indirect");
|
||||
nir_builder *b = &build;
|
||||
|
||||
struct glsl_struct_field push_fields[] = {
|
||||
{ .type = glsl_uint64_t_type(), .name = "in", .offset = 0 },
|
||||
{ .type = glsl_uint64_t_type(), .name = "in_stride", .offset = 8 },
|
||||
{ .type = glsl_uint64_t_type(), .name = "out", .offset = 16 },
|
||||
{ .type = glsl_uint_type(), .name = "count", .offset = 24 },
|
||||
};
|
||||
const struct glsl_type *push_iface_type =
|
||||
glsl_interface_type(push_fields, ARRAY_SIZE(push_fields),
|
||||
GLSL_INTERFACE_PACKING_STD140,
|
||||
false /* row_major */, "push");
|
||||
nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const,
|
||||
push_iface_type, "push");
|
||||
|
||||
b->shader->info.workgroup_size[0] = 32;
|
||||
|
||||
nvk_copy_indirect(b, load_struct_var(b, push, 0),
|
||||
load_struct_var(b, push, 1),
|
||||
load_struct_var(b, push, 2),
|
||||
load_struct_var(b, push, 3));
|
||||
|
||||
return build.shader;
|
||||
}
|
||||
|
||||
static struct nvk_shader *
|
||||
atomic_set_or_destroy_shader(struct nvk_device *dev,
|
||||
struct nvk_shader **shader_ptr,
|
||||
struct nvk_shader *shader,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
struct nvk_shader *old_shader = p_atomic_cmpxchg(shader_ptr, NULL, shader);
|
||||
if (old_shader == NULL) {
|
||||
return shader;
|
||||
} else {
|
||||
vk_shader_destroy(&dev->vk, &shader->vk, alloc);
|
||||
return old_shader;
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
get_copy_indirect_shader(struct nvk_device *dev,
|
||||
struct nvk_shader **shader_out)
|
||||
{
|
||||
struct nvk_shader *shader = p_atomic_read(&dev->copy_indirect);
|
||||
if (shader != NULL) {
|
||||
*shader_out = shader;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
nir_shader *nir = build_copy_indierct_shader();
|
||||
VkResult result = nvk_compile_nir_shader(dev, nir, &dev->vk.alloc, &shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
*shader_out = atomic_set_or_destroy_shader(dev, &dev->copy_indirect,
|
||||
shader, &dev->vk.alloc);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
nvk_CmdCopyMemoryIndirectKHR(VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryIndirectInfoKHR* info)
|
||||
{
|
||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
VkResult result;
|
||||
|
||||
if (info->copyCount == 0)
|
||||
return;
|
||||
|
||||
struct nvk_shader *shader;
|
||||
result = get_copy_indirect_shader(dev, &shader);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t cmds_per_buffer =
|
||||
NVK_CMD_MEM_SIZE / NVK_COPY_INDIRECT_CMD_BYTES;
|
||||
const uint32_t num_buffers = DIV_ROUND_UP(info->copyCount, cmds_per_buffer);
|
||||
|
||||
if (!util_dynarray_ensure_cap(&cmd->copy_memory_indirect_temps,
|
||||
sizeof(struct nvk_cmd_mem*) * num_buffers)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return;
|
||||
}
|
||||
|
||||
while (util_dynarray_num_elements(&cmd->copy_memory_indirect_temps,
|
||||
struct nvk_cmd_mem*) < num_buffers) {
|
||||
struct nvk_cmd_mem *temp_mem;
|
||||
result = nvk_cmd_buffer_alloc_mem(cmd, false, &temp_mem);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
util_dynarray_append_typed(&cmd->copy_memory_indirect_temps,
|
||||
struct nvk_cmd_mem*, temp_mem);
|
||||
}
|
||||
|
||||
for (int i = 0; i < info->copyCount; i += cmds_per_buffer) {
|
||||
struct nvk_cmd_mem *temp_mem =
|
||||
*util_dynarray_element(&cmd->copy_memory_indirect_temps,
|
||||
struct nvk_cmd_mem*, i / cmds_per_buffer);
|
||||
uint32_t count = MIN2(cmds_per_buffer, info->copyCount - i);
|
||||
|
||||
const struct nvk_copy_indirect_push push_constants = {
|
||||
.in = info->copyAddressRange.address + i * info->copyAddressRange.stride,
|
||||
.in_stride = info->copyAddressRange.stride,
|
||||
.out = temp_mem->mem->va->addr,
|
||||
.count = count
|
||||
};
|
||||
|
||||
nvk_cmd_dispatch_shader(cmd, shader,
|
||||
&push_constants, sizeof(push_constants),
|
||||
DIV_ROUND_UP(count, 32), 1, 1);
|
||||
}
|
||||
|
||||
if (pdev->info.cls_eng3d >= HOPPER_A) {
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 1);
|
||||
P_IMMD_WORD(p, NVC86F, WFI, 0);
|
||||
} else {
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||||
P_IMMD_WORD(p, NVA0C0, WAIT_FOR_IDLE, 0);
|
||||
__push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < info->copyCount; i += cmds_per_buffer) {
|
||||
struct nvk_cmd_mem *temp_mem =
|
||||
*util_dynarray_element(&cmd->copy_memory_indirect_temps,
|
||||
struct nvk_cmd_mem*, i / cmds_per_buffer);
|
||||
uint32_t count = MIN2(cmds_per_buffer, info->copyCount - i);
|
||||
|
||||
nvk_cmd_buffer_push_indirect(cmd, temp_mem->mem->va->addr,
|
||||
NVK_COPY_INDIRECT_CMD_BYTES * count);
|
||||
}
|
||||
}
|
||||
|
||||
void nvk_CmdCopyMemoryToImageIndirectKHR(VkCommandBuffer commandBuffer,
|
||||
const VkCopyMemoryToImageIndirectInfoKHR* info)
|
||||
{
|
||||
/* Feature unimplemented */
|
||||
assert(false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ struct nvk_device {
|
|||
struct vk_meta_device meta;
|
||||
|
||||
struct nvk_shader *copy_queries;
|
||||
struct nvk_shader *copy_indirect;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(nvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
|
|||
.KHR_compute_shader_derivatives = info->cls_eng3d >= TURING_A,
|
||||
.KHR_cooperative_matrix = info->cls_eng3d >= TURING_A,
|
||||
.KHR_copy_commands2 = true,
|
||||
.KHR_copy_memory_indirect = true,
|
||||
.KHR_create_renderpass2 = true,
|
||||
.KHR_dedicated_allocation = true,
|
||||
.KHR_depth_stencil_resolve = true,
|
||||
|
|
@ -476,6 +477,9 @@ nvk_get_device_features(const struct nv_device_info *info,
|
|||
.hostImageCopy = info->cls_eng3d >= TURING_A,
|
||||
.pushDescriptor = true,
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
.indirectMemoryCopy = true,
|
||||
|
||||
/* VK_KHR_cooperative_matrix */
|
||||
/* TU11X can run coop matrix but the performances are abysal */
|
||||
.cooperativeMatrix = info->cls_eng3d >= TURING_A && !is_tu11x,
|
||||
|
|
@ -1063,6 +1067,9 @@ nvk_get_device_properties(const struct nvk_instance *instance,
|
|||
*/
|
||||
.pipelineBinaryCompressedData = false,
|
||||
|
||||
/* VK_KHR_copy_memory_indirect */
|
||||
.supportedQueues = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT,
|
||||
|
||||
/* VK_EXT_conservative_rasterization */
|
||||
.primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
|
||||
.maxExtraPrimitiveOverestimationSize = 0.75,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue