mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 09:30:13 +01:00
hk: add Vulkan driver for Apple GPUs
Honeykrisp is a Vulkan 1.3 driver for Apple GPUs. It currently support M1 and
M2, future hardware support is planned. It passed CTS a few months ago and with
two exceptions[1] should still pass now.
Compared to the May snapshot that passed conformance [1], this adds a bunch of
new features, most notably:
* Geometry shaders
* Tessellation shaders
* Transform feedback
* Pipeline statistics queries
* Robustness2
* Host image copy
Theoretically, we now support everything DXVK requires for D3D11 with full
FL11_1. To quote Rob Herring:
How's performance? Great, because I haven't tested it.
This driver is NOT ready for end users... YET. Stay tuned, it won't be long now
:}
I would like to reiterate: Honeykrisp is not yet ready for end users. Please
read [3].
Regardless, as the kernel UAPI is not yet stable, this driver will refuse to
probe without out-of-tree Mesa patches. This is the same situation as our GL
driver.
On the Mesa side, the biggest todo before the release is improving
performance. Right now, I expect WineD3D with our GL4.6 driver to give better
performance. This isn't fundamental, just needs time ... our GL driver is 3
years old and honeykrisp is 3 months old.
On the non-Mesa side, there's still a lot of movement around krun and FEX
packaging before this becomes broadly useful for x86 games.
At any rate, now that I've finished up geometry and tessellation, I'm hopefully
done rewriting the whole driver every 2 weeks. So I think this is settled enough
that it makes sense to upstream this now instead of building up a gigantic
monster commit in a private branch.
[1] Pipeline robustness and pipeline statistics are included in this tree but
need bug fixes in the CTS to pass. This is being handled internally in
Khronos. These features may be disabled to get a conformant driver.
[2] https://rosenzweig.io/blog/vk13-on-the-m1-in-1-month.html
[3] https://dont-ship.it/
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30382>
This commit is contained in:
parent
767280fd73
commit
5bc8284816
51 changed files with 19855 additions and 4 deletions
|
|
@ -240,7 +240,7 @@ elif _vulkan_drivers.contains('all')
|
|||
_vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast',
|
||||
'freedreno', 'panfrost', 'virtio', 'broadcom',
|
||||
'imagination-experimental', 'microsoft-experimental',
|
||||
'nouveau']
|
||||
'nouveau', 'asahi']
|
||||
endif
|
||||
|
||||
with_intel_vk = _vulkan_drivers.contains('intel')
|
||||
|
|
@ -255,6 +255,7 @@ with_imagination_vk = _vulkan_drivers.contains('imagination-experimental')
|
|||
with_imagination_srv = get_option('imagination-srv')
|
||||
with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
|
||||
with_nouveau_vk = _vulkan_drivers.contains('nouveau')
|
||||
with_asahi_vk = _vulkan_drivers.contains('asahi')
|
||||
with_any_vk = _vulkan_drivers.length() != 0
|
||||
|
||||
if with_any_vk and host_machine.system() == 'windows' and meson.version().version_compare('< 1.3')
|
||||
|
|
@ -850,7 +851,7 @@ if with_gallium_rusticl
|
|||
endif
|
||||
|
||||
with_clover_spirv = with_gallium_clover and get_option('opencl-spirv')
|
||||
with_clc = with_microsoft_clc or with_intel_clc or with_gallium_asahi or with_gallium_rusticl or with_clover_spirv
|
||||
with_clc = with_microsoft_clc or with_intel_clc or with_gallium_asahi or with_asahi_vk or with_gallium_rusticl or with_clover_spirv
|
||||
|
||||
dep_clc = null_dep
|
||||
if with_gallium_clover or with_clc
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ option(
|
|||
value : ['auto'],
|
||||
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'intel_hasvk',
|
||||
'panfrost', 'swrast', 'virtio', 'imagination-experimental',
|
||||
'microsoft-experimental', 'nouveau', 'all'],
|
||||
'microsoft-experimental', 'nouveau', 'asahi', 'all'],
|
||||
description : 'List of vulkan drivers to build. If this is set to auto ' +
|
||||
'all drivers applicable to the target OS/architecture ' +
|
||||
'will be built'
|
||||
|
|
|
|||
|
|
@ -186,6 +186,8 @@ ForEachMacros:
|
|||
# asahi
|
||||
- foreach_active
|
||||
- foreach_submitted
|
||||
- hk_foreach_view
|
||||
- hk_foreach_variant
|
||||
- AGX_BATCH_FOREACH_BO_HANDLE
|
||||
- agx_pack
|
||||
- agx_push
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ inc_asahi = include_directories([
|
|||
'.', 'layout', 'lib', 'genxml', 'compiler'
|
||||
])
|
||||
|
||||
if with_gallium_asahi
|
||||
if with_gallium_asahi or with_asahi_vk
|
||||
subdir('layout')
|
||||
subdir('compiler')
|
||||
subdir('clc')
|
||||
|
|
@ -14,6 +14,10 @@ if with_gallium_asahi
|
|||
subdir('lib')
|
||||
endif
|
||||
|
||||
if with_asahi_vk
|
||||
subdir('vulkan')
|
||||
endif
|
||||
|
||||
if with_tools.contains('drm-shim')
|
||||
subdir('drm-shim')
|
||||
endif
|
||||
|
|
|
|||
286
src/asahi/vulkan/hk_buffer.c
Normal file
286
src/asahi/vulkan/hk_buffer.c
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_buffer.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_device_memory.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
static uint32_t
|
||||
hk_get_buffer_alignment(const struct hk_physical_device *pdev,
|
||||
VkBufferUsageFlags2KHR usage_flags,
|
||||
VkBufferCreateFlags create_flags)
|
||||
{
|
||||
uint32_t alignment = 16;
|
||||
|
||||
if (usage_flags & VK_BUFFER_USAGE_2_UNIFORM_BUFFER_BIT_KHR)
|
||||
alignment = MAX2(alignment, HK_MIN_UBO_ALIGNMENT);
|
||||
|
||||
if (usage_flags & VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT_KHR)
|
||||
alignment = MAX2(alignment, HK_MIN_SSBO_ALIGNMENT);
|
||||
|
||||
if (usage_flags & (VK_BUFFER_USAGE_2_UNIFORM_TEXEL_BUFFER_BIT_KHR |
|
||||
VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR))
|
||||
alignment = MAX2(alignment, HK_MIN_TEXEL_BUFFER_ALIGNMENT);
|
||||
|
||||
if (create_flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT))
|
||||
alignment = MAX2(alignment, 4096);
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_get_bda_replay_addr(const VkBufferCreateInfo *pCreateInfo)
|
||||
{
|
||||
uint64_t addr = 0;
|
||||
vk_foreach_struct_const(ext, pCreateInfo->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO: {
|
||||
const VkBufferOpaqueCaptureAddressCreateInfo *bda = (void *)ext;
|
||||
if (bda->opaqueCaptureAddress != 0) {
|
||||
#ifdef NDEBUG
|
||||
return bda->opaqueCaptureAddress;
|
||||
#else
|
||||
assert(addr == 0 || bda->opaqueCaptureAddress == addr);
|
||||
addr = bda->opaqueCaptureAddress;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT: {
|
||||
const VkBufferDeviceAddressCreateInfoEXT *bda = (void *)ext;
|
||||
if (bda->deviceAddress != 0) {
|
||||
#ifdef NDEBUG
|
||||
return bda->deviceAddress;
|
||||
#else
|
||||
assert(addr == 0 || bda->deviceAddress == addr);
|
||||
addr = bda->deviceAddress;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_buffer *buffer;
|
||||
|
||||
if (pCreateInfo->size > HK_MAX_BUFFER_SIZE)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
buffer =
|
||||
vk_buffer_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*buffer));
|
||||
if (!buffer)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (buffer->vk.size > 0 &&
|
||||
(buffer->vk.create_flags &
|
||||
(VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT))) {
|
||||
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
const uint32_t alignment =
|
||||
hk_get_buffer_alignment(hk_device_physical(dev),
|
||||
buffer->vk.usage,
|
||||
buffer->vk.create_flags);
|
||||
assert(alignment >= 4096);
|
||||
buffer->vma_size_B = align64(buffer->vk.size, alignment);
|
||||
|
||||
const bool sparse_residency =
|
||||
buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
|
||||
const bool bda_capture_replay =
|
||||
buffer->vk.create_flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
|
||||
|
||||
uint64_t bda_replay_addr = 0;
|
||||
if (bda_capture_replay)
|
||||
bda_replay_addr = hk_get_bda_replay_addr(pCreateInfo);
|
||||
|
||||
buffer->addr = nouveau_ws_alloc_vma(dev->ws_dev, bda_replay_addr,
|
||||
buffer->vma_size_B,
|
||||
alignment, bda_capture_replay,
|
||||
sparse_residency);
|
||||
#endif
|
||||
if (buffer->addr == 0) {
|
||||
vk_buffer_destroy(&dev->vk, pAllocator, &buffer->vk);
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Sparse VMA allocation failed");
|
||||
}
|
||||
}
|
||||
|
||||
*pBuffer = hk_buffer_to_handle(buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyBuffer(VkDevice device, VkBuffer _buffer,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
|
||||
|
||||
if (!buffer)
|
||||
return;
|
||||
|
||||
if (buffer->vma_size_B > 0) {
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
const bool sparse_residency =
|
||||
buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
|
||||
const bool bda_capture_replay =
|
||||
buffer->vk.create_flags &
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
|
||||
|
||||
agx_bo_unbind_vma(dev->ws_dev, buffer->addr, buffer->vma_size_B);
|
||||
nouveau_ws_free_vma(dev->ws_dev, buffer->addr, buffer->vma_size_B,
|
||||
bda_capture_replay, sparse_residency);
|
||||
#endif
|
||||
}
|
||||
|
||||
vk_buffer_destroy(&dev->vk, pAllocator, &buffer->vk);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_GetDeviceBufferMemoryRequirements(
|
||||
VkDevice device, const VkDeviceBufferMemoryRequirements *pInfo,
|
||||
VkMemoryRequirements2 *pMemoryRequirements)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
|
||||
const uint32_t alignment = hk_get_buffer_alignment(
|
||||
hk_device_physical(dev), pInfo->pCreateInfo->usage,
|
||||
pInfo->pCreateInfo->flags);
|
||||
|
||||
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements){
|
||||
.size = align64(pInfo->pCreateInfo->size, alignment),
|
||||
.alignment = alignment,
|
||||
.memoryTypeBits = BITFIELD_MASK(pdev->mem_type_count),
|
||||
};
|
||||
|
||||
vk_foreach_struct_const(ext, pMemoryRequirements->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
|
||||
VkMemoryDedicatedRequirements *dedicated = (void *)ext;
|
||||
dedicated->prefersDedicatedAllocation = false;
|
||||
dedicated->requiresDedicatedAllocation = false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
vk_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_GetPhysicalDeviceExternalBufferProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
|
||||
VkExternalBufferProperties *pExternalBufferProperties)
|
||||
{
|
||||
/* The Vulkan 1.3.256 spec says:
|
||||
*
|
||||
* VUID-VkPhysicalDeviceExternalBufferInfo-handleType-parameter
|
||||
*
|
||||
* "handleType must be a valid VkExternalMemoryHandleTypeFlagBits value"
|
||||
*
|
||||
* This differs from VkPhysicalDeviceExternalImageFormatInfo, which
|
||||
* surprisingly permits handleType == 0.
|
||||
*/
|
||||
assert(pExternalBufferInfo->handleType != 0);
|
||||
|
||||
/* All of the current flags are for sparse which we don't support yet.
|
||||
* Even when we do support it, doing sparse on external memory sounds
|
||||
* sketchy. Also, just disallowing flags is the safe option.
|
||||
*/
|
||||
if (pExternalBufferInfo->flags)
|
||||
goto unsupported;
|
||||
|
||||
switch (pExternalBufferInfo->handleType) {
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
|
||||
pExternalBufferProperties->externalMemoryProperties =
|
||||
hk_dma_buf_mem_props;
|
||||
return;
|
||||
default:
|
||||
goto unsupported;
|
||||
}
|
||||
|
||||
unsupported:
|
||||
/* From the Vulkan 1.3.256 spec:
|
||||
*
|
||||
* compatibleHandleTypes must include at least handleType.
|
||||
*/
|
||||
pExternalBufferProperties->externalMemoryProperties =
|
||||
(VkExternalMemoryProperties){
|
||||
.compatibleHandleTypes = pExternalBufferInfo->handleType,
|
||||
};
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
|
||||
const VkBindBufferMemoryInfo *pBindInfos)
|
||||
{
|
||||
for (uint32_t i = 0; i < bindInfoCount; ++i) {
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, pBindInfos[i].memory);
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, pBindInfos[i].buffer);
|
||||
|
||||
if (buffer->vma_size_B) {
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
agx_bo_bind_vma(dev->ws_dev,
|
||||
mem->bo,
|
||||
buffer->addr,
|
||||
buffer->vma_size_B,
|
||||
pBindInfos[i].memoryOffset,
|
||||
0 /* pte_kind */);
|
||||
#endif
|
||||
} else {
|
||||
buffer->addr = mem->bo->ptr.gpu + pBindInfos[i].memoryOffset;
|
||||
}
|
||||
|
||||
const VkBindMemoryStatusKHR *status =
|
||||
vk_find_struct_const(pBindInfos[i].pNext, BIND_MEMORY_STATUS_KHR);
|
||||
if (status != NULL && status->pResult != NULL)
|
||||
*status->pResult = VK_SUCCESS;
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkDeviceAddress VKAPI_CALL
|
||||
hk_GetBufferDeviceAddress(UNUSED VkDevice device,
|
||||
const VkBufferDeviceAddressInfo *pInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, pInfo->buffer);
|
||||
|
||||
return hk_buffer_address(buffer, 0);
|
||||
}
|
||||
|
||||
VKAPI_ATTR uint64_t VKAPI_CALL
|
||||
hk_GetBufferOpaqueCaptureAddress(UNUSED VkDevice device,
|
||||
const VkBufferDeviceAddressInfo *pInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, pInfo->buffer);
|
||||
|
||||
return hk_buffer_address(buffer, 0);
|
||||
}
|
||||
45
src/asahi/vulkan/hk_buffer.h
Normal file
45
src/asahi/vulkan/hk_buffer.h
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "hk_device_memory.h"
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_buffer.h"
|
||||
|
||||
struct hk_device_memory;
|
||||
struct hk_physical_device;
|
||||
|
||||
struct hk_buffer {
|
||||
struct vk_buffer vk;
|
||||
uint64_t addr;
|
||||
|
||||
/** Size of the reserved VMA range for sparse buffers, zero otherwise. */
|
||||
uint64_t vma_size_B;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_buffer, vk.base, VkBuffer,
|
||||
VK_OBJECT_TYPE_BUFFER)
|
||||
|
||||
static inline uint64_t
|
||||
hk_buffer_address(const struct hk_buffer *buffer, uint64_t offset)
|
||||
{
|
||||
return buffer->addr + offset;
|
||||
}
|
||||
|
||||
static inline struct hk_addr_range
|
||||
hk_buffer_addr_range(const struct hk_buffer *buffer, uint64_t offset,
|
||||
uint64_t range)
|
||||
{
|
||||
if (buffer == NULL)
|
||||
return (struct hk_addr_range){.range = 0};
|
||||
|
||||
return (struct hk_addr_range){
|
||||
.addr = hk_buffer_address(buffer, offset),
|
||||
.range = vk_buffer_range(&buffer->vk, offset, range),
|
||||
};
|
||||
}
|
||||
195
src/asahi/vulkan/hk_buffer_view.c
Normal file
195
src/asahi/vulkan/hk_buffer_view.c
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_buffer_view.h"
|
||||
#include "asahi/lib/agx_formats.h"
|
||||
#include "asahi/lib/agx_nir_lower_vbo.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/format/u_formats.h"
|
||||
|
||||
#include "agx_helpers.h"
|
||||
#include "agx_nir_passes.h"
|
||||
#include "agx_pack.h"
|
||||
#include "hk_buffer.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
VkFormatFeatureFlags2
|
||||
hk_get_buffer_format_features(struct hk_physical_device *pdev,
|
||||
VkFormat vk_format)
|
||||
{
|
||||
VkFormatFeatureFlags2 features = 0;
|
||||
enum pipe_format p_format = vk_format_to_pipe_format(vk_format);
|
||||
|
||||
if (p_format == PIPE_FORMAT_NONE)
|
||||
return 0;
|
||||
|
||||
if (agx_vbo_supports_format(p_format))
|
||||
features |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT;
|
||||
|
||||
if (agx_pixel_format[p_format].texturable &&
|
||||
!util_format_is_depth_or_stencil(p_format)) {
|
||||
|
||||
features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT;
|
||||
|
||||
/* RGB32 specially supported for uniform texel buffers only. */
|
||||
if (util_is_power_of_two_nonzero(util_format_get_blocksize(p_format))) {
|
||||
features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
|
||||
}
|
||||
|
||||
if (p_format == PIPE_FORMAT_R32_UINT || p_format == PIPE_FORMAT_R32_SINT)
|
||||
features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkBufferView *pBufferView)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, device, _device);
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, pCreateInfo->buffer);
|
||||
struct hk_buffer_view *view;
|
||||
VkResult result;
|
||||
|
||||
view = vk_buffer_view_create(&device->vk, pCreateInfo, pAllocator,
|
||||
sizeof(*view));
|
||||
if (!view)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
enum pipe_format format = vk_format_to_pipe_format(view->vk.format);
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
|
||||
uint8_t format_swizzle[4] = {
|
||||
desc->swizzle[0],
|
||||
desc->swizzle[1],
|
||||
desc->swizzle[2],
|
||||
desc->swizzle[3],
|
||||
};
|
||||
|
||||
if (util_format_is_depth_or_stencil(format)) {
|
||||
assert(!util_format_is_depth_and_stencil(format) &&
|
||||
"separate stencil always used");
|
||||
|
||||
/* Broadcast depth and stencil */
|
||||
format_swizzle[0] = 0;
|
||||
format_swizzle[1] = 0;
|
||||
format_swizzle[2] = 0;
|
||||
format_swizzle[3] = 0;
|
||||
}
|
||||
|
||||
/* Decompose the offset into a multiple of 16-bytes (which we can include in
|
||||
* the address) and an extra texel-aligned tail offset of up to 15 bytes.
|
||||
*
|
||||
* This lets us offset partially in the shader instead, getting
|
||||
* around alignment restrictions on the base address pointer.
|
||||
*/
|
||||
uint64_t base = hk_buffer_address(buffer, 0) + (view->vk.offset & ~0xf);
|
||||
uint32_t tail_offset_B = view->vk.offset & 0xf;
|
||||
uint32_t tail_offset_el = tail_offset_B / util_format_get_blocksize(format);
|
||||
assert(tail_offset_el * util_format_get_blocksize(format) == tail_offset_B &&
|
||||
"must be texel aligned");
|
||||
|
||||
struct agx_texture_packed tex;
|
||||
agx_pack(&tex, TEXTURE, cfg) {
|
||||
cfg.dimension = AGX_TEXTURE_DIMENSION_2D;
|
||||
cfg.layout = AGX_LAYOUT_LINEAR;
|
||||
cfg.channels = agx_pixel_format[format].channels;
|
||||
cfg.type = agx_pixel_format[format].type;
|
||||
cfg.swizzle_r = agx_channel_from_pipe(format_swizzle[0]);
|
||||
cfg.swizzle_g = agx_channel_from_pipe(format_swizzle[1]);
|
||||
cfg.swizzle_b = agx_channel_from_pipe(format_swizzle[2]);
|
||||
cfg.swizzle_a = agx_channel_from_pipe(format_swizzle[3]);
|
||||
|
||||
cfg.width = AGX_TEXTURE_BUFFER_WIDTH;
|
||||
cfg.height = DIV_ROUND_UP(view->vk.elements, cfg.width);
|
||||
cfg.first_level = cfg.last_level = 0;
|
||||
|
||||
cfg.address = base;
|
||||
cfg.buffer_size_sw = view->vk.elements;
|
||||
cfg.buffer_offset_sw = tail_offset_el;
|
||||
|
||||
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
|
||||
cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3;
|
||||
|
||||
cfg.depth = 1;
|
||||
cfg.stride = (cfg.width * util_format_get_blocksize(format)) - 16;
|
||||
}
|
||||
|
||||
struct agx_pbe_packed pbe;
|
||||
agx_pack(&pbe, PBE, cfg) {
|
||||
cfg.dimension = AGX_TEXTURE_DIMENSION_2D;
|
||||
cfg.layout = AGX_LAYOUT_LINEAR;
|
||||
cfg.channels = agx_pixel_format[format].channels;
|
||||
cfg.type = agx_pixel_format[format].type;
|
||||
cfg.srgb = util_format_is_srgb(format);
|
||||
|
||||
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
|
||||
|
||||
for (unsigned i = 0; i < desc->nr_channels; ++i) {
|
||||
if (desc->swizzle[i] == 0)
|
||||
cfg.swizzle_r = i;
|
||||
else if (desc->swizzle[i] == 1)
|
||||
cfg.swizzle_g = i;
|
||||
else if (desc->swizzle[i] == 2)
|
||||
cfg.swizzle_b = i;
|
||||
else if (desc->swizzle[i] == 3)
|
||||
cfg.swizzle_a = i;
|
||||
}
|
||||
|
||||
cfg.buffer = base;
|
||||
cfg.buffer_offset_sw = tail_offset_el;
|
||||
|
||||
cfg.width = AGX_TEXTURE_BUFFER_WIDTH;
|
||||
cfg.height = DIV_ROUND_UP(view->vk.elements, cfg.width);
|
||||
cfg.level = 0;
|
||||
cfg.stride = (cfg.width * util_format_get_blocksize(format)) - 4;
|
||||
cfg.layers = 1;
|
||||
cfg.levels = 1;
|
||||
};
|
||||
|
||||
result = hk_descriptor_table_add(device, &device->images, &tex, sizeof(tex),
|
||||
&view->tex_desc_index);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
result = hk_descriptor_table_add(device, &device->images, &pbe, sizeof(pbe),
|
||||
&view->pbe_desc_index);
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_descriptor_table_remove(device, &device->images, view->tex_desc_index);
|
||||
vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
*pBufferView = hk_buffer_view_to_handle(view);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, device, _device);
|
||||
VK_FROM_HANDLE(hk_buffer_view, view, bufferView);
|
||||
|
||||
if (!view)
|
||||
return;
|
||||
|
||||
hk_descriptor_table_remove(device, &device->images, view->tex_desc_index);
|
||||
hk_descriptor_table_remove(device, &device->images, view->pbe_desc_index);
|
||||
|
||||
vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk);
|
||||
}
|
||||
27
src/asahi/vulkan/hk_buffer_view.h
Normal file
27
src/asahi/vulkan/hk_buffer_view.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_buffer_view.h"
|
||||
|
||||
struct hk_physical_device;
|
||||
|
||||
VkFormatFeatureFlags2
|
||||
hk_get_buffer_format_features(struct hk_physical_device *pdevice,
|
||||
VkFormat format);
|
||||
|
||||
struct hk_buffer_view {
|
||||
struct vk_buffer_view vk;
|
||||
|
||||
/** Index in the image descriptor table */
|
||||
uint32_t tex_desc_index, pbe_desc_index;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_buffer_view, vk.base, VkBufferView,
|
||||
VK_OBJECT_TYPE_BUFFER_VIEW)
|
||||
811
src/asahi/vulkan/hk_cmd_buffer.c
Normal file
811
src/asahi/vulkan/hk_cmd_buffer.c
Normal file
|
|
@ -0,0 +1,811 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_cmd_buffer.h"
|
||||
|
||||
#include "agx_bo.h"
|
||||
#include "agx_linker.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "agx_usc.h"
|
||||
#include "hk_buffer.h"
|
||||
#include "hk_cmd_pool.h"
|
||||
#include "hk_descriptor_set.h"
|
||||
#include "hk_descriptor_set_layout.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_device_memory.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image_view.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_shader.h"
|
||||
|
||||
#include "pool.h"
|
||||
#include "shader_enums.h"
|
||||
#include "vk_pipeline_layout.h"
|
||||
#include "vk_synchronization.h"
|
||||
|
||||
#include "nouveau/nouveau.h"
|
||||
#include "util/list.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
static void
|
||||
hk_descriptor_state_fini(struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc)
|
||||
{
|
||||
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
||||
|
||||
for (unsigned i = 0; i < HK_MAX_SETS; i++) {
|
||||
vk_free(&pool->vk.alloc, desc->push[i]);
|
||||
desc->push[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_free_resettable_cmd_buffer(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
||||
|
||||
hk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
|
||||
hk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
|
||||
|
||||
hk_cmd_pool_free_bo_list(pool, &cmd->uploader.main.bos);
|
||||
hk_cmd_pool_free_usc_bo_list(pool, &cmd->uploader.usc.bos);
|
||||
|
||||
list_for_each_entry_safe(struct hk_cs, it, &cmd->control_streams, node) {
|
||||
list_del(&it->node);
|
||||
hk_cs_destroy(it);
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&cmd->large_bos, struct agx_bo *, bo) {
|
||||
agx_bo_unreference(*bo);
|
||||
}
|
||||
|
||||
util_dynarray_clear(&cmd->large_bos);
|
||||
}
|
||||
|
||||
static void
|
||||
hk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
|
||||
{
|
||||
struct hk_cmd_buffer *cmd =
|
||||
container_of(vk_cmd_buffer, struct hk_cmd_buffer, vk);
|
||||
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
||||
|
||||
hk_free_resettable_cmd_buffer(cmd);
|
||||
vk_command_buffer_finish(&cmd->vk);
|
||||
vk_free(&pool->vk.alloc, cmd);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_create_cmd_buffer(struct vk_command_pool *vk_pool,
|
||||
VkCommandBufferLevel level,
|
||||
struct vk_command_buffer **cmd_buffer_out)
|
||||
{
|
||||
struct hk_cmd_pool *pool = container_of(vk_pool, struct hk_cmd_pool, vk);
|
||||
struct hk_device *dev = hk_cmd_pool_device(pool);
|
||||
struct hk_cmd_buffer *cmd;
|
||||
VkResult result;
|
||||
|
||||
cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (cmd == NULL)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result =
|
||||
vk_command_buffer_init(&pool->vk, &cmd->vk, &hk_cmd_buffer_ops, level);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&pool->vk.alloc, cmd);
|
||||
return result;
|
||||
}
|
||||
|
||||
util_dynarray_init(&cmd->large_bos, NULL);
|
||||
|
||||
cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
|
||||
cmd->vk.dynamic_graphics_state.ms.sample_locations =
|
||||
&cmd->state.gfx._dynamic_sl;
|
||||
|
||||
list_inithead(&cmd->uploader.main.bos);
|
||||
list_inithead(&cmd->uploader.usc.bos);
|
||||
list_inithead(&cmd->control_streams);
|
||||
|
||||
*cmd_buffer_out = &cmd->vk;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
||||
UNUSED VkCommandBufferResetFlags flags)
|
||||
{
|
||||
struct hk_cmd_buffer *cmd =
|
||||
container_of(vk_cmd_buffer, struct hk_cmd_buffer, vk);
|
||||
|
||||
vk_command_buffer_reset(&cmd->vk);
|
||||
hk_free_resettable_cmd_buffer(cmd);
|
||||
|
||||
cmd->uploader.main.map = NULL;
|
||||
cmd->uploader.main.base = 0;
|
||||
cmd->uploader.main.offset = 0;
|
||||
cmd->uploader.usc.map = NULL;
|
||||
cmd->uploader.usc.base = 0;
|
||||
cmd->uploader.usc.offset = 0;
|
||||
|
||||
cmd->current_cs.gfx = NULL;
|
||||
cmd->current_cs.cs = NULL;
|
||||
cmd->current_cs.post_gfx = NULL;
|
||||
cmd->current_cs.pre_gfx = NULL;
|
||||
|
||||
/* TODO: clear pool! */
|
||||
|
||||
memset(&cmd->state, 0, sizeof(cmd->state));
|
||||
}
|
||||
|
||||
const struct vk_command_buffer_ops hk_cmd_buffer_ops = {
|
||||
.create = hk_create_cmd_buffer,
|
||||
.reset = hk_reset_cmd_buffer,
|
||||
.destroy = hk_destroy_cmd_buffer,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
hk_cmd_buffer_alloc_bo(struct hk_cmd_buffer *cmd, bool usc,
|
||||
struct hk_cmd_bo **bo_out)
|
||||
{
|
||||
VkResult result = hk_cmd_pool_alloc_bo(hk_cmd_buffer_pool(cmd), usc, bo_out);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (usc)
|
||||
list_addtail(&(*bo_out)->link, &cmd->uploader.usc.bos);
|
||||
else
|
||||
list_addtail(&(*bo_out)->link, &cmd->uploader.main.bos);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct agx_ptr
|
||||
hk_pool_alloc_internal(struct hk_cmd_buffer *cmd, uint32_t size,
|
||||
uint32_t alignment, bool usc)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
struct hk_uploader *uploader =
|
||||
usc ? &cmd->uploader.usc : &cmd->uploader.main;
|
||||
|
||||
/* Specially handle large allocations owned by the command buffer, e.g. used
|
||||
* for statically allocated vertex output buffers with geometry shaders.
|
||||
*/
|
||||
if (size > HK_CMD_BO_SIZE) {
|
||||
uint32_t flags = usc ? AGX_BO_LOW_VA : 0;
|
||||
struct agx_bo *bo =
|
||||
agx_bo_create(&dev->dev, size, flags, "Large pool allocation");
|
||||
|
||||
util_dynarray_append(&cmd->large_bos, struct agx_bo *, bo);
|
||||
return bo->ptr;
|
||||
}
|
||||
|
||||
assert(size <= HK_CMD_BO_SIZE);
|
||||
assert(alignment > 0);
|
||||
|
||||
uint32_t offset = align(uploader->offset, alignment);
|
||||
|
||||
assert(offset <= HK_CMD_BO_SIZE);
|
||||
if (uploader->map != NULL && size <= HK_CMD_BO_SIZE - offset) {
|
||||
uploader->offset = offset + size;
|
||||
|
||||
return (struct agx_ptr){
|
||||
.gpu = uploader->base + offset,
|
||||
.cpu = uploader->map + offset,
|
||||
};
|
||||
}
|
||||
|
||||
struct hk_cmd_bo *bo;
|
||||
VkResult result = hk_cmd_buffer_alloc_bo(cmd, usc, &bo);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return (struct agx_ptr){0};
|
||||
}
|
||||
|
||||
/* Pick whichever of the current upload BO and the new BO will have more
|
||||
* room left to be the BO for the next upload. If our upload size is
|
||||
* bigger than the old offset, we're better off burning the whole new
|
||||
* upload BO on this one allocation and continuing on the current upload
|
||||
* BO.
|
||||
*/
|
||||
if (uploader->map == NULL || size < uploader->offset) {
|
||||
uploader->map = bo->bo->ptr.cpu;
|
||||
uploader->base = bo->bo->ptr.gpu;
|
||||
uploader->offset = size;
|
||||
}
|
||||
|
||||
return (struct agx_ptr){
|
||||
.gpu = bo->bo->ptr.gpu,
|
||||
.cpu = bo->map,
|
||||
};
|
||||
}
|
||||
|
||||
uint64_t
|
||||
hk_pool_upload(struct hk_cmd_buffer *cmd, const void *data, uint32_t size,
|
||||
uint32_t alignment)
|
||||
{
|
||||
struct agx_ptr T = hk_pool_alloc(cmd, size, alignment);
|
||||
if (unlikely(T.cpu == NULL))
|
||||
return 0;
|
||||
|
||||
memcpy(T.cpu, data, size);
|
||||
return T.gpu;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
hk_reset_cmd_buffer(&cmd->vk, 0);
|
||||
|
||||
hk_cmd_buffer_begin_compute(cmd, pBeginInfo);
|
||||
hk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
assert(cmd->current_cs.gfx == NULL && cmd->current_cs.pre_gfx == NULL &&
|
||||
"must end rendering before ending the command buffer");
|
||||
|
||||
hk_cmd_buffer_end_compute(cmd);
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
||||
|
||||
return vk_command_buffer_get_record_result(&cmd->vk);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
|
||||
const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
/* The big hammer. We end both compute and graphics batches. Ending compute
|
||||
* here is necessary to properly handle graphics->compute dependencies.
|
||||
*
|
||||
* XXX: perf. */
|
||||
hk_cmd_buffer_end_compute(cmd);
|
||||
hk_cmd_buffer_end_graphics(cmd);
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, uint32_t stage_count,
|
||||
const gl_shader_stage *stages,
|
||||
struct vk_shader **const shaders)
|
||||
{
|
||||
struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
|
||||
|
||||
for (uint32_t i = 0; i < stage_count; i++) {
|
||||
struct hk_api_shader *shader =
|
||||
container_of(shaders[i], struct hk_api_shader, vk);
|
||||
|
||||
if (stages[i] == MESA_SHADER_COMPUTE || stages[i] == MESA_SHADER_KERNEL)
|
||||
hk_cmd_bind_compute_shader(cmd, shader);
|
||||
else
|
||||
hk_cmd_bind_graphics_shader(cmd, stages[i], shader);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_bind_descriptor_sets(UNUSED struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc,
|
||||
const VkBindDescriptorSetsInfoKHR *info)
|
||||
{
|
||||
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
|
||||
|
||||
/* Fro the Vulkan 1.3.275 spec:
|
||||
*
|
||||
* "When binding a descriptor set (see Descriptor Set Binding) to
|
||||
* set number N...
|
||||
*
|
||||
* If, additionally, the previously bound descriptor set for set
|
||||
* N was bound using a pipeline layout not compatible for set N,
|
||||
* then all bindings in sets numbered greater than N are
|
||||
* disturbed."
|
||||
*
|
||||
* This means that, if some earlier set gets bound in such a way that
|
||||
* it changes set_dynamic_buffer_start[s], this binding is implicitly
|
||||
* invalidated. Therefore, we can always look at the current value
|
||||
* of set_dynamic_buffer_start[s] as the base of our dynamic buffer
|
||||
* range and it's only our responsibility to adjust all
|
||||
* set_dynamic_buffer_start[p] for p > s as needed.
|
||||
*/
|
||||
uint8_t dyn_buffer_start =
|
||||
desc->root.set_dynamic_buffer_start[info->firstSet];
|
||||
|
||||
uint32_t next_dyn_offset = 0;
|
||||
for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
|
||||
unsigned s = i + info->firstSet;
|
||||
VK_FROM_HANDLE(hk_descriptor_set, set, info->pDescriptorSets[i]);
|
||||
|
||||
if (desc->sets[s] != set) {
|
||||
if (set != NULL) {
|
||||
desc->root.sets[s] = hk_descriptor_set_addr(set);
|
||||
desc->set_sizes[s] = set->size;
|
||||
} else {
|
||||
desc->root.sets[s] = 0;
|
||||
desc->set_sizes[s] = 0;
|
||||
}
|
||||
desc->sets[s] = set;
|
||||
desc->sets_dirty |= BITFIELD_BIT(s);
|
||||
|
||||
/* Binding descriptors invalidates push descriptors */
|
||||
desc->push_dirty &= ~BITFIELD_BIT(s);
|
||||
}
|
||||
|
||||
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
|
||||
|
||||
if (pipeline_layout->set_layouts[s] != NULL) {
|
||||
const struct hk_descriptor_set_layout *set_layout =
|
||||
vk_to_hk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
|
||||
|
||||
if (set != NULL && set_layout->dynamic_buffer_count > 0) {
|
||||
for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
|
||||
struct hk_buffer_address addr = set->dynamic_buffers[j];
|
||||
addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j];
|
||||
desc->root.dynamic_buffers[dyn_buffer_start + j] = addr;
|
||||
}
|
||||
next_dyn_offset += set->layout->dynamic_buffer_count;
|
||||
}
|
||||
|
||||
dyn_buffer_start += set_layout->dynamic_buffer_count;
|
||||
} else {
|
||||
assert(set == NULL);
|
||||
}
|
||||
}
|
||||
assert(dyn_buffer_start <= HK_MAX_DYNAMIC_BUFFERS);
|
||||
assert(next_dyn_offset <= info->dynamicOffsetCount);
|
||||
|
||||
for (uint32_t s = info->firstSet + info->descriptorSetCount; s < HK_MAX_SETS;
|
||||
s++)
|
||||
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
|
||||
|
||||
desc->root_dirty = true;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdBindDescriptorSets2KHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
||||
hk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
|
||||
pBindDescriptorSetsInfo);
|
||||
}
|
||||
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
hk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
|
||||
pBindDescriptorSetsInfo);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_push_constants(UNUSED struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc,
|
||||
const VkPushConstantsInfoKHR *info)
|
||||
{
|
||||
memcpy(desc->root.push + info->offset, info->pValues, info->size);
|
||||
desc->root_dirty = true;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
|
||||
const VkPushConstantsInfoKHR *pPushConstantsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
|
||||
hk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
|
||||
|
||||
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
hk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
|
||||
}
|
||||
|
||||
static struct hk_push_descriptor_set *
|
||||
hk_cmd_push_descriptors(struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc, uint32_t set)
|
||||
{
|
||||
assert(set < HK_MAX_SETS);
|
||||
if (unlikely(desc->push[set] == NULL)) {
|
||||
desc->push[set] =
|
||||
vk_zalloc(&cmd->vk.pool->alloc, sizeof(*desc->push[set]), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (unlikely(desc->push[set] == NULL)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pushing descriptors replaces whatever sets are bound */
|
||||
desc->sets[set] = NULL;
|
||||
desc->push_dirty |= BITFIELD_BIT(set);
|
||||
|
||||
return desc->push[set];
|
||||
}
|
||||
|
||||
static void
|
||||
hk_push_descriptor_set(struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc,
|
||||
const VkPushDescriptorSetInfoKHR *info)
|
||||
{
|
||||
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
|
||||
|
||||
struct hk_push_descriptor_set *push_set =
|
||||
hk_cmd_push_descriptors(cmd, desc, info->set);
|
||||
if (unlikely(push_set == NULL))
|
||||
return;
|
||||
|
||||
struct hk_descriptor_set_layout *set_layout =
|
||||
vk_to_hk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
|
||||
|
||||
hk_push_descriptor_set_update(push_set, set_layout,
|
||||
info->descriptorWriteCount,
|
||||
info->pDescriptorWrites);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdPushDescriptorSet2KHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
||||
hk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
|
||||
pPushDescriptorSetInfo);
|
||||
}
|
||||
|
||||
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
hk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
|
||||
pPushDescriptorSetInfo);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_buffer_flush_push_descriptors(struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc)
|
||||
{
|
||||
u_foreach_bit(set_idx, desc->push_dirty) {
|
||||
struct hk_push_descriptor_set *push_set = desc->push[set_idx];
|
||||
uint64_t push_set_addr = hk_pool_upload(
|
||||
cmd, push_set->data, sizeof(push_set->data), HK_MIN_UBO_ALIGNMENT);
|
||||
|
||||
desc->root.sets[set_idx] = push_set_addr;
|
||||
desc->set_sizes[set_idx] = sizeof(push_set->data);
|
||||
}
|
||||
|
||||
desc->root_dirty = true;
|
||||
desc->push_dirty = 0;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdPushDescriptorSetWithTemplate2KHR(
|
||||
VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
|
||||
*pPushDescriptorSetWithTemplateInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_descriptor_update_template, template,
|
||||
pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
|
||||
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
|
||||
pPushDescriptorSetWithTemplateInfo->layout);
|
||||
|
||||
struct hk_descriptor_state *desc =
|
||||
hk_get_descriptors_state(cmd, template->bind_point);
|
||||
struct hk_push_descriptor_set *push_set = hk_cmd_push_descriptors(
|
||||
cmd, desc, pPushDescriptorSetWithTemplateInfo->set);
|
||||
if (unlikely(push_set == NULL))
|
||||
return;
|
||||
|
||||
struct hk_descriptor_set_layout *set_layout = vk_to_hk_descriptor_set_layout(
|
||||
pipeline_layout->set_layouts[pPushDescriptorSetWithTemplateInfo->set]);
|
||||
|
||||
hk_push_descriptor_set_update_template(
|
||||
push_set, set_layout, template,
|
||||
pPushDescriptorSetWithTemplateInfo->pData);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
hk_cmd_buffer_upload_root(struct hk_cmd_buffer *cmd,
|
||||
VkPipelineBindPoint bind_point)
|
||||
{
|
||||
struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
|
||||
struct hk_root_descriptor_table *root = &desc->root;
|
||||
|
||||
struct agx_ptr root_ptr = hk_pool_alloc(cmd, sizeof(*root), 8);
|
||||
if (!root_ptr.cpu)
|
||||
return 0;
|
||||
|
||||
root->root_desc_addr = root_ptr.gpu;
|
||||
|
||||
memcpy(root_ptr.cpu, root, sizeof(*root));
|
||||
return root_ptr.gpu;
|
||||
}
|
||||
|
||||
void
|
||||
hk_usc_upload_spilled_rt_descs(struct agx_usc_builder *b,
|
||||
struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_rendering_state *render = &cmd->state.gfx.render;
|
||||
|
||||
/* Upload texture/PBE descriptors for each render target so we can clear
|
||||
* spilled render targets.
|
||||
*/
|
||||
struct agx_ptr descs =
|
||||
hk_pool_alloc(cmd, AGX_TEXTURE_LENGTH * 2 * render->color_att_count, 64);
|
||||
struct agx_texture_packed *desc = descs.cpu;
|
||||
if (!desc)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < render->color_att_count; ++i) {
|
||||
struct hk_image_view *iview = render->color_att[i].iview;
|
||||
if (!iview) {
|
||||
/* XXX: probably should emit a null descriptor here...? */
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(&desc[(i * 2) + 0], &iview->planes[0].emrt_texture, sizeof(*desc));
|
||||
memcpy(&desc[(i * 2) + 1], &iview->planes[0].emrt_pbe, sizeof(*desc));
|
||||
}
|
||||
|
||||
desc = descs.cpu;
|
||||
|
||||
/* Bind the base as u0_u1 for bindless access */
|
||||
agx_usc_uniform(b, 0, 4, hk_pool_upload(cmd, &descs.gpu, 8, 8));
|
||||
}
|
||||
|
||||
void
|
||||
hk_reserve_scratch(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
||||
struct hk_shader *s)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
uint32_t max_scratch_size =
|
||||
MAX2(s->b.info.scratch_size, s->b.info.preamble_scratch_size);
|
||||
|
||||
if (max_scratch_size == 0)
|
||||
return;
|
||||
|
||||
unsigned preamble_size = (s->b.info.preamble_scratch_size > 0) ? 1 : 0;
|
||||
|
||||
/* XXX: need to lock around agx_scratch_alloc... */
|
||||
/* Note: this uses the hardware stage, not the software stage */
|
||||
switch (s->b.info.stage) {
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
agx_scratch_alloc(&dev->scratch.fs, max_scratch_size, 0);
|
||||
cs->scratch.fs.main = true;
|
||||
cs->scratch.fs.preamble = MAX2(cs->scratch.fs.preamble, preamble_size);
|
||||
break;
|
||||
case PIPE_SHADER_VERTEX:
|
||||
agx_scratch_alloc(&dev->scratch.vs, max_scratch_size, 0);
|
||||
cs->scratch.vs.main = true;
|
||||
cs->scratch.vs.preamble = MAX2(cs->scratch.vs.preamble, preamble_size);
|
||||
break;
|
||||
default:
|
||||
agx_scratch_alloc(&dev->scratch.cs, max_scratch_size, 0);
|
||||
cs->scratch.cs.main = true;
|
||||
cs->scratch.cs.preamble = MAX2(cs->scratch.cs.preamble, preamble_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
||||
struct hk_linked_shader *linked)
|
||||
{
|
||||
enum pipe_shader_type sw_stage = s->info.stage;
|
||||
enum pipe_shader_type hw_stage = s->b.info.stage;
|
||||
|
||||
unsigned constant_push_ranges =
|
||||
DIV_ROUND_UP(s->b.info.immediate_size_16, 64);
|
||||
unsigned push_ranges = 2;
|
||||
unsigned stage_ranges = 3;
|
||||
|
||||
size_t usc_size =
|
||||
agx_usc_size(constant_push_ranges + push_ranges + stage_ranges + 4);
|
||||
struct agx_ptr t = hk_pool_usc_alloc(cmd, usc_size, 64);
|
||||
if (!t.cpu)
|
||||
return 0;
|
||||
|
||||
struct agx_usc_builder b = agx_usc_builder(t.cpu, usc_size);
|
||||
|
||||
uint64_t root_ptr;
|
||||
|
||||
if (sw_stage == PIPE_SHADER_COMPUTE)
|
||||
root_ptr = hk_cmd_buffer_upload_root(cmd, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
else
|
||||
root_ptr = cmd->state.gfx.root;
|
||||
|
||||
static_assert(offsetof(struct hk_root_descriptor_table, root_desc_addr) == 0,
|
||||
"self-reflective");
|
||||
|
||||
agx_usc_uniform(&b, HK_ROOT_UNIFORM, 4, root_ptr);
|
||||
|
||||
if (sw_stage == MESA_SHADER_VERTEX) {
|
||||
unsigned count =
|
||||
DIV_ROUND_UP(BITSET_LAST_BIT(s->info.vs.attrib_components_read), 4);
|
||||
|
||||
if (count) {
|
||||
agx_usc_uniform(
|
||||
&b, 0, 4 * count,
|
||||
root_ptr + hk_root_descriptor_offset(draw.attrib_base));
|
||||
|
||||
agx_usc_uniform(
|
||||
&b, 4 * count, 2 * count,
|
||||
root_ptr + hk_root_descriptor_offset(draw.attrib_clamps));
|
||||
}
|
||||
|
||||
if (cmd->state.gfx.draw_params)
|
||||
agx_usc_uniform(&b, 6 * count, 4, cmd->state.gfx.draw_params);
|
||||
|
||||
if (cmd->state.gfx.draw_id_ptr)
|
||||
agx_usc_uniform(&b, (6 * count) + 4, 1, cmd->state.gfx.draw_id_ptr);
|
||||
|
||||
if (hw_stage == MESA_SHADER_COMPUTE) {
|
||||
agx_usc_uniform(
|
||||
&b, (6 * count) + 8, 4,
|
||||
root_ptr + hk_root_descriptor_offset(draw.input_assembly));
|
||||
}
|
||||
} else if (sw_stage == MESA_SHADER_FRAGMENT) {
|
||||
if (agx_tilebuffer_spills(&cmd->state.gfx.render.tilebuffer)) {
|
||||
hk_usc_upload_spilled_rt_descs(&b, cmd);
|
||||
}
|
||||
|
||||
agx_usc_uniform(
|
||||
&b, 4, 8, root_ptr + hk_root_descriptor_offset(draw.blend_constant));
|
||||
|
||||
/* The SHARED state is baked into linked->usc for non-fragment shaders. We
|
||||
* don't pass around the information to bake the tilebuffer layout.
|
||||
*
|
||||
* TODO: We probably could with some refactor.
|
||||
*/
|
||||
agx_usc_push_packed(&b, SHARED, &cmd->state.gfx.render.tilebuffer.usc);
|
||||
}
|
||||
|
||||
agx_usc_push_blob(&b, linked->usc.data, linked->usc.size);
|
||||
return t.gpu;
|
||||
}
|
||||
|
||||
/* Specialized variant of hk_upload_usc_words for internal dispatches that do
|
||||
* not use any state except for some directly mapped uniforms.
|
||||
*/
|
||||
uint32_t
|
||||
hk_upload_usc_words_kernel(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
||||
void *data, size_t data_size)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_COMPUTE);
|
||||
assert(s->b.info.scratch_size == 0 && "you shouldn't be spilling!");
|
||||
assert(s->b.info.preamble_scratch_size == 0 && "you shouldn't be spilling!");
|
||||
|
||||
unsigned constant_push_ranges =
|
||||
DIV_ROUND_UP(s->b.info.immediate_size_16, 64);
|
||||
|
||||
size_t usc_size = agx_usc_size(constant_push_ranges + 7);
|
||||
struct agx_ptr t = hk_pool_usc_alloc(cmd, usc_size, 64);
|
||||
if (!t.cpu)
|
||||
return 0;
|
||||
|
||||
struct agx_usc_builder b = agx_usc_builder(t.cpu, usc_size);
|
||||
|
||||
/* Map the data directly as uniforms starting at u0 */
|
||||
agx_usc_uniform(&b, 0, DIV_ROUND_UP(data_size, 2),
|
||||
hk_pool_upload(cmd, data, data_size, 4));
|
||||
|
||||
agx_usc_push_blob(&b, s->only_linked->usc.data, s->only_linked->usc.size);
|
||||
return t.gpu;
|
||||
}
|
||||
|
||||
void
|
||||
hk_cs_init_graphics(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
|
||||
{
|
||||
struct hk_rendering_state *render = &cmd->state.gfx.render;
|
||||
uint8_t *map = cs->current;
|
||||
|
||||
cs->tib = render->tilebuffer;
|
||||
|
||||
/* Assume this is not the first control stream of the render pass, so
|
||||
* initially use the partial background program and ZLS control.
|
||||
* hk_BeginRendering will override.
|
||||
*/
|
||||
cs->cr = render->cr;
|
||||
cs->cr.bg.main = render->cr.bg.partial;
|
||||
cs->cr.zls_control = render->cr.zls_control_partial;
|
||||
|
||||
/* Barrier to enforce GPU-CPU coherency, in case this batch is back to back
|
||||
* with another that caused stale data to be cached and the CPU wrote to it
|
||||
* in the meantime.
|
||||
*/
|
||||
agx_push(map, VDM_BARRIER, cfg) {
|
||||
cfg.usc_cache_inval = true;
|
||||
}
|
||||
|
||||
struct AGX_PPP_HEADER present = {
|
||||
.w_clamp = true,
|
||||
.occlusion_query_2 = true,
|
||||
.output_unknown = true,
|
||||
.varying_word_2 = true,
|
||||
.viewport_count = 1, /* irrelevant */
|
||||
};
|
||||
|
||||
size_t size = agx_ppp_update_size(&present);
|
||||
struct agx_ptr T = hk_pool_alloc(cmd, size, 64);
|
||||
if (!T.cpu)
|
||||
return;
|
||||
|
||||
struct agx_ppp_update ppp = agx_new_ppp_update(T, size, &present);
|
||||
|
||||
/* clang-format off */
|
||||
agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
|
||||
agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
|
||||
agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
|
||||
agx_ppp_push(&ppp, VARYING_2, cfg);
|
||||
/* clang-format on */
|
||||
|
||||
agx_ppp_fini(&map, &ppp);
|
||||
cs->current = map;
|
||||
|
||||
util_dynarray_init(&cs->scissor, NULL);
|
||||
util_dynarray_init(&cs->depth_bias, NULL);
|
||||
|
||||
/* All graphics state must be reemited in each control stream */
|
||||
hk_cmd_buffer_dirty_all(cmd);
|
||||
}
|
||||
|
||||
void
|
||||
hk_ensure_cs_has_space(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
||||
size_t space)
|
||||
{
|
||||
bool vdm = cs->type == HK_CS_VDM;
|
||||
|
||||
size_t link_length =
|
||||
vdm ? AGX_VDM_STREAM_LINK_LENGTH : AGX_CDM_STREAM_LINK_LENGTH;
|
||||
|
||||
/* Assert that we have space for a link tag */
|
||||
assert((cs->current + link_length) <= cs->end && "Encoder overflowed");
|
||||
|
||||
/* Always leave room for a link tag, in case we run out of space later,
|
||||
* plus padding because VDM apparently overreads?
|
||||
*
|
||||
* 0x200 is not enough. 0x400 seems to work. 0x800 for safety.
|
||||
*/
|
||||
space += link_length + 0x800;
|
||||
|
||||
/* If there is room in the command buffer, we're done */
|
||||
if (likely((cs->end - cs->current) >= space))
|
||||
return;
|
||||
|
||||
/* Otherwise, we need to allocate a new command buffer. We use memory owned
|
||||
* by the batch to simplify lifetime management for the BO.
|
||||
*/
|
||||
size_t size = 65536;
|
||||
struct agx_ptr T = hk_pool_alloc(cmd, size, 256);
|
||||
|
||||
/* Jump from the old control stream to the new control stream */
|
||||
if (vdm) {
|
||||
agx_pack(cs->current, VDM_STREAM_LINK, cfg) {
|
||||
cfg.target_lo = T.gpu & BITFIELD_MASK(32);
|
||||
cfg.target_hi = T.gpu >> 32;
|
||||
}
|
||||
} else {
|
||||
agx_pack(cs->current, CDM_STREAM_LINK, cfg) {
|
||||
cfg.target_lo = T.gpu & BITFIELD_MASK(32);
|
||||
cfg.target_hi = T.gpu >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
/* Swap out the control stream */
|
||||
cs->current = T.cpu;
|
||||
cs->end = cs->current + size;
|
||||
cs->stream_linked = true;
|
||||
}
|
||||
767
src/asahi/vulkan/hk_cmd_buffer.h
Normal file
767
src/asahi/vulkan/hk_cmd_buffer.h
Normal file
|
|
@ -0,0 +1,767 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
#include "util/list.h"
|
||||
#include "agx_helpers.h"
|
||||
#include "agx_linker.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "agx_uvs.h"
|
||||
#include "pool.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
#include "hk_private.h"
|
||||
#include "hk_shader.h"
|
||||
|
||||
#include "hk_cmd_pool.h"
|
||||
#include "hk_descriptor_set.h"
|
||||
|
||||
#include "asahi/lib/agx_nir_lower_vbo.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
struct hk_buffer;
|
||||
struct hk_cmd_bo;
|
||||
struct hk_cmd_pool;
|
||||
struct hk_image_view;
|
||||
struct hk_push_descriptor_set;
|
||||
struct hk_shader;
|
||||
struct hk_linked_shader;
|
||||
struct agx_usc_builder;
|
||||
struct vk_shader;
|
||||
|
||||
/** Root descriptor table. */
|
||||
struct hk_root_descriptor_table {
|
||||
uint64_t root_desc_addr;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t view_index;
|
||||
uint32_t ppp_multisamplectl;
|
||||
|
||||
/* Vertex input state */
|
||||
uint64_t attrib_base[AGX_MAX_VBUFS];
|
||||
uint32_t attrib_clamps[AGX_MAX_VBUFS];
|
||||
|
||||
/* Pointer to the VS->TCS, VS->GS, or TES->GS buffer. */
|
||||
uint64_t vertex_output_buffer;
|
||||
|
||||
/* Mask of outputs flowing VS->TCS, VS->GS, or TES->GS . */
|
||||
uint64_t vertex_outputs;
|
||||
|
||||
/* Address of input assembly buffer if geom/tess is used, else 0 */
|
||||
uint64_t input_assembly;
|
||||
|
||||
/* Address of tessellation param buffer if tessellation used, else 0 */
|
||||
uint64_t tess_params;
|
||||
|
||||
/* Address of geometry param buffer if GS is used, else 0 */
|
||||
uint64_t geometry_params;
|
||||
|
||||
/* Pipeline statistics queries. This is a base address with flags. */
|
||||
uint64_t pipeline_stats;
|
||||
VkQueryPipelineStatisticFlags pipeline_stats_flags;
|
||||
|
||||
float blend_constant[4];
|
||||
uint16_t no_epilog_discard;
|
||||
uint16_t _pad1;
|
||||
uint16_t api_sample_mask;
|
||||
uint16_t _pad2;
|
||||
uint16_t force_never_in_shader;
|
||||
uint16_t _pad3;
|
||||
uint16_t provoking;
|
||||
uint16_t _pad4;
|
||||
|
||||
/* Mapping from varying slots written by the last vertex stage to UVS
|
||||
* indices. This mapping must be compatible with the fragment shader.
|
||||
*/
|
||||
uint8_t uvs_index[VARYING_SLOT_MAX];
|
||||
} draw;
|
||||
struct {
|
||||
uint64_t group_count_addr;
|
||||
uint32_t base_group[3];
|
||||
} cs;
|
||||
};
|
||||
|
||||
/* Client push constants */
|
||||
uint8_t push[HK_MAX_PUSH_SIZE];
|
||||
|
||||
/* Descriptor set base addresses */
|
||||
uint64_t sets[HK_MAX_SETS];
|
||||
|
||||
/* Dynamic buffer bindings */
|
||||
struct hk_buffer_address dynamic_buffers[HK_MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
/* Start index in dynamic_buffers where each set starts */
|
||||
uint8_t set_dynamic_buffer_start[HK_MAX_SETS];
|
||||
};
|
||||
|
||||
/* helper macro for computing root descriptor byte offsets */
|
||||
#define hk_root_descriptor_offset(member) \
|
||||
offsetof(struct hk_root_descriptor_table, member)
|
||||
|
||||
struct hk_descriptor_state {
|
||||
bool root_dirty;
|
||||
struct hk_root_descriptor_table root;
|
||||
|
||||
uint32_t set_sizes[HK_MAX_SETS];
|
||||
struct hk_descriptor_set *sets[HK_MAX_SETS];
|
||||
uint32_t sets_dirty;
|
||||
|
||||
struct hk_push_descriptor_set *push[HK_MAX_SETS];
|
||||
uint32_t push_dirty;
|
||||
};
|
||||
|
||||
struct hk_attachment {
|
||||
VkFormat vk_format;
|
||||
struct hk_image_view *iview;
|
||||
|
||||
VkResolveModeFlagBits resolve_mode;
|
||||
struct hk_image_view *resolve_iview;
|
||||
};
|
||||
|
||||
struct hk_bg_eot {
|
||||
uint64_t usc;
|
||||
struct agx_counts_packed counts;
|
||||
};
|
||||
|
||||
struct hk_render_registers {
|
||||
uint32_t width, height, layers;
|
||||
uint32_t isp_bgobjdepth;
|
||||
uint32_t isp_bgobjvals;
|
||||
struct agx_zls_control_packed zls_control, zls_control_partial;
|
||||
uint32_t iogpu_unk_214;
|
||||
uint32_t depth_dimensions;
|
||||
|
||||
struct {
|
||||
uint32_t dimensions;
|
||||
uint64_t buffer, meta;
|
||||
uint32_t stride, meta_stride;
|
||||
} depth;
|
||||
|
||||
struct {
|
||||
uint64_t buffer, meta;
|
||||
uint32_t stride, meta_stride;
|
||||
} stencil;
|
||||
|
||||
struct {
|
||||
struct hk_bg_eot main;
|
||||
struct hk_bg_eot partial;
|
||||
} bg;
|
||||
|
||||
struct {
|
||||
struct hk_bg_eot main;
|
||||
struct hk_bg_eot partial;
|
||||
} eot;
|
||||
};
|
||||
|
||||
struct hk_rendering_state {
|
||||
VkRenderingFlagBits flags;
|
||||
|
||||
VkRect2D area;
|
||||
uint32_t layer_count;
|
||||
uint32_t view_mask;
|
||||
|
||||
uint32_t color_att_count;
|
||||
struct hk_attachment color_att[HK_MAX_RTS];
|
||||
struct hk_attachment depth_att;
|
||||
struct hk_attachment stencil_att;
|
||||
|
||||
struct agx_tilebuffer_layout tilebuffer;
|
||||
struct hk_render_registers cr;
|
||||
};
|
||||
|
||||
struct hk_index_buffer_state {
|
||||
struct hk_addr_range buffer;
|
||||
enum agx_index_size size;
|
||||
uint32_t restart;
|
||||
};
|
||||
|
||||
/* Dirty tracking bits for state not tracked by vk_dynamic_graphics_state or
|
||||
* shaders_dirty.
|
||||
*/
|
||||
enum hk_dirty {
|
||||
HK_DIRTY_INDEX = BITFIELD_BIT(0),
|
||||
HK_DIRTY_VB = BITFIELD_BIT(1),
|
||||
HK_DIRTY_OCCLUSION = BITFIELD_BIT(2),
|
||||
HK_DIRTY_PROVOKING = BITFIELD_BIT(3),
|
||||
HK_DIRTY_VARYINGS = BITFIELD_BIT(4),
|
||||
};
|
||||
|
||||
struct hk_graphics_state {
|
||||
struct hk_rendering_state render;
|
||||
struct hk_descriptor_state descriptors;
|
||||
|
||||
enum hk_dirty dirty;
|
||||
|
||||
uint64_t root;
|
||||
uint64_t draw_params;
|
||||
uint64_t draw_id_ptr;
|
||||
|
||||
uint32_t shaders_dirty;
|
||||
struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
|
||||
|
||||
/* Vertex buffers */
|
||||
struct hk_addr_range vb[AGX_MAX_VBUFS];
|
||||
|
||||
/* Transform feedback buffers */
|
||||
struct hk_addr_range xfb[4];
|
||||
|
||||
/* Is transform feedback enabled? */
|
||||
bool xfb_enabled;
|
||||
|
||||
/* Internal transform feedback offset vec4.
|
||||
*
|
||||
* TODO: Strictly could be global.
|
||||
*/
|
||||
uint64_t xfb_offsets;
|
||||
|
||||
/* Pointer to the GPU memory backing active transform feedback queries,
|
||||
* per-stream. Zero if no query is bound.
|
||||
*/
|
||||
uint64_t xfb_query[4];
|
||||
|
||||
struct hk_index_buffer_state index;
|
||||
enum agx_primitive topology;
|
||||
enum agx_object_type object_type;
|
||||
|
||||
/* Provoking vertex 0, 1, or 2. Usually 0 or 2 for FIRST/LAST. 1 can only be
|
||||
* set for tri fans.
|
||||
*/
|
||||
uint8_t provoking;
|
||||
|
||||
struct {
|
||||
enum agx_visibility_mode mode;
|
||||
|
||||
/* If enabled, index of the current occlusion query in the occlusion heap.
|
||||
* There can only be one active at a time (hardware contraint).
|
||||
*/
|
||||
uint16_t index;
|
||||
} occlusion;
|
||||
|
||||
/* Fast linked shader data structures */
|
||||
uint64_t varyings;
|
||||
struct agx_varyings_vs linked_varyings;
|
||||
|
||||
uint32_t linked_dirty;
|
||||
struct hk_linked_shader *linked[PIPE_SHADER_TYPES];
|
||||
bool generate_primitive_id;
|
||||
|
||||
/* Tessellation state */
|
||||
uint64_t tess_out_draws;
|
||||
|
||||
/* Needed by vk_command_buffer::dynamic_graphics_state */
|
||||
struct vk_vertex_input_state _dynamic_vi;
|
||||
struct vk_sample_locations_state _dynamic_sl;
|
||||
};
|
||||
|
||||
struct hk_compute_state {
|
||||
struct hk_descriptor_state descriptors;
|
||||
struct hk_api_shader *shader;
|
||||
};
|
||||
|
||||
struct hk_cmd_push {
|
||||
void *map;
|
||||
uint64_t addr;
|
||||
uint32_t range;
|
||||
bool no_prefetch;
|
||||
};
|
||||
|
||||
struct hk_scratch_req {
|
||||
bool main;
|
||||
bool preamble;
|
||||
};
|
||||
|
||||
/*
|
||||
* hk_cs represents a single control stream, to be enqueued either to the
|
||||
* CDM or VDM for compute/3D respectively.
|
||||
*/
|
||||
enum hk_cs_type {
|
||||
HK_CS_CDM,
|
||||
HK_CS_VDM,
|
||||
};
|
||||
|
||||
struct hk_cs {
|
||||
struct list_head node;
|
||||
|
||||
/* Data master */
|
||||
enum hk_cs_type type;
|
||||
|
||||
/* Address of the root control stream for the job */
|
||||
uint64_t addr;
|
||||
|
||||
/* Start pointer of the root control stream */
|
||||
void *start;
|
||||
|
||||
/* Current pointer within the control stream */
|
||||
void *current;
|
||||
|
||||
/* End pointer of the current chunk of the control stream */
|
||||
void *end;
|
||||
|
||||
/* Whether there is more than just the root chunk */
|
||||
bool stream_linked;
|
||||
|
||||
/* Scratch requirements */
|
||||
struct {
|
||||
union {
|
||||
struct hk_scratch_req vs;
|
||||
struct hk_scratch_req cs;
|
||||
};
|
||||
|
||||
struct hk_scratch_req fs;
|
||||
} scratch;
|
||||
|
||||
/* Remaining state is for graphics only, ignored for compute */
|
||||
struct agx_tilebuffer_layout tib;
|
||||
|
||||
struct util_dynarray scissor, depth_bias;
|
||||
uint64_t uploaded_scissor, uploaded_zbias;
|
||||
|
||||
/* We can only set ppp_multisamplectl once per batch. has_sample_locations
|
||||
* tracks if we've committed to a set of sample locations yet. vk_meta
|
||||
* operations do not set has_sample_locations since they don't care and it
|
||||
* would interfere with the app-provided samples.
|
||||
*
|
||||
*/
|
||||
bool has_sample_locations;
|
||||
uint32_t ppp_multisamplectl;
|
||||
|
||||
struct hk_render_registers cr;
|
||||
};
|
||||
|
||||
struct hk_uploader {
|
||||
/** List of hk_cmd_bo */
|
||||
struct list_head bos;
|
||||
|
||||
/* Current addresses */
|
||||
uint8_t *map;
|
||||
uint64_t base;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
struct hk_cmd_buffer {
|
||||
struct vk_command_buffer vk;
|
||||
|
||||
struct {
|
||||
struct hk_graphics_state gfx;
|
||||
struct hk_compute_state cs;
|
||||
} state;
|
||||
|
||||
struct {
|
||||
struct hk_uploader main, usc;
|
||||
} uploader;
|
||||
|
||||
/* List of all recorded control streams */
|
||||
struct list_head control_streams;
|
||||
|
||||
/* Current recorded control stream */
|
||||
struct {
|
||||
/* VDM stream for 3D */
|
||||
struct hk_cs *gfx;
|
||||
|
||||
/* CDM stream for compute */
|
||||
struct hk_cs *cs;
|
||||
|
||||
/* CDM stream that executes immediately before the current graphics
|
||||
* control stream. Used for geometry shading, tessellation, etc.
|
||||
*/
|
||||
struct hk_cs *pre_gfx;
|
||||
|
||||
/* CDM stream that will execute after the current graphics control stream
|
||||
* finishes. Used for queries.
|
||||
*/
|
||||
struct hk_cs *post_gfx;
|
||||
} current_cs;
|
||||
|
||||
/* Are we currently inside a vk_meta operation? This alters sample location
|
||||
* behaviour.
|
||||
*/
|
||||
bool in_meta;
|
||||
|
||||
/* XXX: move me?
|
||||
*
|
||||
* Indirect draw generated by the pre-GS for the geometry shader.
|
||||
*/
|
||||
uint64_t geom_indirect;
|
||||
|
||||
/* Does the command buffer use the geometry heap? */
|
||||
bool uses_heap;
|
||||
|
||||
/* Owned large BOs */
|
||||
struct util_dynarray large_bos;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(hk_cmd_buffer, vk.base, VkCommandBuffer,
|
||||
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
||||
|
||||
extern const struct vk_command_buffer_ops hk_cmd_buffer_ops;
|
||||
|
||||
static inline struct hk_device *
|
||||
hk_cmd_buffer_device(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
return (struct hk_device *)cmd->vk.base.device;
|
||||
}
|
||||
|
||||
static inline struct hk_cmd_pool *
|
||||
hk_cmd_buffer_pool(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
return (struct hk_cmd_pool *)cmd->vk.pool;
|
||||
}
|
||||
|
||||
/*
|
||||
* The hardware vertex shader is supplied by the last geometry stage. The
|
||||
* geometry pipeline is vertex->tess->geometry so we search backwards.
|
||||
*/
|
||||
static inline struct hk_shader *
|
||||
hk_bound_hw_vs(struct hk_graphics_state *gfx)
|
||||
{
|
||||
struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
|
||||
struct hk_api_shader *tes = gfx->shaders[MESA_SHADER_TESS_EVAL];
|
||||
struct hk_api_shader *gs = gfx->shaders[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (gs)
|
||||
return &gs->variants[HK_GS_VARIANT_RAST];
|
||||
else if (tes)
|
||||
return &tes->variants[HK_VS_VARIANT_HW];
|
||||
else
|
||||
return &vs->variants[HK_VS_VARIANT_HW];
|
||||
}
|
||||
|
||||
static inline struct hk_shader *
|
||||
hk_bound_sw_vs(struct hk_graphics_state *gfx)
|
||||
{
|
||||
struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
|
||||
struct hk_shader *hw_vs = hk_bound_hw_vs(gfx);
|
||||
|
||||
if (hw_vs == &vs->variants[HK_VS_VARIANT_HW])
|
||||
return hw_vs;
|
||||
else
|
||||
return &vs->variants[HK_VS_VARIANT_SW];
|
||||
}
|
||||
|
||||
static inline struct hk_shader *
|
||||
hk_bound_sw_vs_before_gs(struct hk_graphics_state *gfx)
|
||||
{
|
||||
struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
|
||||
struct hk_api_shader *tes = gfx->shaders[MESA_SHADER_TESS_EVAL];
|
||||
struct hk_api_shader *api = tes ?: vs;
|
||||
|
||||
return &api->variants[HK_VS_VARIANT_SW];
|
||||
}
|
||||
|
||||
struct agx_ptr hk_pool_alloc_internal(struct hk_cmd_buffer *cmd, uint32_t size,
|
||||
uint32_t alignment, bool usc);
|
||||
|
||||
uint64_t hk_pool_upload(struct hk_cmd_buffer *cmd, const void *data,
|
||||
uint32_t size, uint32_t alignment);
|
||||
|
||||
static inline struct agx_ptr
|
||||
hk_pool_alloc(struct hk_cmd_buffer *cmd, uint32_t size, uint32_t alignment)
|
||||
{
|
||||
return hk_pool_alloc_internal(cmd, size, alignment, false);
|
||||
}
|
||||
|
||||
static inline struct agx_ptr
|
||||
hk_pool_usc_alloc(struct hk_cmd_buffer *cmd, uint32_t size, uint32_t alignment)
|
||||
{
|
||||
return hk_pool_alloc_internal(cmd, size, alignment, true);
|
||||
}
|
||||
|
||||
void hk_cs_init_graphics(struct hk_cmd_buffer *cmd, struct hk_cs *cs);
|
||||
uint32_t hk_default_sample_positions(unsigned nr_samples);
|
||||
|
||||
static inline struct hk_cs *
|
||||
hk_cmd_buffer_get_cs_general(struct hk_cmd_buffer *cmd, struct hk_cs **ptr,
|
||||
bool compute)
|
||||
{
|
||||
if ((*ptr) == NULL) {
|
||||
/* Allocate root control stream */
|
||||
size_t initial_size = 65536;
|
||||
struct agx_ptr root = hk_pool_alloc(cmd, initial_size, 1024);
|
||||
if (!root.cpu)
|
||||
return NULL;
|
||||
|
||||
/* Allocate hk_cs for the new stream */
|
||||
struct hk_cs *cs = malloc(sizeof(*cs));
|
||||
*cs = (struct hk_cs){
|
||||
.type = compute ? HK_CS_CDM : HK_CS_VDM,
|
||||
.addr = root.gpu,
|
||||
.start = root.cpu,
|
||||
.current = root.cpu,
|
||||
.end = root.cpu + initial_size,
|
||||
};
|
||||
|
||||
list_inithead(&cs->node);
|
||||
|
||||
bool before_gfx = (ptr == &cmd->current_cs.pre_gfx);
|
||||
|
||||
/* Insert into the command buffer. We usually append to the end of the
|
||||
* command buffer, except for pre-graphics streams which go right before
|
||||
* the graphics workload. (This implies a level of out-of-order processing
|
||||
* that's allowed by Vulkan and required for efficient
|
||||
* geometry/tessellation shaders.)
|
||||
*/
|
||||
if (before_gfx && cmd->current_cs.gfx) {
|
||||
list_addtail(&cs->node, &cmd->current_cs.gfx->node);
|
||||
} else {
|
||||
list_addtail(&cs->node, &cmd->control_streams);
|
||||
}
|
||||
|
||||
*ptr = cs;
|
||||
|
||||
if (!compute)
|
||||
hk_cs_init_graphics(cmd, cs);
|
||||
}
|
||||
|
||||
assert(*ptr != NULL);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
static inline struct hk_cs *
|
||||
hk_cmd_buffer_get_cs(struct hk_cmd_buffer *cmd, bool compute)
|
||||
{
|
||||
struct hk_cs **ptr = compute ? &cmd->current_cs.cs : &cmd->current_cs.gfx;
|
||||
return hk_cmd_buffer_get_cs_general(cmd, ptr, compute);
|
||||
}
|
||||
|
||||
void hk_ensure_cs_has_space(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
||||
size_t space);
|
||||
|
||||
static void
|
||||
hk_cmd_buffer_dirty_all(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||
struct hk_graphics_state *gfx = &cmd->state.gfx;
|
||||
|
||||
vk_dynamic_graphics_state_dirty_all(dyn);
|
||||
gfx->dirty = ~0;
|
||||
gfx->shaders_dirty = ~0;
|
||||
gfx->linked_dirty = ~0;
|
||||
gfx->descriptors.root_dirty = true;
|
||||
}
|
||||
|
||||
static inline void
|
||||
hk_cs_destroy(struct hk_cs *cs)
|
||||
{
|
||||
if (cs->type == HK_CS_VDM) {
|
||||
util_dynarray_fini(&cs->scissor);
|
||||
util_dynarray_fini(&cs->depth_bias);
|
||||
}
|
||||
|
||||
free(cs);
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_buffer_end_compute_internal(struct hk_cs **ptr)
|
||||
{
|
||||
if (*ptr) {
|
||||
struct hk_cs *cs = *ptr;
|
||||
void *map = cs->current;
|
||||
agx_push(map, CDM_STREAM_TERMINATE, _)
|
||||
;
|
||||
|
||||
cs->current = map;
|
||||
}
|
||||
|
||||
*ptr = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_buffer_end_compute(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.cs);
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_buffer_end_graphics(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_cs *cs = cmd->current_cs.gfx;
|
||||
|
||||
if (cs) {
|
||||
void *map = cs->current;
|
||||
agx_push(map, VDM_STREAM_TERMINATE, _)
|
||||
;
|
||||
|
||||
/* Scissor and depth bias arrays are staged to dynamic arrays on the CPU.
|
||||
* When we end the control stream, they're done growing and are ready for
|
||||
* upload.
|
||||
*/
|
||||
cs->uploaded_scissor =
|
||||
hk_pool_upload(cmd, cs->scissor.data, cs->scissor.size, 64);
|
||||
|
||||
cs->uploaded_zbias =
|
||||
hk_pool_upload(cmd, cs->depth_bias.data, cs->depth_bias.size, 64);
|
||||
|
||||
/* TODO: maybe free scissor/depth_bias now? */
|
||||
|
||||
cmd->current_cs.gfx->current = map;
|
||||
cmd->current_cs.gfx = NULL;
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.pre_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
||||
}
|
||||
|
||||
assert(cmd->current_cs.gfx == NULL);
|
||||
|
||||
/* We just flushed out the heap use. If we want to use it again, we'll need
|
||||
* to queue a free for it again.
|
||||
*/
|
||||
cmd->uses_heap = false;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
hk_pipeline_stat_addr(struct hk_cmd_buffer *cmd,
|
||||
VkQueryPipelineStatisticFlagBits stat)
|
||||
{
|
||||
struct hk_root_descriptor_table *root = &cmd->state.gfx.descriptors.root;
|
||||
VkQueryPipelineStatisticFlags flags = root->draw.pipeline_stats_flags;
|
||||
|
||||
if (flags & stat) {
|
||||
assert(!cmd->in_meta && "queries paused for meta");
|
||||
assert(util_bitcount(stat) == 1 && "by construction");
|
||||
|
||||
/* Prefix sum to determine the compacted index in the query pool */
|
||||
uint32_t index = util_bitcount(flags & (stat - 1));
|
||||
|
||||
return root->draw.pipeline_stats + (sizeof(uint64_t) * index);
|
||||
} else {
|
||||
/* Query disabled */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void hk_cmd_buffer_begin_graphics(struct hk_cmd_buffer *cmd,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo);
|
||||
void hk_cmd_buffer_begin_compute(struct hk_cmd_buffer *cmd,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo);
|
||||
|
||||
void hk_cmd_invalidate_graphics_state(struct hk_cmd_buffer *cmd);
|
||||
void hk_cmd_invalidate_compute_state(struct hk_cmd_buffer *cmd);
|
||||
|
||||
void hk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, uint32_t stage_count,
|
||||
const gl_shader_stage *stages,
|
||||
struct vk_shader **const shaders);
|
||||
|
||||
void hk_cmd_bind_graphics_shader(struct hk_cmd_buffer *cmd,
|
||||
const gl_shader_stage stage,
|
||||
struct hk_api_shader *shader);
|
||||
|
||||
void hk_cmd_bind_compute_shader(struct hk_cmd_buffer *cmd,
|
||||
struct hk_api_shader *shader);
|
||||
|
||||
void hk_cmd_bind_vertex_buffer(struct hk_cmd_buffer *cmd, uint32_t vb_idx,
|
||||
struct hk_addr_range addr_range);
|
||||
|
||||
static inline struct hk_descriptor_state *
|
||||
hk_get_descriptors_state(struct hk_cmd_buffer *cmd,
|
||||
VkPipelineBindPoint bind_point)
|
||||
{
|
||||
switch (bind_point) {
|
||||
case VK_PIPELINE_BIND_POINT_GRAPHICS:
|
||||
return &cmd->state.gfx.descriptors;
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE:
|
||||
return &cmd->state.cs.descriptors;
|
||||
default:
|
||||
unreachable("Unhandled bind point");
|
||||
}
|
||||
};
|
||||
|
||||
void hk_cmd_flush_wait_dep(struct hk_cmd_buffer *cmd,
|
||||
const VkDependencyInfo *dep, bool wait);
|
||||
|
||||
void hk_cmd_invalidate_deps(struct hk_cmd_buffer *cmd, uint32_t dep_count,
|
||||
const VkDependencyInfo *deps);
|
||||
|
||||
void hk_cmd_buffer_flush_push_descriptors(struct hk_cmd_buffer *cmd,
|
||||
struct hk_descriptor_state *desc);
|
||||
|
||||
void hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
|
||||
const VkRenderingInfo *pRenderingInfo);
|
||||
|
||||
uint64_t hk_cmd_buffer_upload_root(struct hk_cmd_buffer *cmd,
|
||||
VkPipelineBindPoint bind_point);
|
||||
|
||||
void hk_reserve_scratch(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
||||
struct hk_shader *s);
|
||||
|
||||
uint32_t hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
||||
struct hk_linked_shader *linked);
|
||||
|
||||
uint32_t hk_upload_usc_words_kernel(struct hk_cmd_buffer *cmd,
|
||||
struct hk_shader *s, void *data,
|
||||
size_t data_size);
|
||||
|
||||
void hk_usc_upload_spilled_rt_descs(struct agx_usc_builder *b,
|
||||
struct hk_cmd_buffer *cmd);
|
||||
|
||||
void hk_cdm_cache_flush(struct hk_device *dev, struct hk_cs *cs);
|
||||
|
||||
struct hk_grid {
|
||||
bool indirect;
|
||||
union {
|
||||
uint32_t count[3];
|
||||
uint64_t ptr;
|
||||
};
|
||||
};
|
||||
|
||||
static struct hk_grid
|
||||
hk_grid(uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
return (struct hk_grid){.indirect = false, .count = {x, y, z}};
|
||||
}
|
||||
|
||||
static struct hk_grid
|
||||
hk_grid_indirect(uint64_t ptr)
|
||||
{
|
||||
return (struct hk_grid){.indirect = true, .ptr = ptr};
|
||||
}
|
||||
|
||||
void hk_dispatch_with_usc(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct hk_shader *s, uint32_t usc,
|
||||
struct hk_grid grid, struct hk_grid local_size);
|
||||
|
||||
static inline void
|
||||
hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
||||
struct hk_shader *s, struct hk_grid grid,
|
||||
struct hk_grid local_size)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
uint32_t usc = hk_upload_usc_words(cmd, s, s->only_linked);
|
||||
|
||||
hk_reserve_scratch(cmd, cs, s);
|
||||
hk_dispatch_with_usc(dev, cs, s, usc, grid, local_size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
hk_dispatch(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct hk_shader *s,
|
||||
struct hk_grid grid)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
struct hk_grid local_size =
|
||||
hk_grid(s->info.cs.local_size[0], s->info.cs.local_size[1],
|
||||
s->info.cs.local_size[2]);
|
||||
|
||||
if (!grid.indirect) {
|
||||
grid.count[0] *= local_size.count[0];
|
||||
grid.count[1] *= local_size.count[1];
|
||||
grid.count[2] *= local_size.count[2];
|
||||
}
|
||||
|
||||
hk_dispatch_with_local_size(cmd, cs, s, grid, local_size);
|
||||
}
|
||||
|
||||
void hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
|
||||
bool after_gfx);
|
||||
196
src/asahi/vulkan/hk_cmd_clear.c
Normal file
196
src/asahi/vulkan/hk_cmd_clear.c
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "agx_formats.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image.h"
|
||||
#include "hk_image_view.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
#include "vk_meta.h"
|
||||
|
||||
static VkImageViewType
|
||||
render_view_type(VkImageType image_type, unsigned layer_count)
|
||||
{
|
||||
switch (image_type) {
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_1D
|
||||
: VK_IMAGE_VIEW_TYPE_1D_ARRAY;
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_2D
|
||||
: VK_IMAGE_VIEW_TYPE_2D_ARRAY;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
return VK_IMAGE_VIEW_TYPE_3D;
|
||||
default:
|
||||
unreachable("Invalid image type");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
clear_image(struct hk_cmd_buffer *cmd, struct hk_image *image,
|
||||
VkImageLayout image_layout, VkFormat format,
|
||||
const VkClearValue *clear_value, uint32_t range_count,
|
||||
const VkImageSubresourceRange *ranges)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
ASSERTED VkResult result;
|
||||
|
||||
for (uint32_t r = 0; r < range_count; r++) {
|
||||
const uint32_t level_count =
|
||||
vk_image_subresource_level_count(&image->vk, &ranges[r]);
|
||||
|
||||
for (uint32_t l = 0; l < level_count; l++) {
|
||||
const uint32_t level = ranges[r].baseMipLevel + l;
|
||||
|
||||
const VkExtent3D level_extent =
|
||||
vk_image_mip_level_extent(&image->vk, level);
|
||||
|
||||
uint32_t base_array_layer, layer_count;
|
||||
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
|
||||
base_array_layer = 0;
|
||||
layer_count = level_extent.depth;
|
||||
} else {
|
||||
base_array_layer = ranges[r].baseArrayLayer;
|
||||
layer_count =
|
||||
vk_image_subresource_layer_count(&image->vk, &ranges[r]);
|
||||
}
|
||||
|
||||
const VkImageViewUsageCreateInfo view_usage_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
|
||||
.usage = (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
|
||||
: VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
|
||||
};
|
||||
const VkImageViewCreateInfo view_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
|
||||
.pNext = &view_usage_info,
|
||||
.image = hk_image_to_handle(image),
|
||||
.viewType = render_view_type(image->vk.image_type, layer_count),
|
||||
.format = format,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = image->vk.aspects,
|
||||
.baseMipLevel = level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = base_array_layer,
|
||||
.layerCount = layer_count,
|
||||
},
|
||||
};
|
||||
|
||||
/* We use vk_meta_create_image_view here for lifetime managemnt */
|
||||
VkImageView view;
|
||||
result =
|
||||
vk_meta_create_image_view(&cmd->vk, &dev->meta, &view_info, &view);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkRenderingInfo render = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = {level_extent.width, level_extent.height},
|
||||
},
|
||||
.layerCount = layer_count,
|
||||
};
|
||||
|
||||
VkRenderingAttachmentInfo vk_att = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = view,
|
||||
.imageLayout = image_layout,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.clearValue = *clear_value,
|
||||
};
|
||||
|
||||
if (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
render.colorAttachmentCount = 1;
|
||||
render.pColorAttachments = &vk_att;
|
||||
}
|
||||
if (ranges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
render.pDepthAttachment = &vk_att;
|
||||
if (ranges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
render.pStencilAttachment = &vk_att;
|
||||
|
||||
hk_CmdBeginRendering(hk_cmd_buffer_to_handle(cmd), &render);
|
||||
hk_CmdEndRendering(hk_cmd_buffer_to_handle(cmd));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkFormat
|
||||
vk_packed_int_format_for_size(unsigned size_B)
|
||||
{
|
||||
switch (size_B) {
|
||||
case 1:
|
||||
return VK_FORMAT_R8_UINT;
|
||||
case 2:
|
||||
return VK_FORMAT_R16_UINT;
|
||||
case 4:
|
||||
return VK_FORMAT_R32_UINT;
|
||||
case 8:
|
||||
return VK_FORMAT_R32G32_UINT;
|
||||
case 16:
|
||||
return VK_FORMAT_R32G32B32A32_UINT;
|
||||
default:
|
||||
unreachable("Invalid image format size");
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage _image,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearColorValue *pColor, uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_image, image, _image);
|
||||
|
||||
VkClearValue clear_value = {
|
||||
.color = *pColor,
|
||||
};
|
||||
|
||||
VkFormat vk_format = image->vk.format;
|
||||
if (vk_format == VK_FORMAT_R64_UINT || vk_format == VK_FORMAT_R64_SINT)
|
||||
vk_format = VK_FORMAT_R32G32_UINT;
|
||||
|
||||
enum pipe_format p_format = vk_format_to_pipe_format(vk_format);
|
||||
assert(p_format != PIPE_FORMAT_NONE);
|
||||
|
||||
if (!agx_pixel_format[p_format].renderable) {
|
||||
memset(&clear_value, 0, sizeof(clear_value));
|
||||
util_format_pack_rgba(p_format, clear_value.color.uint32, pColor->uint32,
|
||||
1);
|
||||
|
||||
unsigned bpp = util_format_get_blocksize(p_format);
|
||||
vk_format = vk_packed_int_format_for_size(bpp);
|
||||
}
|
||||
|
||||
clear_image(cmd, image, imageLayout, vk_format, &clear_value, rangeCount,
|
||||
pRanges);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage _image,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearDepthStencilValue *pDepthStencil,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_image, image, _image);
|
||||
|
||||
const VkClearValue clear_value = {
|
||||
.depthStencil = *pDepthStencil,
|
||||
};
|
||||
|
||||
clear_image(cmd, image, imageLayout, image->vk.format, &clear_value,
|
||||
rangeCount, pRanges);
|
||||
}
|
||||
249
src/asahi/vulkan/hk_cmd_dispatch.c
Normal file
249
src/asahi/vulkan/hk_cmd_dispatch.c
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "shaders/query.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
#include "agx_helpers.h"
|
||||
#include "agx_linker.h"
|
||||
#include "agx_nir_lower_gs.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_scratch.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "hk_buffer.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_descriptor_set.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_shader.h"
|
||||
#include "pool.h"
|
||||
|
||||
void
|
||||
hk_cmd_buffer_begin_compute(struct hk_cmd_buffer *cmd,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_invalidate_compute_state(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
memset(&cmd->state.cs, 0, sizeof(cmd->state.cs));
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_bind_compute_shader(struct hk_cmd_buffer *cmd,
|
||||
struct hk_api_shader *shader)
|
||||
{
|
||||
cmd->state.cs.shader = shader;
|
||||
}
|
||||
|
||||
void
|
||||
hk_cdm_cache_flush(struct hk_device *dev, struct hk_cs *cs)
|
||||
{
|
||||
assert(cs->type == HK_CS_CDM);
|
||||
assert(cs->current + AGX_CDM_BARRIER_LENGTH < cs->end &&
|
||||
"caller must ensure space");
|
||||
|
||||
uint8_t *out = cs->current;
|
||||
|
||||
agx_push(out, CDM_BARRIER, cfg) {
|
||||
cfg.unk_5 = true;
|
||||
cfg.unk_6 = true;
|
||||
cfg.unk_8 = true;
|
||||
// cfg.unk_11 = true;
|
||||
// cfg.unk_20 = true;
|
||||
if (dev->dev.params.num_clusters_total > 1) {
|
||||
// cfg.unk_24 = true;
|
||||
if (dev->dev.params.gpu_generation == 13) {
|
||||
cfg.unk_4 = true;
|
||||
// cfg.unk_26 = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* With multiple launches in the same CDM stream, we can get cache
|
||||
* coherency (? or sync?) issues. We hit this with blits, which need - in
|
||||
* between dispatches - need the PBE cache to be flushed and the texture
|
||||
* cache to be invalidated. Until we know what bits mean what exactly,
|
||||
* let's just set these after every launch to be safe. We can revisit in
|
||||
* the future when we figure out what the bits mean.
|
||||
*/
|
||||
cfg.unk_0 = true;
|
||||
cfg.unk_1 = true;
|
||||
cfg.unk_2 = true;
|
||||
cfg.usc_cache_inval = true;
|
||||
cfg.unk_4 = true;
|
||||
cfg.unk_5 = true;
|
||||
cfg.unk_6 = true;
|
||||
cfg.unk_7 = true;
|
||||
cfg.unk_8 = true;
|
||||
cfg.unk_9 = true;
|
||||
cfg.unk_10 = true;
|
||||
cfg.unk_11 = true;
|
||||
cfg.unk_12 = true;
|
||||
cfg.unk_13 = true;
|
||||
cfg.unk_14 = true;
|
||||
cfg.unk_15 = true;
|
||||
cfg.unk_16 = true;
|
||||
cfg.unk_17 = true;
|
||||
cfg.unk_18 = true;
|
||||
cfg.unk_19 = true;
|
||||
}
|
||||
|
||||
cs->current = out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enqueue workgroups to a given CDM control stream with a given prepared USC
|
||||
* words. This does not interact with any global state, so it is suitable for
|
||||
* internal dispatches that do not save/restore state. That can be simpler /
|
||||
* lower overhead than vk_meta for special operations that logically operate
|
||||
* as graphics.
|
||||
*/
|
||||
void
|
||||
hk_dispatch_with_usc(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct hk_shader *s, uint32_t usc, struct hk_grid grid,
|
||||
struct hk_grid local_size)
|
||||
{
|
||||
assert(cs->current + 0x2000 < cs->end && "should have ensured space");
|
||||
uint8_t *out = cs->current;
|
||||
|
||||
agx_push(out, CDM_LAUNCH_WORD_0, cfg) {
|
||||
if (grid.indirect)
|
||||
cfg.mode = AGX_CDM_MODE_INDIRECT_GLOBAL;
|
||||
else
|
||||
cfg.mode = AGX_CDM_MODE_DIRECT;
|
||||
|
||||
/* For now, always bind the txf sampler and nothing else */
|
||||
cfg.sampler_state_register_count = 1;
|
||||
|
||||
cfg.uniform_register_count = s->b.info.push_count;
|
||||
cfg.preshader_register_count = s->b.info.nr_preamble_gprs;
|
||||
}
|
||||
|
||||
agx_push(out, CDM_LAUNCH_WORD_1, cfg) {
|
||||
cfg.pipeline = usc;
|
||||
}
|
||||
|
||||
/* Added in G14X */
|
||||
if (dev->dev.params.gpu_generation >= 14 &&
|
||||
dev->dev.params.num_clusters_total > 1) {
|
||||
|
||||
agx_push(out, CDM_UNK_G14X, cfg)
|
||||
;
|
||||
}
|
||||
|
||||
assert(!local_size.indirect);
|
||||
|
||||
if (grid.indirect) {
|
||||
agx_push(out, CDM_INDIRECT, cfg) {
|
||||
cfg.address_hi = grid.ptr >> 32;
|
||||
cfg.address_lo = grid.ptr & BITFIELD64_MASK(32);
|
||||
}
|
||||
} else {
|
||||
agx_push(out, CDM_GLOBAL_SIZE, cfg) {
|
||||
cfg.x = grid.count[0];
|
||||
cfg.y = grid.count[1];
|
||||
cfg.z = grid.count[2];
|
||||
}
|
||||
}
|
||||
|
||||
agx_push(out, CDM_LOCAL_SIZE, cfg) {
|
||||
cfg.x = local_size.count[0];
|
||||
cfg.y = local_size.count[1];
|
||||
cfg.z = local_size.count[2];
|
||||
}
|
||||
|
||||
cs->current = out;
|
||||
hk_cdm_cache_flush(dev, cs);
|
||||
}
|
||||
|
||||
static void
|
||||
dispatch(struct hk_cmd_buffer *cmd, struct hk_grid grid)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
struct hk_shader *s = hk_only_variant(cmd->state.cs.shader);
|
||||
struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true /* compute */);
|
||||
if (!cs)
|
||||
return;
|
||||
|
||||
uint64_t stat = hk_pipeline_stat_addr(
|
||||
cmd, VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT);
|
||||
|
||||
if (stat) {
|
||||
uint32_t local_size_threads = s->info.cs.local_size[0] *
|
||||
s->info.cs.local_size[1] *
|
||||
s->info.cs.local_size[2];
|
||||
|
||||
struct libagx_cs_invocation_params p = {
|
||||
.grid = cmd->state.cs.descriptors.root.cs.group_count_addr,
|
||||
.local_size_threads = local_size_threads,
|
||||
.statistic = stat,
|
||||
};
|
||||
|
||||
struct hk_shader *s =
|
||||
hk_meta_kernel(dev, agx_nir_increment_cs_invocations, NULL, 0);
|
||||
|
||||
uint64_t params = hk_pool_upload(cmd, &p, sizeof(p), 8);
|
||||
uint32_t usc =
|
||||
hk_upload_usc_words_kernel(cmd, s, ¶ms, sizeof(params));
|
||||
|
||||
hk_dispatch_with_usc(dev, cs, s, usc, hk_grid(1, 1, 1), hk_grid(1, 1, 1));
|
||||
}
|
||||
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
hk_dispatch(cmd, cs, s, grid);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX,
|
||||
uint32_t baseGroupY, uint32_t baseGroupZ,
|
||||
uint32_t groupCountX, uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
struct hk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
if (desc->push_dirty)
|
||||
hk_cmd_buffer_flush_push_descriptors(cmd, desc);
|
||||
|
||||
desc->root.cs.base_group[0] = baseGroupX;
|
||||
desc->root.cs.base_group[1] = baseGroupY;
|
||||
desc->root.cs.base_group[2] = baseGroupZ;
|
||||
|
||||
/* We don't want to key the shader to whether we're indirectly dispatching,
|
||||
* so treat everything as indirect.
|
||||
*/
|
||||
VkDispatchIndirectCommand group_count = {
|
||||
.x = groupCountX,
|
||||
.y = groupCountY,
|
||||
.z = groupCountZ,
|
||||
};
|
||||
|
||||
desc->root.cs.group_count_addr =
|
||||
hk_pool_upload(cmd, &group_count, sizeof(group_count), 8);
|
||||
|
||||
dispatch(cmd, hk_grid(groupCountX, groupCountY, groupCountZ));
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
|
||||
VkDeviceSize offset)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
|
||||
struct hk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
if (desc->push_dirty)
|
||||
hk_cmd_buffer_flush_push_descriptors(cmd, desc);
|
||||
|
||||
desc->root.cs.base_group[0] = 0;
|
||||
desc->root.cs.base_group[1] = 0;
|
||||
desc->root.cs.base_group[2] = 0;
|
||||
|
||||
uint64_t dispatch_addr = hk_buffer_address(buffer, offset);
|
||||
assert(dispatch_addr != 0);
|
||||
|
||||
desc->root.cs.group_count_addr = dispatch_addr;
|
||||
dispatch(cmd, hk_grid_indirect(dispatch_addr));
|
||||
}
|
||||
3737
src/asahi/vulkan/hk_cmd_draw.c
Normal file
3737
src/asahi/vulkan/hk_cmd_draw.c
Normal file
File diff suppressed because it is too large
Load diff
1692
src/asahi/vulkan/hk_cmd_meta.c
Normal file
1692
src/asahi/vulkan/hk_cmd_meta.c
Normal file
File diff suppressed because it is too large
Load diff
146
src/asahi/vulkan/hk_cmd_pool.c
Normal file
146
src/asahi/vulkan/hk_cmd_pool.c
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_cmd_pool.h"
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
static VkResult
|
||||
hk_cmd_bo_create(struct hk_cmd_pool *pool, bool usc, struct hk_cmd_bo **bo_out)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_pool_device(pool);
|
||||
struct hk_cmd_bo *bo;
|
||||
|
||||
bo = vk_zalloc(&pool->vk.alloc, sizeof(*bo), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (bo == NULL)
|
||||
return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
bo->bo = agx_bo_create(&dev->dev, HK_CMD_BO_SIZE, usc ? AGX_BO_LOW_VA : 0,
|
||||
"Command pool");
|
||||
if (bo->bo == NULL) {
|
||||
vk_free(&pool->vk.alloc, bo);
|
||||
return vk_error(pool, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
bo->map = bo->bo->ptr.cpu;
|
||||
|
||||
*bo_out = bo;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_bo_destroy(struct hk_cmd_pool *pool, struct hk_cmd_bo *bo)
|
||||
{
|
||||
agx_bo_unreference(bo->bo);
|
||||
vk_free(&pool->vk.alloc, bo);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateCommandPool(VkDevice _device,
|
||||
const VkCommandPoolCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkCommandPool *pCmdPool)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, device, _device);
|
||||
struct hk_cmd_pool *pool;
|
||||
|
||||
pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pool == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
VkResult result =
|
||||
vk_command_pool_init(&device->vk, &pool->vk, pCreateInfo, pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, pAllocator, pool);
|
||||
return result;
|
||||
}
|
||||
|
||||
list_inithead(&pool->free_bos);
|
||||
list_inithead(&pool->free_usc_bos);
|
||||
|
||||
*pCmdPool = hk_cmd_pool_to_handle(pool);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_pool_destroy_bos(struct hk_cmd_pool *pool)
|
||||
{
|
||||
list_for_each_entry_safe(struct hk_cmd_bo, bo, &pool->free_bos, link)
|
||||
hk_cmd_bo_destroy(pool, bo);
|
||||
|
||||
list_inithead(&pool->free_bos);
|
||||
|
||||
list_for_each_entry_safe(struct hk_cmd_bo, bo, &pool->free_usc_bos, link)
|
||||
hk_cmd_bo_destroy(pool, bo);
|
||||
|
||||
list_inithead(&pool->free_usc_bos);
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_cmd_pool_alloc_bo(struct hk_cmd_pool *pool, bool usc,
|
||||
struct hk_cmd_bo **bo_out)
|
||||
{
|
||||
struct hk_cmd_bo *bo = NULL;
|
||||
if (usc) {
|
||||
if (!list_is_empty(&pool->free_usc_bos))
|
||||
bo = list_first_entry(&pool->free_usc_bos, struct hk_cmd_bo, link);
|
||||
} else {
|
||||
if (!list_is_empty(&pool->free_bos))
|
||||
bo = list_first_entry(&pool->free_bos, struct hk_cmd_bo, link);
|
||||
}
|
||||
if (bo) {
|
||||
list_del(&bo->link);
|
||||
*bo_out = bo;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
return hk_cmd_bo_create(pool, usc, bo_out);
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_pool_free_bo_list(struct hk_cmd_pool *pool, struct list_head *bos)
|
||||
{
|
||||
list_splicetail(bos, &pool->free_bos);
|
||||
list_inithead(bos);
|
||||
}
|
||||
|
||||
void
|
||||
hk_cmd_pool_free_usc_bo_list(struct hk_cmd_pool *pool, struct list_head *bos)
|
||||
{
|
||||
list_splicetail(bos, &pool->free_usc_bos);
|
||||
list_inithead(bos);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, device, _device);
|
||||
VK_FROM_HANDLE(hk_cmd_pool, pool, commandPool);
|
||||
|
||||
if (!pool)
|
||||
return;
|
||||
|
||||
vk_command_pool_finish(&pool->vk);
|
||||
hk_cmd_pool_destroy_bos(pool);
|
||||
vk_free2(&device->vk.alloc, pAllocator, pool);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_TrimCommandPool(VkDevice device, VkCommandPool commandPool,
|
||||
VkCommandPoolTrimFlags flags)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_pool, pool, commandPool);
|
||||
|
||||
vk_command_pool_trim(&pool->vk, flags);
|
||||
hk_cmd_pool_destroy_bos(pool);
|
||||
}
|
||||
49
src/asahi/vulkan/hk_cmd_pool.h
Normal file
49
src/asahi/vulkan/hk_cmd_pool.h
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_command_pool.h"
|
||||
|
||||
/* XXX: FIXME */
|
||||
#define HK_CMD_BO_SIZE 1024 * 1024
|
||||
|
||||
/* Recyclable command buffer BO, used for both push buffers and upload */
|
||||
struct hk_cmd_bo {
|
||||
struct agx_bo *bo;
|
||||
|
||||
void *map;
|
||||
|
||||
/** Link in hk_cmd_pool::free_bos or hk_cmd_buffer::bos */
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct hk_cmd_pool {
|
||||
struct vk_command_pool vk;
|
||||
|
||||
/** List of hk_cmd_bo */
|
||||
struct list_head free_bos;
|
||||
struct list_head free_usc_bos;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_cmd_pool, vk.base, VkCommandPool,
|
||||
VK_OBJECT_TYPE_COMMAND_POOL)
|
||||
|
||||
static inline struct hk_device *
|
||||
hk_cmd_pool_device(struct hk_cmd_pool *pool)
|
||||
{
|
||||
return (struct hk_device *)pool->vk.base.device;
|
||||
}
|
||||
|
||||
VkResult hk_cmd_pool_alloc_bo(struct hk_cmd_pool *pool, bool force_usc,
|
||||
struct hk_cmd_bo **bo_out);
|
||||
|
||||
void hk_cmd_pool_free_bo_list(struct hk_cmd_pool *pool, struct list_head *bos);
|
||||
void hk_cmd_pool_free_usc_bo_list(struct hk_cmd_pool *pool,
|
||||
struct list_head *bos);
|
||||
794
src/asahi/vulkan/hk_descriptor_set.c
Normal file
794
src/asahi/vulkan/hk_descriptor_set.c
Normal file
|
|
@ -0,0 +1,794 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_descriptor_set.h"
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "hk_buffer.h"
|
||||
#include "hk_buffer_view.h"
|
||||
#include "hk_descriptor_set_layout.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image_view.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_sampler.h"
|
||||
|
||||
static inline uint32_t
|
||||
align_u32(uint32_t v, uint32_t a)
|
||||
{
|
||||
assert(a != 0 && a == (a & -a));
|
||||
return (v + a - 1) & ~(a - 1);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
desc_ubo_data(struct hk_descriptor_set *set, uint32_t binding, uint32_t elem,
|
||||
uint32_t *size_out)
|
||||
{
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
&set->layout->binding[binding];
|
||||
|
||||
uint32_t offset = binding_layout->offset + elem * binding_layout->stride;
|
||||
assert(offset < set->size);
|
||||
|
||||
if (size_out != NULL)
|
||||
*size_out = set->size - offset;
|
||||
|
||||
return (char *)set->mapped_ptr + offset;
|
||||
}
|
||||
|
||||
static void
|
||||
write_desc(struct hk_descriptor_set *set, uint32_t binding, uint32_t elem,
|
||||
const void *desc_data, size_t desc_size)
|
||||
{
|
||||
ASSERTED uint32_t dst_size;
|
||||
void *dst = desc_ubo_data(set, binding, elem, &dst_size);
|
||||
assert(desc_size <= dst_size);
|
||||
memcpy(dst, desc_data, desc_size);
|
||||
}
|
||||
|
||||
static void
|
||||
write_sampled_image_view_desc(struct hk_descriptor_set *set,
|
||||
const VkDescriptorImageInfo *const info,
|
||||
uint32_t binding, uint32_t elem,
|
||||
VkDescriptorType descriptor_type)
|
||||
{
|
||||
struct hk_sampled_image_descriptor desc[3] = {};
|
||||
assert(HK_NULL_TEX_OFFSET == 0 && "zero initialized so null descs implicit");
|
||||
|
||||
uint8_t plane_count = 1;
|
||||
bool ia = (descriptor_type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
|
||||
|
||||
if (descriptor_type != VK_DESCRIPTOR_TYPE_SAMPLER && info &&
|
||||
info->imageView != VK_NULL_HANDLE) {
|
||||
VK_FROM_HANDLE(hk_image_view, view, info->imageView);
|
||||
|
||||
plane_count = view->plane_count;
|
||||
for (uint8_t plane = 0; plane < plane_count; plane++) {
|
||||
unsigned index = ia ? view->planes[plane].ia_desc_index
|
||||
: view->planes[plane].sampled_desc_index;
|
||||
|
||||
assert(index < (1 << 20));
|
||||
desc[plane].image_offset = index * HK_IMAGE_STRIDE;
|
||||
}
|
||||
}
|
||||
|
||||
if (descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
|
||||
descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
&set->layout->binding[binding];
|
||||
|
||||
struct hk_sampler *sampler;
|
||||
if (binding_layout->immutable_samplers) {
|
||||
sampler = binding_layout->immutable_samplers[elem];
|
||||
} else {
|
||||
sampler = hk_sampler_from_handle(info->sampler);
|
||||
}
|
||||
|
||||
if (sampler->has_border)
|
||||
assert(plane_count == 1);
|
||||
else
|
||||
plane_count = MAX2(plane_count, sampler->plane_count);
|
||||
|
||||
for (uint8_t plane = 0; plane < plane_count; plane++) {
|
||||
/* We need to replicate the last sampler plane out to all image
|
||||
* planes due to sampler table entry limitations. See
|
||||
* hk_CreateSampler in hk_sampler.c for more details.
|
||||
*/
|
||||
uint8_t sampler_plane = MIN2(plane, sampler->plane_count - 1);
|
||||
assert(sampler->planes[sampler_plane].hw->index < (1 << 12));
|
||||
|
||||
/* All bindless samplers are indexed from 28 in hardware, add here so
|
||||
* we don't have to care in the shader.
|
||||
*/
|
||||
desc[plane].sampler_index =
|
||||
sampler->planes[sampler_plane].hw->index + 28;
|
||||
desc[plane].lod_bias_fp16 = sampler->lod_bias_fp16;
|
||||
desc[plane].has_border = sampler->has_border;
|
||||
}
|
||||
|
||||
if (sampler->has_border) {
|
||||
assert(sampler->plane_count == 2);
|
||||
desc[0].clamp_0_sampler_index = sampler->planes[1].hw->index + 28;
|
||||
|
||||
static_assert(sizeof(desc[0].border) == sizeof(sampler->custom_border),
|
||||
"fixed format");
|
||||
|
||||
memcpy(desc[0].border, sampler->custom_border.uint32,
|
||||
sizeof(sampler->custom_border));
|
||||
}
|
||||
}
|
||||
write_desc(set, binding, elem, desc, sizeof(desc[0]) * plane_count);
|
||||
}
|
||||
|
||||
static void
|
||||
write_storage_image_view_desc(struct hk_descriptor_set *set,
|
||||
const VkDescriptorImageInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
{
|
||||
struct hk_storage_image_descriptor desc = {};
|
||||
|
||||
if (info && info->imageView != VK_NULL_HANDLE) {
|
||||
VK_FROM_HANDLE(hk_image_view, view, info->imageView);
|
||||
|
||||
/* Storage images are always single plane */
|
||||
assert(view->plane_count == 1);
|
||||
uint8_t plane = 0;
|
||||
|
||||
desc.tex_offset =
|
||||
view->planes[plane].ro_storage_desc_index * HK_IMAGE_STRIDE;
|
||||
|
||||
desc.pbe_offset =
|
||||
view->planes[plane].storage_desc_index * HK_IMAGE_STRIDE;
|
||||
} else {
|
||||
desc.tex_offset = HK_NULL_TEX_OFFSET;
|
||||
desc.pbe_offset = HK_NULL_PBE_OFFSET;
|
||||
}
|
||||
|
||||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
static void
|
||||
write_buffer_desc(struct hk_descriptor_set *set,
|
||||
const VkDescriptorBufferInfo *const info, uint32_t binding,
|
||||
uint32_t elem)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, info->buffer);
|
||||
|
||||
const struct hk_addr_range addr_range =
|
||||
hk_buffer_addr_range(buffer, info->offset, info->range);
|
||||
assert(addr_range.range <= UINT32_MAX);
|
||||
|
||||
const struct hk_buffer_address desc = {
|
||||
.base_addr = addr_range.addr,
|
||||
.size = addr_range.range,
|
||||
};
|
||||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
static void
|
||||
write_dynamic_buffer_desc(struct hk_descriptor_set *set,
|
||||
const VkDescriptorBufferInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_buffer, buffer, info->buffer);
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
&set->layout->binding[binding];
|
||||
|
||||
const struct hk_addr_range addr_range =
|
||||
hk_buffer_addr_range(buffer, info->offset, info->range);
|
||||
assert(addr_range.range <= UINT32_MAX);
|
||||
|
||||
struct hk_buffer_address *desc =
|
||||
&set->dynamic_buffers[binding_layout->dynamic_buffer_index + elem];
|
||||
*desc = (struct hk_buffer_address){
|
||||
.base_addr = addr_range.addr,
|
||||
.size = addr_range.range,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
write_buffer_view_desc(struct hk_descriptor_set *set,
|
||||
const VkBufferView bufferView, uint32_t binding,
|
||||
uint32_t elem)
|
||||
{
|
||||
struct hk_buffer_view_descriptor desc = {};
|
||||
if (bufferView != VK_NULL_HANDLE) {
|
||||
VK_FROM_HANDLE(hk_buffer_view, view, bufferView);
|
||||
|
||||
assert(view->tex_desc_index < (1 << 20));
|
||||
assert(view->pbe_desc_index < (1 << 20));
|
||||
|
||||
desc.tex_offset = view->tex_desc_index * HK_IMAGE_STRIDE;
|
||||
desc.pbe_offset = view->pbe_desc_index * HK_IMAGE_STRIDE;
|
||||
} else {
|
||||
desc.tex_offset = HK_NULL_TEX_OFFSET;
|
||||
desc.pbe_offset = HK_NULL_PBE_OFFSET;
|
||||
}
|
||||
|
||||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
static void
|
||||
write_inline_uniform_data(struct hk_descriptor_set *set,
|
||||
const VkWriteDescriptorSetInlineUniformBlock *info,
|
||||
uint32_t binding, uint32_t offset)
|
||||
{
|
||||
assert(set->layout->binding[binding].stride == 1);
|
||||
write_desc(set, binding, offset, info->pData, info->dataSize);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_UpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount,
|
||||
const VkWriteDescriptorSet *pDescriptorWrites,
|
||||
uint32_t descriptorCopyCount,
|
||||
const VkCopyDescriptorSet *pDescriptorCopies)
|
||||
{
|
||||
for (uint32_t w = 0; w < descriptorWriteCount; w++) {
|
||||
const VkWriteDescriptorSet *write = &pDescriptorWrites[w];
|
||||
VK_FROM_HANDLE(hk_descriptor_set, set, write->dstSet);
|
||||
|
||||
switch (write->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_sampled_image_view_desc(
|
||||
set, write->pImageInfo + j, write->dstBinding,
|
||||
write->dstArrayElement + j, write->descriptorType);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_storage_image_view_desc(set, write->pImageInfo + j,
|
||||
write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_buffer_view_desc(set, write->pTexelBufferView[j],
|
||||
write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_buffer_desc(set, write->pBufferInfo + j, write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_dynamic_buffer_desc(set, write->pBufferInfo + j,
|
||||
write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
|
||||
const VkWriteDescriptorSetInlineUniformBlock *write_inline =
|
||||
vk_find_struct_const(write->pNext,
|
||||
WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK);
|
||||
assert(write_inline->dataSize == write->descriptorCount);
|
||||
write_inline_uniform_data(set, write_inline, write->dstBinding,
|
||||
write->dstArrayElement);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < descriptorCopyCount; i++) {
|
||||
const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
|
||||
VK_FROM_HANDLE(hk_descriptor_set, src, copy->srcSet);
|
||||
VK_FROM_HANDLE(hk_descriptor_set, dst, copy->dstSet);
|
||||
|
||||
const struct hk_descriptor_set_binding_layout *src_binding_layout =
|
||||
&src->layout->binding[copy->srcBinding];
|
||||
const struct hk_descriptor_set_binding_layout *dst_binding_layout =
|
||||
&dst->layout->binding[copy->dstBinding];
|
||||
|
||||
if (dst_binding_layout->stride > 0 && src_binding_layout->stride > 0) {
|
||||
for (uint32_t j = 0; j < copy->descriptorCount; j++) {
|
||||
ASSERTED uint32_t dst_max_size, src_max_size;
|
||||
void *dst_map = desc_ubo_data(
|
||||
dst, copy->dstBinding, copy->dstArrayElement + j, &dst_max_size);
|
||||
const void *src_map = desc_ubo_data(
|
||||
src, copy->srcBinding, copy->srcArrayElement + j, &src_max_size);
|
||||
const uint32_t copy_size =
|
||||
MIN2(dst_binding_layout->stride, src_binding_layout->stride);
|
||||
assert(copy_size <= dst_max_size && copy_size <= src_max_size);
|
||||
memcpy(dst_map, src_map, copy_size);
|
||||
}
|
||||
}
|
||||
|
||||
switch (src_binding_layout->type) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const uint32_t dst_dyn_start =
|
||||
dst_binding_layout->dynamic_buffer_index + copy->dstArrayElement;
|
||||
const uint32_t src_dyn_start =
|
||||
src_binding_layout->dynamic_buffer_index + copy->srcArrayElement;
|
||||
typed_memcpy(&dst->dynamic_buffers[dst_dyn_start],
|
||||
&src->dynamic_buffers[src_dyn_start],
|
||||
copy->descriptorCount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hk_push_descriptor_set_update(struct hk_push_descriptor_set *push_set,
|
||||
struct hk_descriptor_set_layout *layout,
|
||||
uint32_t write_count,
|
||||
const VkWriteDescriptorSet *writes)
|
||||
{
|
||||
assert(layout->non_variable_descriptor_buffer_size < sizeof(push_set->data));
|
||||
struct hk_descriptor_set set = {
|
||||
.layout = layout,
|
||||
.size = sizeof(push_set->data),
|
||||
.mapped_ptr = push_set->data,
|
||||
};
|
||||
|
||||
for (uint32_t w = 0; w < write_count; w++) {
|
||||
const VkWriteDescriptorSet *write = &writes[w];
|
||||
assert(write->dstSet == VK_NULL_HANDLE);
|
||||
|
||||
switch (write->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_sampled_image_view_desc(
|
||||
&set, write->pImageInfo + j, write->dstBinding,
|
||||
write->dstArrayElement + j, write->descriptorType);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_storage_image_view_desc(&set, write->pImageInfo + j,
|
||||
write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_buffer_view_desc(&set, write->pTexelBufferView[j],
|
||||
write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
write_buffer_desc(&set, write->pBufferInfo + j, write->dstBinding,
|
||||
write->dstArrayElement + j);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void hk_descriptor_pool_free(struct hk_descriptor_pool *pool,
|
||||
uint64_t addr, uint64_t size);
|
||||
|
||||
static void
|
||||
hk_descriptor_set_destroy(struct hk_device *dev,
|
||||
struct hk_descriptor_pool *pool,
|
||||
struct hk_descriptor_set *set)
|
||||
{
|
||||
list_del(&set->link);
|
||||
if (set->size > 0)
|
||||
hk_descriptor_pool_free(pool, set->addr, set->size);
|
||||
vk_descriptor_set_layout_unref(&dev->vk, &set->layout->vk);
|
||||
|
||||
vk_object_free(&dev->vk, NULL, set);
|
||||
}
|
||||
|
||||
static void
|
||||
hk_destroy_descriptor_pool(struct hk_device *dev,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
struct hk_descriptor_pool *pool)
|
||||
{
|
||||
list_for_each_entry_safe(struct hk_descriptor_set, set, &pool->sets, link)
|
||||
hk_descriptor_set_destroy(dev, pool, set);
|
||||
|
||||
util_vma_heap_finish(&pool->heap);
|
||||
agx_bo_unreference(pool->bo);
|
||||
|
||||
vk_object_free(&dev->vk, pAllocator, pool);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateDescriptorPool(VkDevice _device,
|
||||
const VkDescriptorPoolCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkDescriptorPool *pDescriptorPool)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, _device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
struct hk_descriptor_pool *pool;
|
||||
|
||||
pool = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pool),
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
|
||||
if (!pool)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
list_inithead(&pool->sets);
|
||||
|
||||
const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
|
||||
|
||||
uint32_t max_align = 0;
|
||||
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
|
||||
const VkMutableDescriptorTypeListEXT *type_list = NULL;
|
||||
if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT &&
|
||||
mutable_info && i < mutable_info->mutableDescriptorTypeListCount)
|
||||
type_list = &mutable_info->pMutableDescriptorTypeLists[i];
|
||||
|
||||
uint32_t stride, alignment;
|
||||
hk_descriptor_stride_align_for_type(pdev, pCreateInfo->pPoolSizes[i].type,
|
||||
type_list, &stride, &alignment);
|
||||
max_align = MAX2(max_align, alignment);
|
||||
}
|
||||
|
||||
uint64_t bo_size = 0;
|
||||
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
|
||||
const VkMutableDescriptorTypeListEXT *type_list = NULL;
|
||||
if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT &&
|
||||
mutable_info && i < mutable_info->mutableDescriptorTypeListCount)
|
||||
type_list = &mutable_info->pMutableDescriptorTypeLists[i];
|
||||
|
||||
uint32_t stride, alignment;
|
||||
hk_descriptor_stride_align_for_type(pdev, pCreateInfo->pPoolSizes[i].type,
|
||||
type_list, &stride, &alignment);
|
||||
bo_size +=
|
||||
MAX2(stride, max_align) * pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
}
|
||||
|
||||
/* Individual descriptor sets are aligned to the min UBO alignment to
|
||||
* ensure that we don't end up with unaligned data access in any shaders.
|
||||
* This means that each descriptor buffer allocated may burn up to 16B of
|
||||
* extra space to get the right alignment. (Technically, it's at most 28B
|
||||
* because we're always going to start at least 4B aligned but we're being
|
||||
* conservative here.) Allocate enough extra space that we can chop it
|
||||
* into maxSets pieces and align each one of them to 32B.
|
||||
*/
|
||||
bo_size += HK_MIN_UBO_ALIGNMENT * pCreateInfo->maxSets;
|
||||
|
||||
if (bo_size) {
|
||||
pool->bo = agx_bo_create(&dev->dev, bo_size, 0, "Descriptor pool");
|
||||
if (!pool->bo) {
|
||||
hk_destroy_descriptor_pool(dev, pAllocator, pool);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
pool->mapped_ptr = pool->bo->ptr.cpu;
|
||||
|
||||
/* The BO may be larger thanks to GPU page alignment. We may as well
|
||||
* make that extra space available to the client.
|
||||
*/
|
||||
assert(pool->bo->size >= bo_size);
|
||||
util_vma_heap_init(&pool->heap, pool->bo->ptr.gpu, pool->bo->size);
|
||||
} else {
|
||||
util_vma_heap_init(&pool->heap, 0, 0);
|
||||
}
|
||||
|
||||
*pDescriptorPool = hk_descriptor_pool_to_handle(pool);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_descriptor_pool_alloc(struct hk_descriptor_pool *pool, uint64_t size,
|
||||
uint64_t alignment, uint64_t *addr_out, void **map_out)
|
||||
{
|
||||
assert(size > 0);
|
||||
uint64_t addr = util_vma_heap_alloc(&pool->heap, size, alignment);
|
||||
if (addr == 0)
|
||||
return VK_ERROR_OUT_OF_POOL_MEMORY;
|
||||
|
||||
assert(addr >= pool->bo->ptr.gpu);
|
||||
assert(addr + size <= pool->bo->ptr.gpu + pool->bo->size);
|
||||
uint64_t offset = addr - pool->bo->ptr.gpu;
|
||||
|
||||
*addr_out = addr;
|
||||
*map_out = pool->mapped_ptr + offset;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_descriptor_pool_free(struct hk_descriptor_pool *pool, uint64_t addr,
|
||||
uint64_t size)
|
||||
{
|
||||
assert(size > 0);
|
||||
assert(addr >= pool->bo->ptr.gpu);
|
||||
assert(addr + size <= pool->bo->ptr.gpu + pool->bo->size);
|
||||
util_vma_heap_free(&pool->heap, addr, size);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_descriptor_set_create(struct hk_device *dev, struct hk_descriptor_pool *pool,
|
||||
struct hk_descriptor_set_layout *layout,
|
||||
uint32_t variable_count,
|
||||
struct hk_descriptor_set **out_set)
|
||||
{
|
||||
struct hk_descriptor_set *set;
|
||||
VkResult result;
|
||||
|
||||
uint32_t mem_size =
|
||||
sizeof(struct hk_descriptor_set) +
|
||||
layout->dynamic_buffer_count * sizeof(struct hk_buffer_address);
|
||||
|
||||
set =
|
||||
vk_object_zalloc(&dev->vk, NULL, mem_size, VK_OBJECT_TYPE_DESCRIPTOR_SET);
|
||||
if (!set)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
set->size = layout->non_variable_descriptor_buffer_size;
|
||||
|
||||
if (layout->binding_count > 0 &&
|
||||
(layout->binding[layout->binding_count - 1].flags &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||
uint32_t stride = layout->binding[layout->binding_count - 1].stride;
|
||||
set->size += stride * variable_count;
|
||||
}
|
||||
|
||||
set->size = align64(set->size, HK_MIN_UBO_ALIGNMENT);
|
||||
|
||||
if (set->size > 0) {
|
||||
result = hk_descriptor_pool_alloc(pool, set->size, HK_MIN_UBO_ALIGNMENT,
|
||||
&set->addr, &set->mapped_ptr);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_object_free(&dev->vk, NULL, set);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
vk_descriptor_set_layout_ref(&layout->vk);
|
||||
set->layout = layout;
|
||||
|
||||
for (uint32_t b = 0; b < layout->binding_count; b++) {
|
||||
if (layout->binding[b].type != VK_DESCRIPTOR_TYPE_SAMPLER &&
|
||||
layout->binding[b].type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
|
||||
continue;
|
||||
|
||||
if (layout->binding[b].immutable_samplers == NULL)
|
||||
continue;
|
||||
|
||||
uint32_t array_size = layout->binding[b].array_size;
|
||||
if (layout->binding[b].flags &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)
|
||||
array_size = variable_count;
|
||||
|
||||
for (uint32_t j = 0; j < array_size; j++) {
|
||||
write_sampled_image_view_desc(set, NULL, b, j,
|
||||
layout->binding[b].type);
|
||||
}
|
||||
}
|
||||
|
||||
list_addtail(&set->link, &pool->sets);
|
||||
*out_set = set;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_AllocateDescriptorSets(VkDevice device,
|
||||
const VkDescriptorSetAllocateInfo *pAllocateInfo,
|
||||
VkDescriptorSet *pDescriptorSets)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_descriptor_pool, pool, pAllocateInfo->descriptorPool);
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
uint32_t i;
|
||||
|
||||
struct hk_descriptor_set *set = NULL;
|
||||
|
||||
const VkDescriptorSetVariableDescriptorCountAllocateInfo *var_desc_count =
|
||||
vk_find_struct_const(
|
||||
pAllocateInfo->pNext,
|
||||
DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
|
||||
|
||||
/* allocate a set of buffers for each shader to contain descriptors */
|
||||
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
|
||||
VK_FROM_HANDLE(hk_descriptor_set_layout, layout,
|
||||
pAllocateInfo->pSetLayouts[i]);
|
||||
/* If descriptorSetCount is zero or this structure is not included in
|
||||
* the pNext chain, then the variable lengths are considered to be zero.
|
||||
*/
|
||||
const uint32_t variable_count =
|
||||
var_desc_count && var_desc_count->descriptorSetCount > 0
|
||||
? var_desc_count->pDescriptorCounts[i]
|
||||
: 0;
|
||||
|
||||
result =
|
||||
hk_descriptor_set_create(dev, pool, layout, variable_count, &set);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
|
||||
pDescriptorSets[i] = hk_descriptor_set_to_handle(set);
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_FreeDescriptorSets(device, pAllocateInfo->descriptorPool, i,
|
||||
pDescriptorSets);
|
||||
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
|
||||
pDescriptorSets[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_FreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool,
|
||||
uint32_t descriptorSetCount,
|
||||
const VkDescriptorSet *pDescriptorSets)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_descriptor_pool, pool, descriptorPool);
|
||||
|
||||
for (uint32_t i = 0; i < descriptorSetCount; i++) {
|
||||
VK_FROM_HANDLE(hk_descriptor_set, set, pDescriptorSets[i]);
|
||||
|
||||
if (set)
|
||||
hk_descriptor_set_destroy(dev, pool, set);
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyDescriptorPool(VkDevice device, VkDescriptorPool _pool,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_descriptor_pool, pool, _pool);
|
||||
|
||||
if (!_pool)
|
||||
return;
|
||||
|
||||
hk_destroy_descriptor_pool(dev, pAllocator, pool);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_ResetDescriptorPool(VkDevice device, VkDescriptorPool descriptorPool,
|
||||
VkDescriptorPoolResetFlags flags)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_descriptor_pool, pool, descriptorPool);
|
||||
|
||||
list_for_each_entry_safe(struct hk_descriptor_set, set, &pool->sets, link)
|
||||
hk_descriptor_set_destroy(dev, pool, set);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_descriptor_set_write_template(
|
||||
struct hk_descriptor_set *set,
|
||||
const struct vk_descriptor_update_template *template, const void *data)
|
||||
{
|
||||
for (uint32_t i = 0; i < template->entry_count; i++) {
|
||||
const struct vk_descriptor_template_entry *entry = &template->entries[i];
|
||||
|
||||
switch (entry->type) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
for (uint32_t j = 0; j < entry->array_count; j++) {
|
||||
const VkDescriptorImageInfo *info =
|
||||
data + entry->offset + j * entry->stride;
|
||||
|
||||
write_sampled_image_view_desc(set, info, entry->binding,
|
||||
entry->array_element + j,
|
||||
entry->type);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
for (uint32_t j = 0; j < entry->array_count; j++) {
|
||||
const VkDescriptorImageInfo *info =
|
||||
data + entry->offset + j * entry->stride;
|
||||
|
||||
write_storage_image_view_desc(set, info, entry->binding,
|
||||
entry->array_element + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
for (uint32_t j = 0; j < entry->array_count; j++) {
|
||||
const VkBufferView *bview =
|
||||
data + entry->offset + j * entry->stride;
|
||||
|
||||
write_buffer_view_desc(set, *bview, entry->binding,
|
||||
entry->array_element + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
for (uint32_t j = 0; j < entry->array_count; j++) {
|
||||
const VkDescriptorBufferInfo *info =
|
||||
data + entry->offset + j * entry->stride;
|
||||
|
||||
write_buffer_desc(set, info, entry->binding,
|
||||
entry->array_element + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
for (uint32_t j = 0; j < entry->array_count; j++) {
|
||||
const VkDescriptorBufferInfo *info =
|
||||
data + entry->offset + j * entry->stride;
|
||||
|
||||
write_dynamic_buffer_desc(set, info, entry->binding,
|
||||
entry->array_element + j);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
|
||||
write_desc(set, entry->binding, entry->array_element,
|
||||
data + entry->offset, entry->array_count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_UpdateDescriptorSetWithTemplate(
|
||||
VkDevice device, VkDescriptorSet descriptorSet,
|
||||
VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_descriptor_set, set, descriptorSet);
|
||||
VK_FROM_HANDLE(vk_descriptor_update_template, template,
|
||||
descriptorUpdateTemplate);
|
||||
|
||||
hk_descriptor_set_write_template(set, template, pData);
|
||||
}
|
||||
|
||||
void
|
||||
hk_push_descriptor_set_update_template(
|
||||
struct hk_push_descriptor_set *push_set,
|
||||
struct hk_descriptor_set_layout *layout,
|
||||
const struct vk_descriptor_update_template *template, const void *data)
|
||||
{
|
||||
struct hk_descriptor_set tmp_set = {
|
||||
.layout = layout,
|
||||
.size = sizeof(push_set->data),
|
||||
.mapped_ptr = push_set->data,
|
||||
};
|
||||
hk_descriptor_set_write_template(&tmp_set, template, data);
|
||||
}
|
||||
107
src/asahi/vulkan/hk_descriptor_set.h
Normal file
107
src/asahi/vulkan/hk_descriptor_set.h
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "vk_descriptor_update_template.h"
|
||||
#include "vk_object.h"
|
||||
|
||||
#include "util/list.h"
|
||||
#include "util/vma.h"
|
||||
|
||||
/* Stride of the image heap, equal to the size of a texture/PBE descriptor */
|
||||
#define HK_IMAGE_STRIDE (24)
|
||||
|
||||
struct hk_descriptor_set_layout;
|
||||
|
||||
struct hk_sampled_image_descriptor {
|
||||
uint32_t image_offset;
|
||||
uint16_t sampler_index;
|
||||
uint16_t lod_bias_fp16;
|
||||
/* TODO: This should probably be a heap! */
|
||||
uint32_t border[4];
|
||||
/* XXX: Single bit! Tuck it in somewhere else */
|
||||
uint32_t has_border;
|
||||
uint16_t clamp_0_sampler_index;
|
||||
uint16_t pad_0;
|
||||
};
|
||||
static_assert(sizeof(struct hk_sampled_image_descriptor) == 32,
|
||||
"hk_sampled_image_descriptor has no holes");
|
||||
|
||||
struct hk_storage_image_descriptor {
|
||||
uint32_t tex_offset;
|
||||
uint32_t pbe_offset;
|
||||
};
|
||||
static_assert(sizeof(struct hk_storage_image_descriptor) == 8,
|
||||
"hk_storage_image_descriptor has no holes");
|
||||
|
||||
struct hk_buffer_view_descriptor {
|
||||
uint32_t tex_offset;
|
||||
uint32_t pbe_offset;
|
||||
};
|
||||
static_assert(sizeof(struct hk_buffer_view_descriptor) == 8,
|
||||
"hk_buffer_view_descriptor has no holes");
|
||||
|
||||
/* This has to match nir_address_format_64bit_bounded_global */
|
||||
struct hk_buffer_address {
|
||||
uint64_t base_addr;
|
||||
uint32_t size;
|
||||
uint32_t zero; /* Must be zero! */
|
||||
};
|
||||
|
||||
struct hk_descriptor_pool {
|
||||
struct vk_object_base base;
|
||||
|
||||
struct list_head sets;
|
||||
|
||||
struct agx_bo *bo;
|
||||
uint8_t *mapped_ptr;
|
||||
struct util_vma_heap heap;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_descriptor_pool, base, VkDescriptorPool,
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
|
||||
|
||||
struct hk_descriptor_set {
|
||||
struct vk_object_base base;
|
||||
|
||||
/* Link in hk_descriptor_pool::sets */
|
||||
struct list_head link;
|
||||
|
||||
struct hk_descriptor_set_layout *layout;
|
||||
void *mapped_ptr;
|
||||
uint64_t addr;
|
||||
uint32_t size;
|
||||
|
||||
struct hk_buffer_address dynamic_buffers[];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_descriptor_set, base, VkDescriptorSet,
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_SET)
|
||||
|
||||
static inline uint64_t
|
||||
hk_descriptor_set_addr(const struct hk_descriptor_set *set)
|
||||
{
|
||||
return set->addr;
|
||||
}
|
||||
|
||||
struct hk_push_descriptor_set {
|
||||
uint8_t data[HK_PUSH_DESCRIPTOR_SET_SIZE];
|
||||
};
|
||||
|
||||
void hk_push_descriptor_set_update(struct hk_push_descriptor_set *push_set,
|
||||
struct hk_descriptor_set_layout *layout,
|
||||
uint32_t write_count,
|
||||
const VkWriteDescriptorSet *writes);
|
||||
|
||||
void hk_push_descriptor_set_update_template(
|
||||
struct hk_push_descriptor_set *push_set,
|
||||
struct hk_descriptor_set_layout *layout,
|
||||
const struct vk_descriptor_update_template *template, const void *data);
|
||||
423
src/asahi/vulkan/hk_descriptor_set_layout.c
Normal file
423
src/asahi/vulkan/hk_descriptor_set_layout.c
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_descriptor_set_layout.h"
|
||||
|
||||
#include "hk_descriptor_set.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_sampler.h"
|
||||
|
||||
#include "vk_pipeline_layout.h"
|
||||
|
||||
static bool
|
||||
binding_has_immutable_samplers(const VkDescriptorSetLayoutBinding *binding)
|
||||
{
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
return binding->pImmutableSamplers != NULL;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hk_descriptor_stride_align_for_type(
|
||||
const struct hk_physical_device *pdev, VkDescriptorType type,
|
||||
const VkMutableDescriptorTypeListEXT *type_list, uint32_t *stride,
|
||||
uint32_t *alignment)
|
||||
{
|
||||
switch (type) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
/* TODO: How do samplers work? */
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
*stride = *alignment = sizeof(struct hk_sampled_image_descriptor);
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
*stride = *alignment = sizeof(struct hk_storage_image_descriptor);
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
*stride = *alignment = sizeof(struct hk_buffer_view_descriptor);
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
*stride = *alignment = sizeof(struct hk_buffer_address);
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
*stride = *alignment = 0; /* These don't take up buffer space */
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
|
||||
*stride = 1; /* Array size is bytes */
|
||||
*alignment = HK_MIN_UBO_ALIGNMENT;
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
|
||||
*stride = *alignment = 0;
|
||||
if (type_list == NULL)
|
||||
*stride = *alignment = HK_MAX_DESCRIPTOR_SIZE;
|
||||
for (unsigned i = 0; type_list && i < type_list->descriptorTypeCount;
|
||||
i++) {
|
||||
/* This shouldn't recurse */
|
||||
assert(type_list->pDescriptorTypes[i] !=
|
||||
VK_DESCRIPTOR_TYPE_MUTABLE_EXT);
|
||||
uint32_t desc_stride, desc_align;
|
||||
hk_descriptor_stride_align_for_type(pdev,
|
||||
type_list->pDescriptorTypes[i],
|
||||
NULL, &desc_stride, &desc_align);
|
||||
*stride = MAX2(*stride, desc_stride);
|
||||
*alignment = MAX2(*alignment, desc_align);
|
||||
}
|
||||
*stride = ALIGN(*stride, *alignment);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid descriptor type");
|
||||
}
|
||||
|
||||
assert(*stride <= HK_MAX_DESCRIPTOR_SIZE);
|
||||
}
|
||||
|
||||
static const VkMutableDescriptorTypeListEXT *
|
||||
hk_descriptor_get_type_list(VkDescriptorType type,
|
||||
const VkMutableDescriptorTypeCreateInfoEXT *info,
|
||||
const uint32_t info_idx)
|
||||
{
|
||||
const VkMutableDescriptorTypeListEXT *type_list = NULL;
|
||||
if (type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
|
||||
assert(info != NULL);
|
||||
assert(info_idx < info->mutableDescriptorTypeListCount);
|
||||
type_list = &info->pMutableDescriptorTypeLists[info_idx];
|
||||
}
|
||||
return type_list;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateDescriptorSetLayout(VkDevice device,
|
||||
const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkDescriptorSetLayout *pSetLayout)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
|
||||
uint32_t num_bindings = 0;
|
||||
uint32_t immutable_sampler_count = 0;
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
|
||||
num_bindings = MAX2(num_bindings, binding->binding + 1);
|
||||
|
||||
/* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
|
||||
*
|
||||
* "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
|
||||
* VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
|
||||
* pImmutableSamplers can be used to initialize a set of immutable
|
||||
* samplers. [...] If descriptorType is not one of these descriptor
|
||||
* types, then pImmutableSamplers is ignored.
|
||||
*
|
||||
* We need to be careful here and only parse pImmutableSamplers if we
|
||||
* have one of the right descriptor types.
|
||||
*/
|
||||
if (binding_has_immutable_samplers(binding))
|
||||
immutable_sampler_count += binding->descriptorCount;
|
||||
}
|
||||
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct hk_descriptor_set_layout, layout, 1);
|
||||
VK_MULTIALLOC_DECL(&ma, struct hk_descriptor_set_binding_layout, bindings,
|
||||
num_bindings);
|
||||
VK_MULTIALLOC_DECL(&ma, struct hk_sampler *, samplers,
|
||||
immutable_sampler_count);
|
||||
|
||||
if (!vk_descriptor_set_layout_multizalloc(&dev->vk, &ma))
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
layout->binding_count = num_bindings;
|
||||
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
|
||||
uint32_t b = binding->binding;
|
||||
/* We temporarily store pCreateInfo->pBindings[] index (plus one) in the
|
||||
* immutable_samplers pointer. This provides us with a quick-and-dirty
|
||||
* way to sort the bindings by binding number.
|
||||
*/
|
||||
layout->binding[b].immutable_samplers = (void *)(uintptr_t)(j + 1);
|
||||
}
|
||||
|
||||
const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
|
||||
const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
|
||||
|
||||
uint32_t buffer_size = 0;
|
||||
uint8_t dynamic_buffer_count = 0;
|
||||
for (uint32_t b = 0; b < num_bindings; b++) {
|
||||
/* We stashed the pCreateInfo->pBindings[] index (plus one) in the
|
||||
* immutable_samplers pointer. Check for NULL (empty binding) and then
|
||||
* reset it and compute the index.
|
||||
*/
|
||||
if (layout->binding[b].immutable_samplers == NULL)
|
||||
continue;
|
||||
const uint32_t info_idx =
|
||||
(uintptr_t)(void *)layout->binding[b].immutable_samplers - 1;
|
||||
layout->binding[b].immutable_samplers = NULL;
|
||||
|
||||
const VkDescriptorSetLayoutBinding *binding =
|
||||
&pCreateInfo->pBindings[info_idx];
|
||||
|
||||
if (binding->descriptorCount == 0)
|
||||
continue;
|
||||
|
||||
layout->binding[b].type = binding->descriptorType;
|
||||
|
||||
if (binding_flags_info && binding_flags_info->bindingCount > 0) {
|
||||
assert(binding_flags_info->bindingCount == pCreateInfo->bindingCount);
|
||||
layout->binding[b].flags = binding_flags_info->pBindingFlags[info_idx];
|
||||
}
|
||||
|
||||
layout->binding[b].array_size = binding->descriptorCount;
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
layout->binding[b].dynamic_buffer_index = dynamic_buffer_count;
|
||||
dynamic_buffer_count += binding->descriptorCount;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const VkMutableDescriptorTypeListEXT *type_list =
|
||||
hk_descriptor_get_type_list(binding->descriptorType, mutable_info,
|
||||
info_idx);
|
||||
|
||||
uint32_t stride, alignment;
|
||||
hk_descriptor_stride_align_for_type(pdev, binding->descriptorType,
|
||||
type_list, &stride, &alignment);
|
||||
|
||||
uint8_t max_plane_count = 1;
|
||||
|
||||
if (binding_has_immutable_samplers(binding)) {
|
||||
layout->binding[b].immutable_samplers = samplers;
|
||||
samplers += binding->descriptorCount;
|
||||
for (uint32_t i = 0; i < binding->descriptorCount; i++) {
|
||||
VK_FROM_HANDLE(hk_sampler, sampler, binding->pImmutableSamplers[i]);
|
||||
layout->binding[b].immutable_samplers[i] = sampler;
|
||||
const uint8_t sampler_plane_count =
|
||||
sampler->vk.ycbcr_conversion
|
||||
? vk_format_get_plane_count(
|
||||
sampler->vk.ycbcr_conversion->state.format)
|
||||
: 1;
|
||||
if (max_plane_count < sampler_plane_count)
|
||||
max_plane_count = sampler_plane_count;
|
||||
}
|
||||
}
|
||||
|
||||
stride *= max_plane_count;
|
||||
|
||||
if (stride > 0) {
|
||||
assert(stride <= UINT8_MAX);
|
||||
assert(util_is_power_of_two_nonzero(alignment));
|
||||
|
||||
buffer_size = align64(buffer_size, alignment);
|
||||
layout->binding[b].offset = buffer_size;
|
||||
layout->binding[b].stride = stride;
|
||||
|
||||
if (layout->binding[b].flags &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) {
|
||||
/* From the Vulkan 1.3.256 spec:
|
||||
*
|
||||
* VUID-VkDescriptorSetLayoutBindingFlagsCreateInfo-pBindingFlags-03004
|
||||
* "If an element of pBindingFlags includes
|
||||
* VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT, then
|
||||
* all other elements of
|
||||
* VkDescriptorSetLayoutCreateInfo::pBindings must have a
|
||||
* smaller value of binding"
|
||||
*
|
||||
* In other words, it has to be the last binding.
|
||||
*/
|
||||
assert(b == num_bindings - 1);
|
||||
} else {
|
||||
/* the allocation size will be computed at descriptor allocation,
|
||||
* but the buffer size will be already aligned as this binding will
|
||||
* be the last
|
||||
*/
|
||||
buffer_size += stride * binding->descriptorCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
layout->non_variable_descriptor_buffer_size = buffer_size;
|
||||
layout->dynamic_buffer_count = dynamic_buffer_count;
|
||||
|
||||
struct mesa_blake3 blake3_ctx;
|
||||
_mesa_blake3_init(&blake3_ctx);
|
||||
|
||||
#define BLAKE3_UPDATE_VALUE(x) \
|
||||
_mesa_blake3_update(&blake3_ctx, &(x), sizeof(x));
|
||||
BLAKE3_UPDATE_VALUE(layout->non_variable_descriptor_buffer_size);
|
||||
BLAKE3_UPDATE_VALUE(layout->dynamic_buffer_count);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding_count);
|
||||
|
||||
for (uint32_t b = 0; b < num_bindings; b++) {
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].type);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].flags);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].array_size);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].offset);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].stride);
|
||||
BLAKE3_UPDATE_VALUE(layout->binding[b].dynamic_buffer_index);
|
||||
|
||||
if (layout->binding[b].immutable_samplers != NULL) {
|
||||
for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
|
||||
const struct hk_sampler *sampler =
|
||||
layout->binding[b].immutable_samplers[i];
|
||||
|
||||
/* We zalloc the object, so it's safe to hash the whole thing */
|
||||
if (sampler != NULL && sampler->vk.ycbcr_conversion != NULL)
|
||||
BLAKE3_UPDATE_VALUE(sampler->vk.ycbcr_conversion->state);
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef BLAKE3_UPDATE_VALUE
|
||||
|
||||
_mesa_blake3_final(&blake3_ctx, layout->vk.blake3);
|
||||
|
||||
*pSetLayout = hk_descriptor_set_layout_to_handle(layout);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_GetDescriptorSetLayoutSupport(
|
||||
VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
|
||||
VkDescriptorSetLayoutSupport *pSupport)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
|
||||
const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
|
||||
const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
|
||||
|
||||
/* Figure out the maximum alignment up-front. Otherwise, we need to sort
|
||||
* the list of descriptors by binding number in order to get the size
|
||||
* accumulation right.
|
||||
*/
|
||||
uint32_t max_align = 0;
|
||||
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
||||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i];
|
||||
const VkMutableDescriptorTypeListEXT *type_list =
|
||||
hk_descriptor_get_type_list(binding->descriptorType, mutable_info, i);
|
||||
|
||||
uint32_t stride, alignment;
|
||||
hk_descriptor_stride_align_for_type(pdev, binding->descriptorType,
|
||||
type_list, &stride, &alignment);
|
||||
max_align = MAX2(max_align, alignment);
|
||||
}
|
||||
|
||||
uint64_t non_variable_size = 0;
|
||||
uint32_t variable_stride = 0;
|
||||
uint32_t variable_count = 0;
|
||||
uint8_t dynamic_buffer_count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
||||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i];
|
||||
|
||||
VkDescriptorBindingFlags flags = 0;
|
||||
if (binding_flags != NULL && binding_flags->bindingCount > 0)
|
||||
flags = binding_flags->pBindingFlags[i];
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
dynamic_buffer_count += binding->descriptorCount;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const VkMutableDescriptorTypeListEXT *type_list =
|
||||
hk_descriptor_get_type_list(binding->descriptorType, mutable_info, i);
|
||||
|
||||
uint32_t stride, alignment;
|
||||
hk_descriptor_stride_align_for_type(pdev, binding->descriptorType,
|
||||
type_list, &stride, &alignment);
|
||||
|
||||
if (stride > 0) {
|
||||
assert(stride <= UINT8_MAX);
|
||||
assert(util_is_power_of_two_nonzero(alignment));
|
||||
|
||||
if (flags & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) {
|
||||
/* From the Vulkan 1.3.256 spec:
|
||||
*
|
||||
* "For the purposes of this command, a variable-sized
|
||||
* descriptor binding with a descriptorCount of zero is treated
|
||||
* as if the descriptorCount is one"
|
||||
*/
|
||||
variable_count = MAX2(1, binding->descriptorCount);
|
||||
variable_stride = stride;
|
||||
} else {
|
||||
/* Since we're aligning to the maximum and since this is just a
|
||||
* check for whether or not the max buffer size is big enough, we
|
||||
* keep non_variable_size aligned to max_align.
|
||||
*/
|
||||
non_variable_size += stride * binding->descriptorCount;
|
||||
non_variable_size = align64(non_variable_size, max_align);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t buffer_size = non_variable_size;
|
||||
if (variable_stride > 0) {
|
||||
buffer_size += variable_stride * variable_count;
|
||||
buffer_size = align64(buffer_size, max_align);
|
||||
}
|
||||
|
||||
uint32_t max_buffer_size;
|
||||
if (pCreateInfo->flags &
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
|
||||
max_buffer_size = HK_PUSH_DESCRIPTOR_SET_SIZE;
|
||||
else
|
||||
max_buffer_size = HK_MAX_DESCRIPTOR_SET_SIZE;
|
||||
|
||||
pSupport->supported = dynamic_buffer_count <= HK_MAX_DYNAMIC_BUFFERS &&
|
||||
buffer_size <= max_buffer_size;
|
||||
|
||||
vk_foreach_struct(ext, pSupport->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
||||
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
||||
if (variable_stride > 0) {
|
||||
vs->maxVariableDescriptorCount =
|
||||
(max_buffer_size - non_variable_size) / variable_stride;
|
||||
} else {
|
||||
vs->maxVariableDescriptorCount = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
vk_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
75
src/asahi/vulkan/hk_descriptor_set_layout.h
Normal file
75
src/asahi/vulkan/hk_descriptor_set_layout.h
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_descriptor_set_layout.h"
|
||||
#include "vk_object.h"
|
||||
|
||||
struct hk_device;
|
||||
struct hk_physical_device;
|
||||
struct hk_sampler;
|
||||
struct vk_pipeline_layout;
|
||||
|
||||
struct hk_descriptor_set_binding_layout {
|
||||
/* The type of the descriptors in this binding */
|
||||
VkDescriptorType type;
|
||||
|
||||
/* Flags provided when this binding was created */
|
||||
VkDescriptorBindingFlags flags;
|
||||
|
||||
/* Number of array elements in this binding (or size in bytes for inline
|
||||
* uniform data)
|
||||
*/
|
||||
uint32_t array_size;
|
||||
|
||||
/* Offset into the descriptor buffer where this descriptor lives */
|
||||
uint32_t offset;
|
||||
|
||||
/* Stride between array elements in the descriptor buffer */
|
||||
uint8_t stride;
|
||||
|
||||
/* Index into the dynamic buffer binding array */
|
||||
uint8_t dynamic_buffer_index;
|
||||
|
||||
/* Immutable samplers (or NULL if no immutable samplers) */
|
||||
struct hk_sampler **immutable_samplers;
|
||||
};
|
||||
|
||||
struct hk_descriptor_set_layout {
|
||||
struct vk_descriptor_set_layout vk;
|
||||
|
||||
/* Size of the descriptor buffer for this descriptor set */
|
||||
/* Does not contain the size needed for variable count descriptors */
|
||||
uint32_t non_variable_descriptor_buffer_size;
|
||||
|
||||
/* Number of dynamic UBO bindings in this set */
|
||||
uint8_t dynamic_buffer_count;
|
||||
|
||||
/* Number of bindings in this descriptor set */
|
||||
uint32_t binding_count;
|
||||
|
||||
/* Bindings in this descriptor set */
|
||||
struct hk_descriptor_set_binding_layout binding[0];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_descriptor_set_layout, vk.base,
|
||||
VkDescriptorSetLayout,
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
|
||||
|
||||
void hk_descriptor_stride_align_for_type(
|
||||
const struct hk_physical_device *pdev, VkDescriptorType type,
|
||||
const VkMutableDescriptorTypeListEXT *type_list, uint32_t *stride,
|
||||
uint32_t *alignment);
|
||||
|
||||
static inline struct hk_descriptor_set_layout *
|
||||
vk_to_hk_descriptor_set_layout(struct vk_descriptor_set_layout *layout)
|
||||
{
|
||||
return container_of(layout, struct hk_descriptor_set_layout, vk);
|
||||
}
|
||||
179
src/asahi/vulkan/hk_descriptor_table.c
Normal file
179
src/asahi/vulkan/hk_descriptor_table.c
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_descriptor_table.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
static VkResult
|
||||
hk_descriptor_table_grow_locked(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
uint32_t new_alloc)
|
||||
{
|
||||
struct agx_bo *new_bo;
|
||||
uint32_t *new_free_table;
|
||||
|
||||
assert(new_alloc > table->alloc && new_alloc <= table->max_alloc);
|
||||
|
||||
const uint32_t new_bo_size = new_alloc * table->desc_size;
|
||||
new_bo = agx_bo_create(&dev->dev, new_bo_size, 0, "Descriptor table");
|
||||
|
||||
if (new_bo == NULL) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Failed to allocate the descriptor table");
|
||||
}
|
||||
|
||||
void *new_map = new_bo->ptr.cpu;
|
||||
|
||||
assert(table->bo == NULL && "not yet implemented sparse binding");
|
||||
table->bo = new_bo;
|
||||
table->map = new_map;
|
||||
|
||||
const size_t new_free_table_size = new_alloc * sizeof(uint32_t);
|
||||
new_free_table =
|
||||
vk_realloc(&dev->vk.alloc, table->free_table, new_free_table_size, 4,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (new_free_table == NULL) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"Failed to allocate image descriptor free table");
|
||||
}
|
||||
table->free_table = new_free_table;
|
||||
|
||||
table->alloc = new_alloc;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_descriptor_table_init(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
uint32_t descriptor_size,
|
||||
uint32_t min_descriptor_count,
|
||||
uint32_t max_descriptor_count)
|
||||
{
|
||||
memset(table, 0, sizeof(*table));
|
||||
VkResult result;
|
||||
|
||||
simple_mtx_init(&table->mutex, mtx_plain);
|
||||
|
||||
assert(util_is_power_of_two_nonzero(min_descriptor_count));
|
||||
assert(util_is_power_of_two_nonzero(max_descriptor_count));
|
||||
|
||||
/* TODO: sparse binding for stable gpu va */
|
||||
min_descriptor_count = max_descriptor_count;
|
||||
|
||||
table->desc_size = descriptor_size;
|
||||
table->alloc = 0;
|
||||
table->max_alloc = max_descriptor_count;
|
||||
table->next_desc = 0;
|
||||
table->free_count = 0;
|
||||
|
||||
result = hk_descriptor_table_grow_locked(dev, table, min_descriptor_count);
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_descriptor_table_finish(dev, table);
|
||||
return result;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
hk_descriptor_table_finish(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table)
|
||||
{
|
||||
agx_bo_unreference(table->bo);
|
||||
vk_free(&dev->vk.alloc, table->free_table);
|
||||
simple_mtx_destroy(&table->mutex);
|
||||
}
|
||||
|
||||
#define HK_IMAGE_DESC_INVALID
|
||||
|
||||
static VkResult
|
||||
hk_descriptor_table_alloc_locked(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
uint32_t *index_out)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
if (table->free_count > 0) {
|
||||
*index_out = table->free_table[--table->free_count];
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (table->next_desc < table->alloc) {
|
||||
*index_out = table->next_desc++;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (table->next_desc >= table->max_alloc) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"Descriptor table not large enough");
|
||||
}
|
||||
|
||||
result = hk_descriptor_table_grow_locked(dev, table, table->alloc * 2);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
assert(table->next_desc < table->alloc);
|
||||
*index_out = table->next_desc++;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_descriptor_table_add_locked(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
const void *desc_data, size_t desc_size,
|
||||
uint32_t *index_out)
|
||||
{
|
||||
VkResult result = hk_descriptor_table_alloc_locked(dev, table, index_out);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
void *map = (char *)table->map + (*index_out * table->desc_size);
|
||||
|
||||
assert(desc_size == table->desc_size);
|
||||
memcpy(map, desc_data, table->desc_size);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_descriptor_table_add(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
const void *desc_data, size_t desc_size,
|
||||
uint32_t *index_out)
|
||||
{
|
||||
simple_mtx_lock(&table->mutex);
|
||||
VkResult result = hk_descriptor_table_add_locked(dev, table, desc_data,
|
||||
desc_size, index_out);
|
||||
simple_mtx_unlock(&table->mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
hk_descriptor_table_remove(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table, uint32_t index)
|
||||
{
|
||||
simple_mtx_lock(&table->mutex);
|
||||
|
||||
void *map = (char *)table->map + (index * table->desc_size);
|
||||
memset(map, 0, table->desc_size);
|
||||
|
||||
/* Sanity check for double-free */
|
||||
assert(table->free_count < table->alloc);
|
||||
for (uint32_t i = 0; i < table->free_count; i++)
|
||||
assert(table->free_table[i] != index);
|
||||
|
||||
table->free_table[table->free_count++] = index;
|
||||
|
||||
simple_mtx_unlock(&table->mutex);
|
||||
}
|
||||
49
src/asahi/vulkan/hk_descriptor_table.h
Normal file
49
src/asahi/vulkan/hk_descriptor_table.h
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include "util/simple_mtx.h"
|
||||
|
||||
struct hk_device;
|
||||
|
||||
struct hk_descriptor_table {
|
||||
simple_mtx_t mutex;
|
||||
|
||||
uint32_t desc_size; /**< Size of a descriptor */
|
||||
uint32_t alloc; /**< Number of descriptors allocated */
|
||||
uint32_t max_alloc; /**< Maximum possible number of descriptors */
|
||||
uint32_t next_desc; /**< Next unallocated descriptor */
|
||||
uint32_t free_count; /**< Size of free_table */
|
||||
|
||||
struct agx_bo *bo;
|
||||
void *map;
|
||||
|
||||
/* Stack for free descriptor elements */
|
||||
uint32_t *free_table;
|
||||
};
|
||||
|
||||
VkResult hk_descriptor_table_init(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
uint32_t descriptor_size,
|
||||
uint32_t min_descriptor_count,
|
||||
uint32_t max_descriptor_count);
|
||||
|
||||
void hk_descriptor_table_finish(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table);
|
||||
|
||||
VkResult hk_descriptor_table_add(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
const void *desc_data, size_t desc_size,
|
||||
uint32_t *index_out);
|
||||
|
||||
void hk_descriptor_table_remove(struct hk_device *dev,
|
||||
struct hk_descriptor_table *table,
|
||||
uint32_t index);
|
||||
548
src/asahi/vulkan/hk_device.c
Normal file
548
src/asahi/vulkan/hk_device.c
Normal file
|
|
@ -0,0 +1,548 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_device.h"
|
||||
|
||||
#include "agx_bg_eot.h"
|
||||
#include "agx_opcodes.h"
|
||||
#include "agx_scratch.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_descriptor_table.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_instance.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_shader.h"
|
||||
|
||||
#include "asahi/genxml/agx_pack.h"
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include "asahi/lib/agx_device.h"
|
||||
#include "asahi/lib/shaders/geometry.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/os_file.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
#include "vulkan/wsi/wsi_common.h"
|
||||
#include "vk_cmd_enqueue_entrypoints.h"
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_pipeline_cache.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
/*
|
||||
* We preupload some constants so we can cheaply reference later without extra
|
||||
* allocation and copying.
|
||||
*
|
||||
* TODO: This is small, don't waste a whole BO.
|
||||
*/
|
||||
static VkResult
|
||||
hk_upload_rodata(struct hk_device *dev)
|
||||
{
|
||||
dev->rodata.bo =
|
||||
agx_bo_create(&dev->dev, AGX_SAMPLER_LENGTH, 0, "Read only data");
|
||||
|
||||
if (!dev->rodata.bo)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
uint8_t *map = dev->rodata.bo->ptr.cpu;
|
||||
uint32_t offs = 0;
|
||||
|
||||
offs = align(offs, 8);
|
||||
agx_pack(&dev->rodata.txf_sampler, USC_SAMPLER, cfg) {
|
||||
cfg.start = 0;
|
||||
cfg.count = 1;
|
||||
cfg.buffer = dev->rodata.bo->ptr.gpu + offs;
|
||||
}
|
||||
|
||||
agx_pack(map + offs, SAMPLER, cfg) {
|
||||
/* Allow mipmapping. This is respected by txf, weirdly. */
|
||||
cfg.mip_filter = AGX_MIP_FILTER_NEAREST;
|
||||
|
||||
/* Out-of-bounds reads must return 0 */
|
||||
cfg.wrap_s = AGX_WRAP_CLAMP_TO_BORDER;
|
||||
cfg.wrap_t = AGX_WRAP_CLAMP_TO_BORDER;
|
||||
cfg.wrap_r = AGX_WRAP_CLAMP_TO_BORDER;
|
||||
cfg.border_colour = AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
|
||||
}
|
||||
offs += AGX_SAMPLER_LENGTH;
|
||||
|
||||
/* The image heap is allocated on the device prior to the rodata. The heap
|
||||
* lives as long as the device does and has a stable address (requiring
|
||||
* sparse binding to grow dynamically). That means its address is effectively
|
||||
* rodata and can be uploaded now. agx_usc_uniform requires an indirection to
|
||||
* push the heap address, so this takes care of that indirection up front to
|
||||
* cut an alloc/upload at draw time.
|
||||
*/
|
||||
offs = align(offs, sizeof(uint64_t));
|
||||
agx_pack(&dev->rodata.image_heap, USC_UNIFORM, cfg) {
|
||||
cfg.start_halfs = HK_IMAGE_HEAP_UNIFORM;
|
||||
cfg.size_halfs = 4;
|
||||
cfg.buffer = dev->rodata.bo->ptr.gpu + offs;
|
||||
}
|
||||
|
||||
uint64_t *image_heap_ptr = dev->rodata.bo->ptr.cpu + offs;
|
||||
*image_heap_ptr = dev->images.bo->ptr.gpu;
|
||||
offs += sizeof(uint64_t);
|
||||
|
||||
/* The geometry state buffer isn't strictly readonly data, but we only have a
|
||||
* single instance of it device-wide and -- after initializing at heap
|
||||
* allocate time -- it is read-only from the CPU perspective. The GPU uses it
|
||||
* for scratch, but is required to reset it after use to ensure resubmitting
|
||||
* the same command buffer works.
|
||||
*
|
||||
* So, we allocate it here for convenience.
|
||||
*/
|
||||
offs = align(offs, sizeof(uint64_t));
|
||||
dev->rodata.geometry_state = dev->rodata.bo->ptr.gpu + offs;
|
||||
offs += sizeof(struct agx_geometry_state);
|
||||
|
||||
/* For null readonly buffers, we need to allocate 16 bytes of zeroes for
|
||||
* robustness2 semantics on read.
|
||||
*/
|
||||
offs = align(offs, 16);
|
||||
dev->rodata.zero_sink = dev->rodata.bo->ptr.gpu + offs;
|
||||
memset(dev->rodata.bo->ptr.cpu + offs, 0, 16);
|
||||
offs += 16;
|
||||
|
||||
/* For null storage descriptors, we need to reserve 16 bytes to catch writes.
|
||||
* No particular content is required; we cannot get robustness2 semantics
|
||||
* without more work.
|
||||
*/
|
||||
offs = align(offs, 16);
|
||||
dev->rodata.null_sink = dev->rodata.bo->ptr.gpu + offs;
|
||||
offs += 16;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
internal_key_hash(const void *key_)
|
||||
{
|
||||
const struct hk_internal_key *key = key_;
|
||||
|
||||
return _mesa_hash_data(key, sizeof(struct hk_internal_key) + key->key_size);
|
||||
}
|
||||
|
||||
static bool
|
||||
internal_key_equal(const void *a_, const void *b_)
|
||||
{
|
||||
const struct hk_internal_key *a = a_;
|
||||
const struct hk_internal_key *b = b_;
|
||||
|
||||
return a->builder == b->builder && a->key_size == b->key_size &&
|
||||
memcmp(a->key, b->key, a->key_size) == 0;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_init_internal_shaders(struct hk_internal_shaders *s)
|
||||
{
|
||||
s->ht = _mesa_hash_table_create(NULL, internal_key_hash, internal_key_equal);
|
||||
if (!s->ht)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
simple_mtx_init(&s->lock, mtx_plain);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_destroy_internal_shaders(struct hk_device *dev,
|
||||
struct hk_internal_shaders *s, bool part)
|
||||
{
|
||||
hash_table_foreach(s->ht, ent) {
|
||||
if (part) {
|
||||
struct agx_shader_part *part = ent->data;
|
||||
free(part->binary);
|
||||
|
||||
/* The agx_shader_part itself is ralloc'd against the hash table so
|
||||
* will be freed.
|
||||
*/
|
||||
} else {
|
||||
struct hk_api_shader *obj = ent->data;
|
||||
hk_api_shader_destroy(&dev->vk, &obj->vk, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_hash_table_destroy(s->ht, NULL);
|
||||
simple_mtx_destroy(&s->lock);
|
||||
}
|
||||
|
||||
DERIVE_HASH_TABLE(agx_sampler_packed);
|
||||
|
||||
static VkResult
|
||||
hk_init_sampler_heap(struct hk_device *dev, struct hk_sampler_heap *h)
|
||||
{
|
||||
h->ht = agx_sampler_packed_table_create(NULL);
|
||||
if (!h->ht)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
VkResult result =
|
||||
hk_descriptor_table_init(dev, &h->table, AGX_SAMPLER_LENGTH, 1024, 1024);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
ralloc_free(h->ht);
|
||||
return result;
|
||||
}
|
||||
|
||||
simple_mtx_init(&h->lock, mtx_plain);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_destroy_sampler_heap(struct hk_device *dev, struct hk_sampler_heap *h)
|
||||
{
|
||||
hk_descriptor_table_finish(dev, &h->table);
|
||||
ralloc_free(h->ht);
|
||||
simple_mtx_destroy(&h->lock);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_sampler_heap_add_locked(struct hk_device *dev, struct hk_sampler_heap *h,
|
||||
struct agx_sampler_packed desc,
|
||||
struct hk_rc_sampler **out)
|
||||
{
|
||||
struct hash_entry *ent = _mesa_hash_table_search(h->ht, &desc);
|
||||
if (ent != NULL) {
|
||||
*out = ent->data;
|
||||
|
||||
assert((*out)->refcount != 0);
|
||||
(*out)->refcount++;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct hk_rc_sampler *rc = ralloc(h->ht, struct hk_rc_sampler);
|
||||
if (!rc)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
uint32_t index;
|
||||
VkResult result =
|
||||
hk_descriptor_table_add(dev, &h->table, &desc, sizeof(desc), &index);
|
||||
if (result != VK_SUCCESS) {
|
||||
ralloc_free(rc);
|
||||
return result;
|
||||
}
|
||||
|
||||
*rc = (struct hk_rc_sampler){
|
||||
.key = desc,
|
||||
.refcount = 1,
|
||||
.index = index,
|
||||
};
|
||||
|
||||
_mesa_hash_table_insert(h->ht, &rc->key, rc);
|
||||
*out = rc;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_sampler_heap_add(struct hk_device *dev, struct agx_sampler_packed desc,
|
||||
struct hk_rc_sampler **out)
|
||||
{
|
||||
struct hk_sampler_heap *h = &dev->samplers;
|
||||
|
||||
simple_mtx_lock(&h->lock);
|
||||
VkResult result = hk_sampler_heap_add_locked(dev, h, desc, out);
|
||||
simple_mtx_unlock(&h->lock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_sampler_heap_remove_locked(struct hk_device *dev, struct hk_sampler_heap *h,
|
||||
struct hk_rc_sampler *rc)
|
||||
{
|
||||
assert(rc->refcount != 0);
|
||||
rc->refcount--;
|
||||
|
||||
if (rc->refcount == 0) {
|
||||
hk_descriptor_table_remove(dev, &h->table, rc->index);
|
||||
_mesa_hash_table_remove_key(h->ht, &rc->key);
|
||||
ralloc_free(rc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hk_sampler_heap_remove(struct hk_device *dev, struct hk_rc_sampler *rc)
|
||||
{
|
||||
struct hk_sampler_heap *h = &dev->samplers;
|
||||
|
||||
simple_mtx_lock(&h->lock);
|
||||
hk_sampler_heap_remove_locked(dev, h, rc);
|
||||
simple_mtx_unlock(&h->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* To implement nullDescriptor, the descriptor set code will reference
|
||||
* preuploaded null descriptors at fixed offsets in the image heap. Here we
|
||||
* upload those descriptors, initializing the image heap.
|
||||
*/
|
||||
static void
|
||||
hk_upload_null_descriptors(struct hk_device *dev)
|
||||
{
|
||||
struct agx_texture_packed null_tex;
|
||||
struct agx_pbe_packed null_pbe;
|
||||
uint32_t offset_tex, offset_pbe;
|
||||
|
||||
agx_set_null_texture(&null_tex, dev->rodata.null_sink);
|
||||
agx_set_null_pbe(&null_pbe, dev->rodata.null_sink);
|
||||
|
||||
hk_descriptor_table_add(dev, &dev->images, &null_tex, sizeof(null_tex),
|
||||
&offset_tex);
|
||||
|
||||
hk_descriptor_table_add(dev, &dev->images, &null_pbe, sizeof(null_pbe),
|
||||
&offset_pbe);
|
||||
|
||||
assert((offset_tex * HK_IMAGE_STRIDE) == HK_NULL_TEX_OFFSET && "static");
|
||||
assert((offset_pbe * HK_IMAGE_STRIDE) == HK_NULL_PBE_OFFSET && "static");
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
|
||||
VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
struct hk_device *dev;
|
||||
|
||||
dev = vk_zalloc2(&pdev->vk.instance->alloc, pAllocator, sizeof(*dev), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!dev)
|
||||
return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
struct vk_device_dispatch_table dispatch_table;
|
||||
|
||||
/* For secondary command buffer support, overwrite any command entrypoints
|
||||
* in the main device-level dispatch table with
|
||||
* vk_cmd_enqueue_unless_primary_Cmd*.
|
||||
*/
|
||||
vk_device_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
|
||||
|
||||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||||
&hk_device_entrypoints, false);
|
||||
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
|
||||
&wsi_device_entrypoints, false);
|
||||
|
||||
/* Populate primary cmd_dispatch table */
|
||||
vk_device_dispatch_table_from_entrypoints(&dev->cmd_dispatch,
|
||||
&hk_device_entrypoints, true);
|
||||
vk_device_dispatch_table_from_entrypoints(&dev->cmd_dispatch,
|
||||
&wsi_device_entrypoints, false);
|
||||
vk_device_dispatch_table_from_entrypoints(
|
||||
&dev->cmd_dispatch, &vk_common_device_entrypoints, false);
|
||||
|
||||
result = vk_device_init(&dev->vk, &pdev->vk, &dispatch_table, pCreateInfo,
|
||||
pAllocator);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_alloc;
|
||||
|
||||
dev->vk.shader_ops = &hk_device_shader_ops;
|
||||
dev->vk.command_dispatch_table = &dev->cmd_dispatch;
|
||||
|
||||
drmDevicePtr drm_device = NULL;
|
||||
int ret = drmGetDeviceFromDevId(pdev->render_dev, 0, &drm_device);
|
||||
if (ret != 0) {
|
||||
result = vk_errorf(dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to get DRM device: %m");
|
||||
goto fail_init;
|
||||
}
|
||||
|
||||
const char *path = drm_device->nodes[DRM_NODE_RENDER];
|
||||
dev->dev.fd = open(path, O_RDWR | O_CLOEXEC);
|
||||
if (dev->dev.fd < 0) {
|
||||
drmFreeDevice(&drm_device);
|
||||
result = vk_errorf(dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"failed to open device %s", path);
|
||||
goto fail_init;
|
||||
}
|
||||
|
||||
bool succ = agx_open_device(NULL, &dev->dev);
|
||||
drmFreeDevice(&drm_device);
|
||||
if (!succ) {
|
||||
result = vk_errorf(dev, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to get DRM device: %m");
|
||||
goto fail_fd;
|
||||
}
|
||||
|
||||
vk_device_set_drm_fd(&dev->vk, dev->dev.fd);
|
||||
dev->vk.command_buffer_ops = &hk_cmd_buffer_ops;
|
||||
|
||||
result = hk_descriptor_table_init(dev, &dev->images, AGX_TEXTURE_LENGTH,
|
||||
1024, 1024 * 1024);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_dev;
|
||||
|
||||
result = hk_init_sampler_heap(dev, &dev->samplers);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_images;
|
||||
|
||||
result = hk_descriptor_table_init(
|
||||
dev, &dev->occlusion_queries, sizeof(uint64_t), AGX_MAX_OCCLUSION_QUERIES,
|
||||
AGX_MAX_OCCLUSION_QUERIES);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_samplers;
|
||||
|
||||
result = hk_upload_rodata(dev);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_queries;
|
||||
|
||||
/* Depends on rodata */
|
||||
hk_upload_null_descriptors(dev);
|
||||
|
||||
/* XXX: error handling, and should this even go on the device? */
|
||||
agx_bg_eot_init(&dev->bg_eot, &dev->dev);
|
||||
if (!dev->bg_eot.ht) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto fail_rodata;
|
||||
}
|
||||
|
||||
result = hk_init_internal_shaders(&dev->prolog_epilog);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_bg_eot;
|
||||
|
||||
result = hk_init_internal_shaders(&dev->kernels);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_internal_shaders;
|
||||
|
||||
result =
|
||||
hk_queue_init(dev, &dev->queue, &pCreateInfo->pQueueCreateInfos[0], 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_internal_shaders_2;
|
||||
|
||||
struct vk_pipeline_cache_create_info cache_info = {
|
||||
.weak_ref = true,
|
||||
};
|
||||
dev->mem_cache = vk_pipeline_cache_create(&dev->vk, &cache_info, NULL);
|
||||
if (dev->mem_cache == NULL) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto fail_queue;
|
||||
}
|
||||
|
||||
result = hk_device_init_meta(dev);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_mem_cache;
|
||||
|
||||
*pDevice = hk_device_to_handle(dev);
|
||||
|
||||
agx_scratch_init(&dev->dev, &dev->scratch.vs);
|
||||
agx_scratch_init(&dev->dev, &dev->scratch.fs);
|
||||
agx_scratch_init(&dev->dev, &dev->scratch.cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_mem_cache:
|
||||
vk_pipeline_cache_destroy(dev->mem_cache, NULL);
|
||||
fail_queue:
|
||||
hk_queue_finish(dev, &dev->queue);
|
||||
fail_rodata:
|
||||
agx_bo_unreference(dev->rodata.bo);
|
||||
fail_bg_eot:
|
||||
agx_bg_eot_cleanup(&dev->bg_eot);
|
||||
fail_internal_shaders_2:
|
||||
hk_destroy_internal_shaders(dev, &dev->kernels, false);
|
||||
fail_internal_shaders:
|
||||
hk_destroy_internal_shaders(dev, &dev->prolog_epilog, true);
|
||||
fail_queries:
|
||||
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
||||
fail_samplers:
|
||||
hk_destroy_sampler_heap(dev, &dev->samplers);
|
||||
fail_images:
|
||||
hk_descriptor_table_finish(dev, &dev->images);
|
||||
fail_dev:
|
||||
agx_close_device(&dev->dev);
|
||||
fail_fd:
|
||||
close(dev->dev.fd);
|
||||
fail_init:
|
||||
vk_device_finish(&dev->vk);
|
||||
fail_alloc:
|
||||
vk_free(&dev->vk.alloc, dev);
|
||||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, _device);
|
||||
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
hk_device_finish_meta(dev);
|
||||
hk_destroy_internal_shaders(dev, &dev->kernels, false);
|
||||
hk_destroy_internal_shaders(dev, &dev->prolog_epilog, true);
|
||||
|
||||
vk_pipeline_cache_destroy(dev->mem_cache, NULL);
|
||||
hk_queue_finish(dev, &dev->queue);
|
||||
vk_device_finish(&dev->vk);
|
||||
|
||||
agx_scratch_fini(&dev->scratch.vs);
|
||||
agx_scratch_fini(&dev->scratch.fs);
|
||||
agx_scratch_fini(&dev->scratch.cs);
|
||||
|
||||
hk_destroy_sampler_heap(dev, &dev->samplers);
|
||||
hk_descriptor_table_finish(dev, &dev->images);
|
||||
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
||||
agx_bo_unreference(dev->rodata.bo);
|
||||
agx_bo_unreference(dev->heap);
|
||||
agx_bg_eot_cleanup(&dev->bg_eot);
|
||||
agx_close_device(&dev->dev);
|
||||
vk_free(&dev->vk.alloc, dev);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_GetCalibratedTimestampsKHR(
|
||||
VkDevice _device, uint32_t timestampCount,
|
||||
const VkCalibratedTimestampInfoKHR *pTimestampInfos, uint64_t *pTimestamps,
|
||||
uint64_t *pMaxDeviation)
|
||||
{
|
||||
// VK_FROM_HANDLE(hk_device, dev, _device);
|
||||
// struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
uint64_t max_clock_period = 0;
|
||||
uint64_t begin, end;
|
||||
int d;
|
||||
|
||||
#ifdef CLOCK_MONOTONIC_RAW
|
||||
begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||
#else
|
||||
begin = vk_clock_gettime(CLOCK_MONOTONIC);
|
||||
#endif
|
||||
|
||||
for (d = 0; d < timestampCount; d++) {
|
||||
switch (pTimestampInfos[d].timeDomain) {
|
||||
case VK_TIME_DOMAIN_DEVICE_KHR:
|
||||
unreachable("todo");
|
||||
// pTimestamps[d] = agx_get_gpu_timestamp(&pdev->dev);
|
||||
max_clock_period = MAX2(
|
||||
max_clock_period, 1); /* FIXME: Is timestamp period actually 1? */
|
||||
break;
|
||||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
|
||||
pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
|
||||
max_clock_period = MAX2(max_clock_period, 1);
|
||||
break;
|
||||
|
||||
#ifdef CLOCK_MONOTONIC_RAW
|
||||
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
|
||||
pTimestamps[d] = begin;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
pTimestamps[d] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CLOCK_MONOTONIC_RAW
|
||||
end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||
#else
|
||||
end = vk_clock_gettime(CLOCK_MONOTONIC);
|
||||
#endif
|
||||
|
||||
*pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
123
src/asahi/vulkan/hk_device.h
Normal file
123
src/asahi/vulkan/hk_device.h
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "asahi/lib/agx_device.h"
|
||||
#include "agx_bg_eot.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_scratch.h"
|
||||
#include "decode.h"
|
||||
#include "vk_cmd_queue.h"
|
||||
#include "vk_dispatch_table.h"
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "hk_descriptor_table.h"
|
||||
#include "hk_queue.h"
|
||||
#include "vk_device.h"
|
||||
#include "vk_meta.h"
|
||||
#include "vk_queue.h"
|
||||
|
||||
struct hk_physical_device;
|
||||
struct vk_pipeline_cache;
|
||||
|
||||
/* Fixed offsets for reserved null image descriptors */
|
||||
#define HK_NULL_TEX_OFFSET (0)
|
||||
#define HK_NULL_PBE_OFFSET (24)
|
||||
|
||||
typedef void (*hk_internal_builder_t)(struct nir_builder *b, const void *key);
|
||||
|
||||
struct hk_internal_key {
|
||||
hk_internal_builder_t builder;
|
||||
size_t key_size;
|
||||
uint8_t key[];
|
||||
};
|
||||
|
||||
struct hk_internal_shaders {
|
||||
simple_mtx_t lock;
|
||||
struct hash_table *ht;
|
||||
};
|
||||
|
||||
struct hk_rc_sampler {
|
||||
struct agx_sampler_packed key;
|
||||
|
||||
/* Reference count for this hardware sampler, protected by the heap mutex */
|
||||
uint16_t refcount;
|
||||
|
||||
/* Index of this hardware sampler in the hardware sampler heap */
|
||||
uint16_t index;
|
||||
};
|
||||
|
||||
struct hk_sampler_heap {
|
||||
simple_mtx_t lock;
|
||||
|
||||
struct hk_descriptor_table table;
|
||||
|
||||
/* Map of agx_sampler_packed to hk_rc_sampler */
|
||||
struct hash_table *ht;
|
||||
};
|
||||
|
||||
struct hk_device {
|
||||
struct vk_device vk;
|
||||
struct agx_device dev;
|
||||
struct agxdecode_ctx *decode_ctx;
|
||||
|
||||
struct hk_descriptor_table images;
|
||||
struct hk_descriptor_table occlusion_queries;
|
||||
struct hk_sampler_heap samplers;
|
||||
|
||||
struct hk_queue queue;
|
||||
|
||||
struct vk_pipeline_cache *mem_cache;
|
||||
|
||||
struct vk_meta_device meta;
|
||||
struct agx_bg_eot_cache bg_eot;
|
||||
|
||||
struct {
|
||||
struct agx_bo *bo;
|
||||
struct agx_usc_sampler_packed txf_sampler;
|
||||
struct agx_usc_uniform_packed image_heap;
|
||||
uint64_t null_sink, zero_sink;
|
||||
uint64_t geometry_state;
|
||||
} rodata;
|
||||
|
||||
struct hk_internal_shaders prolog_epilog;
|
||||
struct hk_internal_shaders kernels;
|
||||
struct hk_api_shader *write_shader;
|
||||
|
||||
/* Indirected for common secondary emulation */
|
||||
struct vk_device_dispatch_table cmd_dispatch;
|
||||
|
||||
/* Heap used for GPU-side memory allocation for geometry/tessellation.
|
||||
*
|
||||
* Control streams accessing the heap must be serialized. This is not
|
||||
* expected to be a legitimate problem. If it is, we can rework later.
|
||||
*/
|
||||
struct agx_bo *heap;
|
||||
|
||||
struct {
|
||||
struct agx_scratch vs, fs, cs;
|
||||
} scratch;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(hk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
|
||||
|
||||
static inline struct hk_physical_device *
|
||||
hk_device_physical(struct hk_device *dev)
|
||||
{
|
||||
return (struct hk_physical_device *)dev->vk.physical;
|
||||
}
|
||||
|
||||
VkResult hk_device_init_meta(struct hk_device *dev);
|
||||
void hk_device_finish_meta(struct hk_device *dev);
|
||||
|
||||
VkResult hk_sampler_heap_add(struct hk_device *dev,
|
||||
struct agx_sampler_packed desc,
|
||||
struct hk_rc_sampler **out);
|
||||
|
||||
void hk_sampler_heap_remove(struct hk_device *dev, struct hk_rc_sampler *rc);
|
||||
330
src/asahi/vulkan/hk_device_memory.c
Normal file
330
src/asahi/vulkan/hk_device_memory.c
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_device_memory.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include "util/u_atomic.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* Supports opaque fd only */
|
||||
const VkExternalMemoryProperties hk_opaque_fd_mem_props = {
|
||||
.externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
|
||||
VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
|
||||
.exportFromImportedHandleTypes =
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
};
|
||||
|
||||
/* Supports opaque fd and dma_buf. */
|
||||
const VkExternalMemoryProperties hk_dma_buf_mem_props = {
|
||||
.externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
|
||||
VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
|
||||
.exportFromImportedHandleTypes =
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
|
||||
static enum agx_bo_flags
|
||||
hk_memory_type_flags(const VkMemoryType *type,
|
||||
VkExternalMemoryHandleTypeFlagBits handle_types)
|
||||
{
|
||||
unsigned flags = 0;
|
||||
|
||||
if (handle_types)
|
||||
flags |= AGX_BO_SHARED | AGX_BO_SHAREABLE;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_GetMemoryFdPropertiesKHR(VkDevice device,
|
||||
VkExternalMemoryHandleTypeFlagBits handleType,
|
||||
int fd,
|
||||
VkMemoryFdPropertiesKHR *pMemoryFdProperties)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
struct agx_bo *bo;
|
||||
|
||||
switch (handleType) {
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
|
||||
bo = agx_bo_import(&dev->dev, fd);
|
||||
if (bo == NULL)
|
||||
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||||
break;
|
||||
default:
|
||||
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||||
}
|
||||
|
||||
uint32_t type_bits = 0;
|
||||
for (unsigned t = 0; t < ARRAY_SIZE(pdev->mem_types); t++) {
|
||||
const unsigned flags =
|
||||
hk_memory_type_flags(&pdev->mem_types[t], handleType);
|
||||
if (!(flags & ~bo->flags))
|
||||
type_bits |= (1 << t);
|
||||
}
|
||||
|
||||
pMemoryFdProperties->memoryTypeBits = type_bits;
|
||||
|
||||
agx_bo_unreference(bo);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_AllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
struct hk_device_memory *mem;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
const VkImportMemoryFdInfoKHR *fd_info =
|
||||
vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
|
||||
const VkExportMemoryAllocateInfo *export_info =
|
||||
vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
|
||||
const VkMemoryType *type = &pdev->mem_types[pAllocateInfo->memoryTypeIndex];
|
||||
|
||||
VkExternalMemoryHandleTypeFlagBits handle_types = 0;
|
||||
if (export_info != NULL)
|
||||
handle_types |= export_info->handleTypes;
|
||||
if (fd_info != NULL)
|
||||
handle_types |= fd_info->handleType;
|
||||
|
||||
const unsigned flags = hk_memory_type_flags(type, handle_types);
|
||||
|
||||
uint32_t alignment = 16384; /* Apple page size */
|
||||
|
||||
struct hk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex];
|
||||
if (p_atomic_read(&heap->used) > heap->size)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
const uint64_t aligned_size =
|
||||
align64(pAllocateInfo->allocationSize, alignment);
|
||||
|
||||
mem = vk_device_memory_create(&dev->vk, pAllocateInfo, pAllocator,
|
||||
sizeof(*mem));
|
||||
if (!mem)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
mem->map = NULL;
|
||||
if (fd_info && fd_info->handleType) {
|
||||
assert(
|
||||
fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
|
||||
fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
|
||||
|
||||
mem->bo = agx_bo_import(&dev->dev, fd_info->fd);
|
||||
if (mem->bo == NULL) {
|
||||
result = vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||||
goto fail_alloc;
|
||||
}
|
||||
assert(!(flags & ~mem->bo->flags));
|
||||
} else {
|
||||
enum agx_bo_flags flags = 0;
|
||||
if (handle_types)
|
||||
flags |= AGX_BO_SHAREABLE;
|
||||
|
||||
mem->bo = agx_bo_create(&dev->dev, aligned_size, flags, "App memory");
|
||||
if (!mem->bo) {
|
||||
result = vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc;
|
||||
}
|
||||
}
|
||||
|
||||
if (fd_info && fd_info->handleType) {
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "Importing memory from a file descriptor transfers ownership of
|
||||
* the file descriptor from the application to the Vulkan
|
||||
* implementation. The application must not perform any operations on
|
||||
* the file descriptor after a successful import."
|
||||
*
|
||||
* If the import fails, we leave the file descriptor open.
|
||||
*/
|
||||
close(fd_info->fd);
|
||||
}
|
||||
|
||||
uint64_t heap_used = p_atomic_add_return(&heap->used, mem->bo->size);
|
||||
if (heap_used > heap->size) {
|
||||
hk_FreeMemory(device, hk_device_memory_to_handle(mem), pAllocator);
|
||||
return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Out of heap memory");
|
||||
}
|
||||
|
||||
*pMem = hk_device_memory_to_handle(mem);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_alloc:
|
||||
vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_FreeMemory(VkDevice device, VkDeviceMemory _mem,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, _mem);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
|
||||
if (!mem)
|
||||
return;
|
||||
|
||||
const VkMemoryType *type = &pdev->mem_types[mem->vk.memory_type_index];
|
||||
struct hk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex];
|
||||
p_atomic_add(&heap->used, -((int64_t)mem->bo->size));
|
||||
|
||||
agx_bo_unreference(mem->bo);
|
||||
|
||||
vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_MapMemory2KHR(VkDevice device, const VkMemoryMapInfoKHR *pMemoryMapInfo,
|
||||
void **ppData)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, pMemoryMapInfo->memory);
|
||||
|
||||
if (mem == NULL) {
|
||||
*ppData = NULL;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const VkDeviceSize offset = pMemoryMapInfo->offset;
|
||||
const VkDeviceSize size = vk_device_memory_range(
|
||||
&mem->vk, pMemoryMapInfo->offset, pMemoryMapInfo->size);
|
||||
|
||||
UNUSED void *fixed_addr = NULL;
|
||||
if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
|
||||
const VkMemoryMapPlacedInfoEXT *placed_info = vk_find_struct_const(
|
||||
pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
|
||||
fixed_addr = placed_info->pPlacedAddress;
|
||||
}
|
||||
|
||||
/* From the Vulkan spec version 1.0.32 docs for MapMemory:
|
||||
*
|
||||
* * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
|
||||
* assert(size != 0);
|
||||
* * If size is not equal to VK_WHOLE_SIZE, size must be less than or
|
||||
* equal to the size of the memory minus offset
|
||||
*/
|
||||
assert(size > 0);
|
||||
assert(offset + size <= mem->bo->size);
|
||||
|
||||
if (size != (size_t)size) {
|
||||
return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED,
|
||||
"requested size 0x%" PRIx64 " does not fit in %u bits",
|
||||
size, (unsigned)(sizeof(size_t) * 8));
|
||||
}
|
||||
|
||||
/* From the Vulkan 1.2.194 spec:
|
||||
*
|
||||
* "memory must not be currently host mapped"
|
||||
*/
|
||||
if (mem->map != NULL) {
|
||||
return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED,
|
||||
"Memory object already mapped.");
|
||||
}
|
||||
|
||||
mem->map = mem->bo->ptr.cpu;
|
||||
*ppData = mem->map + offset;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_UnmapMemory2KHR(VkDevice device,
|
||||
const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, pMemoryUnmapInfo->memory);
|
||||
|
||||
if (mem == NULL)
|
||||
return VK_SUCCESS;
|
||||
|
||||
if (pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT) {
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
|
||||
int err = agx_bo_overmap(mem->bo, mem->map);
|
||||
if (err) {
|
||||
return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED,
|
||||
"Failed to map over original mapping");
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
/* TODO */
|
||||
//// agx_bo_unmap(mem->bo, mem->map);
|
||||
}
|
||||
|
||||
mem->map = NULL;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory _mem,
|
||||
VkDeviceSize *pCommittedMemoryInBytes)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, _mem);
|
||||
|
||||
*pCommittedMemoryInBytes = mem->bo->size;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_GetMemoryFdKHR(VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo,
|
||||
int *pFD)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_device_memory, memory, pGetFdInfo->memory);
|
||||
|
||||
switch (pGetFdInfo->handleType) {
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
|
||||
*pFD = agx_bo_export(memory->bo);
|
||||
return VK_SUCCESS;
|
||||
default:
|
||||
assert(!"unsupported handle type");
|
||||
return vk_error(dev, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR uint64_t VKAPI_CALL
|
||||
hk_GetDeviceMemoryOpaqueCaptureAddress(
|
||||
UNUSED VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device_memory, mem, pInfo->memory);
|
||||
|
||||
return mem->bo->ptr.gpu;
|
||||
}
|
||||
31
src/asahi/vulkan/hk_device_memory.h
Normal file
31
src/asahi/vulkan/hk_device_memory.h
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_device_memory.h"
|
||||
|
||||
#include "util/list.h"
|
||||
|
||||
struct hk_device;
|
||||
struct hk_image_plane;
|
||||
|
||||
struct hk_device_memory {
|
||||
struct vk_device_memory vk;
|
||||
|
||||
struct agx_bo *bo;
|
||||
|
||||
void *map;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_device_memory, vk.base, VkDeviceMemory,
|
||||
VK_OBJECT_TYPE_DEVICE_MEMORY)
|
||||
|
||||
extern const VkExternalMemoryProperties hk_opaque_fd_mem_props;
|
||||
extern const VkExternalMemoryProperties hk_dma_buf_mem_props;
|
||||
113
src/asahi/vulkan/hk_event.c
Normal file
113
src/asahi/vulkan/hk_event.c
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_event.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "agx_bo.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
|
||||
#define HK_EVENT_MEM_SIZE sizeof(VkResult)
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_event *event;
|
||||
|
||||
event = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*event),
|
||||
VK_OBJECT_TYPE_EVENT);
|
||||
if (!event)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* TODO: this is really wasteful, bring back the NVK heap!
|
||||
*
|
||||
* XXX
|
||||
*/
|
||||
event->bo =
|
||||
agx_bo_create(&dev->dev, HK_EVENT_MEM_SIZE, AGX_BO_WRITEBACK, "Event");
|
||||
event->status = event->bo->ptr.cpu;
|
||||
event->addr = event->bo->ptr.gpu;
|
||||
|
||||
*event->status = VK_EVENT_RESET;
|
||||
|
||||
*pEvent = hk_event_to_handle(event);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyEvent(VkDevice device, VkEvent _event,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
agx_bo_unreference(event->bo);
|
||||
vk_object_free(&dev->vk, pAllocator, event);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_GetEventStatus(VkDevice device, VkEvent _event)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
return *event->status;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_SetEvent(VkDevice device, VkEvent _event)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
*event->status = VK_EVENT_SET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_ResetEvent(VkDevice device, VkEvent _event)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
*event->status = VK_EVENT_RESET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
|
||||
const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
hk_queue_write(cmd, event->bo->ptr.gpu, VK_EVENT_SET, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
|
||||
VkPipelineStageFlags2 stageMask)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_event, event, _event);
|
||||
|
||||
hk_queue_write(cmd, event->bo->ptr.gpu, VK_EVENT_RESET, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount,
|
||||
const VkEvent *pEvents,
|
||||
const VkDependencyInfo *pDependencyInfos)
|
||||
{
|
||||
/* Currently we barrier everything, so this is a no-op. */
|
||||
}
|
||||
22
src/asahi/vulkan/hk_event.h
Normal file
22
src/asahi/vulkan/hk_event.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_object.h"
|
||||
|
||||
struct hk_event {
|
||||
struct vk_object_base base;
|
||||
struct agx_bo *bo;
|
||||
|
||||
uint64_t addr;
|
||||
VkResult *status;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
|
||||
140
src/asahi/vulkan/hk_format.c
Normal file
140
src/asahi/vulkan/hk_format.c
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
#include "hk_buffer_view.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "vk_enum_defines.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
uint64_t agx_best_modifiers[] = {
|
||||
// DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
|
||||
DRM_FORMAT_MOD_APPLE_TWIDDLED,
|
||||
DRM_FORMAT_MOD_LINEAR,
|
||||
};
|
||||
|
||||
static VkFormatFeatureFlags2
|
||||
hk_modifier_features(uint64_t mod, VkFormat vk_format,
|
||||
const VkFormatProperties *props)
|
||||
{
|
||||
if (mod == DRM_FORMAT_MOD_LINEAR)
|
||||
return props->linearTilingFeatures;
|
||||
|
||||
if (mod == DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED /* TODO */)
|
||||
return 0;
|
||||
|
||||
return props->optimalTilingFeatures;
|
||||
}
|
||||
|
||||
static void
|
||||
get_drm_format_modifier_properties_list(
|
||||
const struct hk_physical_device *physical_device, VkFormat vk_format,
|
||||
VkDrmFormatModifierPropertiesListEXT *list, const VkFormatProperties *props)
|
||||
{
|
||||
VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
|
||||
list->pDrmFormatModifierProperties,
|
||||
&list->drmFormatModifierCount);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
|
||||
uint64_t mod = agx_best_modifiers[i];
|
||||
VkFormatFeatureFlags2 flags = hk_modifier_features(mod, vk_format, props);
|
||||
|
||||
if (!flags)
|
||||
continue;
|
||||
|
||||
vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out,
|
||||
out_props)
|
||||
{
|
||||
*out_props = (VkDrmFormatModifierPropertiesEXT){
|
||||
.drmFormatModifier = mod,
|
||||
.drmFormatModifierPlaneCount = 1 /* no planar mods */,
|
||||
.drmFormatModifierTilingFeatures = flags,
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_drm_format_modifier_properties_list_2(
|
||||
const struct hk_physical_device *physical_device, VkFormat vk_format,
|
||||
VkDrmFormatModifierPropertiesList2EXT *list, const VkFormatProperties *props)
|
||||
{
|
||||
VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierProperties2EXT, out,
|
||||
list->pDrmFormatModifierProperties,
|
||||
&list->drmFormatModifierCount);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
|
||||
uint64_t mod = agx_best_modifiers[i];
|
||||
VkFormatFeatureFlags2 flags = hk_modifier_features(mod, vk_format, props);
|
||||
|
||||
if (!flags)
|
||||
continue;
|
||||
|
||||
vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out,
|
||||
out_props)
|
||||
{
|
||||
*out_props = (VkDrmFormatModifierProperties2EXT){
|
||||
.drmFormatModifier = mod,
|
||||
.drmFormatModifierPlaneCount = 1, /* no planar mods */
|
||||
.drmFormatModifierTilingFeatures = flags,
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
|
||||
VkFormat format,
|
||||
VkFormatProperties2 *pFormatProperties)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_physical_device, pdevice, physicalDevice);
|
||||
|
||||
VkFormatFeatureFlags2 linear2, optimal2, buffer2;
|
||||
linear2 =
|
||||
hk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_LINEAR);
|
||||
optimal2 =
|
||||
hk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_OPTIMAL);
|
||||
buffer2 = hk_get_buffer_format_features(pdevice, format);
|
||||
|
||||
pFormatProperties->formatProperties = (VkFormatProperties){
|
||||
.linearTilingFeatures = vk_format_features2_to_features(linear2),
|
||||
.optimalTilingFeatures = vk_format_features2_to_features(optimal2),
|
||||
.bufferFeatures = vk_format_features2_to_features(buffer2),
|
||||
};
|
||||
|
||||
vk_foreach_struct(ext, pFormatProperties->pNext) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3: {
|
||||
VkFormatProperties3 *p = (void *)ext;
|
||||
p->linearTilingFeatures = linear2;
|
||||
p->optimalTilingFeatures = optimal2;
|
||||
p->bufferFeatures = buffer2;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT:
|
||||
get_drm_format_modifier_properties_list(
|
||||
pdevice, format, (void *)ext, &pFormatProperties->formatProperties);
|
||||
break;
|
||||
|
||||
case VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_2_EXT:
|
||||
get_drm_format_modifier_properties_list_2(
|
||||
pdevice, format, (void *)ext, &pFormatProperties->formatProperties);
|
||||
break;
|
||||
|
||||
default:
|
||||
vk_debug_ignored_stype(ext->sType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
1536
src/asahi/vulkan/hk_image.c
Normal file
1536
src/asahi/vulkan/hk_image.c
Normal file
File diff suppressed because it is too large
Load diff
115
src/asahi/vulkan/hk_image.h
Normal file
115
src/asahi/vulkan/hk_image.h
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "asahi/layout/layout.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_image.h"
|
||||
|
||||
/* Because small images can end up with an array_stride_B that is less than
|
||||
* the sparse block size (in bytes), we have to set SINGLE_MIPTAIL_BIT when
|
||||
* advertising sparse properties to the client. This means that we get one
|
||||
* single memory range for the miptail of the image. For large images with
|
||||
* mipTailStartLod > 0, we have to deal with the array stride ourselves.
|
||||
*
|
||||
* We do this by returning HK_MIP_TAIL_START_OFFSET as the image's
|
||||
* imageMipTailOffset. We can then detect anything with that address as
|
||||
* being part of the miptail and re-map it accordingly. The Vulkan spec
|
||||
* explicitly allows for this.
|
||||
*
|
||||
* From the Vulkan 1.3.279 spec:
|
||||
*
|
||||
* "When VK_SPARSE_MEMORY_BIND_METADATA_BIT is present, the resourceOffset
|
||||
* must have been derived explicitly from the imageMipTailOffset in the
|
||||
* sparse resource properties returned for the metadata aspect. By
|
||||
* manipulating the value returned for imageMipTailOffset, the
|
||||
* resourceOffset does not have to correlate directly to a device virtual
|
||||
* address offset, and may instead be whatever value makes it easiest for
|
||||
* the implementation to derive the correct device virtual address."
|
||||
*/
|
||||
#define HK_MIP_TAIL_START_OFFSET 0x6d74000000000000UL
|
||||
|
||||
struct hk_device_memory;
|
||||
struct hk_physical_device;
|
||||
|
||||
static VkFormatFeatureFlags2
|
||||
hk_get_image_plane_format_features(struct hk_physical_device *pdev,
|
||||
VkFormat vk_format, VkImageTiling tiling);
|
||||
|
||||
VkFormatFeatureFlags2
|
||||
hk_get_image_format_features(struct hk_physical_device *pdevice,
|
||||
VkFormat format, VkImageTiling tiling);
|
||||
|
||||
struct hk_image_plane {
|
||||
struct ail_layout layout;
|
||||
uint64_t addr;
|
||||
|
||||
/** Size of the reserved VMA range for sparse images, zero otherwise. */
|
||||
uint64_t vma_size_B;
|
||||
|
||||
/* For host image copy */
|
||||
void *map;
|
||||
uint32_t rem;
|
||||
};
|
||||
|
||||
struct hk_image {
|
||||
struct vk_image vk;
|
||||
|
||||
/** True if the planes are bound separately
|
||||
*
|
||||
* This is set based on VK_IMAGE_CREATE_DISJOINT_BIT
|
||||
*/
|
||||
bool disjoint;
|
||||
|
||||
uint8_t plane_count;
|
||||
struct hk_image_plane planes[3];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
|
||||
|
||||
static inline uint64_t
|
||||
hk_image_plane_base_address(const struct hk_image_plane *plane)
|
||||
{
|
||||
return plane->addr;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
hk_image_base_address(const struct hk_image *image, uint8_t plane)
|
||||
{
|
||||
return hk_image_plane_base_address(&image->planes[plane]);
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
hk_image_aspects_to_plane(const struct hk_image *image,
|
||||
VkImageAspectFlags aspectMask)
|
||||
{
|
||||
/* Must only be one aspect unless it's depth/stencil */
|
||||
assert(aspectMask ==
|
||||
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ||
|
||||
util_bitcount(aspectMask) == 1);
|
||||
|
||||
switch (aspectMask) {
|
||||
default:
|
||||
assert(aspectMask != VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT);
|
||||
return 0;
|
||||
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
return image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
|
||||
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
||||
case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
|
||||
return 1;
|
||||
|
||||
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
||||
case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
653
src/asahi/vulkan/hk_image_view.c
Normal file
653
src/asahi/vulkan/hk_image_view.c
Normal file
|
|
@ -0,0 +1,653 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_image_view.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "agx_helpers.h"
|
||||
#include "agx_nir_passes.h"
|
||||
#include "agx_pack.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_image.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "layout.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
enum hk_desc_usage {
|
||||
HK_DESC_USAGE_SAMPLED,
|
||||
HK_DESC_USAGE_STORAGE,
|
||||
HK_DESC_USAGE_INPUT,
|
||||
HK_DESC_USAGE_BG_EOT,
|
||||
HK_DESC_USAGE_LAYERED_BG_EOT,
|
||||
HK_DESC_USAGE_EMRT,
|
||||
};
|
||||
|
||||
static bool
|
||||
hk_image_view_type_is_array(VkImageViewType view_type)
|
||||
{
|
||||
switch (view_type) {
|
||||
case VK_IMAGE_VIEW_TYPE_1D:
|
||||
case VK_IMAGE_VIEW_TYPE_2D:
|
||||
case VK_IMAGE_VIEW_TYPE_3D:
|
||||
case VK_IMAGE_VIEW_TYPE_CUBE:
|
||||
return false;
|
||||
|
||||
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
|
||||
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
|
||||
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Invalid image view type");
|
||||
}
|
||||
}
|
||||
|
||||
static enum agx_texture_dimension
|
||||
translate_image_view_type(VkImageViewType view_type, bool msaa, bool layered,
|
||||
enum hk_desc_usage usage)
|
||||
{
|
||||
if (usage == HK_DESC_USAGE_EMRT || usage == HK_DESC_USAGE_INPUT ||
|
||||
(usage == HK_DESC_USAGE_LAYERED_BG_EOT && layered)) {
|
||||
return msaa ? AGX_TEXTURE_DIMENSION_2D_ARRAY_MULTISAMPLED
|
||||
: AGX_TEXTURE_DIMENSION_2D_ARRAY;
|
||||
}
|
||||
|
||||
/* For background/EOT, we ignore the application-provided view type */
|
||||
if (usage == HK_DESC_USAGE_BG_EOT || usage == HK_DESC_USAGE_LAYERED_BG_EOT) {
|
||||
return msaa ? AGX_TEXTURE_DIMENSION_2D_MULTISAMPLED
|
||||
: AGX_TEXTURE_DIMENSION_2D;
|
||||
}
|
||||
|
||||
bool cubes_to_2d = usage != HK_DESC_USAGE_SAMPLED;
|
||||
|
||||
switch (view_type) {
|
||||
case VK_IMAGE_VIEW_TYPE_1D:
|
||||
case VK_IMAGE_VIEW_TYPE_2D:
|
||||
return msaa ? AGX_TEXTURE_DIMENSION_2D_MULTISAMPLED
|
||||
: AGX_TEXTURE_DIMENSION_2D;
|
||||
|
||||
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
|
||||
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
|
||||
return msaa ? AGX_TEXTURE_DIMENSION_2D_ARRAY_MULTISAMPLED
|
||||
: AGX_TEXTURE_DIMENSION_2D_ARRAY;
|
||||
|
||||
case VK_IMAGE_VIEW_TYPE_3D:
|
||||
assert(!msaa);
|
||||
return AGX_TEXTURE_DIMENSION_3D;
|
||||
|
||||
case VK_IMAGE_VIEW_TYPE_CUBE:
|
||||
assert(!msaa);
|
||||
return cubes_to_2d ? AGX_TEXTURE_DIMENSION_2D_ARRAY
|
||||
: AGX_TEXTURE_DIMENSION_CUBE;
|
||||
|
||||
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
|
||||
assert(!msaa);
|
||||
return cubes_to_2d ? AGX_TEXTURE_DIMENSION_2D_ARRAY
|
||||
: AGX_TEXTURE_DIMENSION_CUBE_ARRAY;
|
||||
|
||||
default:
|
||||
unreachable("Invalid image view type");
|
||||
}
|
||||
}
|
||||
|
||||
static enum pipe_swizzle
|
||||
vk_swizzle_to_pipe(VkComponentSwizzle swizzle)
|
||||
{
|
||||
switch (swizzle) {
|
||||
case VK_COMPONENT_SWIZZLE_R:
|
||||
return PIPE_SWIZZLE_X;
|
||||
case VK_COMPONENT_SWIZZLE_G:
|
||||
return PIPE_SWIZZLE_Y;
|
||||
case VK_COMPONENT_SWIZZLE_B:
|
||||
return PIPE_SWIZZLE_Z;
|
||||
case VK_COMPONENT_SWIZZLE_A:
|
||||
return PIPE_SWIZZLE_W;
|
||||
case VK_COMPONENT_SWIZZLE_ONE:
|
||||
return PIPE_SWIZZLE_1;
|
||||
case VK_COMPONENT_SWIZZLE_ZERO:
|
||||
return PIPE_SWIZZLE_0;
|
||||
default:
|
||||
unreachable("Invalid component swizzle");
|
||||
}
|
||||
}
|
||||
|
||||
static enum pipe_format
|
||||
get_stencil_format(enum pipe_format format)
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
return PIPE_FORMAT_S8_UINT;
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return PIPE_FORMAT_X24S8_UINT;
|
||||
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
|
||||
return PIPE_FORMAT_S8X24_UINT;
|
||||
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
return PIPE_FORMAT_X32_S8X24_UINT;
|
||||
default:
|
||||
unreachable("Unsupported depth/stencil format");
|
||||
}
|
||||
}
|
||||
|
||||
struct hk_3d {
|
||||
unsigned x, y, z;
|
||||
};
|
||||
|
||||
static struct hk_3d
|
||||
view_denominator(struct hk_image_view *view)
|
||||
{
|
||||
enum pipe_format view_format = vk_format_to_pipe_format(view->vk.format);
|
||||
enum pipe_format img_format =
|
||||
vk_format_to_pipe_format(view->vk.image->format);
|
||||
|
||||
if (util_format_is_compressed(view_format)) {
|
||||
/*
|
||||
* We can do an uncompressed view of a compressed image but not the other
|
||||
* way around.
|
||||
*/
|
||||
assert(util_format_is_compressed(img_format));
|
||||
assert(util_format_get_blockwidth(img_format) ==
|
||||
util_format_get_blockwidth(view_format));
|
||||
assert(util_format_get_blockheight(img_format) ==
|
||||
util_format_get_blockheight(view_format));
|
||||
assert(util_format_get_blockdepth(img_format) ==
|
||||
util_format_get_blockdepth(view_format));
|
||||
|
||||
return (struct hk_3d){1, 1, 1};
|
||||
}
|
||||
|
||||
if (!util_format_is_compressed(img_format)) {
|
||||
/* Both formats uncompressed */
|
||||
return (struct hk_3d){1, 1, 1};
|
||||
}
|
||||
|
||||
/* Else, img is compressed but view is not */
|
||||
return (struct hk_3d){
|
||||
util_format_get_blockwidth(img_format),
|
||||
util_format_get_blockheight(img_format),
|
||||
util_format_get_blockdepth(img_format),
|
||||
};
|
||||
}
|
||||
|
||||
static enum pipe_format
|
||||
format_for_plane(struct hk_image_view *view, unsigned view_plane)
|
||||
{
|
||||
const struct vk_format_ycbcr_info *ycbcr_info =
|
||||
vk_format_get_ycbcr_info(view->vk.format);
|
||||
|
||||
assert(ycbcr_info || view_plane == 0);
|
||||
VkFormat plane_format =
|
||||
ycbcr_info ? ycbcr_info->planes[view_plane].format : view->vk.format;
|
||||
|
||||
enum pipe_format p_format = vk_format_to_pipe_format(plane_format);
|
||||
if (view->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
p_format = get_stencil_format(p_format);
|
||||
|
||||
return p_format;
|
||||
}
|
||||
|
||||
static void
|
||||
pack_texture(struct hk_image_view *view, unsigned view_plane,
|
||||
enum hk_desc_usage usage, struct agx_texture_packed *out)
|
||||
{
|
||||
struct hk_image *image = container_of(view->vk.image, struct hk_image, vk);
|
||||
const uint8_t image_plane = view->planes[view_plane].image_plane;
|
||||
struct ail_layout *layout = &image->planes[image_plane].layout;
|
||||
uint64_t base_addr = hk_image_base_address(image, image_plane);
|
||||
|
||||
bool cubes_to_2d = usage != HK_DESC_USAGE_SAMPLED;
|
||||
|
||||
unsigned level = view->vk.base_mip_level;
|
||||
unsigned layer = view->vk.base_array_layer;
|
||||
|
||||
enum pipe_format p_format = format_for_plane(view, view_plane);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(p_format);
|
||||
|
||||
struct hk_3d denom = view_denominator(view);
|
||||
|
||||
uint8_t format_swizzle[4] = {
|
||||
desc->swizzle[0],
|
||||
desc->swizzle[1],
|
||||
desc->swizzle[2],
|
||||
desc->swizzle[3],
|
||||
};
|
||||
|
||||
/* Different APIs have different depth/stencil swizzle rules. Vulkan expects
|
||||
* R001 behaviour, override here because Mesa's format table is not that.
|
||||
*/
|
||||
if (util_format_is_depth_or_stencil(p_format)) {
|
||||
format_swizzle[0] = PIPE_SWIZZLE_X;
|
||||
format_swizzle[1] = PIPE_SWIZZLE_0;
|
||||
format_swizzle[2] = PIPE_SWIZZLE_0;
|
||||
format_swizzle[3] = PIPE_SWIZZLE_1;
|
||||
}
|
||||
|
||||
/* We only have a single swizzle for the user swizzle and the format
|
||||
* fixup, so compose them now.
|
||||
*/
|
||||
uint8_t out_swizzle[4];
|
||||
uint8_t view_swizzle[4] = {
|
||||
vk_swizzle_to_pipe(view->vk.swizzle.r),
|
||||
vk_swizzle_to_pipe(view->vk.swizzle.g),
|
||||
vk_swizzle_to_pipe(view->vk.swizzle.b),
|
||||
vk_swizzle_to_pipe(view->vk.swizzle.a),
|
||||
};
|
||||
|
||||
unsigned layers = view->vk.layer_count;
|
||||
if (view->vk.view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
layers = DIV_ROUND_UP(layout->depth_px, denom.z);
|
||||
} else if (!cubes_to_2d &&
|
||||
(view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
|
||||
view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)) {
|
||||
|
||||
layers /= 6;
|
||||
}
|
||||
|
||||
util_format_compose_swizzles(format_swizzle, view_swizzle, out_swizzle);
|
||||
|
||||
agx_pack(out, TEXTURE, cfg) {
|
||||
cfg.dimension = translate_image_view_type(
|
||||
view->vk.view_type, view->vk.image->samples > 1, layers > 1, usage);
|
||||
cfg.layout = agx_translate_layout(layout->tiling);
|
||||
cfg.channels = agx_pixel_format[p_format].channels;
|
||||
cfg.type = agx_pixel_format[p_format].type;
|
||||
cfg.srgb = util_format_is_srgb(p_format);
|
||||
|
||||
cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]);
|
||||
cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]);
|
||||
cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]);
|
||||
cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]);
|
||||
|
||||
if (denom.x > 1) {
|
||||
assert(view->vk.level_count == 1);
|
||||
assert(view->vk.layer_count == 1);
|
||||
|
||||
cfg.address = base_addr + ail_get_layer_level_B(layout, layer, level);
|
||||
cfg.width = DIV_ROUND_UP(u_minify(layout->width_px, level), denom.x);
|
||||
cfg.height = DIV_ROUND_UP(u_minify(layout->height_px, level), denom.y);
|
||||
cfg.first_level = 0;
|
||||
cfg.last_level = 1;
|
||||
} else {
|
||||
cfg.address = base_addr + ail_get_layer_offset_B(layout, layer);
|
||||
cfg.width = layout->width_px;
|
||||
cfg.height = layout->height_px;
|
||||
cfg.first_level = level;
|
||||
cfg.last_level = level + view->vk.level_count - 1;
|
||||
}
|
||||
|
||||
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
|
||||
cfg.unk_mipmapped = layout->levels > 1;
|
||||
cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3;
|
||||
|
||||
if (ail_is_compressed(layout)) {
|
||||
cfg.compressed_1 = true;
|
||||
cfg.extended = true;
|
||||
}
|
||||
|
||||
if (ail_is_compressed(layout)) {
|
||||
cfg.acceleration_buffer = base_addr + layout->metadata_offset_B +
|
||||
(layer * layout->compression_layer_stride_B);
|
||||
}
|
||||
|
||||
if (layout->tiling == AIL_TILING_LINEAR &&
|
||||
(hk_image_view_type_is_array(view->vk.view_type))) {
|
||||
|
||||
cfg.depth_linear = layers;
|
||||
cfg.layer_stride_linear = layout->layer_stride_B - 0x80;
|
||||
cfg.extended = true;
|
||||
} else {
|
||||
assert((layout->tiling != AIL_TILING_LINEAR) || (layers == 1));
|
||||
cfg.depth = layers;
|
||||
}
|
||||
|
||||
if (view->vk.image->samples > 1) {
|
||||
cfg.samples = agx_translate_sample_count(view->vk.image->samples);
|
||||
}
|
||||
|
||||
if (layout->tiling == AIL_TILING_LINEAR) {
|
||||
cfg.stride = ail_get_linear_stride_B(layout, 0) - 16;
|
||||
} else {
|
||||
assert(layout->tiling == AIL_TILING_TWIDDLED ||
|
||||
layout->tiling == AIL_TILING_TWIDDLED_COMPRESSED);
|
||||
|
||||
cfg.page_aligned_layers = layout->page_aligned_layers;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pack_pbe(struct hk_device *dev, struct hk_image_view *view, unsigned view_plane,
|
||||
enum hk_desc_usage usage, struct agx_pbe_packed *out)
|
||||
{
|
||||
struct hk_image *image = container_of(view->vk.image, struct hk_image, vk);
|
||||
const uint8_t image_plane = view->planes[view_plane].image_plane;
|
||||
struct ail_layout *layout = &image->planes[image_plane].layout;
|
||||
uint64_t base_addr = hk_image_base_address(image, image_plane);
|
||||
|
||||
unsigned level = view->vk.base_mip_level;
|
||||
unsigned layer = view->vk.base_array_layer;
|
||||
|
||||
enum pipe_format p_format = format_for_plane(view, view_plane);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(p_format);
|
||||
|
||||
bool eot =
|
||||
usage == HK_DESC_USAGE_BG_EOT || usage == HK_DESC_USAGE_LAYERED_BG_EOT;
|
||||
|
||||
/* The tilebuffer is already in sRGB space if needed. Do not convert for
|
||||
* end-of-tile descriptors.
|
||||
*/
|
||||
if (eot)
|
||||
p_format = util_format_linear(p_format);
|
||||
|
||||
bool msaa = view->vk.image->samples > 1;
|
||||
struct hk_3d denom = view_denominator(view);
|
||||
|
||||
unsigned layers = view->vk.view_type == VK_IMAGE_VIEW_TYPE_3D
|
||||
? image->vk.extent.depth
|
||||
: view->vk.layer_count;
|
||||
|
||||
agx_pack(out, PBE, cfg) {
|
||||
cfg.dimension =
|
||||
translate_image_view_type(view->vk.view_type, msaa, layers > 1, usage);
|
||||
cfg.layout = agx_translate_layout(layout->tiling);
|
||||
cfg.channels = agx_pixel_format[p_format].channels;
|
||||
cfg.type = agx_pixel_format[p_format].type;
|
||||
cfg.srgb = util_format_is_srgb(p_format);
|
||||
|
||||
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
|
||||
|
||||
for (unsigned i = 0; i < desc->nr_channels; ++i) {
|
||||
if (desc->swizzle[i] == 0)
|
||||
cfg.swizzle_r = i;
|
||||
else if (desc->swizzle[i] == 1)
|
||||
cfg.swizzle_g = i;
|
||||
else if (desc->swizzle[i] == 2)
|
||||
cfg.swizzle_b = i;
|
||||
else if (desc->swizzle[i] == 3)
|
||||
cfg.swizzle_a = i;
|
||||
}
|
||||
|
||||
cfg.buffer = base_addr + ail_get_layer_offset_B(layout, layer);
|
||||
cfg.unk_mipmapped = layout->levels > 1;
|
||||
|
||||
if (msaa & !eot) {
|
||||
/* Multisampled images are bound like buffer textures, with
|
||||
* addressing arithmetic to determine the texel to write.
|
||||
*
|
||||
* Note that the end-of-tile program uses real multisample images
|
||||
* with image_write_block instructions.
|
||||
*/
|
||||
unsigned blocksize_B = util_format_get_blocksize(p_format);
|
||||
unsigned size_px =
|
||||
(layout->size_B - layout->layer_stride_B * layer) / blocksize_B;
|
||||
|
||||
cfg.dimension = AGX_TEXTURE_DIMENSION_2D;
|
||||
cfg.layout = AGX_LAYOUT_LINEAR;
|
||||
cfg.width = AGX_TEXTURE_BUFFER_WIDTH;
|
||||
cfg.height = DIV_ROUND_UP(size_px, cfg.width);
|
||||
cfg.stride = (cfg.width * blocksize_B) - 4;
|
||||
cfg.layers = 1;
|
||||
cfg.levels = 1;
|
||||
|
||||
cfg.buffer += layout->level_offsets_B[level];
|
||||
cfg.level = 0;
|
||||
} else {
|
||||
if (denom.x > 1) {
|
||||
assert(denom.z == 1 && "todo how to handle?");
|
||||
assert(view->vk.level_count == 1);
|
||||
assert(view->vk.layer_count == 1);
|
||||
|
||||
cfg.buffer =
|
||||
base_addr + ail_get_layer_level_B(layout, layer, level);
|
||||
cfg.width =
|
||||
DIV_ROUND_UP(u_minify(layout->width_px, level), denom.x);
|
||||
cfg.height =
|
||||
DIV_ROUND_UP(u_minify(layout->height_px, level), denom.y);
|
||||
cfg.level = 0;
|
||||
} else {
|
||||
cfg.buffer = base_addr + ail_get_layer_offset_B(layout, layer);
|
||||
cfg.width = layout->width_px;
|
||||
cfg.height = layout->height_px;
|
||||
cfg.level = level;
|
||||
}
|
||||
|
||||
if (layout->tiling == AIL_TILING_LINEAR &&
|
||||
(hk_image_view_type_is_array(view->vk.view_type))) {
|
||||
|
||||
cfg.depth_linear = layers;
|
||||
cfg.layer_stride_linear = (layout->layer_stride_B - 0x80);
|
||||
cfg.extended = true;
|
||||
} else {
|
||||
assert((layout->tiling != AIL_TILING_LINEAR) || (layers == 1));
|
||||
cfg.layers = layers;
|
||||
}
|
||||
|
||||
cfg.levels = image->vk.mip_levels;
|
||||
|
||||
if (layout->tiling == AIL_TILING_LINEAR) {
|
||||
cfg.stride = ail_get_linear_stride_B(layout, level) - 4;
|
||||
assert(cfg.levels == 1);
|
||||
} else {
|
||||
cfg.page_aligned_layers = layout->page_aligned_layers;
|
||||
}
|
||||
|
||||
if (image->vk.samples > 1)
|
||||
cfg.samples = agx_translate_sample_count(image->vk.samples);
|
||||
}
|
||||
|
||||
if (ail_is_compressed(layout)) {
|
||||
cfg.compressed_1 = true;
|
||||
cfg.extended = true;
|
||||
|
||||
cfg.acceleration_buffer = base_addr + layout->metadata_offset_B +
|
||||
(layer * layout->compression_layer_stride_B);
|
||||
}
|
||||
|
||||
/* When the descriptor isn't extended architecturally, we use
|
||||
* the last 8 bytes as a sideband to accelerate image atomics.
|
||||
*/
|
||||
if (!cfg.extended && layout->writeable_image) {
|
||||
if (msaa) {
|
||||
assert(denom.x == 1 && "no MSAA of block-compressed");
|
||||
|
||||
cfg.aligned_width_msaa_sw =
|
||||
align(u_minify(layout->width_px, level),
|
||||
layout->tilesize_el[level].width_el);
|
||||
} else {
|
||||
cfg.level_offset_sw = ail_get_level_offset_B(layout, cfg.level);
|
||||
}
|
||||
|
||||
cfg.sample_count_log2_sw = util_logbase2(image->vk.samples);
|
||||
|
||||
if (layout->tiling == AIL_TILING_TWIDDLED) {
|
||||
struct ail_tile tile_size = layout->tilesize_el[level];
|
||||
cfg.tile_width_sw = tile_size.width_el;
|
||||
cfg.tile_height_sw = tile_size.height_el;
|
||||
|
||||
cfg.layer_stride_sw = layout->layer_stride_B;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static VkResult
|
||||
add_descriptor(struct hk_device *dev, struct hk_image_view *view,
|
||||
struct agx_texture_packed *desc,
|
||||
struct agx_texture_packed *cached, uint32_t *index)
|
||||
{
|
||||
/* First, look for a descriptor we already uploaded */
|
||||
for (unsigned i = 0; i < view->descriptor_count; ++i) {
|
||||
if (memcmp(&cached[i], desc, sizeof *desc) == 0) {
|
||||
*index = view->descriptor_index[i];
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* Else, add a new descriptor */
|
||||
VkResult result =
|
||||
hk_descriptor_table_add(dev, &dev->images, desc, sizeof *desc, index);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
uint32_t local_index = view->descriptor_count++;
|
||||
assert(local_index < HK_MAX_IMAGE_DESCS);
|
||||
|
||||
cached[local_index] = *desc;
|
||||
view->descriptor_index[local_index] = *index;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_image_view_init(struct hk_device *dev, struct hk_image_view *view,
|
||||
bool driver_internal,
|
||||
const VkImageViewCreateInfo *pCreateInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_image, image, pCreateInfo->image);
|
||||
VkResult result;
|
||||
|
||||
memset(view, 0, sizeof(*view));
|
||||
|
||||
vk_image_view_init(&dev->vk, &view->vk, driver_internal, pCreateInfo);
|
||||
|
||||
/* First, figure out which image planes we need. For depth/stencil, we only
|
||||
* have one aspect viewed at a time.
|
||||
*/
|
||||
if (image->vk.aspects &
|
||||
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
|
||||
view->plane_count = 1;
|
||||
view->planes[0].image_plane =
|
||||
hk_image_aspects_to_plane(image, view->vk.aspects);
|
||||
} else {
|
||||
/* For other formats, retrieve the plane count from the aspect mask
|
||||
* and then walk through the aspect mask to map each image plane
|
||||
* to its corresponding view plane
|
||||
*/
|
||||
assert(util_bitcount(view->vk.aspects) ==
|
||||
vk_format_get_plane_count(view->vk.format));
|
||||
view->plane_count = 0;
|
||||
u_foreach_bit(aspect_bit, view->vk.aspects) {
|
||||
uint8_t image_plane =
|
||||
hk_image_aspects_to_plane(image, 1u << aspect_bit);
|
||||
view->planes[view->plane_count++].image_plane = image_plane;
|
||||
}
|
||||
}
|
||||
|
||||
struct agx_texture_packed cached[HK_MAX_IMAGE_DESCS];
|
||||
|
||||
/* Finally, fill in each view plane separately */
|
||||
for (unsigned view_plane = 0; view_plane < view->plane_count; view_plane++) {
|
||||
const struct {
|
||||
VkImageUsageFlagBits flag;
|
||||
enum hk_desc_usage usage;
|
||||
uint32_t *tex;
|
||||
uint32_t *pbe;
|
||||
} descriptors[] = {
|
||||
{VK_IMAGE_USAGE_SAMPLED_BIT, HK_DESC_USAGE_SAMPLED,
|
||||
&view->planes[view_plane].sampled_desc_index},
|
||||
|
||||
{VK_IMAGE_USAGE_STORAGE_BIT, HK_DESC_USAGE_STORAGE,
|
||||
&view->planes[view_plane].ro_storage_desc_index,
|
||||
&view->planes[view_plane].storage_desc_index},
|
||||
|
||||
{VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, HK_DESC_USAGE_INPUT,
|
||||
&view->planes[view_plane].ia_desc_index},
|
||||
|
||||
{VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, HK_DESC_USAGE_BG_EOT,
|
||||
&view->planes[view_plane].background_desc_index,
|
||||
&view->planes[view_plane].eot_pbe_desc_index},
|
||||
|
||||
{VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, HK_DESC_USAGE_LAYERED_BG_EOT,
|
||||
&view->planes[view_plane].layered_background_desc_index,
|
||||
&view->planes[view_plane].layered_eot_pbe_desc_index},
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(descriptors); ++i) {
|
||||
if (!(view->vk.usage & descriptors[i].flag))
|
||||
continue;
|
||||
|
||||
for (unsigned is_pbe = 0; is_pbe < 2; ++is_pbe) {
|
||||
struct agx_texture_packed desc;
|
||||
uint32_t *out = is_pbe ? descriptors[i].pbe : descriptors[i].tex;
|
||||
|
||||
if (!out)
|
||||
continue;
|
||||
|
||||
if (is_pbe) {
|
||||
static_assert(sizeof(struct agx_pbe_packed) ==
|
||||
sizeof(struct agx_texture_packed));
|
||||
|
||||
pack_pbe(dev, view, view_plane, descriptors[i].usage,
|
||||
(struct agx_pbe_packed *)&desc);
|
||||
} else {
|
||||
pack_texture(view, view_plane, descriptors[i].usage, &desc);
|
||||
}
|
||||
|
||||
result = add_descriptor(dev, view, &desc, cached, out);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (view->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
|
||||
pack_texture(view, view_plane, HK_DESC_USAGE_EMRT,
|
||||
&view->planes[view_plane].emrt_texture);
|
||||
|
||||
pack_pbe(dev, view, view_plane, HK_DESC_USAGE_EMRT,
|
||||
&view->planes[view_plane].emrt_pbe);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyImageView(VkDevice _device, VkImageView imageView,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, _device);
|
||||
VK_FROM_HANDLE(hk_image_view, view, imageView);
|
||||
|
||||
if (!view)
|
||||
return;
|
||||
|
||||
for (uint8_t d = 0; d < view->descriptor_count; ++d) {
|
||||
hk_descriptor_table_remove(dev, &dev->images, view->descriptor_index[d]);
|
||||
}
|
||||
|
||||
vk_image_view_finish(&view->vk);
|
||||
vk_free2(&dev->vk.alloc, pAllocator, view);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkImageView *pView)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, _device);
|
||||
struct hk_image_view *view;
|
||||
VkResult result;
|
||||
|
||||
view = vk_alloc2(&dev->vk.alloc, pAllocator, sizeof(*view), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!view)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = hk_image_view_init(
|
||||
dev, view, pCreateInfo->flags & VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
|
||||
pCreateInfo);
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_DestroyImageView(_device, hk_image_view_to_handle(view), pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
*pView = hk_image_view_to_handle(view);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
66
src/asahi/vulkan/hk_image_view.h
Normal file
66
src/asahi/vulkan/hk_image_view.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "agx_pack.h"
|
||||
#include "hk_private.h"
|
||||
#include "vk_image.h"
|
||||
|
||||
struct hk_device;
|
||||
|
||||
#define HK_MAX_PLANES 3
|
||||
#define HK_MAX_IMAGE_DESCS (10 * HK_MAX_PLANES)
|
||||
|
||||
struct hk_image_view {
|
||||
struct vk_image_view vk;
|
||||
|
||||
uint32_t descriptor_index[HK_MAX_IMAGE_DESCS];
|
||||
uint8_t descriptor_count;
|
||||
|
||||
uint8_t plane_count;
|
||||
struct {
|
||||
uint8_t image_plane;
|
||||
|
||||
/** Descriptors used for eMRT. We delay upload since we want them
|
||||
* contiguous in memory, although this could be reworked if we wanted.
|
||||
*/
|
||||
struct agx_texture_packed emrt_texture;
|
||||
struct agx_pbe_packed emrt_pbe;
|
||||
|
||||
/** Index in the image descriptor table for the sampled image descriptor */
|
||||
uint32_t sampled_desc_index;
|
||||
|
||||
/** Index in the image descriptor table for the storage image descriptor */
|
||||
uint32_t storage_desc_index;
|
||||
|
||||
/** Index in the image descriptor table for the readonly storage image
|
||||
* descriptor.
|
||||
*/
|
||||
uint32_t ro_storage_desc_index;
|
||||
|
||||
/** Index in the image descriptor table for the texture descriptor used
|
||||
* for background programs.
|
||||
*/
|
||||
uint32_t background_desc_index;
|
||||
uint32_t layered_background_desc_index;
|
||||
|
||||
/** Index in the image descriptor table for the texture descriptor used
|
||||
* for input attachments.
|
||||
*/
|
||||
uint32_t ia_desc_index;
|
||||
|
||||
/** Index in the image descriptor table for the PBE descriptor used for
|
||||
* end-of-tile programs.
|
||||
*/
|
||||
uint32_t eot_pbe_desc_index;
|
||||
uint32_t layered_eot_pbe_desc_index;
|
||||
} planes[3];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_image_view, vk.base, VkImageView,
|
||||
VK_OBJECT_TYPE_IMAGE_VIEW)
|
||||
196
src/asahi/vulkan/hk_instance.c
Normal file
196
src/asahi/vulkan/hk_instance.c
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_instance.h"
|
||||
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "vulkan/wsi/wsi_common.h"
|
||||
|
||||
#include "util/build_id.h"
|
||||
#include "util/driconf.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_EnumerateInstanceVersion(uint32_t *pApiVersion)
|
||||
{
|
||||
uint32_t version_override = vk_get_version_override();
|
||||
*pApiVersion = version_override ? version_override
|
||||
: VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static const struct vk_instance_extension_table instance_extensions = {
|
||||
#ifdef HK_USE_WSI_PLATFORM
|
||||
.KHR_get_surface_capabilities2 = true,
|
||||
.KHR_surface = true,
|
||||
.KHR_surface_protected_capabilities = true,
|
||||
.EXT_surface_maintenance1 = true,
|
||||
.EXT_swapchain_colorspace = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
.KHR_wayland_surface = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
.KHR_xcb_surface = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
.KHR_xlib_surface = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
|
||||
.EXT_acquire_xlib_display = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
|
||||
.KHR_display = true,
|
||||
.KHR_get_display_properties2 = true,
|
||||
.EXT_direct_mode_display = true,
|
||||
.EXT_display_surface_counter = true,
|
||||
.EXT_acquire_drm_display = true,
|
||||
#endif
|
||||
#ifndef VK_USE_PLATFORM_WIN32_KHR
|
||||
.EXT_headless_surface = true,
|
||||
#endif
|
||||
.KHR_device_group_creation = true,
|
||||
.KHR_external_fence_capabilities = true,
|
||||
.KHR_external_memory_capabilities = true,
|
||||
.KHR_external_semaphore_capabilities = true,
|
||||
.KHR_get_physical_device_properties2 = true,
|
||||
.EXT_debug_report = true,
|
||||
.EXT_debug_utils = true,
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_EnumerateInstanceExtensionProperties(const char *pLayerName,
|
||||
uint32_t *pPropertyCount,
|
||||
VkExtensionProperties *pProperties)
|
||||
{
|
||||
if (pLayerName)
|
||||
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
|
||||
|
||||
return vk_enumerate_instance_extension_properties(
|
||||
&instance_extensions, pPropertyCount, pProperties);
|
||||
}
|
||||
|
||||
static const driOptionDescription hk_dri_options[] = {
|
||||
DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(true)
|
||||
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
|
||||
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
|
||||
DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
|
||||
DRI_CONF_VK_KHR_PRESENT_WAIT(false)
|
||||
DRI_CONF_VK_XWAYLAND_WAIT_READY(false) DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_DEBUG DRI_CONF_FORCE_VK_VENDOR()
|
||||
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
|
||||
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
|
||||
DRI_CONF_SECTION_END};
|
||||
|
||||
static void
|
||||
hk_init_dri_options(struct hk_instance *instance)
|
||||
{
|
||||
driParseOptionInfo(&instance->available_dri_options, hk_dri_options,
|
||||
ARRAY_SIZE(hk_dri_options));
|
||||
driParseConfigFiles(
|
||||
&instance->dri_options, &instance->available_dri_options, 0, "hk", NULL,
|
||||
NULL, instance->vk.app_info.app_name, instance->vk.app_info.app_version,
|
||||
instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
|
||||
|
||||
instance->force_vk_vendor =
|
||||
driQueryOptioni(&instance->dri_options, "force_vk_vendor");
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkInstance *pInstance)
|
||||
{
|
||||
struct hk_instance *instance;
|
||||
VkResult result;
|
||||
|
||||
if (pAllocator == NULL)
|
||||
pAllocator = vk_default_allocator();
|
||||
|
||||
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!instance)
|
||||
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
struct vk_instance_dispatch_table dispatch_table;
|
||||
vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
|
||||
&hk_instance_entrypoints, true);
|
||||
vk_instance_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &wsi_instance_entrypoints, false);
|
||||
|
||||
result = vk_instance_init(&instance->vk, &instance_extensions,
|
||||
&dispatch_table, pCreateInfo, pAllocator);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_alloc;
|
||||
|
||||
hk_init_dri_options(instance);
|
||||
|
||||
instance->vk.physical_devices.try_create_for_drm =
|
||||
hk_create_drm_physical_device;
|
||||
instance->vk.physical_devices.destroy = hk_physical_device_destroy;
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(hk_CreateInstance);
|
||||
if (!note) {
|
||||
result = vk_errorf(NULL, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Failed to find build-id");
|
||||
goto fail_init;
|
||||
}
|
||||
|
||||
unsigned build_id_len = build_id_length(note);
|
||||
if (build_id_len < SHA1_DIGEST_LENGTH) {
|
||||
result = vk_errorf(NULL, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"build-id too short. It needs to be a SHA");
|
||||
goto fail_init;
|
||||
}
|
||||
|
||||
static_assert(sizeof(instance->driver_build_sha) == SHA1_DIGEST_LENGTH);
|
||||
memcpy(instance->driver_build_sha, build_id_data(note), SHA1_DIGEST_LENGTH);
|
||||
|
||||
*pInstance = hk_instance_to_handle(instance);
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_init:
|
||||
vk_instance_finish(&instance->vk);
|
||||
fail_alloc:
|
||||
vk_free(pAllocator, instance);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyInstance(VkInstance _instance,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_instance, instance, _instance);
|
||||
|
||||
if (!instance)
|
||||
return;
|
||||
|
||||
driDestroyOptionCache(&instance->dri_options);
|
||||
driDestroyOptionInfo(&instance->available_dri_options);
|
||||
|
||||
vk_instance_finish(&instance->vk);
|
||||
vk_free(&instance->vk.alloc, instance);
|
||||
}
|
||||
|
||||
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
|
||||
hk_GetInstanceProcAddr(VkInstance _instance, const char *pName)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_instance, instance, _instance);
|
||||
return vk_instance_get_proc_addr(&instance->vk, &hk_instance_entrypoints,
|
||||
pName);
|
||||
}
|
||||
|
||||
PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
|
||||
vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
|
||||
{
|
||||
return hk_GetInstanceProcAddr(instance, pName);
|
||||
}
|
||||
25
src/asahi/vulkan/hk_instance.h
Normal file
25
src/asahi/vulkan/hk_instance.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/xmlconfig.h"
|
||||
#include "hk_private.h"
|
||||
#include "vk_instance.h"
|
||||
|
||||
struct hk_instance {
|
||||
struct vk_instance vk;
|
||||
|
||||
struct driOptionCache dri_options;
|
||||
struct driOptionCache available_dri_options;
|
||||
|
||||
uint8_t driver_build_sha[20];
|
||||
uint32_t force_vk_vendor;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(hk_instance, vk.base, VkInstance,
|
||||
VK_OBJECT_TYPE_INSTANCE)
|
||||
867
src/asahi/vulkan/hk_nir_lower_descriptors.c
Normal file
867
src/asahi/vulkan/hk_nir_lower_descriptors.c
Normal file
|
|
@ -0,0 +1,867 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "pipe/p_defines.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
#include "agx_nir_passes.h"
|
||||
#include "agx_pack.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_descriptor_set.h"
|
||||
#include "hk_descriptor_set_layout.h"
|
||||
#include "hk_shader.h"
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_deref.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
#include "shader_enums.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
struct lower_descriptors_ctx {
|
||||
const struct hk_descriptor_set_layout *set_layouts[HK_MAX_SETS];
|
||||
|
||||
bool clamp_desc_array_bounds;
|
||||
nir_address_format ubo_addr_format;
|
||||
nir_address_format ssbo_addr_format;
|
||||
};
|
||||
|
||||
static const struct hk_descriptor_set_binding_layout *
|
||||
get_binding_layout(uint32_t set, uint32_t binding,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
assert(set < HK_MAX_SETS);
|
||||
assert(ctx->set_layouts[set] != NULL);
|
||||
|
||||
const struct hk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
|
||||
|
||||
assert(binding < set_layout->binding_count);
|
||||
return &set_layout->binding[binding];
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_speculatable(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
nir_def *addr, unsigned align)
|
||||
{
|
||||
return nir_build_load_global_constant(b, num_components, bit_size, addr,
|
||||
.align_mul = align,
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_root(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
nir_def *offset, unsigned align)
|
||||
{
|
||||
nir_def *root = nir_load_preamble(b, 1, 64, .base = HK_ROOT_UNIFORM);
|
||||
|
||||
/* We've bound the address of the root descriptor, index in. */
|
||||
nir_def *addr = nir_iadd(b, root, nir_u2u64(b, offset));
|
||||
|
||||
return load_speculatable(b, num_components, bit_size, addr, align);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
assert(load->intrinsic == nir_intrinsic_load_constant);
|
||||
unreachable("todo: stick an address in the root descriptor or something");
|
||||
|
||||
uint32_t base = nir_intrinsic_base(load);
|
||||
uint32_t range = nir_intrinsic_range(load);
|
||||
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
|
||||
nir_def *data = nir_load_ubo(
|
||||
b, load->def.num_components, load->def.bit_size, nir_imm_int(b, 0),
|
||||
offset, .align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load), .range_base = base,
|
||||
.range = range);
|
||||
|
||||
nir_def_rewrite_uses(&load->def, data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_descriptor_set_addr(nir_builder *b, uint32_t set,
|
||||
UNUSED const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
uint32_t set_addr_offset =
|
||||
hk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
|
||||
|
||||
return load_root(b, 1, 64, nir_imm_int(b, set_addr_offset), 8);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_dynamic_buffer_start(nir_builder *b, uint32_t set,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
int dynamic_buffer_start_imm = 0;
|
||||
for (uint32_t s = 0; s < set; s++) {
|
||||
if (ctx->set_layouts[s] == NULL) {
|
||||
dynamic_buffer_start_imm = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
|
||||
}
|
||||
|
||||
if (dynamic_buffer_start_imm >= 0) {
|
||||
return nir_imm_int(b, dynamic_buffer_start_imm);
|
||||
} else {
|
||||
uint32_t root_offset =
|
||||
hk_root_descriptor_offset(set_dynamic_buffer_start) + set;
|
||||
|
||||
return nir_u2u32(b, load_root(b, 1, 8, nir_imm_int(b, root_offset), 1));
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
uint32_t set, uint32_t binding, nir_def *index,
|
||||
unsigned offset_B, const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
get_binding_layout(set, binding, ctx);
|
||||
|
||||
if (ctx->clamp_desc_array_bounds)
|
||||
index =
|
||||
nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
|
||||
|
||||
switch (binding_layout->type) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
/* Get the index in the root descriptor table dynamic_buffers array. */
|
||||
nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
|
||||
|
||||
index = nir_iadd(b, index,
|
||||
nir_iadd_imm(b, dynamic_buffer_start,
|
||||
binding_layout->dynamic_buffer_index));
|
||||
|
||||
nir_def *root_desc_offset = nir_iadd_imm(
|
||||
b, nir_imul_imm(b, index, sizeof(struct hk_buffer_address)),
|
||||
hk_root_descriptor_offset(dynamic_buffers));
|
||||
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
nir_def *desc = load_root(b, 4, 32, root_desc_offset, 16);
|
||||
|
||||
/* We know a priori that the the .w compnent (offset) is zero */
|
||||
return nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
|
||||
}
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
|
||||
nir_def *base_addr = nir_iadd_imm(
|
||||
b, load_descriptor_set_addr(b, set, ctx), binding_layout->offset);
|
||||
|
||||
assert(binding_layout->stride == 1);
|
||||
const uint32_t binding_size = binding_layout->array_size;
|
||||
|
||||
/* Convert it to nir_address_format_64bit_bounded_global */
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
|
||||
nir_unpack_64_2x32_split_y(b, base_addr),
|
||||
nir_imm_int(b, binding_size), nir_imm_int(b, 0));
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(binding_layout->stride > 0);
|
||||
nir_def *desc_ubo_offset =
|
||||
nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
|
||||
binding_layout->offset + offset_B);
|
||||
|
||||
unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
|
||||
desc_align_mul = MIN2(desc_align_mul, 16);
|
||||
unsigned desc_align_offset = binding_layout->offset + offset_B;
|
||||
desc_align_offset %= desc_align_mul;
|
||||
|
||||
nir_def *desc;
|
||||
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
|
||||
desc = nir_load_global_constant_offset(
|
||||
b, num_components, bit_size, set_addr, desc_ubo_offset,
|
||||
.align_mul = desc_align_mul, .align_offset = desc_align_offset,
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
|
||||
if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
|
||||
/* We know a priori that the the .w compnent (offset) is zero */
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
desc = nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
|
||||
}
|
||||
return desc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_idx_intrin(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
|
||||
intrin = nir_src_as_intrinsic(intrin->src[0]);
|
||||
if (intrin == NULL)
|
||||
return false;
|
||||
}
|
||||
|
||||
return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
nir_def *index = nir_imm_int(b, 0);
|
||||
|
||||
while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
|
||||
index = nir_iadd(b, index, intrin->src[1].ssa);
|
||||
intrin = nir_src_as_intrinsic(intrin->src[0]);
|
||||
}
|
||||
|
||||
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
|
||||
uint32_t set = nir_intrinsic_desc_set(intrin);
|
||||
uint32_t binding = nir_intrinsic_binding(intrin);
|
||||
index = nir_iadd(b, index, intrin->src[0].ssa);
|
||||
|
||||
return load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
|
||||
}
|
||||
|
||||
static bool
|
||||
try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
|
||||
if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
|
||||
assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
|
||||
desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, desc);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
_lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
uint32_t root_table_offset)
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
assert((root_table_offset & 3) == 0 && "aligned");
|
||||
|
||||
nir_def *val = load_root(b, intrin->def.num_components, intrin->def.bit_size,
|
||||
nir_imm_int(b, root_table_offset), 4);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define lower_sysval_to_root_table(b, intrin, member) \
|
||||
_lower_sysval_to_root_table(b, intrin, hk_root_descriptor_offset(member))
|
||||
|
||||
static bool
|
||||
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
const uint32_t push_region_offset = hk_root_descriptor_offset(push);
|
||||
const uint32_t base = nir_intrinsic_base(load);
|
||||
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *offset =
|
||||
nir_iadd_imm(b, load->src[0].ssa, push_region_offset + base);
|
||||
|
||||
nir_def *val = load_root(b, load->def.num_components, load->def.bit_size,
|
||||
offset, load->def.bit_size / 8);
|
||||
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
|
||||
uint32_t *set, uint32_t *binding, nir_def **index)
|
||||
{
|
||||
if (deref->deref_type == nir_deref_type_array) {
|
||||
*index = deref->arr.index.ssa;
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
} else {
|
||||
*index = nir_imm_int(b, 0);
|
||||
}
|
||||
|
||||
assert(deref->deref_type == nir_deref_type_var);
|
||||
nir_variable *var = deref->var;
|
||||
|
||||
*set = var->data.descriptor_set;
|
||||
*binding = var->data.binding;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_resource_deref_desc(nir_builder *b, unsigned num_components,
|
||||
unsigned bit_size, nir_deref_instr *deref,
|
||||
unsigned offset_B,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
uint32_t set, binding;
|
||||
nir_def *index;
|
||||
get_resource_deref_binding(b, deref, &set, &binding, &index);
|
||||
return load_descriptor(b, num_components, bit_size, set, binding, index,
|
||||
offset_B, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns an AGX bindless handle to access an indexed image within the global
|
||||
* image heap.
|
||||
*/
|
||||
static nir_def *
|
||||
image_heap_handle(nir_builder *b, nir_def *offset)
|
||||
{
|
||||
return nir_vec2(b, nir_imm_int(b, HK_IMAGE_HEAP_UNIFORM), offset);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
||||
|
||||
/* Reads and queries use the texture descriptor; writes and atomics PBE. */
|
||||
unsigned offs;
|
||||
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
|
||||
intr->intrinsic != nir_intrinsic_image_deref_size &&
|
||||
intr->intrinsic != nir_intrinsic_image_deref_samples) {
|
||||
|
||||
offs = offsetof(struct hk_storage_image_descriptor, pbe_offset);
|
||||
} else {
|
||||
offs = offsetof(struct hk_storage_image_descriptor, tex_offset);
|
||||
}
|
||||
|
||||
nir_def *offset = load_resource_deref_desc(b, 1, 32, deref, offs, ctx);
|
||||
nir_rewrite_image_intrinsic(intr, image_heap_handle(b, offset), true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static VkQueryPipelineStatisticFlagBits
|
||||
translate_pipeline_stat_bit(enum pipe_statistics_query_index pipe)
|
||||
{
|
||||
switch (pipe) {
|
||||
case PIPE_STAT_QUERY_IA_VERTICES:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT;
|
||||
case PIPE_STAT_QUERY_IA_PRIMITIVES:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
|
||||
case PIPE_STAT_QUERY_VS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_GS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_GS_PRIMITIVES:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT;
|
||||
case PIPE_STAT_QUERY_C_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_C_PRIMITIVES:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT;
|
||||
case PIPE_STAT_QUERY_PS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_HS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT;
|
||||
case PIPE_STAT_QUERY_DS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_CS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
|
||||
case PIPE_STAT_QUERY_TS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT;
|
||||
case PIPE_STAT_QUERY_MS_INVOCATIONS:
|
||||
return VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT;
|
||||
}
|
||||
|
||||
unreachable("invalid statistic");
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_uvs_index(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
unsigned *vs_uniform_base = data;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_uvs_index_agx: {
|
||||
gl_varying_slot slot = nir_intrinsic_io_semantics(intrin).location;
|
||||
unsigned offset = hk_root_descriptor_offset(draw.uvs_index[slot]);
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *val = load_root(b, 1, 8, nir_imm_int(b, offset), 1);
|
||||
nir_def_rewrite_uses(&intrin->def, nir_u2u16(b, val));
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_shader_part_tests_zs_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.no_epilog_discard);
|
||||
|
||||
case nir_intrinsic_load_api_sample_mask_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.api_sample_mask);
|
||||
|
||||
case nir_intrinsic_load_sample_positions_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.ppp_multisamplectl);
|
||||
|
||||
case nir_intrinsic_load_depth_never_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.force_never_in_shader);
|
||||
|
||||
case nir_intrinsic_load_geometry_param_buffer_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.geometry_params);
|
||||
|
||||
case nir_intrinsic_load_vs_output_buffer_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.vertex_output_buffer);
|
||||
|
||||
case nir_intrinsic_load_vs_outputs_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.vertex_outputs);
|
||||
|
||||
case nir_intrinsic_load_tess_param_buffer_agx:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.tess_params);
|
||||
|
||||
case nir_intrinsic_load_is_first_fan_agx: {
|
||||
unsigned offset = hk_root_descriptor_offset(draw.provoking);
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_def *val = load_root(b, 1, 16, nir_imm_int(b, offset), 2);
|
||||
nir_def_rewrite_uses(&intrin->def, nir_ieq_imm(b, val, 1));
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_provoking_last: {
|
||||
unsigned offset = hk_root_descriptor_offset(draw.provoking);
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_def *val = load_root(b, 1, 16, nir_imm_int(b, offset), 2);
|
||||
nir_def_rewrite_uses(&intrin->def, nir_b2b32(b, nir_ieq_imm(b, val, 2)));
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
case nir_intrinsic_load_base_instance:
|
||||
case nir_intrinsic_load_draw_id:
|
||||
case nir_intrinsic_load_input_assembly_buffer_agx: {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
unsigned base = *vs_uniform_base;
|
||||
unsigned size = 32;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_base_instance) {
|
||||
base += 2;
|
||||
} else if (intrin->intrinsic == nir_intrinsic_load_draw_id) {
|
||||
base += 4;
|
||||
size = 16;
|
||||
} else if (intrin->intrinsic ==
|
||||
nir_intrinsic_load_input_assembly_buffer_agx) {
|
||||
base += 8;
|
||||
size = 64;
|
||||
}
|
||||
|
||||
nir_def *val = nir_load_preamble(b, 1, size, .base = base);
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
nir_u2uN(b, val, intrin->def.bit_size));
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_stat_query_address_agx: {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
unsigned off1 = hk_root_descriptor_offset(draw.pipeline_stats);
|
||||
unsigned off2 = hk_root_descriptor_offset(draw.pipeline_stats_flags);
|
||||
|
||||
nir_def *base = load_root(b, 1, 64, nir_imm_int(b, off1), 8);
|
||||
nir_def *flags = load_root(b, 1, 16, nir_imm_int(b, off2), 2);
|
||||
|
||||
unsigned query = nir_intrinsic_base(intrin);
|
||||
VkQueryPipelineStatisticFlagBits bit = translate_pipeline_stat_bit(query);
|
||||
|
||||
/* Prefix sum to find the compacted offset */
|
||||
nir_def *idx = nir_bit_count(b, nir_iand_imm(b, flags, bit - 1));
|
||||
nir_def *addr = nir_iadd(
|
||||
b, base, nir_imul_imm(b, nir_u2u64(b, idx), sizeof(uint64_t)));
|
||||
|
||||
/* The above returns garbage if the query isn't actually enabled, handle
|
||||
* that case.
|
||||
*
|
||||
* TODO: Optimize case where we *know* the query is present?
|
||||
*/
|
||||
nir_def *present = nir_ine_imm(b, nir_iand_imm(b, flags, bit), 0);
|
||||
addr = nir_bcsel(b, present, addr, nir_imm_int64(b, 0));
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(
|
||||
s, lower_uvs_index, nir_metadata_control_flow, &vs_uniform_base);
|
||||
}
|
||||
|
||||
static bool
|
||||
try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_constant:
|
||||
return lower_load_constant(b, intrin, ctx);
|
||||
|
||||
case nir_intrinsic_load_vulkan_descriptor:
|
||||
return try_lower_load_vulkan_descriptor(b, intrin, ctx);
|
||||
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
|
||||
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
return lower_sysval_to_root_table(b, intrin, cs.base_group);
|
||||
|
||||
case nir_intrinsic_load_push_constant:
|
||||
return lower_load_push_constant(b, intrin, ctx);
|
||||
|
||||
case nir_intrinsic_load_view_index:
|
||||
return lower_sysval_to_root_table(b, intrin, draw.view_index);
|
||||
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
case nir_intrinsic_image_deref_size:
|
||||
case nir_intrinsic_image_deref_samples:
|
||||
return lower_image_intrin(b, intrin, ctx);
|
||||
|
||||
case nir_intrinsic_load_num_workgroups: {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
unsigned offset = hk_root_descriptor_offset(cs.group_count_addr);
|
||||
nir_def *ptr = load_root(b, 1, 64, nir_imm_int(b, offset), 4);
|
||||
nir_def *val = load_speculatable(b, 3, 32, ptr, 4);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_tex(nir_builder *b, nir_tex_instr *tex,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
nir_def *texture = nir_steal_tex_src(tex, nir_tex_src_texture_deref);
|
||||
nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_deref);
|
||||
if (!texture) {
|
||||
assert(!sampler);
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
|
||||
const uint32_t plane =
|
||||
plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
|
||||
const uint64_t plane_offset_B =
|
||||
plane * sizeof(struct hk_sampled_image_descriptor);
|
||||
|
||||
/* LOD bias is passed in the descriptor set, rather than embedded into
|
||||
* the sampler descriptor. There's no spot in the hardware descriptor,
|
||||
* plus this saves on precious sampler heap spots.
|
||||
*/
|
||||
if (tex->op == nir_texop_lod_bias_agx) {
|
||||
unsigned offs =
|
||||
offsetof(struct hk_sampled_image_descriptor, lod_bias_fp16);
|
||||
|
||||
nir_def *bias = load_resource_deref_desc(
|
||||
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
nir_def_replace(&tex->def, bias);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tex->op == nir_texop_has_custom_border_color_agx) {
|
||||
unsigned offs = offsetof(struct hk_sampled_image_descriptor, has_border);
|
||||
|
||||
nir_def *res = load_resource_deref_desc(
|
||||
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
nir_def_replace(&tex->def, nir_ine_imm(b, res, 0));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tex->op == nir_texop_custom_border_color_agx) {
|
||||
unsigned offs = offsetof(struct hk_sampled_image_descriptor, border);
|
||||
|
||||
nir_def *border = load_resource_deref_desc(
|
||||
b, 4, 32, nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
nir_alu_type T = nir_alu_type_get_base_type(tex->dest_type);
|
||||
border = nir_convert_to_bit_size(b, border, T, tex->def.bit_size);
|
||||
|
||||
nir_def_replace(&tex->def, border);
|
||||
return true;
|
||||
}
|
||||
|
||||
{
|
||||
unsigned offs =
|
||||
offsetof(struct hk_sampled_image_descriptor, image_offset);
|
||||
|
||||
nir_def *offset = load_resource_deref_desc(
|
||||
b, 1, 32, nir_src_as_deref(nir_src_for_ssa(texture)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
nir_def *handle = image_heap_handle(b, offset);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_texture_handle, handle);
|
||||
}
|
||||
|
||||
if (sampler != NULL) {
|
||||
unsigned offs =
|
||||
offsetof(struct hk_sampled_image_descriptor, sampler_index);
|
||||
|
||||
if (tex->backend_flags & AGX_TEXTURE_FLAG_CLAMP_TO_0) {
|
||||
offs =
|
||||
offsetof(struct hk_sampled_image_descriptor, clamp_0_sampler_index);
|
||||
}
|
||||
|
||||
nir_def *index = load_resource_deref_desc(
|
||||
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, index);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
try_lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *_data)
|
||||
{
|
||||
const struct lower_descriptors_ctx *ctx = _data;
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_tex:
|
||||
return lower_tex(b, nir_instr_as_tex(instr), ctx);
|
||||
case nir_instr_type_intrinsic:
|
||||
return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
|
||||
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
uint32_t set = nir_intrinsic_desc_set(intrin);
|
||||
uint32_t binding = nir_intrinsic_binding(intrin);
|
||||
nir_def *index = intrin->src[0].ssa;
|
||||
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
get_binding_layout(set, binding, ctx);
|
||||
|
||||
nir_def *binding_addr;
|
||||
uint8_t binding_stride;
|
||||
switch (binding_layout->type) {
|
||||
case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
|
||||
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
|
||||
binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
|
||||
binding_stride = binding_layout->stride;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
const uint32_t root_desc_addr_offset =
|
||||
hk_root_descriptor_offset(root_desc_addr);
|
||||
|
||||
nir_def *root_desc_addr =
|
||||
load_root(b, 1, 64, nir_imm_int(b, root_desc_addr_offset), 8);
|
||||
|
||||
nir_def *dynamic_buffer_start =
|
||||
nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
|
||||
binding_layout->dynamic_buffer_index);
|
||||
|
||||
nir_def *dynamic_binding_offset =
|
||||
nir_iadd_imm(b,
|
||||
nir_imul_imm(b, dynamic_buffer_start,
|
||||
sizeof(struct hk_buffer_address)),
|
||||
hk_root_descriptor_offset(dynamic_buffers));
|
||||
|
||||
binding_addr =
|
||||
nir_iadd(b, root_desc_addr, nir_u2u64(b, dynamic_binding_offset));
|
||||
binding_stride = sizeof(struct hk_buffer_address);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Not an SSBO descriptor");
|
||||
}
|
||||
|
||||
/* Tuck the stride in the top 8 bits of the binding address */
|
||||
binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
|
||||
|
||||
const uint32_t binding_size = binding_layout->array_size * binding_stride;
|
||||
nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
|
||||
|
||||
nir_def *addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
|
||||
nir_unpack_64_2x32_split_y(b, binding_addr),
|
||||
nir_imm_int(b, binding_size), offset_in_binding);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, addr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
|
||||
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *addr = intrin->src[0].ssa;
|
||||
nir_def *index = intrin->src[1].ssa;
|
||||
|
||||
nir_def *addr_high32 = nir_channel(b, addr, 1);
|
||||
nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
|
||||
nir_def *offset = nir_imul(b, index, stride);
|
||||
|
||||
addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format, nir_var_mem_ssbo,
|
||||
offset);
|
||||
nir_def_rewrite_uses(&intrin->def, addr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
|
||||
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *addr = intrin->src[0].ssa;
|
||||
|
||||
nir_def *desc;
|
||||
switch (ctx->ssbo_addr_format) {
|
||||
case nir_address_format_64bit_global_32bit_offset: {
|
||||
nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
|
||||
nir_def *offset = nir_channel(b, addr, 3);
|
||||
/* Mask off the binding stride */
|
||||
base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
|
||||
desc = nir_load_global_constant_offset(b, 4, 32, base, offset,
|
||||
.align_mul = 16, .align_offset = 0,
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_address_format_64bit_bounded_global: {
|
||||
nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
|
||||
nir_def *size = nir_channel(b, addr, 2);
|
||||
nir_def *offset = nir_channel(b, addr, 3);
|
||||
/* Mask off the binding stride */
|
||||
base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
|
||||
desc = nir_load_global_constant_bounded(
|
||||
b, 4, 32, base, offset, size, .align_mul = 16, .align_offset = 0,
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unknown address mode");
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, desc);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intr, void *_data)
|
||||
{
|
||||
const struct lower_descriptors_ctx *ctx = _data;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
return lower_ssbo_resource_index(b, intr, ctx);
|
||||
case nir_intrinsic_vulkan_resource_reindex:
|
||||
return lower_ssbo_resource_reindex(b, intr, ctx);
|
||||
case nir_intrinsic_load_vulkan_descriptor:
|
||||
return lower_load_ssbo_descriptor(b, intr, ctx);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
hk_nir_lower_descriptors(nir_shader *nir,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts)
|
||||
{
|
||||
struct lower_descriptors_ctx ctx = {
|
||||
.clamp_desc_array_bounds =
|
||||
rs->storage_buffers !=
|
||||
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
|
||||
|
||||
rs->uniform_buffers !=
|
||||
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
|
||||
|
||||
rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
|
||||
|
||||
.ssbo_addr_format = hk_buffer_addr_format(rs->storage_buffers),
|
||||
.ubo_addr_format = hk_buffer_addr_format(rs->uniform_buffers),
|
||||
};
|
||||
|
||||
assert(set_layout_count <= HK_MAX_SETS);
|
||||
for (uint32_t s = 0; s < set_layout_count; s++) {
|
||||
if (set_layouts[s] != NULL)
|
||||
ctx.set_layouts[s] = vk_to_hk_descriptor_set_layout(set_layouts[s]);
|
||||
}
|
||||
|
||||
/* First lower everything but complex SSBOs, then lower complex SSBOs.
|
||||
*
|
||||
* TODO: See if we can unify this, not sure if the fast path matters on
|
||||
* Apple. This is inherited from NVK.
|
||||
*/
|
||||
bool pass_lower_descriptors = nir_shader_instructions_pass(
|
||||
nir, try_lower_descriptors_instr, nir_metadata_control_flow, &ctx);
|
||||
|
||||
bool pass_lower_ssbo = nir_shader_intrinsics_pass(
|
||||
nir, lower_ssbo_descriptor, nir_metadata_control_flow, &ctx);
|
||||
|
||||
return pass_lower_descriptors || pass_lower_ssbo;
|
||||
}
|
||||
112
src/asahi/vulkan/hk_nir_passthrough_gs.c
Normal file
112
src/asahi/vulkan/hk_nir_passthrough_gs.c
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "hk_shader.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_xfb_info.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
void
|
||||
hk_nir_passthrough_gs(nir_builder *b, const void *key_)
|
||||
{
|
||||
nir_shader *s = b->shader;
|
||||
const struct hk_passthrough_gs_key *key = key_;
|
||||
assert(key->prim == u_decomposed_prim(key->prim));
|
||||
assert(key->prim != MESA_PRIM_PATCHES && "tessellation consumes patches");
|
||||
|
||||
enum mesa_prim out;
|
||||
if (key->prim == MESA_PRIM_POINTS)
|
||||
out = MESA_PRIM_POINTS;
|
||||
else if (u_reduced_prim(key->prim) == MESA_PRIM_LINES)
|
||||
out = MESA_PRIM_LINE_STRIP;
|
||||
else
|
||||
out = MESA_PRIM_TRIANGLE_STRIP;
|
||||
|
||||
#if 0
|
||||
assert((key->outputs &
|
||||
(VARYING_BIT_BOUNDING_BOX0 | VARYING_BIT_BOUNDING_BOX1)) == 0 &&
|
||||
"cull distance lowering not run yet");
|
||||
#endif
|
||||
/* XXX: need rework of preprocess_nir */
|
||||
uint64_t outputs =
|
||||
key->outputs & ~(VARYING_BIT_BOUNDING_BOX0 | VARYING_BIT_BOUNDING_BOX1);
|
||||
|
||||
s->info.outputs_written = s->info.inputs_read = outputs;
|
||||
s->info.clip_distance_array_size = key->clip_distance_array_size;
|
||||
s->info.cull_distance_array_size = key->cull_distance_array_size;
|
||||
s->info.stage = MESA_SHADER_GEOMETRY;
|
||||
s->info.gs.input_primitive = key->prim;
|
||||
s->info.gs.output_primitive = out;
|
||||
s->info.gs.vertices_in = mesa_vertices_per_prim(key->prim);
|
||||
s->info.gs.vertices_out = mesa_vertices_per_prim(out);
|
||||
s->info.gs.invocations = 1;
|
||||
s->info.gs.active_stream_mask = 1;
|
||||
|
||||
if (key->xfb_info.output_count) {
|
||||
size_t size = nir_xfb_info_size(key->xfb_info.output_count);
|
||||
s->xfb_info = ralloc_memdup(s, &key->xfb_info, size);
|
||||
s->info.has_transform_feedback_varyings = true;
|
||||
memcpy(s->info.xfb_stride, key->xfb_stride, sizeof(key->xfb_stride));
|
||||
}
|
||||
|
||||
unsigned int start_vert = key->prim == MESA_PRIM_LINES_ADJACENCY ? 1 : 0;
|
||||
unsigned int step = key->prim == MESA_PRIM_TRIANGLES_ADJACENCY ? 2 : 1;
|
||||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
|
||||
for (unsigned i = 0; i < s->info.gs.vertices_out; ++i) {
|
||||
nir_def *vertex = nir_imm_int(b, start_vert + (i * step));
|
||||
|
||||
/* Copy inputs to outputs. */
|
||||
u_foreach_bit64(loc, outputs) {
|
||||
unsigned adjusted_loc = loc;
|
||||
nir_def *offset = zero;
|
||||
unsigned num_slots = 1;
|
||||
|
||||
bool scalar = loc == VARYING_SLOT_LAYER ||
|
||||
loc == VARYING_SLOT_VIEW_INDEX ||
|
||||
loc == VARYING_SLOT_VIEWPORT || loc == VARYING_SLOT_PSIZ;
|
||||
unsigned comps = scalar ? 1 : 4;
|
||||
|
||||
/* We use combined, compact clip/cull */
|
||||
if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1) {
|
||||
adjusted_loc--;
|
||||
offset = one;
|
||||
}
|
||||
|
||||
if (adjusted_loc == VARYING_SLOT_CLIP_DIST0 ||
|
||||
adjusted_loc == VARYING_SLOT_CULL_DIST0) {
|
||||
num_slots =
|
||||
key->cull_distance_array_size + key->clip_distance_array_size;
|
||||
|
||||
if (loc > adjusted_loc)
|
||||
comps = num_slots - 4;
|
||||
else
|
||||
comps = MIN2(num_slots, 4);
|
||||
}
|
||||
|
||||
nir_io_semantics sem = {
|
||||
.location = adjusted_loc,
|
||||
.num_slots = num_slots,
|
||||
};
|
||||
|
||||
nir_def *val = nir_load_per_vertex_input(b, comps, 32, vertex, offset,
|
||||
.io_semantics = sem);
|
||||
|
||||
for (unsigned c = 0; c < comps; ++c) {
|
||||
nir_store_output(b, nir_channel(b, val, c), offset,
|
||||
.io_semantics = sem, .src_type = nir_type_uint32,
|
||||
.component = c);
|
||||
}
|
||||
}
|
||||
|
||||
nir_emit_vertex(b, 0);
|
||||
}
|
||||
}
|
||||
1417
src/asahi/vulkan/hk_physical_device.c
Normal file
1417
src/asahi/vulkan/hk_physical_device.c
Normal file
File diff suppressed because it is too large
Load diff
76
src/asahi/vulkan/hk_physical_device.h
Normal file
76
src/asahi/vulkan/hk_physical_device.h
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "asahi/lib/agx_device.h"
|
||||
#include <sys/types.h>
|
||||
#include "hk_private.h"
|
||||
#include "vk_physical_device.h"
|
||||
#include "vk_sync.h"
|
||||
#include "wsi_common.h"
|
||||
|
||||
struct hk_instance;
|
||||
struct hk_physical_device;
|
||||
|
||||
struct hk_queue_family {
|
||||
VkQueueFlags queue_flags;
|
||||
uint32_t queue_count;
|
||||
};
|
||||
|
||||
struct hk_memory_heap {
|
||||
uint64_t size;
|
||||
uint64_t used;
|
||||
VkMemoryHeapFlags flags;
|
||||
uint64_t (*available)(struct hk_physical_device *pdev);
|
||||
};
|
||||
|
||||
struct hk_physical_device {
|
||||
struct vk_physical_device vk;
|
||||
dev_t render_dev;
|
||||
int master_fd;
|
||||
|
||||
/* Only used for VK_EXT_memory_budget */
|
||||
struct agx_device dev;
|
||||
|
||||
struct wsi_device wsi_device;
|
||||
|
||||
uint8_t device_uuid[VK_UUID_SIZE];
|
||||
|
||||
// TODO: add mapable VRAM heap if possible
|
||||
struct hk_memory_heap mem_heaps[3];
|
||||
VkMemoryType mem_types[3];
|
||||
uint8_t mem_heap_count;
|
||||
uint8_t mem_type_count;
|
||||
|
||||
struct hk_queue_family queue_families[3];
|
||||
uint8_t queue_family_count;
|
||||
|
||||
struct vk_sync_type syncobj_sync_type;
|
||||
const struct vk_sync_type *sync_types[2];
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(hk_physical_device, vk.base, VkPhysicalDevice,
|
||||
VK_OBJECT_TYPE_PHYSICAL_DEVICE)
|
||||
|
||||
static inline struct hk_instance *
|
||||
hk_physical_device_instance(struct hk_physical_device *pdev)
|
||||
{
|
||||
return (struct hk_instance *)pdev->vk.instance;
|
||||
}
|
||||
|
||||
VkResult hk_create_drm_physical_device(struct vk_instance *vk_instance,
|
||||
struct _drmDevice *drm_device,
|
||||
struct vk_physical_device **pdev_out);
|
||||
|
||||
void hk_physical_device_destroy(struct vk_physical_device *vk_device);
|
||||
|
||||
#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
|
||||
defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \
|
||||
defined(VK_USE_PLATFORM_DISPLAY_KHR)
|
||||
#define HK_USE_WSI_PLATFORM
|
||||
#endif
|
||||
53
src/asahi/vulkan/hk_private.h
Normal file
53
src/asahi/vulkan/hk_private.h
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "vk_log.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
#define HK_MAX_SETS 8
|
||||
#define HK_MAX_PUSH_SIZE 128
|
||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||
#define HK_MAX_RTS 8
|
||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||
#define HK_MIN_UBO_ALIGNMENT 64
|
||||
#define HK_MAX_VIEWPORTS 16
|
||||
#define HK_MAX_DESCRIPTOR_SIZE 32
|
||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||
#define HK_PUSH_DESCRIPTOR_SET_SIZE \
|
||||
(HK_MAX_PUSH_DESCRIPTORS * HK_MAX_DESCRIPTOR_SIZE)
|
||||
#define HK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
|
||||
#define HK_MAX_MULTIVIEW_VIEW_COUNT 32
|
||||
|
||||
#define HK_SPARSE_ADDR_SPACE_SIZE (1ull << 39)
|
||||
#define HK_MAX_BUFFER_SIZE (1ull << 31)
|
||||
#define HK_MAX_SHARED_SIZE (32 * 1024)
|
||||
|
||||
struct hk_addr_range {
|
||||
uint64_t addr;
|
||||
uint64_t range;
|
||||
};
|
||||
|
||||
#define perf_debug(dev, fmt, ...) \
|
||||
do { \
|
||||
if (dev->dev.debug & AGX_DBG_PERF) \
|
||||
mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/* Fake values, pending UAPI upstreaming */
|
||||
#ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
|
||||
#define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
|
||||
#endif
|
||||
#ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
|
||||
#define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
|
||||
#endif
|
||||
580
src/asahi/vulkan/hk_query_pool.c
Normal file
580
src/asahi/vulkan/hk_query_pool.c
Normal file
|
|
@ -0,0 +1,580 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_query_pool.h"
|
||||
|
||||
#include "agx_compile.h"
|
||||
#include "agx_pack.h"
|
||||
#include "hk_buffer.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_event.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_shader.h"
|
||||
|
||||
#include "shader_enums.h"
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_meta.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
#include "asahi/lib/agx_bo.h"
|
||||
#include "asahi/lib/libagx_shaders.h"
|
||||
#include "asahi/lib/shaders/query.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
#include "util/os_time.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
struct hk_query_report {
|
||||
/* TODO: do we want this to be legit u64? */
|
||||
uint32_t value;
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
static uint16_t *
|
||||
hk_pool_oq_index_ptr(const struct hk_query_pool *pool)
|
||||
{
|
||||
return (uint16_t *)(pool->bo->ptr.cpu + pool->query_start);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
hk_reports_per_query(struct hk_query_pool *pool)
|
||||
{
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
return 1;
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
return util_bitcount(pool->vk.pipeline_statistics);
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
// Primitives succeeded and primitives needed
|
||||
return 2;
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkQueryPool *pQueryPool)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_query_pool *pool;
|
||||
|
||||
bool occlusion = pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION;
|
||||
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
|
||||
|
||||
pool =
|
||||
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
|
||||
if (!pool)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* We place the availability first and then data */
|
||||
pool->query_start = align(pool->vk.query_count * sizeof(uint32_t),
|
||||
sizeof(struct hk_query_report));
|
||||
|
||||
uint32_t reports_per_query = hk_reports_per_query(pool);
|
||||
pool->query_stride = reports_per_query * sizeof(struct hk_query_report);
|
||||
|
||||
if (pool->vk.query_count > 0) {
|
||||
uint32_t bo_size = pool->query_start;
|
||||
|
||||
/* For occlusion queries, we stick the query index remapping here */
|
||||
if (occlusion_queries)
|
||||
bo_size += sizeof(uint16_t) * pool->vk.query_count;
|
||||
else
|
||||
bo_size += pool->query_stride * pool->vk.query_count;
|
||||
|
||||
pool->bo =
|
||||
agx_bo_create(&dev->dev, bo_size, AGX_BO_WRITEBACK, "Query pool");
|
||||
if (!pool->bo) {
|
||||
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool), pAllocator);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
|
||||
for (unsigned i = 0; i < occlusion_queries; ++i) {
|
||||
uint64_t zero = 0;
|
||||
unsigned index;
|
||||
|
||||
VkResult result = hk_descriptor_table_add(
|
||||
dev, &dev->occlusion_queries, &zero, sizeof(uint64_t), &index);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool), pAllocator);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
/* We increment as we go so we can clean up properly if we run out */
|
||||
assert(pool->oq_queries < occlusion_queries);
|
||||
oq_index[pool->oq_queries++] = index;
|
||||
}
|
||||
|
||||
*pQueryPool = hk_query_pool_to_handle(pool);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
if (!pool)
|
||||
return;
|
||||
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
|
||||
for (unsigned i = 0; i < pool->oq_queries; ++i) {
|
||||
hk_descriptor_table_remove(dev, &dev->occlusion_queries, oq_index[i]);
|
||||
}
|
||||
|
||||
agx_bo_unreference(pool->bo);
|
||||
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_query_available_addr(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
assert(query < pool->vk.query_count);
|
||||
return pool->bo->ptr.gpu + query * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
hk_query_available_map(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
assert(query < pool->vk.query_count);
|
||||
return (uint32_t *)pool->bo->ptr.cpu + query;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_query_offset(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
assert(query < pool->vk.query_count);
|
||||
return pool->query_start + query * pool->query_stride;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_query_report_addr(struct hk_device *dev, struct hk_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
if (pool->oq_queries) {
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
return dev->occlusion_queries.bo->ptr.gpu +
|
||||
(oq_index[query] * sizeof(uint64_t));
|
||||
} else {
|
||||
return pool->bo->ptr.gpu + hk_query_offset(pool, query);
|
||||
}
|
||||
}
|
||||
|
||||
static struct hk_query_report *
|
||||
hk_query_report_map(struct hk_device *dev, struct hk_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
if (pool->oq_queries) {
|
||||
uint64_t *queries = (uint64_t *)dev->occlusion_queries.bo->ptr.cpu;
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
|
||||
return (struct hk_query_report *)&queries[oq_index[query]];
|
||||
} else {
|
||||
return (void *)((char *)pool->bo->ptr.cpu + hk_query_offset(pool, query));
|
||||
}
|
||||
}
|
||||
|
||||
struct hk_write_params {
|
||||
uint64_t address;
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
static void
|
||||
hk_nir_write_u32(nir_builder *b, UNUSED const void *key)
|
||||
{
|
||||
nir_def *addr = nir_load_preamble(
|
||||
b, 1, 64, .base = offsetof(struct hk_write_params, address) / 2);
|
||||
|
||||
nir_def *value = nir_load_preamble(
|
||||
b, 1, 32, .base = offsetof(struct hk_write_params, value) / 2);
|
||||
|
||||
nir_store_global(b, addr, 4, value, nir_component_mask(1));
|
||||
}
|
||||
|
||||
void
|
||||
hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
|
||||
bool after_gfx)
|
||||
{
|
||||
struct hk_cs *cs = hk_cmd_buffer_get_cs_general(
|
||||
cmd, after_gfx ? &cmd->current_cs.post_gfx : &cmd->current_cs.cs, true);
|
||||
if (!cs)
|
||||
return;
|
||||
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
||||
/* As soon as we mark a query available, it needs to be available system
|
||||
* wide, otherwise a CPU-side get result can query. As such, we cache flush
|
||||
* before and then let coherency works its magic. Without this barrier, we
|
||||
* get flakes in
|
||||
*
|
||||
* dEQP-VK.query_pool.occlusion_query.get_results_conservative_size_64_wait_query_without_availability_draw_triangles_discard
|
||||
*/
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
hk_cdm_cache_flush(dev, cs);
|
||||
|
||||
struct hk_shader *s = hk_meta_kernel(dev, hk_nir_write_u32, NULL, 0);
|
||||
struct hk_write_params params = {.address = address, .value = value};
|
||||
uint32_t usc = hk_upload_usc_words_kernel(cmd, s, ¶ms, sizeof(params));
|
||||
|
||||
hk_dispatch_with_usc(dev, cs, s, usc, hk_grid(1, 1, 1), hk_grid(1, 1, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Goes through a series of consecutive query indices in the given pool,
|
||||
* setting all element values to 0 and emitting them as available.
|
||||
*/
|
||||
static void
|
||||
emit_zero_queries(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool,
|
||||
uint32_t first_index, uint32_t num_queries,
|
||||
bool set_available)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
uint64_t available = hk_query_available_addr(pool, first_index + i);
|
||||
uint64_t report = hk_query_report_addr(dev, pool, first_index + i);
|
||||
hk_queue_write(cmd, available, set_available, false);
|
||||
|
||||
/* XXX: is this supposed to happen on the begin? */
|
||||
for (unsigned j = 0; j < hk_reports_per_query(pool); ++j) {
|
||||
hk_queue_write(cmd, report + (j * sizeof(struct hk_query_report)), 0,
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_ResetQueryPool(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery,
|
||||
uint32_t queryCount)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
|
||||
uint32_t *available = hk_query_available_map(pool, firstQuery);
|
||||
struct hk_query_report *reports = hk_query_report_map(dev, pool, firstQuery);
|
||||
|
||||
memset(available, 0, queryCount * sizeof(*available));
|
||||
memset(reports, 0, queryCount * pool->query_stride);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
||||
uint32_t firstQuery, uint32_t queryCount)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
emit_zero_queries(cmd, pool, firstQuery, queryCount, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
||||
VkPipelineStageFlags2 stage, VkQueryPool queryPool,
|
||||
uint32_t query)
|
||||
{
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
struct nv_push *p = hk_cmd_buffer_push(cmd, 10);
|
||||
|
||||
uint64_t report_addr = hk_query_report_addr(pool, query);
|
||||
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
|
||||
.operation = OPERATION_REPORT_ONLY,
|
||||
.pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stage),
|
||||
.structure_size = STRUCTURE_SIZE_FOUR_WORDS,
|
||||
});
|
||||
|
||||
uint64_t available_addr = hk_query_available_addr(pool, query);
|
||||
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_A(p, available_addr >> 32);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_B(p, available_addr);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
|
||||
.operation = OPERATION_RELEASE,
|
||||
.release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
|
||||
.pipeline_location = PIPELINE_LOCATION_ALL,
|
||||
.structure_size = STRUCTURE_SIZE_ONE_WORD,
|
||||
});
|
||||
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "If vkCmdWriteTimestamp2 is called while executing a render pass
|
||||
* instance that has multiview enabled, the timestamp uses N consecutive
|
||||
* query indices in the query pool (starting at query) where N is the
|
||||
* number of bits set in the view mask of the subpass the command is
|
||||
* executed in. The resulting query values are determined by an
|
||||
* implementation-dependent choice of one of the following behaviors:"
|
||||
*
|
||||
* In our case, only the first query is used, so we emit zeros for the
|
||||
* remaining queries, as described in the first behavior listed in the
|
||||
* Vulkan spec:
|
||||
*
|
||||
* "The first query is a timestamp value and (if more than one bit is set
|
||||
* in the view mask) zero is written to the remaining queries."
|
||||
*/
|
||||
if (cmd->state.gfx.render.view_mask != 0) {
|
||||
const uint32_t num_queries =
|
||||
util_bitcount(cmd->state.gfx.render.view_mask);
|
||||
if (num_queries > 1)
|
||||
emit_zero_queries(cmd, pool, query + 1, num_queries - 1, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hk_cmd_begin_end_query(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool,
|
||||
uint32_t query, uint32_t index,
|
||||
VkQueryControlFlags flags, bool end)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
bool graphics = false;
|
||||
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
assert(query < pool->oq_queries);
|
||||
|
||||
if (end) {
|
||||
cmd->state.gfx.occlusion.mode = AGX_VISIBILITY_MODE_NONE;
|
||||
} else {
|
||||
cmd->state.gfx.occlusion.mode = flags & VK_QUERY_CONTROL_PRECISE_BIT
|
||||
? AGX_VISIBILITY_MODE_COUNTING
|
||||
: AGX_VISIBILITY_MODE_BOOLEAN;
|
||||
}
|
||||
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
cmd->state.gfx.occlusion.index = oq_index[query];
|
||||
cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
|
||||
uint64_t addr = hk_query_report_addr(dev, pool, query);
|
||||
cmd->state.gfx.xfb_query[index] = end ? 0 : addr;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
|
||||
struct hk_root_descriptor_table *root = &cmd->state.gfx.descriptors.root;
|
||||
cmd->state.gfx.descriptors.root_dirty = true;
|
||||
|
||||
root->draw.pipeline_stats = hk_query_report_addr(dev, pool, query);
|
||||
root->draw.pipeline_stats_flags = pool->vk.pipeline_statistics;
|
||||
|
||||
/* XXX: I don't think is correct... when does the query become available
|
||||
* exactly?
|
||||
*/
|
||||
graphics = pool->vk.pipeline_statistics &
|
||||
~VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
|
||||
/* We need to set available=1 after the graphics work finishes. */
|
||||
if (end) {
|
||||
hk_queue_write(cmd, hk_query_available_addr(pool, query), 1, graphics);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
||||
uint32_t query, VkQueryControlFlags flags,
|
||||
uint32_t index)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
hk_cmd_begin_end_query(cmd, pool, query, index, flags, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
||||
uint32_t query, uint32_t index)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
hk_cmd_begin_end_query(cmd, pool, query, index, 0, true);
|
||||
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "If queries are used while executing a render pass instance that has
|
||||
* multiview enabled, the query uses N consecutive query indices in
|
||||
* the query pool (starting at query) where N is the number of bits set
|
||||
* in the view mask in the subpass the query is used in. How the
|
||||
* numerical results of the query are distributed among the queries is
|
||||
* implementation-dependent."
|
||||
*
|
||||
* In our case, only the first query is used, so we emit zeros for the
|
||||
* remaining queries.
|
||||
*/
|
||||
if (cmd->state.gfx.render.view_mask != 0) {
|
||||
const uint32_t num_queries =
|
||||
util_bitcount(cmd->state.gfx.render.view_mask);
|
||||
if (num_queries > 1)
|
||||
emit_zero_queries(cmd, pool, query + 1, num_queries - 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
hk_query_is_available(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
uint32_t *available = hk_query_available_map(pool, query);
|
||||
return p_atomic_read(available) != 0;
|
||||
}
|
||||
|
||||
#define HK_QUERY_TIMEOUT 2000000000ull
|
||||
|
||||
static VkResult
|
||||
hk_query_wait_for_available(struct hk_device *dev, struct hk_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(HK_QUERY_TIMEOUT);
|
||||
|
||||
while (os_time_get_nano() < abs_timeout_ns) {
|
||||
if (hk_query_is_available(pool, query))
|
||||
return VK_SUCCESS;
|
||||
|
||||
VkResult status = vk_device_check_status(&dev->vk);
|
||||
if (status != VK_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
return vk_device_set_lost(&dev->vk, "query timeout");
|
||||
}
|
||||
|
||||
static void
|
||||
cpu_write_query_result(void *dst, uint32_t idx, VkQueryResultFlags flags,
|
||||
uint64_t result)
|
||||
{
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
uint64_t *dst64 = dst;
|
||||
dst64[idx] = result;
|
||||
} else {
|
||||
uint32_t *dst32 = dst;
|
||||
dst32[idx] = result;
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_GetQueryPoolResults(VkDevice device, VkQueryPool queryPool,
|
||||
uint32_t firstQuery, uint32_t queryCount,
|
||||
size_t dataSize, void *pData, VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
|
||||
if (vk_device_is_lost(&dev->vk))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
VkResult status = VK_SUCCESS;
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
const uint32_t query = firstQuery + i;
|
||||
|
||||
bool available = hk_query_is_available(pool, query);
|
||||
|
||||
if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
|
||||
status = hk_query_wait_for_available(dev, pool, query);
|
||||
if (status != VK_SUCCESS)
|
||||
return status;
|
||||
|
||||
available = true;
|
||||
}
|
||||
|
||||
bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
|
||||
|
||||
const struct hk_query_report *src = hk_query_report_map(dev, pool, query);
|
||||
assert(i * stride < dataSize);
|
||||
void *dst = (char *)pData + i * stride;
|
||||
|
||||
uint32_t reports = hk_reports_per_query(pool);
|
||||
if (write_results) {
|
||||
for (uint32_t j = 0; j < reports; j++) {
|
||||
cpu_write_query_result(dst, j, flags, src[j].value);
|
||||
}
|
||||
}
|
||||
|
||||
if (!write_results)
|
||||
status = VK_NOT_READY;
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
cpu_write_query_result(dst, reports, flags, available);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void
|
||||
hk_nir_copy_query(nir_builder *b, UNUSED const void *key)
|
||||
{
|
||||
nir_def *id = nir_channel(b, nir_load_workgroup_id(b), 0);
|
||||
libagx_copy_query(b, nir_load_preamble(b, 1, 64), id);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
||||
uint32_t firstQuery, uint32_t queryCount,
|
||||
VkBuffer dstBuffer, VkDeviceSize dstOffset,
|
||||
VkDeviceSize stride, VkQueryResultFlags flags)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
VK_FROM_HANDLE(hk_buffer, dst_buffer, dstBuffer);
|
||||
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true);
|
||||
if (!cs)
|
||||
return;
|
||||
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
||||
const struct libagx_copy_query_push info = {
|
||||
.availability = pool->bo->ptr.gpu,
|
||||
.results = pool->oq_queries ? dev->occlusion_queries.bo->ptr.gpu
|
||||
: pool->bo->ptr.gpu + pool->query_start,
|
||||
.oq_index = pool->oq_queries ? pool->bo->ptr.gpu + pool->query_start : 0,
|
||||
|
||||
.first_query = firstQuery,
|
||||
.dst_addr = hk_buffer_address(dst_buffer, dstOffset),
|
||||
.dst_stride = stride,
|
||||
.reports_per_query = hk_reports_per_query(pool),
|
||||
|
||||
.partial = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT,
|
||||
._64 = flags & VK_QUERY_RESULT_64_BIT,
|
||||
.with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT,
|
||||
};
|
||||
|
||||
uint64_t push = hk_pool_upload(cmd, &info, sizeof(info), 8);
|
||||
|
||||
struct hk_shader *s = hk_meta_kernel(dev, hk_nir_copy_query, NULL, 0);
|
||||
uint32_t usc = hk_upload_usc_words_kernel(cmd, s, &push, sizeof(push));
|
||||
hk_dispatch_with_usc(dev, cs, s, usc, hk_grid(queryCount, 1, 1),
|
||||
hk_grid(1, 1, 1));
|
||||
}
|
||||
28
src/asahi/vulkan/hk_query_pool.h
Normal file
28
src/asahi/vulkan/hk_query_pool.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
#include "vk_query_pool.h"
|
||||
|
||||
struct agx_bo;
|
||||
|
||||
struct hk_query_pool {
|
||||
struct vk_query_pool vk;
|
||||
|
||||
uint32_t query_start;
|
||||
uint32_t query_stride;
|
||||
|
||||
struct agx_bo *bo;
|
||||
void *bo_map;
|
||||
|
||||
unsigned oq_queries;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_query_pool, vk.base, VkQueryPool,
|
||||
VK_OBJECT_TYPE_QUERY_POOL)
|
||||
599
src/asahi/vulkan/hk_queue.c
Normal file
599
src/asahi/vulkan/hk_queue.c
Normal file
|
|
@ -0,0 +1,599 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_queue.h"
|
||||
|
||||
#include "agx_bo.h"
|
||||
#include "agx_device.h"
|
||||
#include "agx_pack.h"
|
||||
#include "decode.h"
|
||||
#include "hk_cmd_buffer.h"
|
||||
#include "hk_device.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include "asahi/lib/unstable_asahi_drm.h"
|
||||
#include "util/list.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_sync.h"
|
||||
|
||||
/*
|
||||
* We need to specially handle submits with no control streams. The kernel
|
||||
* can't accept empty submits, but we can end up here in Vulkan for
|
||||
* synchronization purposes only. Rather than submit a no-op job (slow),
|
||||
* we simply tie the fences together.
|
||||
*/
|
||||
static VkResult
|
||||
queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
int fd = dev->dev.fd;
|
||||
|
||||
/* Transfer the waits into the queue timeline. */
|
||||
for (unsigned i = 0; i < submit->wait_count; ++i) {
|
||||
struct vk_sync_wait *wait = &submit->waits[i];
|
||||
|
||||
assert(vk_sync_type_is_drm_syncobj(wait->sync->type));
|
||||
const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(wait->sync);
|
||||
|
||||
drmSyncobjTransfer(fd, queue->drm.syncobj, ++queue->drm.timeline_value,
|
||||
syncobj->syncobj, wait->wait_value, 0);
|
||||
}
|
||||
|
||||
/* Transfer the queue timeline into each out fence. They will all be
|
||||
* signalled when we reach this point.
|
||||
*/
|
||||
for (unsigned i = 0; i < submit->signal_count; ++i) {
|
||||
struct vk_sync_signal *signal = &submit->signals[i];
|
||||
|
||||
assert(vk_sync_type_is_drm_syncobj(signal->sync->type));
|
||||
const struct vk_drm_syncobj *syncobj =
|
||||
vk_sync_as_drm_syncobj(signal->sync);
|
||||
|
||||
drmSyncobjTransfer(fd, syncobj->syncobj, signal->signal_value,
|
||||
queue->drm.syncobj, queue->drm.timeline_value, 0);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct drm_asahi_cmd_compute *cmd)
|
||||
{
|
||||
size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
|
||||
|
||||
*cmd = (struct drm_asahi_cmd_compute){
|
||||
.encoder_ptr = cs->addr,
|
||||
.encoder_end = cs->addr + len,
|
||||
|
||||
.sampler_array = dev->samplers.table.bo->ptr.gpu,
|
||||
.sampler_count = dev->samplers.table.alloc,
|
||||
.sampler_max = dev->samplers.table.alloc + 1,
|
||||
|
||||
.encoder_id = agx_get_global_id(&dev->dev),
|
||||
.cmd_id = agx_get_global_id(&dev->dev),
|
||||
.unk_mask = 0xffffffff,
|
||||
};
|
||||
|
||||
if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
|
||||
cmd->helper_arg = dev->scratch.cs.buf->ptr.gpu;
|
||||
cmd->helper_cfg = cs->scratch.cs.preamble << 16;
|
||||
cmd->helper_program = dev->dev.helper->ptr.gpu | 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct drm_asahi_cmd_render *c)
|
||||
{
|
||||
#if 0
|
||||
bool clear_pipeline_textures =
|
||||
agx_tilebuffer_spills(&batch->tilebuffer_layout);
|
||||
|
||||
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
|
||||
struct pipe_surface *surf = batch->key.cbufs[i];
|
||||
|
||||
clear_pipeline_textures |=
|
||||
surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
|
||||
}
|
||||
|
||||
#endif
|
||||
unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
|
||||
unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
|
||||
unsigned encoder_id = agx_get_global_id(&dev->dev);
|
||||
|
||||
memset(c, 0, sizeof(*c));
|
||||
|
||||
c->encoder_ptr = cs->addr;
|
||||
c->encoder_id = encoder_id;
|
||||
c->cmd_3d_id = cmd_3d_id;
|
||||
c->cmd_ta_id = cmd_ta_id;
|
||||
c->ppp_ctrl = 0x202;
|
||||
|
||||
c->fb_width = cs->cr.width;
|
||||
c->fb_height = cs->cr.height;
|
||||
|
||||
c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
|
||||
c->isp_bgobjvals = cs->cr.isp_bgobjvals;
|
||||
|
||||
static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
|
||||
memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
|
||||
|
||||
c->depth_dimensions = (cs->cr.width - 1) | ((cs->cr.height - 1) << 15);
|
||||
|
||||
c->depth_buffer_load = cs->cr.depth.buffer;
|
||||
c->depth_buffer_store = cs->cr.depth.buffer;
|
||||
c->depth_buffer_partial = cs->cr.depth.buffer;
|
||||
|
||||
c->depth_buffer_load_stride = cs->cr.depth.stride;
|
||||
c->depth_buffer_store_stride = cs->cr.depth.stride;
|
||||
c->depth_buffer_partial_stride = cs->cr.depth.stride;
|
||||
|
||||
c->depth_meta_buffer_load = cs->cr.depth.meta;
|
||||
c->depth_meta_buffer_store = cs->cr.depth.meta;
|
||||
c->depth_meta_buffer_partial = cs->cr.depth.meta;
|
||||
|
||||
c->depth_meta_buffer_load_stride = cs->cr.depth.stride;
|
||||
c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride;
|
||||
c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride;
|
||||
|
||||
c->stencil_buffer_load = cs->cr.stencil.buffer;
|
||||
c->stencil_buffer_store = cs->cr.stencil.buffer;
|
||||
c->stencil_buffer_partial = cs->cr.stencil.buffer;
|
||||
|
||||
c->stencil_buffer_load_stride = cs->cr.stencil.stride;
|
||||
c->stencil_buffer_store_stride = cs->cr.stencil.stride;
|
||||
c->stencil_buffer_partial_stride = cs->cr.stencil.stride;
|
||||
|
||||
c->stencil_meta_buffer_load = cs->cr.stencil.meta;
|
||||
c->stencil_meta_buffer_store = cs->cr.stencil.meta;
|
||||
c->stencil_meta_buffer_partial = cs->cr.stencil.meta;
|
||||
|
||||
c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride;
|
||||
c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride;
|
||||
c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride;
|
||||
|
||||
c->iogpu_unk_214 = cs->cr.iogpu_unk_214;
|
||||
|
||||
#if 0
|
||||
if (clear_pipeline_textures)
|
||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
||||
else
|
||||
c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
|
||||
|
||||
if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
|
||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
||||
|
||||
if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
|
||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
||||
#endif
|
||||
|
||||
if (dev->dev.debug & AGX_DBG_NOCLUSTER)
|
||||
c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
|
||||
|
||||
#if 0
|
||||
/* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
|
||||
if (tib->nr_samples > 1 && framebuffer->zsbuf)
|
||||
c->flags |= ASAHI_RENDER_MSAA_ZS;
|
||||
#endif
|
||||
|
||||
c->utile_width = cs->tib.tile_size.width;
|
||||
c->utile_height = cs->tib.tile_size.height;
|
||||
|
||||
/* Can be 0 for attachmentless rendering with no draws */
|
||||
c->samples = MAX2(cs->tib.nr_samples, 1);
|
||||
c->layers = cs->cr.layers;
|
||||
|
||||
c->ppp_multisamplectl = cs->ppp_multisamplectl;
|
||||
c->sample_size = cs->tib.sample_size_B;
|
||||
|
||||
/* XXX OR 0x80 with eMRT? */
|
||||
c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048;
|
||||
|
||||
float tan_60 = 1.732051f;
|
||||
c->merge_upper_x = fui(tan_60 / cs->cr.width);
|
||||
c->merge_upper_y = fui(tan_60 / cs->cr.height);
|
||||
|
||||
c->load_pipeline = cs->cr.bg.main.usc | 4;
|
||||
c->store_pipeline = cs->cr.eot.main.usc | 4;
|
||||
c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4;
|
||||
c->partial_store_pipeline = cs->cr.eot.partial.usc | 4;
|
||||
|
||||
memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts,
|
||||
sizeof(struct agx_counts_packed));
|
||||
|
||||
memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts,
|
||||
sizeof(struct agx_counts_packed));
|
||||
|
||||
memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts,
|
||||
sizeof(struct agx_counts_packed));
|
||||
|
||||
memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts,
|
||||
sizeof(struct agx_counts_packed));
|
||||
|
||||
c->scissor_array = cs->uploaded_scissor;
|
||||
c->depth_bias_array = cs->uploaded_zbias;
|
||||
|
||||
c->vertex_sampler_array = dev->samplers.table.bo->ptr.gpu;
|
||||
c->vertex_sampler_count = dev->samplers.table.alloc;
|
||||
c->vertex_sampler_max = dev->samplers.table.alloc + 1;
|
||||
|
||||
c->fragment_sampler_array = c->vertex_sampler_array;
|
||||
c->fragment_sampler_count = c->vertex_sampler_count;
|
||||
c->fragment_sampler_max = c->vertex_sampler_max;
|
||||
|
||||
c->visibility_result_buffer = dev->occlusion_queries.bo->ptr.gpu;
|
||||
|
||||
/* If a tile is empty, we do not want to process it, as the redundant
|
||||
* roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
|
||||
* memory bandwidth. Any draw marks a tile as non-empty, so we only need to
|
||||
* process empty tiles if the background+EOT programs have a side effect.
|
||||
* This is the case exactly when there is an attachment we are clearing (some
|
||||
* attachment A in clear and in resolve <==> non-empty intersection).
|
||||
*
|
||||
* This case matters a LOT for performance in workloads that split batches.
|
||||
*/
|
||||
if (true /* TODO */)
|
||||
c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
|
||||
|
||||
if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
|
||||
c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
|
||||
c->vertex_helper_arg = dev->scratch.vs.buf->ptr.gpu;
|
||||
c->vertex_helper_cfg = cs->scratch.vs.preamble << 16;
|
||||
c->vertex_helper_program = dev->dev.helper->ptr.gpu | 1;
|
||||
}
|
||||
|
||||
if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
|
||||
c->fragment_helper_arg = dev->scratch.fs.buf->ptr.gpu;
|
||||
c->fragment_helper_cfg = cs->scratch.fs.preamble << 16;
|
||||
c->fragment_helper_program = dev->dev.helper->ptr.gpu | 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
asahi_fill_sync(struct drm_asahi_sync *sync, struct vk_sync *vk_sync,
|
||||
uint64_t value)
|
||||
{
|
||||
if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
|
||||
unreachable("Unsupported sync type");
|
||||
return;
|
||||
}
|
||||
|
||||
const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
|
||||
*sync = (struct drm_asahi_sync){.handle = syncobj->syncobj};
|
||||
|
||||
if (vk_sync->flags & VK_SYNC_IS_TIMELINE) {
|
||||
sync->sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ;
|
||||
sync->timeline_value = value;
|
||||
} else {
|
||||
sync->sync_type = DRM_ASAHI_SYNC_SYNCOBJ;
|
||||
}
|
||||
}
|
||||
|
||||
union drm_asahi_cmd {
|
||||
struct drm_asahi_cmd_compute compute;
|
||||
struct drm_asahi_cmd_render render;
|
||||
};
|
||||
|
||||
/* TODO: I think it's 64. Can we query from the kernel? */
|
||||
#define MAX_COMMANDS_PER_SUBMIT (16)
|
||||
|
||||
static VkResult
|
||||
queue_submit_single(struct agx_device *dev, struct drm_asahi_submit *submit)
|
||||
{
|
||||
int ret = dev->ops.submit(dev, submit, 0);
|
||||
|
||||
/* XXX: don't trap */
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_ASAHI_SUBMIT failed: %m\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* The kernel/firmware jointly impose a limit on commands per submit ioctl, but
|
||||
* we can build up arbitrarily large command buffers. We handle this here by
|
||||
* looping the ioctl, submitting slices of the command buffers that are within
|
||||
* bounds.
|
||||
*/
|
||||
static VkResult
|
||||
queue_submit_looped(struct agx_device *dev, struct drm_asahi_submit *submit)
|
||||
{
|
||||
struct drm_asahi_command *cmds = (void *)submit->commands;
|
||||
unsigned commands_remaining = submit->command_count;
|
||||
unsigned submitted_vdm = 0, submitted_cdm = 0;
|
||||
|
||||
while (commands_remaining) {
|
||||
bool first = commands_remaining == submit->command_count;
|
||||
bool last = commands_remaining <= MAX_COMMANDS_PER_SUBMIT;
|
||||
|
||||
unsigned count = MIN2(commands_remaining, MAX_COMMANDS_PER_SUBMIT);
|
||||
commands_remaining -= count;
|
||||
|
||||
assert(!last || commands_remaining == 0);
|
||||
assert(count > 0);
|
||||
|
||||
/* We need to fix up the barriers since barriers are ioctl-relative */
|
||||
for (unsigned i = 0; i < count; ++i) {
|
||||
assert(cmds[i].barriers[0] >= submitted_vdm);
|
||||
assert(cmds[i].barriers[1] >= submitted_cdm);
|
||||
|
||||
cmds[i].barriers[0] -= submitted_vdm;
|
||||
cmds[i].barriers[1] -= submitted_cdm;
|
||||
}
|
||||
|
||||
/* We can't signal the out-syncobjs until all prior work finishes. Since
|
||||
* only the last ioctl will signal, make sure it waits on prior ioctls.
|
||||
*
|
||||
* TODO: there might be a more performant way to do this.
|
||||
*/
|
||||
if (last && !first) {
|
||||
if (cmds[0].barriers[0] == DRM_ASAHI_BARRIER_NONE)
|
||||
cmds[0].barriers[0] = 0;
|
||||
|
||||
if (cmds[0].barriers[1] == DRM_ASAHI_BARRIER_NONE)
|
||||
cmds[0].barriers[1] = 0;
|
||||
}
|
||||
|
||||
struct drm_asahi_submit submit_ioctl = {
|
||||
.flags = submit->flags,
|
||||
.queue_id = submit->queue_id,
|
||||
.result_handle = submit->result_handle,
|
||||
.commands = (uint64_t)(uintptr_t)(cmds),
|
||||
.command_count = count,
|
||||
.in_syncs = first ? submit->in_syncs : 0,
|
||||
.in_sync_count = first ? submit->in_sync_count : 0,
|
||||
.out_syncs = last ? submit->out_syncs : 0,
|
||||
.out_sync_count = last ? submit->out_sync_count : 0,
|
||||
};
|
||||
|
||||
VkResult result = queue_submit_single(dev, &submit_ioctl);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
for (unsigned i = 0; i < count; ++i) {
|
||||
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE)
|
||||
submitted_cdm++;
|
||||
else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER)
|
||||
submitted_vdm++;
|
||||
else
|
||||
unreachable("unknown subqueue");
|
||||
}
|
||||
|
||||
cmds += count;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
unsigned command_count = 0;
|
||||
|
||||
/* Gather the number of individual commands to submit up front */
|
||||
for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
|
||||
struct hk_cmd_buffer *cmdbuf =
|
||||
(struct hk_cmd_buffer *)submit->command_buffers[i];
|
||||
|
||||
command_count += list_length(&cmdbuf->control_streams);
|
||||
}
|
||||
|
||||
if (command_count == 0)
|
||||
return queue_submit_empty(dev, queue, submit);
|
||||
|
||||
unsigned wait_count = 0;
|
||||
struct drm_asahi_sync *waits =
|
||||
alloca(submit->wait_count * sizeof(struct drm_asahi_sync));
|
||||
|
||||
struct drm_asahi_sync *signals =
|
||||
alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync));
|
||||
|
||||
for (unsigned i = 0; i < submit->wait_count; ++i) {
|
||||
/* The kernel rejects the submission if we try to wait on the same
|
||||
* timeline semaphore at multiple points.
|
||||
*
|
||||
* TODO: Can we relax the UAPI?
|
||||
*
|
||||
* XXX: This is quadratic time.
|
||||
*/
|
||||
bool skip = false;
|
||||
if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) {
|
||||
uint32_t v1 = submit->waits[i].wait_value;
|
||||
for (unsigned j = 0; j < submit->wait_count; ++j) {
|
||||
uint32_t v2 = submit->waits[j].wait_value;
|
||||
if (i != j && submit->waits[i].sync == submit->waits[j].sync &&
|
||||
(v1 < v2 || (v1 == v2 && i < j))) {
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (skip)
|
||||
continue;
|
||||
}
|
||||
|
||||
asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync,
|
||||
submit->waits[i].wait_value);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < submit->signal_count; ++i) {
|
||||
asahi_fill_sync(&signals[i], submit->signals[i].sync,
|
||||
submit->signals[i].signal_value);
|
||||
}
|
||||
|
||||
/* Signal progress on the queue itself */
|
||||
signals[submit->signal_count] = (struct drm_asahi_sync){
|
||||
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
||||
.handle = queue->drm.syncobj,
|
||||
.timeline_value = ++queue->drm.timeline_value,
|
||||
};
|
||||
|
||||
/* Now setup the command structs */
|
||||
struct drm_asahi_command *cmds = alloca(sizeof(*cmds) * command_count);
|
||||
union drm_asahi_cmd *cmds_inner =
|
||||
alloca(sizeof(*cmds_inner) * command_count);
|
||||
|
||||
unsigned cmd_it = 0;
|
||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||
|
||||
for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
|
||||
struct hk_cmd_buffer *cmdbuf =
|
||||
(struct hk_cmd_buffer *)submit->command_buffers[i];
|
||||
|
||||
list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
|
||||
assert(cmd_it < command_count);
|
||||
|
||||
struct drm_asahi_command cmd = {
|
||||
.cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it],
|
||||
.result_offset = 0 /* TODO */,
|
||||
.result_size = 0 /* TODO */,
|
||||
/* Barrier on previous command */
|
||||
.barriers = {nr_vdm, nr_cdm},
|
||||
};
|
||||
|
||||
if (cs->type == HK_CS_CDM) {
|
||||
cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE;
|
||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute);
|
||||
nr_cdm++;
|
||||
|
||||
asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute);
|
||||
} else {
|
||||
assert(cs->type == HK_CS_VDM);
|
||||
cmd.cmd_type = DRM_ASAHI_CMD_RENDER;
|
||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render);
|
||||
nr_vdm++;
|
||||
|
||||
asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render);
|
||||
}
|
||||
|
||||
cmds[cmd_it++] = cmd;
|
||||
}
|
||||
}
|
||||
|
||||
assert(cmd_it == command_count);
|
||||
|
||||
if (dev->dev.debug & AGX_DBG_TRACE) {
|
||||
for (unsigned i = 0; i < command_count; ++i) {
|
||||
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) {
|
||||
agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params,
|
||||
&cmds_inner[i].compute, true);
|
||||
} else {
|
||||
assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER);
|
||||
agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params,
|
||||
&cmds_inner[i].render, true);
|
||||
}
|
||||
}
|
||||
|
||||
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->ptr.gpu,
|
||||
dev->images.alloc);
|
||||
|
||||
agxdecode_next_frame();
|
||||
}
|
||||
|
||||
struct drm_asahi_submit submit_ioctl = {
|
||||
.flags = 0,
|
||||
.queue_id = queue->drm.id,
|
||||
.result_handle = 0 /* TODO */,
|
||||
.in_sync_count = wait_count,
|
||||
.out_sync_count = submit->signal_count + 1,
|
||||
.command_count = command_count,
|
||||
.in_syncs = (uint64_t)(uintptr_t)(waits),
|
||||
.out_syncs = (uint64_t)(uintptr_t)(signals),
|
||||
.commands = (uint64_t)(uintptr_t)(cmds),
|
||||
};
|
||||
|
||||
if (command_count <= MAX_COMMANDS_PER_SUBMIT)
|
||||
return queue_submit_single(&dev->dev, &submit_ioctl);
|
||||
else
|
||||
return queue_submit_looped(&dev->dev, &submit_ioctl);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
||||
{
|
||||
struct hk_queue *queue = container_of(vk_queue, struct hk_queue, vk);
|
||||
struct hk_device *dev = hk_queue_device(queue);
|
||||
|
||||
if (vk_queue_is_lost(&queue->vk))
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
VkResult result = queue_submit(dev, queue, submit);
|
||||
if (result != VK_SUCCESS)
|
||||
return vk_queue_set_lost(&queue->vk, "Submit failed");
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
|
||||
const VkDeviceQueueCreateInfo *pCreateInfo,
|
||||
uint32_t index_in_family)
|
||||
{
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
|
||||
|
||||
const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
|
||||
const enum VkQueueGlobalPriorityKHR global_priority =
|
||||
priority_info ? priority_info->globalPriority
|
||||
: VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
||||
|
||||
if (global_priority != VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
queue->vk.driver_submit = hk_queue_submit;
|
||||
|
||||
queue->drm.id = agx_create_command_queue(&dev->dev,
|
||||
DRM_ASAHI_QUEUE_CAP_RENDER |
|
||||
DRM_ASAHI_QUEUE_CAP_BLIT |
|
||||
DRM_ASAHI_QUEUE_CAP_COMPUTE,
|
||||
2);
|
||||
|
||||
if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
|
||||
mesa_loge("drmSyncobjCreate() failed %d\n", errno);
|
||||
agx_destroy_command_queue(&dev->dev, queue->drm.id);
|
||||
vk_queue_finish(&queue->vk);
|
||||
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"DRM_IOCTL_SYNCOBJ_CREATE failed: %m");
|
||||
}
|
||||
|
||||
uint64_t initial_value = 1;
|
||||
if (drmSyncobjTimelineSignal(dev->dev.fd, &queue->drm.syncobj,
|
||||
&initial_value, 1)) {
|
||||
hk_queue_finish(dev, queue);
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"DRM_IOCTL_TIMELINE_SYNCOBJ_SIGNAL failed: %m");
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
hk_queue_finish(struct hk_device *dev, struct hk_queue *queue)
|
||||
{
|
||||
drmSyncobjDestroy(dev->dev.fd, queue->drm.syncobj);
|
||||
agx_destroy_command_queue(&dev->dev, queue->drm.id);
|
||||
vk_queue_finish(&queue->vk);
|
||||
}
|
||||
42
src/asahi/vulkan/hk_queue.h
Normal file
42
src/asahi/vulkan/hk_queue.h
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_private.h"
|
||||
#include "vk_queue.h"
|
||||
|
||||
struct hk_device;
|
||||
|
||||
struct hk_queue {
|
||||
struct vk_queue vk;
|
||||
|
||||
struct {
|
||||
/* Asahi kernel queue ID */
|
||||
uint32_t id;
|
||||
|
||||
/* Timeline syncobj backing the queue */
|
||||
uint32_t syncobj;
|
||||
|
||||
/* Current maximum timeline value for the queue's syncobj. If the
|
||||
* syncobj's value equals timeline_value, then all work is complete.
|
||||
*/
|
||||
uint32_t timeline_value;
|
||||
} drm;
|
||||
};
|
||||
|
||||
static inline struct hk_device *
|
||||
hk_queue_device(struct hk_queue *queue)
|
||||
{
|
||||
return (struct hk_device *)queue->vk.base.device;
|
||||
}
|
||||
|
||||
VkResult hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
|
||||
const VkDeviceQueueCreateInfo *pCreateInfo,
|
||||
uint32_t index_in_family);
|
||||
|
||||
void hk_queue_finish(struct hk_device *dev, struct hk_queue *queue);
|
||||
281
src/asahi/vulkan/hk_sampler.c
Normal file
281
src/asahi/vulkan/hk_sampler.c
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_sampler.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_entrypoints.h"
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
#include "vk_enum_to_str.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_sampler.h"
|
||||
|
||||
#include "asahi/genxml/agx_pack.h"
|
||||
|
||||
static inline uint32_t
|
||||
translate_address_mode(VkSamplerAddressMode addr_mode)
|
||||
{
|
||||
#define MODE(VK, AGX_) [VK_SAMPLER_ADDRESS_MODE_##VK] = AGX_WRAP_##AGX_
|
||||
static const uint8_t translate[] = {
|
||||
MODE(REPEAT, REPEAT),
|
||||
MODE(MIRRORED_REPEAT, MIRRORED_REPEAT),
|
||||
MODE(CLAMP_TO_EDGE, CLAMP_TO_EDGE),
|
||||
MODE(CLAMP_TO_BORDER, CLAMP_TO_BORDER),
|
||||
MODE(MIRROR_CLAMP_TO_EDGE, MIRRORED_CLAMP_TO_EDGE),
|
||||
};
|
||||
#undef MODE
|
||||
|
||||
assert(addr_mode < ARRAY_SIZE(translate));
|
||||
return translate[addr_mode];
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
translate_texsamp_compare_op(VkCompareOp op)
|
||||
{
|
||||
#define OP(VK, AGX_) [VK_COMPARE_OP_##VK] = AGX_COMPARE_FUNC_##AGX_
|
||||
static const uint8_t translate[] = {
|
||||
OP(NEVER, NEVER),
|
||||
OP(LESS, LESS),
|
||||
OP(EQUAL, EQUAL),
|
||||
OP(LESS_OR_EQUAL, LEQUAL),
|
||||
OP(GREATER, GREATER),
|
||||
OP(NOT_EQUAL, NOT_EQUAL),
|
||||
OP(GREATER_OR_EQUAL, GEQUAL),
|
||||
OP(ALWAYS, ALWAYS),
|
||||
};
|
||||
#undef OP
|
||||
|
||||
assert(op < ARRAY_SIZE(translate));
|
||||
return translate[op];
|
||||
}
|
||||
|
||||
static enum agx_filter
|
||||
translate_filter(VkFilter filter)
|
||||
{
|
||||
static_assert((enum agx_filter)VK_FILTER_NEAREST == AGX_FILTER_NEAREST);
|
||||
static_assert((enum agx_filter)VK_FILTER_LINEAR == AGX_FILTER_LINEAR);
|
||||
|
||||
return (enum agx_filter)filter;
|
||||
}
|
||||
|
||||
static enum agx_mip_filter
|
||||
translate_mipfilter(VkSamplerMipmapMode mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case VK_SAMPLER_MIPMAP_MODE_NEAREST:
|
||||
return AGX_MIP_FILTER_NEAREST;
|
||||
|
||||
case VK_SAMPLER_MIPMAP_MODE_LINEAR:
|
||||
return AGX_MIP_FILTER_LINEAR;
|
||||
|
||||
default:
|
||||
unreachable("Invalid filter");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
uses_border(const VkSamplerCreateInfo *info)
|
||||
{
|
||||
return info->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
info->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
info->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
|
||||
}
|
||||
|
||||
static enum agx_border_colour
|
||||
is_border_color_custom(VkBorderColor color)
|
||||
{
|
||||
/* TODO: for now, opaque black is treated as custom due to rgba4 swizzling
|
||||
* issues, could be optimized though.
|
||||
*/
|
||||
switch (color) {
|
||||
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
|
||||
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
|
||||
case VK_BORDER_COLOR_INT_CUSTOM_EXT:
|
||||
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Translate an American VkBorderColor into a Canadian agx_border_colour */
|
||||
static enum agx_border_colour
|
||||
translate_border_color(VkBorderColor color, bool custom_to_1)
|
||||
{
|
||||
switch (color) {
|
||||
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
|
||||
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
|
||||
return AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
|
||||
|
||||
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
|
||||
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
|
||||
return AGX_BORDER_COLOUR_OPAQUE_WHITE;
|
||||
|
||||
default:
|
||||
assert(is_border_color_custom(color));
|
||||
return custom_to_1 ? AGX_BORDER_COLOUR_OPAQUE_WHITE
|
||||
: AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pack_sampler(const struct hk_physical_device *pdev,
|
||||
const struct VkSamplerCreateInfo *info, bool custom_to_1,
|
||||
struct agx_sampler_packed *out)
|
||||
{
|
||||
agx_pack(out, SAMPLER, cfg) {
|
||||
cfg.minimum_lod = info->minLod;
|
||||
cfg.maximum_lod = info->maxLod;
|
||||
cfg.magnify = translate_filter(info->magFilter);
|
||||
cfg.minify = translate_filter(info->minFilter);
|
||||
cfg.mip_filter = translate_mipfilter(info->mipmapMode);
|
||||
cfg.wrap_s = translate_address_mode(info->addressModeU);
|
||||
cfg.wrap_t = translate_address_mode(info->addressModeV);
|
||||
cfg.wrap_r = translate_address_mode(info->addressModeW);
|
||||
cfg.pixel_coordinates = info->unnormalizedCoordinates;
|
||||
|
||||
cfg.seamful_cube_maps =
|
||||
info->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
|
||||
|
||||
if (info->compareEnable) {
|
||||
cfg.compare_func = translate_texsamp_compare_op(info->compareOp);
|
||||
cfg.compare_enable = true;
|
||||
}
|
||||
|
||||
if (info->anisotropyEnable) {
|
||||
cfg.maximum_anisotropy =
|
||||
util_next_power_of_two(MAX2(info->maxAnisotropy, 1));
|
||||
} else {
|
||||
cfg.maximum_anisotropy = 1;
|
||||
}
|
||||
|
||||
if (uses_border(info)) {
|
||||
cfg.border_colour =
|
||||
translate_border_color(info->borderColor, custom_to_1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateSampler(VkDevice device,
|
||||
const VkSamplerCreateInfo *info /* pCreateInfo */,
|
||||
const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||
struct hk_sampler *sampler;
|
||||
VkResult result;
|
||||
|
||||
sampler = vk_sampler_create(&dev->vk, info, pAllocator, sizeof(*sampler));
|
||||
if (!sampler)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
struct agx_sampler_packed samp;
|
||||
pack_sampler(pdev, info, true, &samp);
|
||||
|
||||
/* LOD bias passed in the descriptor set */
|
||||
sampler->lod_bias_fp16 = _mesa_float_to_half(info->mipLodBias);
|
||||
|
||||
result =
|
||||
hk_sampler_heap_add(dev, samp, &sampler->planes[sampler->plane_count].hw);
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_DestroySampler(device, hk_sampler_to_handle(sampler), pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
sampler->plane_count++;
|
||||
|
||||
/* In order to support CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, we
|
||||
* need multiple sampler planes: at minimum we will need one for luminance
|
||||
* (the default), and one for chroma. Each sampler plane needs its own
|
||||
* sampler table entry. However, sampler table entries are very rare on
|
||||
* G13, and each plane would burn one of those. So we make sure to allocate
|
||||
* only the minimum amount that we actually need (i.e., either 1 or 2), and
|
||||
* then just copy the last sampler plane out as far as we need to fill the
|
||||
* number of image planes.
|
||||
*/
|
||||
if (sampler->vk.ycbcr_conversion) {
|
||||
assert(!uses_border(info) &&
|
||||
"consequence of VUID-VkSamplerCreateInfo-addressModeU-01646");
|
||||
|
||||
const VkFilter chroma_filter =
|
||||
sampler->vk.ycbcr_conversion->state.chroma_filter;
|
||||
if (info->magFilter != chroma_filter ||
|
||||
info->minFilter != chroma_filter) {
|
||||
VkSamplerCreateInfo plane2_info = *info;
|
||||
plane2_info.magFilter = chroma_filter;
|
||||
plane2_info.minFilter = chroma_filter;
|
||||
|
||||
pack_sampler(pdev, &plane2_info, false, &samp);
|
||||
result = hk_sampler_heap_add(
|
||||
dev, samp, &sampler->planes[sampler->plane_count].hw);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_DestroySampler(device, hk_sampler_to_handle(sampler),
|
||||
pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
sampler->plane_count++;
|
||||
}
|
||||
} else if (uses_border(info)) {
|
||||
/* If the sampler uses custom border colours, we need both clamp-to-1
|
||||
* and clamp-to-0 variants. We treat these as planes.
|
||||
*/
|
||||
pack_sampler(pdev, info, false, &samp);
|
||||
result = hk_sampler_heap_add(dev, samp,
|
||||
&sampler->planes[sampler->plane_count].hw);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
hk_DestroySampler(device, hk_sampler_to_handle(sampler), pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
sampler->plane_count++;
|
||||
|
||||
/* We also need to record the border.
|
||||
*
|
||||
* If there is a border colour component mapping, we need to swizzle with
|
||||
* it. Otherwise, we can assume there's nothing to do.
|
||||
*/
|
||||
VkClearColorValue bc = sampler->vk.border_color_value;
|
||||
|
||||
const VkSamplerBorderColorComponentMappingCreateInfoEXT *swiz_info =
|
||||
vk_find_struct_const(
|
||||
info->pNext,
|
||||
SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT);
|
||||
|
||||
if (swiz_info) {
|
||||
const bool is_int = vk_border_color_is_int(info->borderColor);
|
||||
bc = vk_swizzle_color_value(bc, swiz_info->components, is_int);
|
||||
}
|
||||
|
||||
sampler->custom_border = bc;
|
||||
sampler->has_border = true;
|
||||
}
|
||||
|
||||
*pSampler = hk_sampler_to_handle(sampler);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
hk_DestroySampler(VkDevice device, VkSampler _sampler,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
VK_FROM_HANDLE(hk_sampler, sampler, _sampler);
|
||||
|
||||
if (!sampler)
|
||||
return;
|
||||
|
||||
for (uint8_t plane = 0; plane < sampler->plane_count; plane++) {
|
||||
hk_sampler_heap_remove(dev, sampler->planes[plane].hw);
|
||||
}
|
||||
|
||||
vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
|
||||
}
|
||||
33
src/asahi/vulkan/hk_sampler.h
Normal file
33
src/asahi/vulkan/hk_sampler.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_physical_device.h"
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "vk_sampler.h"
|
||||
#include "vk_ycbcr_conversion.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
struct hk_sampler {
|
||||
struct vk_sampler vk;
|
||||
VkClearColorValue custom_border;
|
||||
bool has_border;
|
||||
|
||||
uint8_t plane_count;
|
||||
uint16_t lod_bias_fp16;
|
||||
|
||||
struct {
|
||||
struct hk_rc_sampler *hw;
|
||||
} planes[2];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(hk_sampler, vk.base, VkSampler,
|
||||
VK_OBJECT_TYPE_SAMPLER)
|
||||
1432
src/asahi/vulkan/hk_shader.c
Normal file
1432
src/asahi/vulkan/hk_shader.c
Normal file
File diff suppressed because it is too large
Load diff
400
src/asahi/vulkan/hk_shader.h
Normal file
400
src/asahi/vulkan/hk_shader.h
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "asahi/compiler/agx_compile.h"
|
||||
#include "util/macros.h"
|
||||
#include "agx_linker.h"
|
||||
#include "agx_nir_lower_vbo.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_usc.h"
|
||||
#include "agx_uvs.h"
|
||||
|
||||
#include "hk_device.h"
|
||||
#include "hk_device_memory.h"
|
||||
#include "hk_private.h"
|
||||
|
||||
#include "nir_xfb_info.h"
|
||||
#include "shader_enums.h"
|
||||
#include "vk_pipeline_cache.h"
|
||||
|
||||
#include "nir.h"
|
||||
|
||||
#include "vk_shader.h"
|
||||
|
||||
struct hk_physical_device;
|
||||
struct hk_pipeline_compilation_ctx;
|
||||
struct vk_descriptor_set_layout;
|
||||
struct vk_graphics_pipeline_state;
|
||||
struct vk_pipeline_cache;
|
||||
struct vk_pipeline_layout;
|
||||
struct vk_pipeline_robustness_state;
|
||||
struct vk_shader_module;
|
||||
|
||||
/* TODO: Make dynamic */
|
||||
#define HK_ROOT_UNIFORM 104
|
||||
#define HK_IMAGE_HEAP_UNIFORM 108
|
||||
|
||||
struct hk_shader_info {
|
||||
union {
|
||||
struct {
|
||||
uint32_t attribs_read;
|
||||
BITSET_DECLARE(attrib_components_read, AGX_MAX_ATTRIBS * 4);
|
||||
uint8_t cull_distance_array_size;
|
||||
uint8_t _pad[7];
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
/* Local workgroup size */
|
||||
uint16_t local_size[3];
|
||||
|
||||
uint8_t _pad[26];
|
||||
} cs;
|
||||
|
||||
struct {
|
||||
struct agx_interp_info interp;
|
||||
struct agx_fs_epilog_link_info epilog_key;
|
||||
|
||||
bool reads_sample_mask;
|
||||
bool post_depth_coverage;
|
||||
bool uses_sample_shading;
|
||||
bool early_fragment_tests;
|
||||
bool writes_memory;
|
||||
|
||||
uint8_t _pad[7];
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
uint8_t spacing;
|
||||
uint8_t mode;
|
||||
enum mesa_prim out_prim;
|
||||
bool point_mode;
|
||||
bool ccw;
|
||||
uint8_t _pad[27];
|
||||
} ts;
|
||||
|
||||
struct {
|
||||
uint64_t per_vertex_outputs;
|
||||
uint32_t output_stride;
|
||||
uint8_t output_patch_size;
|
||||
uint8_t nr_patch_outputs;
|
||||
uint8_t _pad[18];
|
||||
} tcs;
|
||||
|
||||
struct {
|
||||
unsigned count_words;
|
||||
enum mesa_prim out_prim;
|
||||
uint8_t _pad[27];
|
||||
} gs;
|
||||
|
||||
/* Used to initialize the union for other stages */
|
||||
uint8_t _pad[32];
|
||||
};
|
||||
|
||||
struct agx_unlinked_uvs_layout uvs;
|
||||
|
||||
/* Transform feedback buffer strides */
|
||||
uint8_t xfb_stride[MAX_XFB_BUFFERS];
|
||||
|
||||
gl_shader_stage stage : 8;
|
||||
uint8_t clip_distance_array_size;
|
||||
uint8_t cull_distance_array_size;
|
||||
uint8_t _pad0[1];
|
||||
|
||||
/* XXX: is there a less goofy way to do this? I really don't want dynamic
|
||||
* allocation here.
|
||||
*/
|
||||
nir_xfb_info xfb_info;
|
||||
nir_xfb_output_info xfb_outputs[64];
|
||||
};
|
||||
|
||||
/*
|
||||
* Hash table keys for fast-linked shader variants. These contain the entire
|
||||
* prolog/epilog key so we only do 1 hash table lookup instead of 2 in the
|
||||
* general case where the linked shader is already ready.
|
||||
*/
|
||||
struct hk_fast_link_key_vs {
|
||||
struct agx_vs_prolog_key prolog;
|
||||
};
|
||||
|
||||
struct hk_fast_link_key_fs {
|
||||
unsigned nr_samples_shaded;
|
||||
struct agx_fs_prolog_key prolog;
|
||||
struct agx_fs_epilog_key epilog;
|
||||
};
|
||||
|
||||
struct hk_shader {
|
||||
struct agx_shader_part b;
|
||||
|
||||
struct hk_shader_info info;
|
||||
struct agx_fragment_face_2_packed frag_face;
|
||||
struct agx_counts_packed counts;
|
||||
|
||||
const void *code_ptr;
|
||||
uint32_t code_size;
|
||||
|
||||
const void *data_ptr;
|
||||
uint32_t data_size;
|
||||
|
||||
/* BO for any uploaded shader part */
|
||||
struct agx_bo *bo;
|
||||
|
||||
/* Cache of fast linked variants */
|
||||
struct {
|
||||
simple_mtx_t lock;
|
||||
struct hash_table *ht;
|
||||
} linked;
|
||||
|
||||
/* If there's only a single possibly linked variant, direct pointer. TODO:
|
||||
* Union with the cache to save some space?
|
||||
*/
|
||||
struct hk_linked_shader *only_linked;
|
||||
|
||||
/* Address to the uploaded preamble section. Preambles are uploaded
|
||||
* separately from fast-linked main shaders.
|
||||
*/
|
||||
uint64_t preamble_addr;
|
||||
|
||||
/* Address of the start of the shader data section */
|
||||
uint64_t data_addr;
|
||||
};
|
||||
|
||||
enum hk_vs_variant {
|
||||
/* Hardware vertex shader, when next stage is fragment */
|
||||
HK_VS_VARIANT_HW,
|
||||
|
||||
/* Hardware compute shader, when next is geometry/tessellation */
|
||||
HK_VS_VARIANT_SW,
|
||||
|
||||
HK_VS_VARIANTS,
|
||||
};
|
||||
|
||||
enum hk_gs_variant {
|
||||
/* Hardware vertex shader used for rasterization */
|
||||
HK_GS_VARIANT_RAST,
|
||||
|
||||
/* Main compute shader */
|
||||
HK_GS_VARIANT_MAIN,
|
||||
HK_GS_VARIANT_MAIN_NO_RAST,
|
||||
|
||||
/* Count compute shader */
|
||||
HK_GS_VARIANT_COUNT,
|
||||
HK_GS_VARIANT_COUNT_NO_RAST,
|
||||
|
||||
/* Pre-GS compute shader */
|
||||
HK_GS_VARIANT_PRE,
|
||||
HK_GS_VARIANT_PRE_NO_RAST,
|
||||
|
||||
HK_GS_VARIANTS,
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
static const char *hk_gs_variant_name[] = {
|
||||
[HK_GS_VARIANT_RAST] = "Rasterization",
|
||||
[HK_GS_VARIANT_MAIN] = "Main",
|
||||
[HK_GS_VARIANT_MAIN_NO_RAST] = "Main (rast. discard)",
|
||||
[HK_GS_VARIANT_COUNT] = "Count",
|
||||
[HK_GS_VARIANT_COUNT_NO_RAST] = "Count (rast. discard)",
|
||||
[HK_GS_VARIANT_PRE] = "Pre-GS",
|
||||
[HK_GS_VARIANT_PRE_NO_RAST] = "Pre-GS (rast. discard)",
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
static inline unsigned
|
||||
hk_num_variants(gl_shader_stage stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return HK_VS_VARIANTS;
|
||||
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
return HK_GS_VARIANTS;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* An hk_api shader maps 1:1 to a VkShader object. An hk_api_shader may contain
|
||||
* multiple hardware hk_shader's, built at shader compile time. This complexity
|
||||
* is required to efficiently implement the legacy geometry pipeline.
|
||||
*/
|
||||
struct hk_api_shader {
|
||||
struct vk_shader vk;
|
||||
|
||||
/* Is this an internal passthrough geometry shader? */
|
||||
bool is_passthrough;
|
||||
|
||||
struct hk_shader variants[];
|
||||
};
|
||||
|
||||
#define hk_foreach_variant(api_shader, var) \
|
||||
for (struct hk_shader *var = api_shader->variants; \
|
||||
var < api_shader->variants + hk_num_variants(api_shader->vk.stage); \
|
||||
++var)
|
||||
|
||||
static const char *
|
||||
hk_variant_name(struct hk_api_shader *obj, struct hk_shader *variant)
|
||||
{
|
||||
unsigned i = variant - obj->variants;
|
||||
assert(i < hk_num_variants(obj->vk.stage));
|
||||
|
||||
if (hk_num_variants(obj->vk.stage) == 1) {
|
||||
return NULL;
|
||||
} else if (obj->vk.stage == MESA_SHADER_GEOMETRY) {
|
||||
assert(i < ARRAY_SIZE(hk_gs_variant_name));
|
||||
return hk_gs_variant_name[i];
|
||||
} else {
|
||||
assert(i < 2);
|
||||
return i == HK_VS_VARIANT_SW ? "Software" : "Hardware";
|
||||
}
|
||||
}
|
||||
|
||||
static struct hk_shader *
|
||||
hk_only_variant(struct hk_api_shader *obj)
|
||||
{
|
||||
if (!obj)
|
||||
return NULL;
|
||||
|
||||
assert(hk_num_variants(obj->vk.stage) == 1);
|
||||
return &obj->variants[0];
|
||||
}
|
||||
|
||||
static struct hk_shader *
|
||||
hk_any_variant(struct hk_api_shader *obj)
|
||||
{
|
||||
if (!obj)
|
||||
return NULL;
|
||||
|
||||
return &obj->variants[0];
|
||||
}
|
||||
|
||||
static struct hk_shader *
|
||||
hk_main_gs_variant(struct hk_api_shader *obj, bool rast_disc)
|
||||
{
|
||||
return &obj->variants[HK_GS_VARIANT_MAIN + rast_disc];
|
||||
}
|
||||
|
||||
static struct hk_shader *
|
||||
hk_count_gs_variant(struct hk_api_shader *obj, bool rast_disc)
|
||||
{
|
||||
return &obj->variants[HK_GS_VARIANT_COUNT + rast_disc];
|
||||
}
|
||||
|
||||
static struct hk_shader *
|
||||
hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
|
||||
{
|
||||
return &obj->variants[HK_GS_VARIANT_PRE + rast_disc];
|
||||
}
|
||||
|
||||
#define HK_MAX_LINKED_USC_SIZE \
|
||||
(AGX_USC_PRESHADER_LENGTH + AGX_USC_FRAGMENT_PROPERTIES_LENGTH + \
|
||||
AGX_USC_REGISTERS_LENGTH + AGX_USC_SHADER_LENGTH + AGX_USC_SHARED_LENGTH + \
|
||||
AGX_USC_SAMPLER_LENGTH + (AGX_USC_UNIFORM_LENGTH * 9))
|
||||
|
||||
struct hk_linked_shader {
|
||||
struct agx_linked_shader b;
|
||||
|
||||
/* Distinct from hk_shader::counts due to addition of cf_binding_count, which
|
||||
* is delayed since it depends on cull distance.
|
||||
*/
|
||||
struct agx_fragment_shader_word_0_packed fs_counts;
|
||||
|
||||
/* Baked USC words to bind this linked shader */
|
||||
struct {
|
||||
uint8_t data[HK_MAX_LINKED_USC_SIZE];
|
||||
size_t size;
|
||||
} usc;
|
||||
};
|
||||
|
||||
struct hk_linked_shader *hk_fast_link(struct hk_device *dev, bool fragment,
|
||||
struct hk_shader *main,
|
||||
struct agx_shader_part *prolog,
|
||||
struct agx_shader_part *epilog,
|
||||
unsigned nr_samples_shaded);
|
||||
|
||||
extern const struct vk_device_shader_ops hk_device_shader_ops;
|
||||
|
||||
uint64_t
|
||||
hk_physical_device_compiler_flags(const struct hk_physical_device *pdev);
|
||||
|
||||
static inline nir_address_format
|
||||
hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
|
||||
{
|
||||
switch (robustness) {
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
default:
|
||||
unreachable("Invalid robust buffer access behavior");
|
||||
}
|
||||
}
|
||||
|
||||
bool hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base);
|
||||
|
||||
bool
|
||||
hk_nir_lower_descriptors(nir_shader *nir,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts);
|
||||
void hk_lower_nir(struct hk_device *dev, nir_shader *nir,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
bool is_multiview, uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts);
|
||||
|
||||
VkResult hk_compile_shader(struct hk_device *dev,
|
||||
struct vk_shader_compile_info *info,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
struct hk_api_shader **shader_out);
|
||||
|
||||
void hk_preprocess_nir_internal(struct vk_physical_device *vk_pdev,
|
||||
nir_shader *nir);
|
||||
|
||||
void hk_api_shader_destroy(struct vk_device *vk_dev,
|
||||
struct vk_shader *vk_shader,
|
||||
const VkAllocationCallbacks *pAllocator);
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
hk_get_nir_options(struct vk_physical_device *vk_pdev, gl_shader_stage stage,
|
||||
UNUSED const struct vk_pipeline_robustness_state *rs);
|
||||
|
||||
struct hk_api_shader *hk_meta_shader(struct hk_device *dev,
|
||||
hk_internal_builder_t builder, void *data,
|
||||
size_t data_size);
|
||||
|
||||
static inline struct hk_shader *
|
||||
hk_meta_kernel(struct hk_device *dev, hk_internal_builder_t builder, void *data,
|
||||
size_t data_size)
|
||||
{
|
||||
return hk_only_variant(hk_meta_shader(dev, builder, data, data_size));
|
||||
}
|
||||
|
||||
struct hk_passthrough_gs_key {
|
||||
/* Bit mask of outputs written by the VS/TES, to be passed through */
|
||||
uint64_t outputs;
|
||||
|
||||
/* Clip/cull sizes, implies clip/cull written in output */
|
||||
uint8_t clip_distance_array_size;
|
||||
uint8_t cull_distance_array_size;
|
||||
|
||||
/* Transform feedback buffer strides */
|
||||
uint8_t xfb_stride[MAX_XFB_BUFFERS];
|
||||
|
||||
/* Decomposed primitive */
|
||||
enum mesa_prim prim;
|
||||
|
||||
/* Transform feedback info. Must add nir_xfb_info_size to get the key size */
|
||||
nir_xfb_info xfb_info;
|
||||
};
|
||||
|
||||
void hk_nir_passthrough_gs(struct nir_builder *b, const void *key_);
|
||||
44
src/asahi/vulkan/hk_wsi.c
Normal file
44
src/asahi/vulkan/hk_wsi.c
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "hk_wsi.h"
|
||||
#include "hk_instance.h"
|
||||
#include "wsi_common.h"
|
||||
|
||||
static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
|
||||
hk_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
|
||||
{
|
||||
VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
|
||||
return vk_instance_get_proc_addr_unchecked(pdev->vk.instance, pName);
|
||||
}
|
||||
|
||||
VkResult
|
||||
hk_init_wsi(struct hk_physical_device *pdev)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
struct wsi_device_options wsi_options = {.sw_device = false};
|
||||
result = wsi_device_init(
|
||||
&pdev->wsi_device, hk_physical_device_to_handle(pdev), hk_wsi_proc_addr,
|
||||
&pdev->vk.instance->alloc, pdev->master_fd,
|
||||
&hk_physical_device_instance(pdev)->dri_options, &wsi_options);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pdev->wsi_device.supports_scanout = false;
|
||||
pdev->wsi_device.supports_modifiers = true;
|
||||
|
||||
pdev->vk.wsi_device = &pdev->wsi_device;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
hk_finish_wsi(struct hk_physical_device *pdev)
|
||||
{
|
||||
pdev->vk.wsi_device = NULL;
|
||||
wsi_device_finish(&pdev->wsi_device, &pdev->vk.instance->alloc);
|
||||
}
|
||||
13
src/asahi/vulkan/hk_wsi.h
Normal file
13
src/asahi/vulkan/hk_wsi.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hk_physical_device.h"
|
||||
|
||||
VkResult hk_init_wsi(struct hk_physical_device *pdev);
|
||||
void hk_finish_wsi(struct hk_physical_device *pdev);
|
||||
142
src/asahi/vulkan/meson.build
Normal file
142
src/asahi/vulkan/meson.build
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# Copyright © 2022 Collabora Ltd. and Red Hat Inc.
|
||||
# SPDX-License-Identifier: MIT
|
||||
hk_files = files(
|
||||
'hk_buffer.c',
|
||||
'hk_buffer.h',
|
||||
'hk_buffer_view.c',
|
||||
'hk_buffer_view.h',
|
||||
'hk_cmd_buffer.c',
|
||||
'hk_cmd_buffer.h',
|
||||
'hk_cmd_clear.c',
|
||||
'hk_cmd_dispatch.c',
|
||||
'hk_cmd_draw.c',
|
||||
'hk_cmd_meta.c',
|
||||
'hk_cmd_pool.c',
|
||||
'hk_cmd_pool.h',
|
||||
'hk_descriptor_set.h',
|
||||
'hk_descriptor_set.c',
|
||||
'hk_descriptor_set_layout.c',
|
||||
'hk_descriptor_set_layout.h',
|
||||
'hk_descriptor_table.c',
|
||||
'hk_descriptor_table.h',
|
||||
'hk_device.c',
|
||||
'hk_device.h',
|
||||
'hk_device_memory.c',
|
||||
'hk_device_memory.h',
|
||||
'hk_event.c',
|
||||
'hk_event.h',
|
||||
'hk_format.c',
|
||||
'hk_image.c',
|
||||
'hk_image.h',
|
||||
'hk_image_view.c',
|
||||
'hk_image_view.h',
|
||||
'hk_instance.c',
|
||||
'hk_instance.h',
|
||||
'hk_nir_lower_descriptors.c',
|
||||
'hk_nir_passthrough_gs.c',
|
||||
'hk_physical_device.c',
|
||||
'hk_physical_device.h',
|
||||
'hk_private.h',
|
||||
'hk_query_pool.c',
|
||||
'hk_query_pool.h',
|
||||
'hk_queue.c',
|
||||
'hk_queue.h',
|
||||
'hk_sampler.c',
|
||||
'hk_sampler.h',
|
||||
'hk_shader.c',
|
||||
'hk_shader.h',
|
||||
'hk_wsi.c',
|
||||
'hk_wsi.h'
|
||||
)
|
||||
|
||||
hk_entrypoints = custom_target(
|
||||
'hk_entrypoints',
|
||||
input : [vk_entrypoints_gen, vk_api_xml],
|
||||
output : ['hk_entrypoints.h', 'hk_entrypoints.c'],
|
||||
command : [
|
||||
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
|
||||
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'hk',
|
||||
'--beta', with_vulkan_beta.to_string(),
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
hk_deps = [
|
||||
dep_libdrm,
|
||||
idep_nir,
|
||||
idep_vulkan_runtime,
|
||||
idep_vulkan_util,
|
||||
idep_vulkan_wsi,
|
||||
idep_vulkan_wsi_headers,
|
||||
idep_agx_pack,
|
||||
]
|
||||
|
||||
libhk = static_library(
|
||||
'hk',
|
||||
[
|
||||
hk_entrypoints,
|
||||
hk_files,
|
||||
libagx_shaders,
|
||||
sha1_h,
|
||||
],
|
||||
include_directories : [
|
||||
inc_gallium,
|
||||
inc_gallium_aux,
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_asahi,
|
||||
],
|
||||
link_with : [libasahi_lib, libasahi_layout, libasahi_compiler],
|
||||
c_args : ['-Wno-c2x-extensions'],
|
||||
dependencies : [hk_deps],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
|
||||
libvulkan_asahi = shared_library(
|
||||
'vulkan_asahi',
|
||||
link_whole : [libhk],
|
||||
link_args: [ld_args_build_id],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
install : true,
|
||||
)
|
||||
|
||||
icd_lib_path = join_paths(get_option('prefix'), get_option('libdir'))
|
||||
icd_file_name = 'libvulkan_asahi.so'
|
||||
if with_platform_windows
|
||||
icd_lib_path = import('fs').relative_to(get_option('bindir'), with_vulkan_icd_dir)
|
||||
icd_file_name = 'vulkan_asahi.dll'
|
||||
endif
|
||||
|
||||
asahi_icd = custom_target(
|
||||
'asahi_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : 'asahi_icd.@0@.json'.format(host_machine.cpu()),
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.3', '--xml', '@INPUT1@',
|
||||
'--lib-path', join_paths(icd_lib_path, icd_file_name),
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
install_dir : with_vulkan_icd_dir,
|
||||
install_tag : 'runtime',
|
||||
install : true,
|
||||
)
|
||||
|
||||
_dev_icdname = 'asahi_devenv_icd.@0@.json'.format(host_machine.cpu())
|
||||
custom_target(
|
||||
'asahi_devenv_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : _dev_icdname,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.3', '--xml', '@INPUT1@',
|
||||
'--lib-path', meson.current_build_dir() / icd_file_name,
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
devenv.append('VK_DRIVER_FILES', meson.current_build_dir() / _dev_icdname)
|
||||
# Deprecated: replaced by VK_DRIVER_FILES above
|
||||
devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
|
||||
Loading…
Add table
Reference in a new issue