nvk: Implement samplers

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
Faith Ekstrand 2023-01-30 20:11:50 -06:00 committed by Marge Bot
parent 100c7060d8
commit 834f01f1b1
4 changed files with 231 additions and 1 deletions

View file

@ -43,6 +43,13 @@ nvk_update_preamble_push(struct nvk_queue_state *qs, struct nvk_device *dev,
P_NVA0C0_SET_SHADER_LOCAL_MEMORY_A(push, qs->tls_bo->offset >> 32);
P_NVA0C0_SET_SHADER_LOCAL_MEMORY_B(push, qs->tls_bo->offset & 0xffffffff);
nvk_push_descriptor_table_ref(push, &dev->samplers);
uint64_t tsp_addr = nvk_descriptor_table_base_address(&dev->samplers);
P_MTHD(push, NVA0C0, SET_TEX_SAMPLER_POOL_A);
P_NVA0C0_SET_TEX_SAMPLER_POOL_A(push, tsp_addr >> 32);
P_NVA0C0_SET_TEX_SAMPLER_POOL_B(push, tsp_addr & 0xffffffff);
P_NVA0C0_SET_TEX_SAMPLER_POOL_C(push, dev->samplers.alloc - 1);
nvk_push_descriptor_table_ref(push, &dev->images);
uint64_t thp_addr = nvk_descriptor_table_base_address(&dev->images);
P_MTHD(push, NVA0C0, SET_TEX_HEADER_POOL_A);
@ -218,10 +225,16 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
assert(null_desc != NULL && null_image_index == 0);
memset(null_desc, 0, 8 * 4);
result = vk_queue_init(&device->queue.vk, &device->vk, &pCreateInfo->pQueueCreateInfos[0], 0);
result = nvk_descriptor_table_init(device, &device->samplers,
8 * 4 /* tsc entry size */,
4096, 4096);
if (result != VK_SUCCESS)
goto fail_images;
result = vk_queue_init(&device->queue.vk, &device->vk, &pCreateInfo->pQueueCreateInfos[0], 0);
if (result != VK_SUCCESS)
goto fail_samplers;
if (pthread_mutex_init(&device->mutex, NULL) != 0) {
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
goto fail_queue;
@ -256,6 +269,8 @@ fail_mutex:
pthread_mutex_destroy(&device->mutex);
fail_queue:
vk_queue_finish(&device->queue.vk);
fail_samplers:
nvk_descriptor_table_finish(device, &device->samplers);
fail_images:
nvk_descriptor_table_finish(device, &device->images);
fail_ctx:
@ -285,6 +300,7 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
pthread_mutex_destroy(&device->mutex);
vk_queue_finish(&device->queue.vk);
vk_device_finish(&device->vk);
nvk_descriptor_table_finish(device, &device->samplers);
nvk_descriptor_table_finish(device, &device->images);
nouveau_ws_context_destroy(device->ctx);
vk_free(&device->vk.alloc, device);

View file

@ -36,6 +36,7 @@ struct nvk_device {
struct nouveau_ws_context *ctx;
struct nvk_descriptor_table images;
struct nvk_descriptor_table samplers;
struct nvk_queue queue;

View file

@ -151,6 +151,7 @@ nvk_get_device_extensions(const struct nvk_physical_device *device,
.KHR_swapchain_mutable_format = true,
#endif
.KHR_variable_pointers = true,
.EXT_custom_border_color = true,
};
}

View file

@ -1,6 +1,120 @@
#include "nvk_sampler.h"
#include "nvk_device.h"
#include "util/format_srgb.h"
#include "vulkan/runtime/vk_sampler.h"
#include "gallium/drivers/nouveau/nv50/g80_texture.xml.h"
static inline uint32_t
g80_tsc_wrap_mode(VkSamplerAddressMode addr_mode)
{
switch (addr_mode) {
case VK_SAMPLER_ADDRESS_MODE_REPEAT:
return G80_TSC_WRAP_WRAP;
case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
return G80_TSC_WRAP_MIRROR;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
return G80_TSC_WRAP_CLAMP_TO_EDGE;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
return G80_TSC_WRAP_BORDER;
case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE;
default:
unreachable("Invalid sampler address mode");
}
}
static uint32_t
g80_tsc_0_depth_compare_func(VkCompareOp op)
{
switch (op) {
case VK_COMPARE_OP_NEVER:
return G80_TSC_0_DEPTH_COMPARE_FUNC_NEVER;
case VK_COMPARE_OP_LESS:
return G80_TSC_0_DEPTH_COMPARE_FUNC_LESS;
case VK_COMPARE_OP_EQUAL:
return G80_TSC_0_DEPTH_COMPARE_FUNC_EQUAL;
case VK_COMPARE_OP_LESS_OR_EQUAL:
return G80_TSC_0_DEPTH_COMPARE_FUNC_LEQUAL;
case VK_COMPARE_OP_GREATER:
return G80_TSC_0_DEPTH_COMPARE_FUNC_GREATER;
case VK_COMPARE_OP_NOT_EQUAL:
return G80_TSC_0_DEPTH_COMPARE_FUNC_NOTEQUAL;
case VK_COMPARE_OP_GREATER_OR_EQUAL:
return G80_TSC_0_DEPTH_COMPARE_FUNC_GEQUAL;
case VK_COMPARE_OP_ALWAYS:
return G80_TSC_0_DEPTH_COMPARE_FUNC_ALWAYS;
default:
unreachable("Invalid compare op");
}
}
static uint32_t
g80_tsc_0_max_anisotropy(float max_anisotropy)
{
if (max_anisotropy >= 16)
return G80_TSC_0_MAX_ANISOTROPY_16_TO_1;
if (max_anisotropy >= 12)
return G80_TSC_0_MAX_ANISOTROPY_12_TO_1;
uint32_t aniso_u32 = MAX2(0.0f, max_anisotropy);
return (aniso_u32 >> 1) << 20;
}
static uint32_t
g80_tsc_1_trilin_opt(float max_anisotropy)
{
/* No idea if we want this but matching nouveau */
if (max_anisotropy >= 12)
return 0;
if (max_anisotropy >= 4)
return 6 << G80_TSC_1_TRILIN_OPT__SHIFT;
if (max_anisotropy >= 2)
return 4 << G80_TSC_1_TRILIN_OPT__SHIFT;
return 0;
}
static VkSamplerReductionMode
vk_sampler_create_reduction_mode(const VkSamplerCreateInfo *pCreateInfo)
{
const VkSamplerReductionModeCreateInfo *reduction =
vk_find_struct_const(pCreateInfo->pNext,
SAMPLER_REDUCTION_MODE_CREATE_INFO);
if (reduction == NULL)
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE;
return reduction->reductionMode;
}
static uint32_t
to_sfixed(float f, unsigned int_bits, unsigned frac_bits)
{
int min = -(1 << (int_bits - 1));
int max = (1 << (int_bits - 1)) - 1;
f = CLAMP(f, (float)min, (float)max);
int fixed = f * (float)(1 << frac_bits);
/* It's a uint so mask of high bits */
return fixed & ((1 << (int_bits + frac_bits)) - 1);
}
static uint32_t
to_ufixed(float f, unsigned int_bits, unsigned frac_bits)
{
unsigned max = (1 << int_bits) - 1;
f = CLAMP(f, 0.0f, (float)max);
int fixed = f * (float)(1 << frac_bits);
assert((uint32_t)fixed <= UINT32_MAX);
return fixed;
}
VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateSampler(VkDevice _device,
@ -16,6 +130,104 @@ nvk_CreateSampler(VkDevice _device,
if (!sampler)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t *desc_map = nvk_descriptor_table_alloc(device, &device->samplers,
&sampler->desc_index);
if (desc_map == NULL) {
vk_object_free(&device->vk, pAllocator, sampler);
return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to allocate image descriptor");
}
uint32_t tsc[8] = {};
tsc[0] |= 0x00024000; /* Font filter? */
tsc[0] |= G80_TSC_0_SRGB_CONVERSION;
tsc[0] |= g80_tsc_wrap_mode(pCreateInfo->addressModeU)
<< G80_TSC_0_ADDRESS_U__SHIFT;
tsc[0] |= g80_tsc_wrap_mode(pCreateInfo->addressModeV)
<< G80_TSC_0_ADDRESS_V__SHIFT;
tsc[0] |= g80_tsc_wrap_mode(pCreateInfo->addressModeW)
<< G80_TSC_0_ADDRESS_P__SHIFT;
if (pCreateInfo->compareEnable) {
tsc[0] |= G80_TSC_0_DEPTH_COMPARE;
tsc[0] |= g80_tsc_0_depth_compare_func(pCreateInfo->compareOp);
}
tsc[0] |= g80_tsc_0_max_anisotropy(pCreateInfo->maxAnisotropy);
tsc[1] |= g80_tsc_1_trilin_opt(pCreateInfo->maxAnisotropy);
switch (pCreateInfo->magFilter) {
case VK_FILTER_NEAREST:
tsc[1] |= G80_TSC_1_MAG_FILTER_NEAREST;
break;
case VK_FILTER_LINEAR:
tsc[1] |= G80_TSC_1_MAG_FILTER_LINEAR;
break;
default:
unreachable("Invalid filter");
}
switch (pCreateInfo->minFilter) {
case VK_FILTER_NEAREST:
tsc[1] |= G80_TSC_1_MIN_FILTER_NEAREST;
break;
case VK_FILTER_LINEAR:
tsc[1] |= G80_TSC_1_MIN_FILTER_LINEAR;
break;
default:
unreachable("Invalid filter");
}
switch (pCreateInfo->mipmapMode) {
case VK_SAMPLER_MIPMAP_MODE_NEAREST:
tsc[1] |= G80_TSC_1_MIP_FILTER_NEAREST;
break;
case VK_SAMPLER_MIPMAP_MODE_LINEAR:
tsc[1] |= G80_TSC_1_MIP_FILTER_LINEAR;
break;
default:
unreachable("Invalid mipmap mode");
}
tsc[1] |= GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING;
if (pCreateInfo->unnormalizedCoordinates)
tsc[1] |= GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS;
switch (vk_sampler_create_reduction_mode(pCreateInfo)) {
case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE:
tsc[1] |= GM204_TSC_1_REDUCTION_MODE_WEIGHTED_AVERAGE;
break;
case VK_SAMPLER_REDUCTION_MODE_MIN:
tsc[1] |= GM204_TSC_1_REDUCTION_MODE_MIN;
break;
case VK_SAMPLER_REDUCTION_MODE_MAX:
tsc[1] |= GM204_TSC_1_REDUCTION_MODE_MAX;
break;
default:
unreachable("Invalid reduction mode");
}
tsc[1] |= to_sfixed(pCreateInfo->mipLodBias, 5, 8)
<< G80_TSC_1_MIP_LOD_BIAS__SHIFT;
tsc[2] |= to_ufixed(pCreateInfo->minLod, 4, 8)
<< G80_TSC_2_MIN_LOD_CLAMP__SHIFT;
tsc[2] |= to_ufixed(pCreateInfo->maxLod, 4, 8)
<< G80_TSC_2_MAX_LOD_CLAMP__SHIFT;
const VkClearColorValue bc =
vk_sampler_border_color_value(pCreateInfo, NULL);
uint32_t bc_srgb[3];
for (unsigned i = 0; i < 3; i++)
bc_srgb[i] = util_format_linear_float_to_srgb_8unorm(bc.float32[i]);
tsc[2] |= bc_srgb[0] << G80_TSC_2_SRGB_BORDER_COLOR_R__SHIFT;
tsc[3] |= bc_srgb[1] << G80_TSC_3_SRGB_BORDER_COLOR_G__SHIFT;
tsc[3] |= bc_srgb[2] << G80_TSC_3_SRGB_BORDER_COLOR_B__SHIFT;
for (unsigned i = 0; i < 4; i++)
tsc[i + 4] = bc.uint32[i];
memcpy(desc_map, tsc, sizeof(tsc));
*pSampler = nvk_sampler_to_handle(sampler);
return VK_SUCCESS;