mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
nvk: Use bindless cbufs on Turing+
These are much faster than ld.global.constant. This takes The Witness from 103 FPS to 130 FPS on my 4060 laptop GPU when run with NVK_DEBUG=no_cbuf Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
parent
248b22d158
commit
8b5835af31
8 changed files with 197 additions and 59 deletions
|
|
@ -596,11 +596,13 @@ nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
|
|||
}
|
||||
|
||||
static void
|
||||
nvk_bind_descriptor_sets(UNUSED struct nvk_cmd_buffer *cmd,
|
||||
nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
|
||||
struct nvk_descriptor_state *desc,
|
||||
const VkBindDescriptorSetsInfoKHR *info)
|
||||
{
|
||||
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
/* Fro the Vulkan 1.3.275 spec:
|
||||
*
|
||||
|
|
@ -644,9 +646,16 @@ nvk_bind_descriptor_sets(UNUSED struct nvk_cmd_buffer *cmd,
|
|||
|
||||
if (set != NULL && set_layout->dynamic_buffer_count > 0) {
|
||||
for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
|
||||
struct nvk_buffer_address addr = set->dynamic_buffers[j];
|
||||
addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j];
|
||||
desc->root.dynamic_buffers[dyn_buffer_start + j] = addr;
|
||||
union nvk_buffer_descriptor db = set->dynamic_buffers[j];
|
||||
uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j];
|
||||
if (BITSET_TEST(set_layout->dynamic_ubos, j) &&
|
||||
nvk_use_bindless_cbuf(&pdev->info)) {
|
||||
assert((offset & 0xf) == 0);
|
||||
db.cbuf.base_addr_shift_4 += offset >> 4;
|
||||
} else {
|
||||
db.addr.base_addr += offset;
|
||||
}
|
||||
desc->root.dynamic_buffers[dyn_buffer_start + j] = db;
|
||||
}
|
||||
next_dyn_offset += set->layout->dynamic_buffer_count;
|
||||
}
|
||||
|
|
@ -804,6 +813,9 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
|
|||
const struct nvk_cbuf *cbuf,
|
||||
struct nvk_buffer_address *addr_out)
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
switch (cbuf->type) {
|
||||
case NVK_CBUF_TYPE_INVALID:
|
||||
*addr_out = (struct nvk_buffer_address) { .size = 0 };
|
||||
|
|
@ -827,7 +839,8 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
|
|||
case NVK_CBUF_TYPE_DYNAMIC_UBO: {
|
||||
const uint32_t dyn_start =
|
||||
desc->root.set_dynamic_buffer_start[cbuf->desc_set];
|
||||
*addr_out = desc->root.dynamic_buffers[dyn_start + cbuf->dynamic_idx];
|
||||
*addr_out = nvk_ubo_descriptor_addr(pdev,
|
||||
desc->root.dynamic_buffers[dyn_start + cbuf->dynamic_idx]);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -840,8 +853,9 @@ nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
|
|||
return false;
|
||||
|
||||
assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
|
||||
void *desc = &push->data[cbuf->desc_offset];
|
||||
*addr_out = *(struct nvk_buffer_address *)desc;
|
||||
union nvk_buffer_descriptor desc;
|
||||
memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc));
|
||||
*addr_out = nvk_ubo_descriptor_addr(pdev, desc);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ struct nvk_root_descriptor_table {
|
|||
uint8_t set_dynamic_buffer_start[NVK_MAX_SETS];
|
||||
|
||||
/* Dynamic buffer bindings */
|
||||
struct nvk_buffer_address dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
|
||||
union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
/* enfore alignment to 0x100 as needed pre pascal */
|
||||
uint8_t __padding[0x40];
|
||||
|
|
|
|||
|
|
@ -2255,15 +2255,38 @@ nvk_mme_bind_cbuf_desc(struct mme_builder *b)
|
|||
/* First 4 bits are group, later bits are slot */
|
||||
struct mme_value group_slot = mme_load(b);
|
||||
|
||||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||||
struct mme_value64 addr = mme_load_addr64(b);
|
||||
mme_tu104_read_fifoed(b, addr, mme_imm(3));
|
||||
}
|
||||
struct mme_value addr_lo, addr_hi, size;
|
||||
if (nvk_use_bindless_cbuf(b->devinfo)) {
|
||||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||||
struct mme_value64 addr = mme_load_addr64(b);
|
||||
mme_tu104_read_fifoed(b, addr, mme_imm(2));
|
||||
}
|
||||
|
||||
/* Load the descriptor */
|
||||
struct mme_value addr_lo = mme_load(b);
|
||||
struct mme_value addr_hi = mme_load(b);
|
||||
struct mme_value size = mme_load(b);
|
||||
/* Load the descriptor */
|
||||
struct mme_value desc_lo = mme_load(b);
|
||||
struct mme_value desc_hi = mme_load(b);
|
||||
|
||||
/* The bottom 45 bits are addr >> 4 */
|
||||
addr_lo = mme_merge(b, mme_zero(), desc_lo, 4, 28, 0);
|
||||
addr_hi = mme_merge(b, mme_zero(), desc_lo, 0, 4, 28);
|
||||
mme_merge_to(b, addr_hi, addr_hi, desc_hi, 4, 13, 0);
|
||||
|
||||
/* The top 19 bits are size >> 4 */
|
||||
size = mme_merge(b, mme_zero(), desc_hi, 4, 19, 13);
|
||||
|
||||
mme_free_reg(b, desc_hi);
|
||||
mme_free_reg(b, desc_lo);
|
||||
} else {
|
||||
if (b->devinfo->cls_eng3d >= TURING_A) {
|
||||
struct mme_value64 addr = mme_load_addr64(b);
|
||||
mme_tu104_read_fifoed(b, addr, mme_imm(3));
|
||||
}
|
||||
|
||||
/* Load the descriptor */
|
||||
addr_lo = mme_load(b);
|
||||
addr_hi = mme_load(b);
|
||||
size = mme_load(b);
|
||||
}
|
||||
|
||||
struct mme_value cb = mme_alloc_reg(b);
|
||||
mme_if(b, ieq, size, mme_zero()) {
|
||||
|
|
@ -2285,7 +2308,7 @@ nvk_mme_bind_cbuf_desc(struct mme_builder *b)
|
|||
mme_emit(b, addr_hi);
|
||||
mme_emit(b, addr_lo);
|
||||
|
||||
/* Bottim bit is the valid bit, 8:4 are shader slot */
|
||||
/* Bottom bit is the valid bit, 8:4 are shader slot */
|
||||
mme_merge_to(b, cb, mme_imm(1), group_slot, 4, 5, 4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ write_storage_image_view_desc(struct nvk_descriptor_set *set,
|
|||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
static struct nvk_buffer_address
|
||||
static union nvk_buffer_descriptor
|
||||
ubo_desc(struct nvk_physical_device *pdev,
|
||||
const VkDescriptorBufferInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
|
|
@ -140,10 +140,17 @@ ubo_desc(struct nvk_physical_device *pdev,
|
|||
addr_range.addr = align64(addr_range.addr, min_cbuf_alignment);
|
||||
addr_range.range = align(addr_range.range, min_cbuf_alignment);
|
||||
|
||||
return (struct nvk_buffer_address) {
|
||||
.base_addr = align64(addr_range.addr, min_cbuf_alignment),
|
||||
.size = align(addr_range.range, min_cbuf_alignment),
|
||||
};
|
||||
if (nvk_use_bindless_cbuf(&pdev->info)) {
|
||||
return (union nvk_buffer_descriptor) { .cbuf = {
|
||||
.base_addr_shift_4 = addr_range.addr >> 4,
|
||||
.size_shift_4 = addr_range.range >> 4,
|
||||
}};
|
||||
} else {
|
||||
return (union nvk_buffer_descriptor) { .addr = {
|
||||
.base_addr = addr_range.addr,
|
||||
.size = addr_range.range,
|
||||
}};
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -152,7 +159,7 @@ write_ubo_desc(struct nvk_physical_device *pdev,
|
|||
const VkDescriptorBufferInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
{
|
||||
const struct nvk_buffer_address desc = ubo_desc(pdev, info, binding, elem);
|
||||
const union nvk_buffer_descriptor desc = ubo_desc(pdev, info, binding, elem);
|
||||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
|
|
@ -168,7 +175,7 @@ write_dynamic_ubo_desc(struct nvk_physical_device *pdev,
|
|||
ubo_desc(pdev, info, binding, elem);
|
||||
}
|
||||
|
||||
static struct nvk_buffer_address
|
||||
static union nvk_buffer_descriptor
|
||||
ssbo_desc(const VkDescriptorBufferInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
{
|
||||
|
|
@ -182,10 +189,10 @@ ssbo_desc(const VkDescriptorBufferInfo *const info,
|
|||
addr_range.addr = align64(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT);
|
||||
addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT);
|
||||
|
||||
return (struct nvk_buffer_address) {
|
||||
.base_addr = align64(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT),
|
||||
.size = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT),
|
||||
};
|
||||
return (union nvk_buffer_descriptor) { .addr = {
|
||||
.base_addr = addr_range.addr,
|
||||
.size = addr_range.range,
|
||||
}};
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -194,7 +201,7 @@ write_ssbo_desc(struct nvk_descriptor_set *set,
|
|||
const VkDescriptorBufferInfo *const info,
|
||||
uint32_t binding, uint32_t elem)
|
||||
{
|
||||
const struct nvk_buffer_address desc = ssbo_desc(info, binding, elem);
|
||||
const union nvk_buffer_descriptor desc = ssbo_desc(info, binding, elem);
|
||||
write_desc(set, binding, elem, &desc, sizeof(desc));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "nouveau_bo.h"
|
||||
#include "nvk_device.h"
|
||||
#include "nvk_physical_device.h"
|
||||
#include "vk_object.h"
|
||||
#include "vk_descriptor_update_template.h"
|
||||
|
||||
|
|
@ -52,6 +53,16 @@ PRAGMA_DIAGNOSTIC_POP
|
|||
static_assert(sizeof(struct nvk_buffer_view_descriptor) == 4,
|
||||
"nvk_buffer_view_descriptor has no holes");
|
||||
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
struct nvk_bindless_cbuf {
|
||||
uint64_t base_addr_shift_4:45;
|
||||
uint64_t size_shift_4:19;
|
||||
};
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
static_assert(sizeof(struct nvk_bindless_cbuf) == 8,
|
||||
"nvk_bindless_cbuf has no holes");
|
||||
|
||||
/* This has to match nir_address_format_64bit_bounded_global */
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
|
|
@ -64,6 +75,31 @@ PRAGMA_DIAGNOSTIC_POP
|
|||
static_assert(sizeof(struct nvk_buffer_address) == 16,
|
||||
"nvk_buffer_address has no holes");
|
||||
|
||||
union nvk_buffer_descriptor {
|
||||
struct nvk_buffer_address addr;
|
||||
struct nvk_bindless_cbuf cbuf;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
nvk_use_bindless_cbuf(const struct nv_device_info *info)
|
||||
{
|
||||
return info->cls_eng3d >= 0xC597 /* TURING_A */;
|
||||
}
|
||||
|
||||
static inline struct nvk_buffer_address
|
||||
nvk_ubo_descriptor_addr(const struct nvk_physical_device *pdev,
|
||||
union nvk_buffer_descriptor desc)
|
||||
{
|
||||
if (nvk_use_bindless_cbuf(&pdev->info)) {
|
||||
return (struct nvk_buffer_address) {
|
||||
.base_addr = desc.cbuf.base_addr_shift_4 << 4,
|
||||
.size = desc.cbuf.size_shift_4 << 4,
|
||||
};
|
||||
} else {
|
||||
return desc.addr;
|
||||
}
|
||||
}
|
||||
|
||||
#define NVK_BUFFER_ADDRESS_NULL ((struct nvk_buffer_address) { .size = 0 })
|
||||
|
||||
struct nvk_descriptor_pool {
|
||||
|
|
@ -90,7 +126,7 @@ struct nvk_descriptor_set {
|
|||
uint64_t addr;
|
||||
uint32_t size;
|
||||
|
||||
struct nvk_buffer_address dynamic_buffers[];
|
||||
union nvk_buffer_descriptor dynamic_buffers[];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_descriptor_set, base, VkDescriptorSet,
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ nvk_descriptor_stride_align_for_type(const struct nvk_physical_device *pdev,
|
|||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
*stride = *alignment = sizeof(struct nvk_buffer_address);
|
||||
*stride = *alignment = sizeof(union nvk_buffer_descriptor);
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ compar_cbufs(const void *_a, const void *_b)
|
|||
struct lower_descriptors_ctx {
|
||||
const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
|
||||
|
||||
bool use_bindless_cbuf;
|
||||
bool clamp_desc_array_bounds;
|
||||
nir_address_format ubo_addr_format;
|
||||
nir_address_format ssbo_addr_format;
|
||||
|
|
@ -610,13 +611,14 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
|
|||
index = nir_iadd(b, index,
|
||||
nir_iadd_imm(b, dynamic_buffer_start,
|
||||
binding_layout->dynamic_buffer_index));
|
||||
|
||||
uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
|
||||
nir_def *root_desc_offset =
|
||||
nir_iadd_imm(b, nir_imul_imm(b, index, sizeof(struct nvk_buffer_address)),
|
||||
nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
|
||||
nvk_root_descriptor_offset(dynamic_buffers));
|
||||
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
return nir_ldc_nv(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
|
||||
assert(num_components * bit_size <= desc_size * 8);
|
||||
return nir_ldc_nv(b, num_components, bit_size,
|
||||
nir_imm_int(b, 0), root_desc_offset,
|
||||
.align_mul = 16, .align_offset = 0);
|
||||
}
|
||||
|
||||
|
|
@ -628,12 +630,19 @@ load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
|
|||
assert(binding_layout->stride == 1);
|
||||
const uint32_t binding_size = binding_layout->array_size;
|
||||
|
||||
/* Convert it to nir_address_format_64bit_bounded_global */
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
|
||||
nir_unpack_64_2x32_split_y(b, base_addr),
|
||||
nir_imm_int(b, binding_size),
|
||||
nir_imm_int(b, 0));
|
||||
if (ctx->use_bindless_cbuf) {
|
||||
assert(num_components == 1 && bit_size == 64);
|
||||
const uint32_t size = align(binding_size, 16);
|
||||
return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
|
||||
((uint64_t)size >> 4) << 45);
|
||||
} else {
|
||||
/* Convert it to nir_address_format_64bit_bounded_global */
|
||||
assert(num_components == 4 && bit_size == 32);
|
||||
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
|
||||
nir_unpack_64_2x32_split_y(b, base_addr),
|
||||
nir_imm_int(b, binding_size),
|
||||
nir_imm_int(b, 0));
|
||||
}
|
||||
}
|
||||
|
||||
default: {
|
||||
|
|
@ -685,6 +694,29 @@ is_idx_intrin(nir_intrinsic_instr *intrin)
|
|||
return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
|
||||
{
|
||||
nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
|
||||
nir_def *size = nir_channel(b, addr, 2);
|
||||
nir_def *offset = nir_channel(b, addr, 3);
|
||||
|
||||
nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
|
||||
nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
|
||||
nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
|
||||
|
||||
/* If we assume the top bis of the address are 0 as well as the bottom two
|
||||
* bits of the size. (We can trust it since it's a descriptor) then
|
||||
*
|
||||
* ((size >> 4) << 13) | addr
|
||||
*
|
||||
* is just an imad.
|
||||
*/
|
||||
nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
|
||||
|
||||
return nir_vec3(b, addr16_lo, handle_hi, offset);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
|
|
@ -701,13 +733,23 @@ load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
uint32_t binding = nir_intrinsic_binding(intrin);
|
||||
index = nir_iadd(b, index, intrin->src[0].ssa);
|
||||
|
||||
nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
|
||||
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
|
||||
nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
|
||||
|
||||
/* We know a priori that the the .w compnent (offset) is zero */
|
||||
return nir_vec4(b, nir_channel(b, desc, 0),
|
||||
nir_channel(b, desc, 1),
|
||||
nir_channel(b, desc, 2),
|
||||
nir_imm_int(b, 0));
|
||||
/* The descriptor is just the handle. NIR also needs an offset. */
|
||||
return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
|
||||
nir_unpack_64_2x32_split_y(b, desc),
|
||||
nir_imm_int(b, 0));
|
||||
} else {
|
||||
nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
|
||||
|
||||
/* We know a priori that the the .w compnent (offset) is zero */
|
||||
return nir_vec4(b, nir_channel(b, desc, 0),
|
||||
nir_channel(b, desc, 1),
|
||||
nir_channel(b, desc, 2),
|
||||
nir_imm_int(b, 0));
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1253,6 +1295,7 @@ nvk_nir_lower_descriptors(nir_shader *nir,
|
|||
struct nvk_cbuf_map *cbuf_map_out)
|
||||
{
|
||||
struct lower_descriptors_ctx ctx = {
|
||||
.use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
|
||||
.clamp_desc_array_bounds =
|
||||
rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
|
||||
rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
|
||||
|
|
|
|||
|
|
@ -116,14 +116,18 @@ nir_address_format
|
|||
nvk_ubo_addr_format(const struct nvk_physical_device *pdev,
|
||||
VkPipelineRobustnessBufferBehaviorEXT robustness)
|
||||
{
|
||||
switch (robustness) {
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
default:
|
||||
unreachable("Invalid robust buffer access behavior");
|
||||
if (nvk_use_bindless_cbuf(&pdev->info)) {
|
||||
return nir_address_format_vec2_index_32bit_offset;
|
||||
} else {
|
||||
switch (robustness) {
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
default:
|
||||
unreachable("Invalid robust buffer access behavior");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -258,10 +262,21 @@ lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
|
|||
const uint32_t align_mul = nir_intrinsic_align_mul(load);
|
||||
const uint32_t align_offset = nir_intrinsic_align_offset(load);
|
||||
|
||||
nir_def *val = nir_ldc_nv(b, load->num_components, load->def.bit_size,
|
||||
index, offset, .access = access,
|
||||
.align_mul = align_mul,
|
||||
.align_offset = align_offset);
|
||||
nir_def *val;
|
||||
if (load->src[0].ssa->num_components == 1) {
|
||||
val = nir_ldc_nv(b, load->num_components, load->def.bit_size,
|
||||
index, offset, .access = access,
|
||||
.align_mul = align_mul,
|
||||
.align_offset = align_offset);
|
||||
} else if (load->src[0].ssa->num_components == 2) {
|
||||
nir_def *handle = nir_pack_64_2x32(b, load->src[0].ssa);
|
||||
val = nir_ldcx_nv(b, load->num_components, load->def.bit_size,
|
||||
handle, offset, .access = access,
|
||||
.align_mul = align_mul,
|
||||
.align_offset = align_offset);
|
||||
} else {
|
||||
unreachable("Invalid UBO index");
|
||||
}
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue