Merge branch 'raw_access_chains' into 'main'

Draft: nir, spirv, nvk: Implement VK_NV_raw_access_chains

See merge request mesa/mesa!38874
This commit is contained in:
Mel Henning 2025-12-19 18:52:28 -05:00
commit 61e5d8ff72
11 changed files with 121 additions and 9 deletions

View file

@ -1676,7 +1676,18 @@ typedef struct nir_deref_instr {
union { union {
struct { struct {
nir_src index; nir_src index;
/** If true, the index is always within the bounds of parent */
bool in_bounds; bool in_bounds;
/**
* If true, then the deref will be in bounds if the parent's base
* address is in bounds
*/
bool base_bounds_check;
/** If true, all bounds checking should be disabled for this deref */
bool never_bounds_check;
} arr; } arr;
struct { struct {

View file

@ -322,6 +322,8 @@ clone_deref_instr(clone_state *state, const nir_deref_instr *deref)
__clone_src(state, &nderef->instr, __clone_src(state, &nderef->instr,
&nderef->arr.index, &deref->arr.index); &nderef->arr.index, &deref->arr.index);
nderef->arr.in_bounds = deref->arr.in_bounds; nderef->arr.in_bounds = deref->arr.in_bounds;
nderef->arr.base_bounds_check = deref->arr.base_bounds_check;
nderef->arr.never_bounds_check = deref->arr.never_bounds_check;
break; break;
case nir_deref_type_array_wildcard: case nir_deref_type_array_wildcard:

View file

@ -1223,6 +1223,13 @@ opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
{ {
assert(deref->deref_type == nir_deref_type_ptr_as_array); assert(deref->deref_type == nir_deref_type_ptr_as_array);
/* Neither of the optimizations below are worthwhile if they discard
* bounds checking info
*/
if (deref->arr.base_bounds_check &&
deref->arr.never_bounds_check)
return false;
nir_deref_instr *parent = nir_deref_instr_parent(deref); nir_deref_instr *parent = nir_deref_instr_parent(deref);
if (nir_src_is_const(deref->arr.index) && if (nir_src_is_const(deref->arr.index) &&

View file

@ -173,6 +173,8 @@ hash_deref(uint32_t hash, const nir_deref_instr *instr)
case nir_deref_type_ptr_as_array: case nir_deref_type_ptr_as_array:
hash = hash_src(hash, &instr->arr.index); hash = hash_src(hash, &instr->arr.index);
hash = HASH(hash, instr->arr.in_bounds); hash = HASH(hash, instr->arr.in_bounds);
hash = HASH(hash, instr->arr.base_bounds_check);
hash = HASH(hash, instr->arr.never_bounds_check);
break; break;
case nir_deref_type_cast: case nir_deref_type_cast:
@ -628,6 +630,10 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
return false; return false;
if (deref1->arr.in_bounds != deref2->arr.in_bounds) if (deref1->arr.in_bounds != deref2->arr.in_bounds)
return false; return false;
if (deref1->arr.base_bounds_check != deref2->arr.base_bounds_check)
return false;
if (deref1->arr.never_bounds_check != deref2->arr.never_bounds_check)
return false;
break; break;
case nir_deref_type_cast: case nir_deref_type_cast:

View file

@ -420,10 +420,37 @@ addr_format_needs_bounds_check(nir_address_format addr_format)
return addr_format == nir_address_format_64bit_bounded_global; return addr_format == nir_address_format_64bit_bounded_global;
} }
static nir_deref_instr*
trailing_array_deref(nir_def *ssa)
{
while (true) {
if (!nir_def_is_deref(ssa))
return NULL;
nir_deref_instr *deref = nir_def_as_deref(ssa);
if (nir_deref_instr_is_arr(deref)) {
return deref;
} else if (deref->deref_type == nir_deref_type_cast) {
ssa = deref->parent.ssa;
} else {
return NULL;
}
}
}
static nir_def * static nir_def *
addr_is_in_bounds(nir_builder *b, nir_def *addr, addr_is_in_bounds(nir_builder *b, nir_def *addr,
nir_address_format addr_format, unsigned size) nir_address_format addr_format, unsigned size)
{ {
nir_deref_instr *arr_deref = trailing_array_deref(addr);
if (arr_deref) {
if (arr_deref->arr.never_bounds_check) {
return nir_imm_true(b);
} else if (arr_deref->arr.base_bounds_check) {
addr = arr_deref->parent.ssa;
size = 1;
}
}
assert(addr_format == nir_address_format_64bit_bounded_global); assert(addr_format == nir_address_format_64bit_bounded_global);
assert(addr->num_components == 4); assert(addr->num_components == 4);
assert(size > 0); assert(size > 0);
@ -810,8 +837,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
nir_def *zero = nir_imm_zero(b, load->num_components, bit_size); nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
/* TODO: Better handle block_intel. */ /* TODO: Better handle block_intel. */
assert(load->num_components == 1); const unsigned load_size = load->num_components * bit_size / 8;
const unsigned load_size = bit_size / 8;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
nir_builder_instr_insert(b, &load->instr); nir_builder_instr_insert(b, &load->instr);
@ -1000,8 +1026,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
if (addr_format_needs_bounds_check(addr_format)) { if (addr_format_needs_bounds_check(addr_format)) {
/* TODO: Better handle block_intel. */ /* TODO: Better handle block_intel. */
assert(store->num_components == 1); const unsigned store_size = value->num_components * value->bit_size / 8;
const unsigned store_size = value->bit_size / 8;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
nir_builder_instr_insert(b, &store->instr); nir_builder_instr_insert(b, &store->instr);
@ -1117,7 +1142,8 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
assert(atomic->def.bit_size % 8 == 0); assert(atomic->def.bit_size % 8 == 0);
if (addr_format_needs_bounds_check(addr_format)) { if (addr_format_needs_bounds_check(addr_format)) {
const unsigned atomic_size = atomic->def.bit_size / 8; const unsigned atomic_size =
atomic->def.num_components * atomic->def.bit_size / 8;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
nir_builder_instr_insert(b, &atomic->instr); nir_builder_instr_insert(b, &atomic->instr);
@ -1421,8 +1447,14 @@ nir_lower_explicit_io_instr(nir_builder *b,
* that information through to nir_lower_explicit_io. For now, however, * that information through to nir_lower_explicit_io. For now, however,
* scalarizing is at least correct. * scalarizing is at least correct.
*/ */
bool scalarize = vec_stride > scalar_size || bool scalarize = vec_stride > scalar_size;
addr_format_needs_bounds_check(addr_format); if (addr_format_needs_bounds_check(addr_format)) {
nir_deref_instr *arr_deref = trailing_array_deref(&deref->def);
bool skip_scalarize = arr_deref &&
(arr_deref->arr.base_bounds_check ||
arr_deref->arr.never_bounds_check);
scalarize |= !skip_scalarize;
}
switch (intrin->intrinsic) { switch (intrin->intrinsic) {
case nir_intrinsic_load_deref: { case nir_intrinsic_load_deref: {

View file

@ -1147,6 +1147,10 @@ print_deref_instr(nir_deref_instr *instr, print_state *state)
if (nir_deref_instr_is_arr(instr)) { if (nir_deref_instr_is_arr(instr)) {
if (instr->arr.in_bounds) if (instr->arr.in_bounds)
fprintf(fp, " (in bounds)"); fprintf(fp, " (in bounds)");
if (instr->arr.base_bounds_check)
fprintf(fp, " (base bounds check)");
if (instr->arr.never_bounds_check)
fprintf(fp, " (never bounds check)");
} }
if (instr->deref_type != nir_deref_type_var && if (instr->deref_type != nir_deref_type_var &&

View file

@ -545,8 +545,10 @@ union packed_instr {
unsigned deref_type : 3; unsigned deref_type : 3;
unsigned cast_type_same_as_last : 1; unsigned cast_type_same_as_last : 1;
unsigned modes : 6; /* See (de|en)code_deref_modes() */ unsigned modes : 6; /* See (de|en)code_deref_modes() */
unsigned _pad : 8; unsigned _pad : 6;
unsigned in_bounds : 1; unsigned in_bounds : 1;
unsigned base_bounds_check : 1;
unsigned never_bounds_check : 1;
unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */ unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
unsigned def : 8; unsigned def : 8;
} deref; } deref;
@ -919,6 +921,8 @@ write_deref(write_ctx *ctx, const nir_deref_instr *deref)
header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx); header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
header.deref.in_bounds = deref->arr.in_bounds; header.deref.in_bounds = deref->arr.in_bounds;
header.deref.base_bounds_check = deref->arr.base_bounds_check;
header.deref.never_bounds_check = deref->arr.never_bounds_check;
} }
write_def(ctx, &deref->def, header, deref->instr.type); write_def(ctx, &deref->def, header, deref->instr.type);
@ -1005,6 +1009,8 @@ read_deref(read_ctx *ctx, union packed_instr header)
} }
deref->arr.in_bounds = header.deref.in_bounds; deref->arr.in_bounds = header.deref.in_bounds;
deref->arr.base_bounds_check = header.deref.base_bounds_check;
deref->arr.never_bounds_check = header.deref.never_bounds_check;
parent = nir_src_as_deref(deref->parent); parent = nir_src_as_deref(deref->parent);
if (deref->deref_type == nir_deref_type_array) if (deref->deref_type == nir_deref_type_array)

View file

@ -157,6 +157,7 @@ static const struct spirv_capabilities implemented_capabilities = {
.RayTracingKHR = true, .RayTracingKHR = true,
.RayTracingPositionFetchKHR = true, .RayTracingPositionFetchKHR = true,
.RayTraversalPrimitiveCullingKHR = true, .RayTraversalPrimitiveCullingKHR = true,
.RawAccessChainsNV = true,
.ReplicatedCompositesEXT = true, .ReplicatedCompositesEXT = true,
.RoundingModeRTE = true, .RoundingModeRTE = true,
.RoundingModeRTZ = true, .RoundingModeRTZ = true,
@ -6536,6 +6537,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
case SpvOpPtrAccessChain: case SpvOpPtrAccessChain:
case SpvOpInBoundsAccessChain: case SpvOpInBoundsAccessChain:
case SpvOpInBoundsPtrAccessChain: case SpvOpInBoundsPtrAccessChain:
case SpvOpRawAccessChainNV:
case SpvOpArrayLength: case SpvOpArrayLength:
case SpvOpConvertPtrToU: case SpvOpConvertPtrToU:
case SpvOpConvertUToPtr: case SpvOpConvertUToPtr:

View file

@ -2848,6 +2848,44 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
break; break;
} }
case SpvOpRawAccessChainNV: {
struct vtn_type *ptr_type = vtn_get_type(b, w[1]);
nir_deref_instr *base = vtn_nir_deref(b, w[3]);
uint32_t stride = vtn_constant_uint(b, w[4]);
nir_def *index = vtn_get_nir_ssa(b, w[5]);
nir_def *offset = vtn_get_nir_ssa(b, w[6]);
uint32_t flags = 0;
if (count >= 8) {
flags = w[7];
}
nir_deref_instr *deref = base;
if (stride) {
index = nir_i2iN(&b->nb, index, base->def.bit_size);
deref = nir_build_deref_cast(&b->nb, &deref->def, base->modes,
glsl_uint8_t_type(), stride);
deref = nir_build_deref_ptr_as_array(&b->nb, deref, index);
}
offset = nir_i2iN(&b->nb, offset, base->def.bit_size);
deref = nir_build_deref_cast(&b->nb, &deref->def, base->modes,
glsl_uint8_t_type(), /* stride */ 1);
deref = nir_build_deref_ptr_as_array(&b->nb, deref, offset);
if (flags & SpvRawAccessChainOperandsRobustnessPerComponentNVMask) {
/* Default robustness */
} else if (flags & SpvRawAccessChainOperandsRobustnessPerElementNVMask) {
deref->arr.base_bounds_check = true;
} else {
deref->arr.never_bounds_check = true;
}
vtn_push_pointer(b, w[2], vtn_pointer_from_ssa(b, &deref->def, ptr_type));
break;
}
case SpvOpCopyMemory: { case SpvOpCopyMemory: {
struct vtn_value *dest_val = vtn_pointer_value(b, w[1]); struct vtn_value *dest_val = vtn_pointer_value(b, w[1]);
struct vtn_value *src_val = vtn_pointer_value(b, w[2]); struct vtn_value *src_val = vtn_pointer_value(b, w[2]);

View file

@ -320,7 +320,7 @@ ssbo_desc(struct nvk_addr_range addr_range)
assert(addr_range.range <= UINT32_MAX); assert(addr_range.range <= UINT32_MAX);
addr_range.addr = ROUND_DOWN_TO(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT); addr_range.addr = ROUND_DOWN_TO(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT);
addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT); // addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT);
return (union nvk_buffer_descriptor) { .addr = { return (union nvk_buffer_descriptor) { .addr = {
.base_addr = addr_range.addr, .base_addr = addr_range.addr,

View file

@ -302,6 +302,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
.GOOGLE_user_type = true, .GOOGLE_user_type = true,
.MESA_image_alignment_control = true, .MESA_image_alignment_control = true,
.NV_compute_shader_derivatives = info->cls_eng3d >= TURING_A, .NV_compute_shader_derivatives = info->cls_eng3d >= TURING_A,
.NV_raw_access_chains = true,
.NV_shader_sm_builtins = true, .NV_shader_sm_builtins = true,
.NVX_image_view_handle = info->cls_eng3d >= MAXWELL_A, /* needs true bindless descriptors */ .NVX_image_view_handle = info->cls_eng3d >= MAXWELL_A, /* needs true bindless descriptors */
.VALVE_mutable_descriptor_type = true, .VALVE_mutable_descriptor_type = true,
@ -744,6 +745,9 @@ nvk_get_device_features(const struct nv_device_info *info,
/* VK_MESA_image_alignment_control */ /* VK_MESA_image_alignment_control */
.imageAlignmentControl = true, .imageAlignmentControl = true,
/* VK_NV_raw_access_chains */
.shaderRawAccessChains = true,
/* VK_NV_shader_sm_builtins */ /* VK_NV_shader_sm_builtins */
.shaderSMBuiltins = true, .shaderSMBuiltins = true,