mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
Merge branch 'raw_access_chains' into 'main'
Draft: nir, spirv, nvk: Implement VK_NV_raw_access_chains See merge request mesa/mesa!38874
This commit is contained in:
commit
61e5d8ff72
11 changed files with 121 additions and 9 deletions
|
|
@ -1676,7 +1676,18 @@ typedef struct nir_deref_instr {
|
|||
union {
|
||||
struct {
|
||||
nir_src index;
|
||||
|
||||
/** If true, the index is always within the bounds of parent */
|
||||
bool in_bounds;
|
||||
|
||||
/**
|
||||
* If true, then the deref will be in bounds if the parent's base
|
||||
* address is in bounds
|
||||
*/
|
||||
bool base_bounds_check;
|
||||
|
||||
/** If true, all bounds checking should be disabled for this deref */
|
||||
bool never_bounds_check;
|
||||
} arr;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -322,6 +322,8 @@ clone_deref_instr(clone_state *state, const nir_deref_instr *deref)
|
|||
__clone_src(state, &nderef->instr,
|
||||
&nderef->arr.index, &deref->arr.index);
|
||||
nderef->arr.in_bounds = deref->arr.in_bounds;
|
||||
nderef->arr.base_bounds_check = deref->arr.base_bounds_check;
|
||||
nderef->arr.never_bounds_check = deref->arr.never_bounds_check;
|
||||
break;
|
||||
|
||||
case nir_deref_type_array_wildcard:
|
||||
|
|
|
|||
|
|
@ -1223,6 +1223,13 @@ opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
|
|||
{
|
||||
assert(deref->deref_type == nir_deref_type_ptr_as_array);
|
||||
|
||||
/* Neither of the optimizations below are worthwhile if they discard
|
||||
* bounds checking info
|
||||
*/
|
||||
if (deref->arr.base_bounds_check &&
|
||||
deref->arr.never_bounds_check)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||
|
||||
if (nir_src_is_const(deref->arr.index) &&
|
||||
|
|
|
|||
|
|
@ -173,6 +173,8 @@ hash_deref(uint32_t hash, const nir_deref_instr *instr)
|
|||
case nir_deref_type_ptr_as_array:
|
||||
hash = hash_src(hash, &instr->arr.index);
|
||||
hash = HASH(hash, instr->arr.in_bounds);
|
||||
hash = HASH(hash, instr->arr.base_bounds_check);
|
||||
hash = HASH(hash, instr->arr.never_bounds_check);
|
||||
break;
|
||||
|
||||
case nir_deref_type_cast:
|
||||
|
|
@ -628,6 +630,10 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
|
|||
return false;
|
||||
if (deref1->arr.in_bounds != deref2->arr.in_bounds)
|
||||
return false;
|
||||
if (deref1->arr.base_bounds_check != deref2->arr.base_bounds_check)
|
||||
return false;
|
||||
if (deref1->arr.never_bounds_check != deref2->arr.never_bounds_check)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case nir_deref_type_cast:
|
||||
|
|
|
|||
|
|
@ -420,10 +420,37 @@ addr_format_needs_bounds_check(nir_address_format addr_format)
|
|||
return addr_format == nir_address_format_64bit_bounded_global;
|
||||
}
|
||||
|
||||
static nir_deref_instr*
|
||||
trailing_array_deref(nir_def *ssa)
|
||||
{
|
||||
while (true) {
|
||||
if (!nir_def_is_deref(ssa))
|
||||
return NULL;
|
||||
|
||||
nir_deref_instr *deref = nir_def_as_deref(ssa);
|
||||
if (nir_deref_instr_is_arr(deref)) {
|
||||
return deref;
|
||||
} else if (deref->deref_type == nir_deref_type_cast) {
|
||||
ssa = deref->parent.ssa;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
addr_is_in_bounds(nir_builder *b, nir_def *addr,
|
||||
nir_address_format addr_format, unsigned size)
|
||||
{
|
||||
nir_deref_instr *arr_deref = trailing_array_deref(addr);
|
||||
if (arr_deref) {
|
||||
if (arr_deref->arr.never_bounds_check) {
|
||||
return nir_imm_true(b);
|
||||
} else if (arr_deref->arr.base_bounds_check) {
|
||||
addr = arr_deref->parent.ssa;
|
||||
size = 1;
|
||||
}
|
||||
}
|
||||
assert(addr_format == nir_address_format_64bit_bounded_global);
|
||||
assert(addr->num_components == 4);
|
||||
assert(size > 0);
|
||||
|
|
@ -810,8 +837,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
||||
|
||||
/* TODO: Better handle block_intel. */
|
||||
assert(load->num_components == 1);
|
||||
const unsigned load_size = bit_size / 8;
|
||||
const unsigned load_size = load->num_components * bit_size / 8;
|
||||
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
|
||||
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
|
@ -1000,8 +1026,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
|
||||
if (addr_format_needs_bounds_check(addr_format)) {
|
||||
/* TODO: Better handle block_intel. */
|
||||
assert(store->num_components == 1);
|
||||
const unsigned store_size = value->bit_size / 8;
|
||||
const unsigned store_size = value->num_components * value->bit_size / 8;
|
||||
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
|
||||
|
||||
nir_builder_instr_insert(b, &store->instr);
|
||||
|
|
@ -1117,7 +1142,8 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
assert(atomic->def.bit_size % 8 == 0);
|
||||
|
||||
if (addr_format_needs_bounds_check(addr_format)) {
|
||||
const unsigned atomic_size = atomic->def.bit_size / 8;
|
||||
const unsigned atomic_size =
|
||||
atomic->def.num_components * atomic->def.bit_size / 8;
|
||||
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
|
||||
|
||||
nir_builder_instr_insert(b, &atomic->instr);
|
||||
|
|
@ -1421,8 +1447,14 @@ nir_lower_explicit_io_instr(nir_builder *b,
|
|||
* that information through to nir_lower_explicit_io. For now, however,
|
||||
* scalarizing is at least correct.
|
||||
*/
|
||||
bool scalarize = vec_stride > scalar_size ||
|
||||
addr_format_needs_bounds_check(addr_format);
|
||||
bool scalarize = vec_stride > scalar_size;
|
||||
if (addr_format_needs_bounds_check(addr_format)) {
|
||||
nir_deref_instr *arr_deref = trailing_array_deref(&deref->def);
|
||||
bool skip_scalarize = arr_deref &&
|
||||
(arr_deref->arr.base_bounds_check ||
|
||||
arr_deref->arr.never_bounds_check);
|
||||
scalarize |= !skip_scalarize;
|
||||
}
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_deref: {
|
||||
|
|
|
|||
|
|
@ -1147,6 +1147,10 @@ print_deref_instr(nir_deref_instr *instr, print_state *state)
|
|||
if (nir_deref_instr_is_arr(instr)) {
|
||||
if (instr->arr.in_bounds)
|
||||
fprintf(fp, " (in bounds)");
|
||||
if (instr->arr.base_bounds_check)
|
||||
fprintf(fp, " (base bounds check)");
|
||||
if (instr->arr.never_bounds_check)
|
||||
fprintf(fp, " (never bounds check)");
|
||||
}
|
||||
|
||||
if (instr->deref_type != nir_deref_type_var &&
|
||||
|
|
|
|||
|
|
@ -545,8 +545,10 @@ union packed_instr {
|
|||
unsigned deref_type : 3;
|
||||
unsigned cast_type_same_as_last : 1;
|
||||
unsigned modes : 6; /* See (de|en)code_deref_modes() */
|
||||
unsigned _pad : 8;
|
||||
unsigned _pad : 6;
|
||||
unsigned in_bounds : 1;
|
||||
unsigned base_bounds_check : 1;
|
||||
unsigned never_bounds_check : 1;
|
||||
unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
|
||||
unsigned def : 8;
|
||||
} deref;
|
||||
|
|
@ -919,6 +921,8 @@ write_deref(write_ctx *ctx, const nir_deref_instr *deref)
|
|||
header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
|
||||
|
||||
header.deref.in_bounds = deref->arr.in_bounds;
|
||||
header.deref.base_bounds_check = deref->arr.base_bounds_check;
|
||||
header.deref.never_bounds_check = deref->arr.never_bounds_check;
|
||||
}
|
||||
|
||||
write_def(ctx, &deref->def, header, deref->instr.type);
|
||||
|
|
@ -1005,6 +1009,8 @@ read_deref(read_ctx *ctx, union packed_instr header)
|
|||
}
|
||||
|
||||
deref->arr.in_bounds = header.deref.in_bounds;
|
||||
deref->arr.base_bounds_check = header.deref.base_bounds_check;
|
||||
deref->arr.never_bounds_check = header.deref.never_bounds_check;
|
||||
|
||||
parent = nir_src_as_deref(deref->parent);
|
||||
if (deref->deref_type == nir_deref_type_array)
|
||||
|
|
|
|||
|
|
@ -157,6 +157,7 @@ static const struct spirv_capabilities implemented_capabilities = {
|
|||
.RayTracingKHR = true,
|
||||
.RayTracingPositionFetchKHR = true,
|
||||
.RayTraversalPrimitiveCullingKHR = true,
|
||||
.RawAccessChainsNV = true,
|
||||
.ReplicatedCompositesEXT = true,
|
||||
.RoundingModeRTE = true,
|
||||
.RoundingModeRTZ = true,
|
||||
|
|
@ -6536,6 +6537,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
|
|||
case SpvOpPtrAccessChain:
|
||||
case SpvOpInBoundsAccessChain:
|
||||
case SpvOpInBoundsPtrAccessChain:
|
||||
case SpvOpRawAccessChainNV:
|
||||
case SpvOpArrayLength:
|
||||
case SpvOpConvertPtrToU:
|
||||
case SpvOpConvertUToPtr:
|
||||
|
|
|
|||
|
|
@ -2848,6 +2848,44 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
|||
break;
|
||||
}
|
||||
|
||||
case SpvOpRawAccessChainNV: {
|
||||
struct vtn_type *ptr_type = vtn_get_type(b, w[1]);
|
||||
nir_deref_instr *base = vtn_nir_deref(b, w[3]);
|
||||
uint32_t stride = vtn_constant_uint(b, w[4]);
|
||||
nir_def *index = vtn_get_nir_ssa(b, w[5]);
|
||||
nir_def *offset = vtn_get_nir_ssa(b, w[6]);
|
||||
|
||||
uint32_t flags = 0;
|
||||
if (count >= 8) {
|
||||
flags = w[7];
|
||||
}
|
||||
|
||||
nir_deref_instr *deref = base;
|
||||
|
||||
if (stride) {
|
||||
index = nir_i2iN(&b->nb, index, base->def.bit_size);
|
||||
deref = nir_build_deref_cast(&b->nb, &deref->def, base->modes,
|
||||
glsl_uint8_t_type(), stride);
|
||||
deref = nir_build_deref_ptr_as_array(&b->nb, deref, index);
|
||||
}
|
||||
|
||||
offset = nir_i2iN(&b->nb, offset, base->def.bit_size);
|
||||
deref = nir_build_deref_cast(&b->nb, &deref->def, base->modes,
|
||||
glsl_uint8_t_type(), /* stride */ 1);
|
||||
deref = nir_build_deref_ptr_as_array(&b->nb, deref, offset);
|
||||
|
||||
if (flags & SpvRawAccessChainOperandsRobustnessPerComponentNVMask) {
|
||||
/* Default robustness */
|
||||
} else if (flags & SpvRawAccessChainOperandsRobustnessPerElementNVMask) {
|
||||
deref->arr.base_bounds_check = true;
|
||||
} else {
|
||||
deref->arr.never_bounds_check = true;
|
||||
}
|
||||
|
||||
vtn_push_pointer(b, w[2], vtn_pointer_from_ssa(b, &deref->def, ptr_type));
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpCopyMemory: {
|
||||
struct vtn_value *dest_val = vtn_pointer_value(b, w[1]);
|
||||
struct vtn_value *src_val = vtn_pointer_value(b, w[2]);
|
||||
|
|
|
|||
|
|
@ -320,7 +320,7 @@ ssbo_desc(struct nvk_addr_range addr_range)
|
|||
assert(addr_range.range <= UINT32_MAX);
|
||||
|
||||
addr_range.addr = ROUND_DOWN_TO(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT);
|
||||
addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT);
|
||||
// addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT);
|
||||
|
||||
return (union nvk_buffer_descriptor) { .addr = {
|
||||
.base_addr = addr_range.addr,
|
||||
|
|
|
|||
|
|
@ -302,6 +302,7 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
|
|||
.GOOGLE_user_type = true,
|
||||
.MESA_image_alignment_control = true,
|
||||
.NV_compute_shader_derivatives = info->cls_eng3d >= TURING_A,
|
||||
.NV_raw_access_chains = true,
|
||||
.NV_shader_sm_builtins = true,
|
||||
.NVX_image_view_handle = info->cls_eng3d >= MAXWELL_A, /* needs true bindless descriptors */
|
||||
.VALVE_mutable_descriptor_type = true,
|
||||
|
|
@ -744,6 +745,9 @@ nvk_get_device_features(const struct nv_device_info *info,
|
|||
/* VK_MESA_image_alignment_control */
|
||||
.imageAlignmentControl = true,
|
||||
|
||||
/* VK_NV_raw_access_chains */
|
||||
.shaderRawAccessChains = true,
|
||||
|
||||
/* VK_NV_shader_sm_builtins */
|
||||
.shaderSMBuiltins = true,
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue