mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
kk: Handle memory coherency for textures and buffers
M1 chips are more restrictive than M2 and above. We need to enforce memory coherency when needed through "coherent" for buffer memory and "memory_coherence_device" for textures. Without these the memory operations are not visible to other threads. Reviewed-by: Arcady Goldmints-Orlov <arcady@lunarg.com> Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38595>
This commit is contained in:
parent
3aa4be52f3
commit
bdaff0b457
3 changed files with 71 additions and 19 deletions
|
|
@ -2881,7 +2881,7 @@ intrinsic("is_null_descriptor", src_comp=[-1], dest_comp=1, flags=[CAN_ELIMINATE
|
||||||
# given binding
|
# given binding
|
||||||
load("buffer_ptr_kk", [], [BINDING], [CAN_ELIMINATE, CAN_REORDER])
|
load("buffer_ptr_kk", [], [BINDING], [CAN_ELIMINATE, CAN_REORDER])
|
||||||
# Opaque texture<T> handle, with DEST_TYPE representing T
|
# Opaque texture<T> handle, with DEST_TYPE representing T
|
||||||
load("texture_handle_kk", [1], [DEST_TYPE, IMAGE_DIM, IMAGE_ARRAY, FLAGS], [CAN_ELIMINATE])
|
load("texture_handle_kk", [1], [DEST_TYPE, IMAGE_DIM, IMAGE_ARRAY, ACCESS, FLAGS], [CAN_ELIMINATE])
|
||||||
# Same as above but for depth<T> textures, T is always float
|
# Same as above but for depth<T> textures, T is always float
|
||||||
load("depth_texture_kk", [1], [IMAGE_DIM, IMAGE_ARRAY], [CAN_ELIMINATE])
|
load("depth_texture_kk", [1], [IMAGE_DIM, IMAGE_ARRAY], [CAN_ELIMINATE])
|
||||||
# Load a bindless sampler handle mapping a binding table sampler.
|
# Load a bindless sampler handle mapping a binding table sampler.
|
||||||
|
|
|
||||||
|
|
@ -1069,10 +1069,21 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
P(ctx, "(ulong)&buf%d.contents[0];\n", nir_intrinsic_binding(instr));
|
P(ctx, "(ulong)&buf%d.contents[0];\n", nir_intrinsic_binding(instr));
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_global: {
|
case nir_intrinsic_load_global: {
|
||||||
src_to_packed_load(ctx, &instr->src[0], "device",
|
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||||
msl_type_for_def(ctx->types, &instr->def),
|
const char *type = msl_type_for_def(ctx->types, &instr->def);
|
||||||
instr->def.num_components);
|
const char *addressing =
|
||||||
P(ctx, ";\n");
|
access & ACCESS_COHERENT ? "coherent device" : "device";
|
||||||
|
if (access & ACCESS_ATOMIC) {
|
||||||
|
assert(instr->num_components == 1u &&
|
||||||
|
"We can only do single component with atomics");
|
||||||
|
P(ctx, "atomic_load_explicit((%s atomic_%s*)", addressing, type);
|
||||||
|
src_to_msl(ctx, &instr->src[0]);
|
||||||
|
P(ctx, ", memory_order_relaxed);\n");
|
||||||
|
} else {
|
||||||
|
src_to_packed_load(ctx, &instr->src[0], addressing, type,
|
||||||
|
instr->def.num_components);
|
||||||
|
P(ctx, ";\n");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_load_global_constant: {
|
case nir_intrinsic_load_global_constant: {
|
||||||
|
|
@ -1094,9 +1105,24 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_load_global_constant_offset: {
|
case nir_intrinsic_load_global_constant_offset: {
|
||||||
src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1], "device",
|
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||||
msl_type_for_def(ctx->types, &instr->def),
|
const char *type = msl_type_for_def(ctx->types, &instr->def);
|
||||||
instr->def.num_components);
|
const char *addressing =
|
||||||
|
access & ACCESS_COHERENT ? "coherent device" : "device";
|
||||||
|
if (access & ACCESS_ATOMIC) {
|
||||||
|
assert(instr->num_components == 1u &&
|
||||||
|
"We can only do single component with atomics");
|
||||||
|
P(ctx, "atomic_load_explicit((%s atomic_%s*)(", addressing, type);
|
||||||
|
src_to_msl(ctx, &instr->src[0]);
|
||||||
|
P(ctx, "+");
|
||||||
|
src_to_msl(ctx, &instr->src[1]);
|
||||||
|
P(ctx, ", memory_order_relaxed);\n");
|
||||||
|
} else {
|
||||||
|
src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1],
|
||||||
|
addressing,
|
||||||
|
msl_type_for_def(ctx->types, &instr->def),
|
||||||
|
instr->def.num_components);
|
||||||
|
}
|
||||||
P(ctx, ";\n");
|
P(ctx, ";\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -1113,15 +1139,29 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
atomic_swap_to_msl(ctx, instr, "threadgroup", true);
|
atomic_swap_to_msl(ctx, instr, "threadgroup", true);
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_store_global: {
|
case nir_intrinsic_store_global: {
|
||||||
|
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||||
const char *type = msl_type_for_src(ctx->types, &instr->src[0]);
|
const char *type = msl_type_for_src(ctx->types, &instr->src[0]);
|
||||||
src_to_packed_store(ctx, &instr->src[1], "device", type,
|
const char *addressing =
|
||||||
instr->src[0].ssa->num_components);
|
access & ACCESS_COHERENT ? "coherent device" : "device";
|
||||||
writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
|
if (access & ACCESS_ATOMIC) {
|
||||||
instr->num_components);
|
assert(instr->num_components == 1u &&
|
||||||
P(ctx, " = ")
|
"We can only do single component with atomics");
|
||||||
src_to_packed(ctx, &instr->src[0], type,
|
P_IND(ctx, "atomic_store_explicit((%s atomic_%s*)", addressing, type);
|
||||||
instr->src[0].ssa->num_components);
|
src_to_msl(ctx, &instr->src[1]);
|
||||||
P(ctx, ";\n");
|
P(ctx, ", ")
|
||||||
|
src_to_packed(ctx, &instr->src[0], type,
|
||||||
|
instr->src[0].ssa->num_components);
|
||||||
|
P(ctx, ", memory_order_relaxed);\n");
|
||||||
|
} else {
|
||||||
|
src_to_packed_store(ctx, &instr->src[1], addressing, type,
|
||||||
|
instr->src[0].ssa->num_components);
|
||||||
|
writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
|
||||||
|
instr->num_components);
|
||||||
|
P(ctx, " = ")
|
||||||
|
src_to_packed(ctx, &instr->src[0], type,
|
||||||
|
instr->src[0].ssa->num_components);
|
||||||
|
P(ctx, ";\n");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_barrier: {
|
case nir_intrinsic_barrier: {
|
||||||
|
|
@ -1243,6 +1283,9 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_texture_handle_kk: {
|
case nir_intrinsic_load_texture_handle_kk: {
|
||||||
const char *access = "";
|
const char *access = "";
|
||||||
|
const char *coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT
|
||||||
|
? ", memory_coherence_device"
|
||||||
|
: "";
|
||||||
switch (nir_intrinsic_flags(instr)) {
|
switch (nir_intrinsic_flags(instr)) {
|
||||||
case MSL_ACCESS_READ:
|
case MSL_ACCESS_READ:
|
||||||
access = ", access::read";
|
access = ", access::read";
|
||||||
|
|
@ -1252,15 +1295,23 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
case MSL_ACCESS_READ_WRITE:
|
case MSL_ACCESS_READ_WRITE:
|
||||||
access = ", access::read_write";
|
access = ", access::read_write";
|
||||||
|
/* TODO_KOSMICKRISP We shouldn't need this line below but it doesn't
|
||||||
|
* seem we get the correct access values for what should be device
|
||||||
|
* coherent textures from NIR. Example test:
|
||||||
|
* dEQP-VK.memory_model.message_passing.ext.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_local.buffer.comp
|
||||||
|
* This test declares the texture as devicecoherent, but in NIR it
|
||||||
|
* appears as resctrict only with no coherent.
|
||||||
|
*/
|
||||||
|
coherent = ", memory_coherence_device";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
P_IND(ctx, "texture%s%s<%s%s> t%d = *(constant texture%s%s<%s%s>*)",
|
P_IND(ctx, "texture%s%s<%s%s%s> t%d = *(constant texture%s%s<%s%s%s>*)",
|
||||||
texture_dim(nir_intrinsic_image_dim(instr)),
|
texture_dim(nir_intrinsic_image_dim(instr)),
|
||||||
nir_intrinsic_image_array(instr) ? "_array" : "",
|
nir_intrinsic_image_array(instr) ? "_array" : "",
|
||||||
tex_type_name(nir_intrinsic_dest_type(instr)), access,
|
tex_type_name(nir_intrinsic_dest_type(instr)), access, coherent,
|
||||||
instr->def.index, texture_dim(nir_intrinsic_image_dim(instr)),
|
instr->def.index, texture_dim(nir_intrinsic_image_dim(instr)),
|
||||||
nir_intrinsic_image_array(instr) ? "_array" : "",
|
nir_intrinsic_image_array(instr) ? "_array" : "",
|
||||||
tex_type_name(nir_intrinsic_dest_type(instr)), access);
|
tex_type_name(nir_intrinsic_dest_type(instr)), access, coherent);
|
||||||
src_to_msl(ctx, &instr->src[0]);
|
src_to_msl(ctx, &instr->src[0]);
|
||||||
P(ctx, ";\n");
|
P(ctx, ";\n");
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -379,6 +379,7 @@ lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr,
|
||||||
b, 1, 64, resource_addr, .dest_type = type,
|
b, 1, 64, resource_addr, .dest_type = type,
|
||||||
.image_dim = nir_intrinsic_image_dim(intr),
|
.image_dim = nir_intrinsic_image_dim(intr),
|
||||||
.image_array = nir_intrinsic_image_array(intr),
|
.image_array = nir_intrinsic_image_array(intr),
|
||||||
|
.access = nir_intrinsic_access(intr) | var->data.access,
|
||||||
.flags = msl_convert_access_flag(var->data.access));
|
.flags = msl_convert_access_flag(var->data.access));
|
||||||
|
|
||||||
nir_rewrite_image_intrinsic(intr, handle, true);
|
nir_rewrite_image_intrinsic(intr, handle, true);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue