diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index d4ccbf8af69..412a7528584 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -56,6 +56,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ldib->barrier_class = IR3_BARRIER_BUFFER_R;
ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
ir3_handle_bindless_cat6(ldib, intr->src[0]);
+ ir3_handle_nonuniform(ldib, intr);
ir3_split_dest(b, dst, ldib, 0, intr->num_components);
}
@@ -83,6 +84,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
stib->barrier_class = IR3_BARRIER_BUFFER_W;
stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
ir3_handle_bindless_cat6(stib, intr->src[1]);
+ ir3_handle_nonuniform(stib, intr);
array_insert(b, b->keeps, stib);
}
@@ -214,6 +216,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ldib->barrier_class = IR3_BARRIER_IMAGE_R;
ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
ir3_handle_bindless_cat6(ldib, intr->src[0]);
+ ir3_handle_nonuniform(ldib, intr);
ir3_split_dest(b, dst, ldib, 0, intr->num_components);
}
@@ -242,6 +245,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
stib->barrier_class = IR3_BARRIER_IMAGE_W;
stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
ir3_handle_bindless_cat6(stib, intr->src[0]);
+ ir3_handle_nonuniform(stib, intr);
array_insert(b, b->keeps, stib);
}
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 71f57cc6aec..955c0970bae 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -39,6 +39,15 @@
#include "ir3.h"
#include "ir3_context.h"
+void
+ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin)
+{
+ if (nir_intrinsic_has_access(intrin) &&
+ (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)) {
+ instr->flags |= IR3_INSTR_NONUNIF;
+ }
+}
+
void
ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc)
{
@@ -741,6 +750,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_handle_bindless_cat6(ldc, intr->src[0]);
if (ldc->flags & IR3_INSTR_B)
ctx->so->bindless_ubo = true;
+ ir3_handle_nonuniform(ldc, intr);
ir3_split_dest(b, dst, ldc, 0, ncomp);
}
@@ -1233,6 +1243,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
sam = emit_sam(ctx, OPC_ISAM, info, type, 0b1111,
ir3_create_collect(ctx, coords, ncoords), NULL);
+ ir3_handle_nonuniform(sam, intr);
+
sam->barrier_class = IR3_BARRIER_IMAGE_R;
sam->barrier_conflict = IR3_BARRIER_IMAGE_W;
@@ -2093,6 +2105,9 @@ get_tex_samp_tex_src(struct ir3_context *ctx, nir_tex_instr *tex)
/* Bindless case */
info.flags |= IR3_INSTR_B;
+ if (tex->texture_non_uniform || tex->sampler_non_uniform)
+ info.flags |= IR3_INSTR_NONUNIF;
+
/* Gather information required to determine which encoding to
* choose as well as for prefetch.
*/
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index b26159eb764..2a0066e069e 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -183,6 +183,7 @@ struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx,
void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
struct ir3_instruction *src, unsigned base, unsigned n);
void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
+void ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin);
void emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 8aad0c68402..c379daf4188 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -181,7 +181,12 @@ static void fixup_cat5_s2en(void)
* fix things up.
*/
struct ir3_register *s2en_src = instr->regs[instr->regs_count - 1];
- assert(s2en_src->flags & IR3_REG_HALF);
+
+ if (instr->flags & IR3_INSTR_B)
+ assert(!(s2en_src->flags & IR3_REG_HALF));
+ else
+ assert(s2en_src->flags & IR3_REG_HALF);
+
for (int i = 1; i < instr->regs_count - 1; i++) {
instr->regs[i+1] = instr->regs[i];
}
@@ -919,6 +924,7 @@ cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; }
| '.' 'p' { instr->flags |= IR3_INSTR_P; }
| '.' 's' { instr->flags |= IR3_INSTR_S; }
| '.' T_S2EN { instr->flags |= IR3_INSTR_S2EN; }
+| '.' T_NONUNIFORM { instr->flags |= IR3_INSTR_NONUNIF; }
| '.' T_BASE { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
cat5_flags:
| cat5_flag cat5_flags
@@ -928,6 +934,7 @@ cat5_tex: T_TEX { if (instr->flags & IR3_INSTR_B) instr->cat5.
cat5_type: '(' type ')' { instr->cat5.type = $2; }
cat5_instr: cat5_opc_dsxypp cat5_flags dst_reg ',' src_reg
+| cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' src_reg
| cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp ',' cat5_tex
| cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp
| cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_tex
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 27c2f68918e..154d2644b7d 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -307,6 +307,17 @@ static const struct test {
/* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
+ /* NonUniform: */
+ /* dEQP-VK.descriptor_indexing.storage_buffer */
+ INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
+ INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
+ /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
+ INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
+ /* dEQP-VK.descriptor_indexing.storage_image */
+ INSTR_6XX(d0360c04_02640b81, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
+ /* dEQP-VK.descriptor_indexing.sampler */
+ INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
+
/* Custom test since we've never seen the blob emit these. */
INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index 5d809a330b5..d6922a3b9dd 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -178,6 +178,8 @@ extract_cat5_DESC_MODE(struct ir3_instruction *instr)
if (instr->flags & IR3_INSTR_B) {
if (instr->flags & IR3_INSTR_A1EN) {
return CAT5_BINDLESS_A1_UNIFORM;
+ } else if (instr->flags & IR3_INSTR_NONUNIF) {
+ return CAT5_BINDLESS_NONUNIFORM;
} else {
return CAT5_BINDLESS_UNIFORM;
}
diff --git a/src/freedreno/isa/ir3-cat5.xml b/src/freedreno/isa/ir3-cat5.xml
index 92697f6584c..e5079326c28 100644
--- a/src/freedreno/isa/ir3-cat5.xml
+++ b/src/freedreno/isa/ir3-cat5.xml
@@ -58,7 +58,7 @@ SOFTWARE.
The s2en (indirect) or bindless case
- {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
+ {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{NONUNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
@@ -75,6 +75,7 @@ SOFTWARE.
+
@@ -604,6 +605,14 @@ SOFTWARE.
({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */
+
+
+ ({DESC_MODE} == 2) /* CAT5_BINDLESS_NONUNIFORM */ ||
+ ({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */
+
+
bindless/indirect src3, which can either be GPR or samp/tex