diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml
index c645f7a8ea1..e276d6311f5 100644
--- a/src/panfrost/bifrost/valhall/ISA.xml
+++ b/src/panfrost/bifrost/valhall/ISA.xml
@@ -64,34 +64,41 @@
0x42480000
-
+
- Every Valhall instruction can perform an action, like wait on dependency
- slots. A few special actions are available, specified in the instruction
- metadata from this enum. The `wait0126` action is required to wait on
+ Every Valhall instruction can wait on dependency
+ slots. A few special flows are available, specified in the instruction
+ metadata from this enum. The `wait0126` flow is required to wait on
dependency slot #6 and should be set on the instruction immediately
- preceding `ATEST`. The `barrier` action may be set on any instruction for
- subgroup barriers, and should particularly be set with the `BARRIER`
- instruction for global barriers. The `td` action only applies to fragment
- shaders and is used to terminate helper invocations, it should be set as
- early as possible after helper invocations are no longer needed as
- determined by data flow analysis. The `return` action is used to terminate
- the shader, although it may be overloaded by the `BLEND` instruction.
+ preceding `ATEST`. The `wait` flow should be set for barriers.
+ The `discard` flow only applies to fragment shaders and is used to
+ terminate helper invocations, it should be set as early as possible after
+ helper invocations are no longer needed as determined by data flow
+ analysis. The `end` flow is used to terminate the shader, although it
+ may be overloaded by the `BLEND` instruction.
- The `reconverge` action is required on any instruction immediately
+ The `reconverge` flow is required on any instruction immediately
preceding a possible change to the mask of active threads in a subgroup.
This includes all divergent branches, but it also includes the final
instruction at the end of any basic block where the immediate successor
(fallthrough) is the target of a divergent branch.
- wait0126
- barrier
+ none
+ wait0
+ wait1
+ wait01
+ wait2
+ wait02
+ wait12
+ wait012
+ wait0126
+ wait
reconverge
- td
+ discard
- return
+ end
@@ -799,7 +806,7 @@
General-purpose barrier. Must use slot #7. Must be paired with a
- `.barrier` action on the instruction.
+ `.wait` flow on the instruction.
diff --git a/src/panfrost/bifrost/valhall/asm.py b/src/panfrost/bifrost/valhall/asm.py
index 34b81331dca..fbaedf83905 100644
--- a/src/panfrost/bifrost/valhall/asm.py
+++ b/src/panfrost/bifrost/valhall/asm.py
@@ -339,35 +339,15 @@ def parse_asm(line):
encoded |= (fau.page << 57)
# Encode modifiers
- has_action = False
+ has_flow = False
for mod in mods:
if len(mod) == 0:
continue
- if mod in enums['action'].bare_values:
- die_if(has_action, "Multiple actions specified")
- has_action = True
- encoded |= (enums['action'].bare_values.index(mod) << 59)
- encoded |= (1 << 62) # Action, not wait
- elif mod.startswith('wait'):
- die_if(has_action, "Multiple actions specified")
- has_action = True
-
- slots = mod[len('wait'):]
- try:
- slots = set([int(x) for x in slots])
- except ValueError:
- die(f"Expected slots in {mod}")
-
- known_slots = set([0, 1, 2])
- die_if(not slots.issubset(known_slots), f"Unknown slots in {mod}")
-
- if 0 in slots:
- encoded |= (1 << 59)
- if 1 in slots:
- encoded |= (1 << 60)
- if 2 in slots:
- encoded |= (1 << 61)
+ if mod in enums['flow'].bare_values:
+ die_if(has_flow, "Multiple flow control modifiers specified")
+ has_flow = True
+ encoded |= (enums['flow'].bare_values.index(mod) << 59)
else:
candidates = [c for c in ins.modifiers if mod in c.bare_values]
diff --git a/src/panfrost/bifrost/valhall/disasm.py b/src/panfrost/bifrost/valhall/disasm.py
index 30400b30564..30c98b4a94d 100644
--- a/src/panfrost/bifrost/valhall/disasm.py
+++ b/src/panfrost/bifrost/valhall/disasm.py
@@ -42,34 +42,6 @@ static const uint32_t va_immediates[32] = {
% endfor
};
-/* Byte 7 has instruction metadata, analogous to Bifrost's clause header */
-struct va_metadata {
- bool opcode_high : 1;
- unsigned immediate_mode : 2;
- unsigned action : 3;
- bool do_action : 1;
- bool unk3 : 1;
-} __attribute__((packed));
-
-static inline void
-va_print_metadata(FILE *fp, uint8_t meta)
-{
- struct va_metadata m;
- memcpy(&m, &meta, 1);
-
- if (m.do_action) {
- fputs(valhall_action[m.action], fp);
- } else if (m.action) {
- fprintf(fp, ".wait%s%s%s",
- m.action & (1 << 0) ? "0" : "",
- m.action & (1 << 1) ? "1" : "",
- m.action & (1 << 2) ? "2" : "");
- }
-
- if (m.unk3)
- fprintf(fp, ".unk3");
-}
-
static inline void
va_print_src(FILE *fp, uint8_t src, unsigned fau_page)
{
@@ -153,8 +125,8 @@ va_disasm_instr(FILE *fp, uint64_t instr)
% endif
% endif
% endfor
- va_print_metadata(fp, instr >> 56);
- fputs(" ", fp);
+ assert((instr & (1ull << 63)) == 0 /* reserved */);
+ fprintf(fp, "%s ", valhall_flow[instr >> 59]);
% if len(op.dests) > 0:
<% no_comma = False %>
va_print_dest(fp, (instr >> 40), true);
diff --git a/src/panfrost/bifrost/valhall/test/assembler-cases.txt b/src/panfrost/bifrost/valhall/test/assembler-cases.txt
index 2d82e370fdc..2aebbe2460e 100644
--- a/src/panfrost/bifrost/valhall/test/assembler-cases.txt
+++ b/src/panfrost/bifrost/valhall/test/assembler-cases.txt
@@ -28,8 +28,8 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
82 3c 27 20 00 c0 a3 01 SHADDX.u64 r0, u2, r60.w0, shift:0x2
40 00 00 18 82 80 60 08 LOAD.i32.unsigned.slot0.wait0 @r0, `r0, offset:0
80 7c 47 20 00 c0 a3 01 SHADDX.u64 r0, u0, `r60.w0, shift:0x4
-40 00 00 38 08 44 61 78 STORE.i128.slot0.return @r4:r5:r6:r7, `r0, offset:0
-00 00 00 00 00 c0 00 78 NOP.return
+40 00 00 38 08 44 61 78 STORE.i128.slot0.end @r4:r5:r6:r7, `r0, offset:0
+00 00 00 00 00 c0 00 78 NOP.end
40 c4 c0 9c 01 c1 f0 00 ICMP.u32.gt.m1 r1, `r0, 0x1000000.b3, 0x0
42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, `r2, offset:0
00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0
@@ -46,18 +46,18 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
40 00 0b 10 00 c3 90 00 F16_TO_F32 r3, `r0.h1
00 00 00 00 00 c0 00 40 NOP.wait0126
42 43 04 00 00 c0 a5 00 V2F32_TO_V2F16 r0, `r2, `r3
-40 c0 00 28 90 c0 a5 48 FADD.v2f16.barrier r0, `r0.abs, 0x0.neg
+40 c0 00 28 90 c0 a5 48 FADD.v2f16.wait r0, `r0.abs, 0x0.neg
c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0
-3c d0 ea 00 02 bc 7d 68 ATEST.td @r60, r60, 0x3F800000, atest_datum.w0
+3c d0 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, 0x3F800000, atest_datum.w0
40 db 05 04 00 c1 a1 00 MKVEC.v2i16 r1, `r0.h00, 0x3C000000.h10
-f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.return @r0:r1, blend_descriptor_0.w0, r60, target:0x0
+f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.end @r0:r1, blend_descriptor_0.w0, r60, target:0x0
7b 0d 00 40 04 84 5e 08 LEA_BUF_IMM.slot1.wait0 @r4:r5, `r59, table:0xD, index:0x0
00 dd c0 08 14 c2 b2 00 FMA.f32 r2, r0, 0x44000000.neg.h1, 0x0.neg
41 88 c0 00 04 c1 b2 00 FMA.f32 r1, `r1, u8, 0x0.neg
40 88 c0 00 04 c0 b2 10 FMA.f32.wait1 r0, `r0, u8, 0x0.neg
-44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.return @r0:r1:r2, `r4, offset:0
-44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.return @r8:r9:r10:r11, `r4, offset:0
-00 00 00 c0 01 c0 45 48 BARRIER.slot7.barrier
+44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.end @r0:r1:r2, `r4, offset:0
+44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.end @r8:r9:r10:r11, `r4, offset:0
+00 00 00 c0 01 c0 45 48 BARRIER.slot7.wait
80 00 00 00 82 82 60 00 LOAD.i8.unsigned.slot0 @r2, u0, offset:0
80 00 00 08 82 82 60 00 LOAD.i16.unsigned.slot0 @r2, u0, offset:0
80 00 00 10 82 82 60 00 LOAD.i24.unsigned.slot0 @r2, u0, offset:0
@@ -106,9 +106,9 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1
42 00 00 38 08 44 61 00 STORE.i128.slot0 @r4:r5:r6:r7, `r2, offset:0
41 f8 ff ff 07 c0 1f 50 BRANCHZ.reconverge `r1, offset:-8
7d c0 00 08 10 bc a1 00 IADD.v2u16 r60.h1, `r61.h10, 0x0
-44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
-44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
-44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
+44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
+44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
+44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
7c c0 12 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x1
7c c0 02 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x0
82 81 00 28 f4 82 6a 00 LD_BUFFER.i64.unsigned.slot0 @r2:r3, u2, u1
@@ -123,7 +123,7 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1
40 44 80 00 01 c0 b8 00 MUX.i32 r0, `r0, `r4, u0
40 44 80 00 02 c0 b8 00 MUX.i32.fp_zero r0, `r0, `r4, u0
40 44 80 00 03 c0 b8 00 MUX.i32.bit r0, `r0, `r4, u0
-00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.td r1, r0
+00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.discard r1, r0
01 00 02 00 00 c2 9c 00 FRSQ.f32 r2, r1
40 00 02 01 00 c0 99 00 FREXPE.f32.sqrt r0, `r0
41 42 c0 40 04 c0 62 41 FMA_RSCALE_LEFT.f32.wait0126 r0, `r1, `r2, 0x0.neg, `r0
@@ -180,14 +180,14 @@ c0 77 01 0c 00 c2 a8 00 ISUB.s32 r2, 0x0, `r55.h1
00 00 03 00 20 c1 90 00 V2S8_TO_V2F16 r1, r0.b20
40 00 03 00 60 c0 90 00 V2S8_TO_V2F16 r0, `r0.b21
-3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.td @r0:r1:r2:r3, r61, index:0x0
+3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.discard @r0:r1:r2:r3, r61, index:0x0
3d 00 10 72 18 84 5c 00 LD_VAR_BUF_IMM.f32.slot1.v4.src_f32.center.retrieve @r4:r5:r6:r7, r61, index:0x10
c0 00 00 00 00 c8 10 01 IADD_IMM.i32 r8, 0x0, #0x0
c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0
3d 00 14 00 00 ca 90 00 U16_TO_U32 r10, r61.h00
3d 09 00 00 30 c0 1f 50 BRANCHZ.eq.reconverge r61.h0, offset:9
0a 00 00 00 00 cb 91 50 MOV.i32.reconverge r11, r10
-00 00 00 00 00 c0 00 48 NOP.barrier
+00 00 00 00 00 c0 00 48 NOP.wait
81 0b 80 33 04 8e 78 00 LD_TILE.v4.f16.slot0 @r14:r15, u1, r11, u0
0b 00 04 00 00 cc 91 00 CLZ.u32 r12, r11
82 4c c0 52 00 cc b4 00 RSHIFT_XOR.i32.not_result r12, u2, `r12.b00, 0x0
@@ -202,15 +202,15 @@ c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0
49 3e c0 22 04 c9 b3 30 FMA.v2f16.wait12 r9, `r9, r62.h00, 0x0.neg
47 43 00 00 00 c3 a4 00 FADD.f32 r3, `r7, `r3
43 09 00 08 00 c3 a4 40 FADD.f32.wait0126 r3, `r3, r9.h1
-3c 03 ea 00 02 bc 7d 68 ATEST.td @r60, r60, r3, atest_datum.w0
+3c 03 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, r3, atest_datum.w0
46 42 00 00 00 c2 a4 00 FADD.f32 r2, `r6, `r2
44 40 00 00 00 c0 a4 00 FADD.f32 r0, `r4, `r0
48 7e c0 22 04 ff b3 00 FMA.v2f16 r63, `r8, `r62.h00, 0x0.neg
45 41 00 00 00 c1 a4 00 FADD.f32 r1, `r5, `r1
41 3f 00 08 00 c1 a4 00 FADD.f32 r1, `r1, r63.h1
40 7f 00 04 00 c0 a4 00 FADD.f32 r0, `r0, `r63.h0
-42 49 00 04 00 c2 a4 48 FADD.f32.barrier r2, `r2, `r9.h0
-f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.return @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0
+42 49 00 04 00 c2 a4 48 FADD.f32.wait r2, `r2, `r9.h0
+f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.end @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0
c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0
c0 f1 00 00 10 c1 2f 08 BRANCHZI.eq.absolute.wait0 0x0, blend_descriptor_0.w1
80 00 c0 17 34 7c 25 01 TEX_FETCH.slot0.f.32.2d @r0:r1:r2:r3, @r60:r61, u0