mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
pan/va: Unify flow control
Group together dependency waits and flow control into a single enum. This simplifies the code, clarifies some detail, and ensures consistency moving forward. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15223>
This commit is contained in:
parent
cf6d1a81f6
commit
76487c7eb4
4 changed files with 48 additions and 89 deletions
|
|
@ -64,34 +64,41 @@
|
|||
<constant desc="Half-float $\pi$">0x42480000</constant>
|
||||
</lut>
|
||||
|
||||
<enum name="Action">
|
||||
<enum name="Flow">
|
||||
<desc>
|
||||
Every Valhall instruction can perform an action, like wait on dependency
|
||||
slots. A few special actions are available, specified in the instruction
|
||||
metadata from this enum. The `wait0126` action is required to wait on
|
||||
Every Valhall instruction can wait on dependency
|
||||
slots. A few special flows are available, specified in the instruction
|
||||
metadata from this enum. The `wait0126` flow is required to wait on
|
||||
dependency slot #6 and should be set on the instruction immediately
|
||||
preceding `ATEST`. The `barrier` action may be set on any instruction for
|
||||
subgroup barriers, and should particularly be set with the `BARRIER`
|
||||
instruction for global barriers. The `td` action only applies to fragment
|
||||
shaders and is used to terminate helper invocations, it should be set as
|
||||
early as possible after helper invocations are no longer needed as
|
||||
determined by data flow analysis. The `return` action is used to terminate
|
||||
the shader, although it may be overloaded by the `BLEND` instruction.
|
||||
preceding `ATEST`. The `wait` flow should be set for barriers.
|
||||
The `discard` flow only applies to fragment shaders and is used to
|
||||
terminate helper invocations, it should be set as early as possible after
|
||||
helper invocations are no longer needed as determined by data flow
|
||||
analysis. The `end` flow is used to terminate the shader, although it
|
||||
may be overloaded by the `BLEND` instruction.
|
||||
|
||||
The `reconverge` action is required on any instruction immediately
|
||||
The `reconverge` flow is required on any instruction immediately
|
||||
preceding a possible change to the mask of active threads in a subgroup.
|
||||
This includes all divergent branches, but it also includes the final
|
||||
instruction at the end of any basic block where the immediate successor
|
||||
(fallthrough) is the target of a divergent branch.
|
||||
</desc>
|
||||
<value name="Wait on all dependency slots">wait0126</value>
|
||||
<value name="Subgroup barrier">barrier</value>
|
||||
<value name="None" default="true">none</value>
|
||||
<value name="Wait on slot 0">wait0</value>
|
||||
<value name="Wait on slot 1">wait1</value>
|
||||
<value name="Wait on slots 0, 1">wait01</value>
|
||||
<value name="Wait on slot 2">wait2</value>
|
||||
<value name="Wait on slots 0, 2">wait02</value>
|
||||
<value name="Wait on slots 1, 2">wait12</value>
|
||||
<value name="Wait on slots 0, 1, 2">wait012</value>
|
||||
<value name="Wait on slots 0, 1, 2, 6">wait0126</value>
|
||||
<value name="Wait on slots 0, 1, 2, 6, 7">wait</value>
|
||||
<value name="Perform branch reconverge">reconverge</value>
|
||||
<reserved/>
|
||||
<reserved/>
|
||||
<value name="Terminate discarded threads">td</value>
|
||||
<value name="Terminate discarded threads">discard</value>
|
||||
<reserved/>
|
||||
<value name="Return from shader">return</value>
|
||||
<value name="Return from shader">end</value>
|
||||
</enum>
|
||||
|
||||
<enum name="FAU special page 0">
|
||||
|
|
@ -799,7 +806,7 @@
|
|||
<ins name="BARRIER" title="Execution and memory barrier" opcode="0x45" unit="NONE">
|
||||
<desc>
|
||||
General-purpose barrier. Must use slot #7. Must be paired with a
|
||||
`.barrier` action on the instruction.
|
||||
`.wait` flow on the instruction.
|
||||
</desc>
|
||||
<slot/>
|
||||
</ins>
|
||||
|
|
|
|||
|
|
@ -339,35 +339,15 @@ def parse_asm(line):
|
|||
encoded |= (fau.page << 57)
|
||||
|
||||
# Encode modifiers
|
||||
has_action = False
|
||||
has_flow = False
|
||||
for mod in mods:
|
||||
if len(mod) == 0:
|
||||
continue
|
||||
|
||||
if mod in enums['action'].bare_values:
|
||||
die_if(has_action, "Multiple actions specified")
|
||||
has_action = True
|
||||
encoded |= (enums['action'].bare_values.index(mod) << 59)
|
||||
encoded |= (1 << 62) # Action, not wait
|
||||
elif mod.startswith('wait'):
|
||||
die_if(has_action, "Multiple actions specified")
|
||||
has_action = True
|
||||
|
||||
slots = mod[len('wait'):]
|
||||
try:
|
||||
slots = set([int(x) for x in slots])
|
||||
except ValueError:
|
||||
die(f"Expected slots in {mod}")
|
||||
|
||||
known_slots = set([0, 1, 2])
|
||||
die_if(not slots.issubset(known_slots), f"Unknown slots in {mod}")
|
||||
|
||||
if 0 in slots:
|
||||
encoded |= (1 << 59)
|
||||
if 1 in slots:
|
||||
encoded |= (1 << 60)
|
||||
if 2 in slots:
|
||||
encoded |= (1 << 61)
|
||||
if mod in enums['flow'].bare_values:
|
||||
die_if(has_flow, "Multiple flow control modifiers specified")
|
||||
has_flow = True
|
||||
encoded |= (enums['flow'].bare_values.index(mod) << 59)
|
||||
else:
|
||||
candidates = [c for c in ins.modifiers if mod in c.bare_values]
|
||||
|
||||
|
|
|
|||
|
|
@ -42,34 +42,6 @@ static const uint32_t va_immediates[32] = {
|
|||
% endfor
|
||||
};
|
||||
|
||||
/* Byte 7 has instruction metadata, analogous to Bifrost's clause header */
|
||||
struct va_metadata {
|
||||
bool opcode_high : 1;
|
||||
unsigned immediate_mode : 2;
|
||||
unsigned action : 3;
|
||||
bool do_action : 1;
|
||||
bool unk3 : 1;
|
||||
} __attribute__((packed));
|
||||
|
||||
static inline void
|
||||
va_print_metadata(FILE *fp, uint8_t meta)
|
||||
{
|
||||
struct va_metadata m;
|
||||
memcpy(&m, &meta, 1);
|
||||
|
||||
if (m.do_action) {
|
||||
fputs(valhall_action[m.action], fp);
|
||||
} else if (m.action) {
|
||||
fprintf(fp, ".wait%s%s%s",
|
||||
m.action & (1 << 0) ? "0" : "",
|
||||
m.action & (1 << 1) ? "1" : "",
|
||||
m.action & (1 << 2) ? "2" : "");
|
||||
}
|
||||
|
||||
if (m.unk3)
|
||||
fprintf(fp, ".unk3");
|
||||
}
|
||||
|
||||
static inline void
|
||||
va_print_src(FILE *fp, uint8_t src, unsigned fau_page)
|
||||
{
|
||||
|
|
@ -153,8 +125,8 @@ va_disasm_instr(FILE *fp, uint64_t instr)
|
|||
% endif
|
||||
% endif
|
||||
% endfor
|
||||
va_print_metadata(fp, instr >> 56);
|
||||
fputs(" ", fp);
|
||||
assert((instr & (1ull << 63)) == 0 /* reserved */);
|
||||
fprintf(fp, "%s ", valhall_flow[instr >> 59]);
|
||||
% if len(op.dests) > 0:
|
||||
<% no_comma = False %>
|
||||
va_print_dest(fp, (instr >> 40), true);
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
|
|||
82 3c 27 20 00 c0 a3 01 SHADDX.u64 r0, u2, r60.w0, shift:0x2
|
||||
40 00 00 18 82 80 60 08 LOAD.i32.unsigned.slot0.wait0 @r0, `r0, offset:0
|
||||
80 7c 47 20 00 c0 a3 01 SHADDX.u64 r0, u0, `r60.w0, shift:0x4
|
||||
40 00 00 38 08 44 61 78 STORE.i128.slot0.return @r4:r5:r6:r7, `r0, offset:0
|
||||
00 00 00 00 00 c0 00 78 NOP.return
|
||||
40 00 00 38 08 44 61 78 STORE.i128.slot0.end @r4:r5:r6:r7, `r0, offset:0
|
||||
00 00 00 00 00 c0 00 78 NOP.end
|
||||
40 c4 c0 9c 01 c1 f0 00 ICMP.u32.gt.m1 r1, `r0, 0x1000000.b3, 0x0
|
||||
42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, `r2, offset:0
|
||||
00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0
|
||||
|
|
@ -46,18 +46,18 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
|
|||
40 00 0b 10 00 c3 90 00 F16_TO_F32 r3, `r0.h1
|
||||
00 00 00 00 00 c0 00 40 NOP.wait0126
|
||||
42 43 04 00 00 c0 a5 00 V2F32_TO_V2F16 r0, `r2, `r3
|
||||
40 c0 00 28 90 c0 a5 48 FADD.v2f16.barrier r0, `r0.abs, 0x0.neg
|
||||
40 c0 00 28 90 c0 a5 48 FADD.v2f16.wait r0, `r0.abs, 0x0.neg
|
||||
c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0
|
||||
3c d0 ea 00 02 bc 7d 68 ATEST.td @r60, r60, 0x3F800000, atest_datum.w0
|
||||
3c d0 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, 0x3F800000, atest_datum.w0
|
||||
40 db 05 04 00 c1 a1 00 MKVEC.v2i16 r1, `r0.h00, 0x3C000000.h10
|
||||
f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.return @r0:r1, blend_descriptor_0.w0, r60, target:0x0
|
||||
f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.end @r0:r1, blend_descriptor_0.w0, r60, target:0x0
|
||||
7b 0d 00 40 04 84 5e 08 LEA_BUF_IMM.slot1.wait0 @r4:r5, `r59, table:0xD, index:0x0
|
||||
00 dd c0 08 14 c2 b2 00 FMA.f32 r2, r0, 0x44000000.neg.h1, 0x0.neg
|
||||
41 88 c0 00 04 c1 b2 00 FMA.f32 r1, `r1, u8, 0x0.neg
|
||||
40 88 c0 00 04 c0 b2 10 FMA.f32.wait1 r0, `r0, u8, 0x0.neg
|
||||
44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.return @r0:r1:r2, `r4, offset:0
|
||||
44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.return @r8:r9:r10:r11, `r4, offset:0
|
||||
00 00 00 c0 01 c0 45 48 BARRIER.slot7.barrier
|
||||
44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.end @r0:r1:r2, `r4, offset:0
|
||||
44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.end @r8:r9:r10:r11, `r4, offset:0
|
||||
00 00 00 c0 01 c0 45 48 BARRIER.slot7.wait
|
||||
80 00 00 00 82 82 60 00 LOAD.i8.unsigned.slot0 @r2, u0, offset:0
|
||||
80 00 00 08 82 82 60 00 LOAD.i16.unsigned.slot0 @r2, u0, offset:0
|
||||
80 00 00 10 82 82 60 00 LOAD.i24.unsigned.slot0 @r2, u0, offset:0
|
||||
|
|
@ -106,9 +106,9 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1
|
|||
42 00 00 38 08 44 61 00 STORE.i128.slot0 @r4:r5:r6:r7, `r2, offset:0
|
||||
41 f8 ff ff 07 c0 1f 50 BRANCHZ.reconverge `r1, offset:-8
|
||||
7d c0 00 08 10 bc a1 00 IADD.v2u16 r60.h1, `r61.h10, 0x0
|
||||
44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0
|
||||
7c c0 12 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x1
|
||||
7c c0 02 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x0
|
||||
82 81 00 28 f4 82 6a 00 LD_BUFFER.i64.unsigned.slot0 @r2:r3, u2, u1
|
||||
|
|
@ -123,7 +123,7 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1
|
|||
40 44 80 00 01 c0 b8 00 MUX.i32 r0, `r0, `r4, u0
|
||||
40 44 80 00 02 c0 b8 00 MUX.i32.fp_zero r0, `r0, `r4, u0
|
||||
40 44 80 00 03 c0 b8 00 MUX.i32.bit r0, `r0, `r4, u0
|
||||
00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.td r1, r0
|
||||
00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.discard r1, r0
|
||||
01 00 02 00 00 c2 9c 00 FRSQ.f32 r2, r1
|
||||
40 00 02 01 00 c0 99 00 FREXPE.f32.sqrt r0, `r0
|
||||
41 42 c0 40 04 c0 62 41 FMA_RSCALE_LEFT.f32.wait0126 r0, `r1, `r2, 0x0.neg, `r0
|
||||
|
|
@ -180,14 +180,14 @@ c0 77 01 0c 00 c2 a8 00 ISUB.s32 r2, 0x0, `r55.h1
|
|||
00 00 03 00 20 c1 90 00 V2S8_TO_V2F16 r1, r0.b20
|
||||
40 00 03 00 60 c0 90 00 V2S8_TO_V2F16 r0, `r0.b21
|
||||
|
||||
3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.td @r0:r1:r2:r3, r61, index:0x0
|
||||
3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.discard @r0:r1:r2:r3, r61, index:0x0
|
||||
3d 00 10 72 18 84 5c 00 LD_VAR_BUF_IMM.f32.slot1.v4.src_f32.center.retrieve @r4:r5:r6:r7, r61, index:0x10
|
||||
c0 00 00 00 00 c8 10 01 IADD_IMM.i32 r8, 0x0, #0x0
|
||||
c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0
|
||||
3d 00 14 00 00 ca 90 00 U16_TO_U32 r10, r61.h00
|
||||
3d 09 00 00 30 c0 1f 50 BRANCHZ.eq.reconverge r61.h0, offset:9
|
||||
0a 00 00 00 00 cb 91 50 MOV.i32.reconverge r11, r10
|
||||
00 00 00 00 00 c0 00 48 NOP.barrier
|
||||
00 00 00 00 00 c0 00 48 NOP.wait
|
||||
81 0b 80 33 04 8e 78 00 LD_TILE.v4.f16.slot0 @r14:r15, u1, r11, u0
|
||||
0b 00 04 00 00 cc 91 00 CLZ.u32 r12, r11
|
||||
82 4c c0 52 00 cc b4 00 RSHIFT_XOR.i32.not_result r12, u2, `r12.b00, 0x0
|
||||
|
|
@ -202,15 +202,15 @@ c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0
|
|||
49 3e c0 22 04 c9 b3 30 FMA.v2f16.wait12 r9, `r9, r62.h00, 0x0.neg
|
||||
47 43 00 00 00 c3 a4 00 FADD.f32 r3, `r7, `r3
|
||||
43 09 00 08 00 c3 a4 40 FADD.f32.wait0126 r3, `r3, r9.h1
|
||||
3c 03 ea 00 02 bc 7d 68 ATEST.td @r60, r60, r3, atest_datum.w0
|
||||
3c 03 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, r3, atest_datum.w0
|
||||
46 42 00 00 00 c2 a4 00 FADD.f32 r2, `r6, `r2
|
||||
44 40 00 00 00 c0 a4 00 FADD.f32 r0, `r4, `r0
|
||||
48 7e c0 22 04 ff b3 00 FMA.v2f16 r63, `r8, `r62.h00, 0x0.neg
|
||||
45 41 00 00 00 c1 a4 00 FADD.f32 r1, `r5, `r1
|
||||
41 3f 00 08 00 c1 a4 00 FADD.f32 r1, `r1, r63.h1
|
||||
40 7f 00 04 00 c0 a4 00 FADD.f32 r0, `r0, `r63.h0
|
||||
42 49 00 04 00 c2 a4 48 FADD.f32.barrier r2, `r2, `r9.h0
|
||||
f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.return @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0
|
||||
42 49 00 04 00 c2 a4 48 FADD.f32.wait r2, `r2, `r9.h0
|
||||
f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.end @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0
|
||||
c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0
|
||||
c0 f1 00 00 10 c1 2f 08 BRANCHZI.eq.absolute.wait0 0x0, blend_descriptor_0.w1
|
||||
80 00 c0 17 34 7c 25 01 TEX_FETCH.slot0.f.32.2d @r0:r1:r2:r3, @r60:r61, u0
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue