diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml
index e40298edb3d..dc8b1fa84ba 100644
--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@@ -1008,8 +1008,9 @@
-
+
+
Load `vecsize` components from the attribute descriptor at entry `index`
of resource table `table` at index (vertex ID, instance ID), converting
@@ -1019,7 +1020,6 @@
-
Vertex ID
Instance ID
@@ -1027,8 +1027,9 @@
-
+
+
Load `vecsize` components from the attribute descriptor at the specified
location at index (vertex ID, instance ID), converting
@@ -1040,7 +1041,6 @@
-
Vertex ID
Instance ID
@@ -1055,8 +1055,9 @@
-
+
+
Load `vecsize` components from the texture descriptor at entry `index`
of resource table `table`, converting
@@ -1066,7 +1067,6 @@
-
X/Y coordinates (16:16)
Z/W coordinates (16:16)
@@ -1074,8 +1074,9 @@
-
+
+
Load `vecsize` components from the texture descriptor at the specified
location at index, converting
@@ -1085,15 +1086,15 @@
-
X/Y coordinates (16:16)
Z/W coordinates (16:16)
Index and table
-
+
+
Load the effective address of an attribute specified with the
given immediate index. Returns three staging register: the low/high
@@ -1101,7 +1102,6 @@
-
Vertex index
Instance index
@@ -1109,8 +1109,9 @@
-
+
+
Load the effective address of an attribute specified with the
given index. Returns three staging register: the low/high
@@ -1119,15 +1120,15 @@
-
Vertex index
Instance index
Attribute index and table
-
+
+
Load the effective address of a texel from the image specified with the
given immediate index. Returns three staging registers: the low/high
@@ -1140,7 +1141,6 @@
-
X/Y coordinates (16:16)
Z/W coordinates (16:16)
@@ -1148,8 +1148,9 @@
-
+
+
Load the effective address of a texel from the image specified with the
given index. Returns three staging register: the low/high
@@ -1163,15 +1164,15 @@
-
X/Y coordinates (16:16)
Z/W coordinates (16:16)
Index and table
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1188,8 +1189,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1206,8 +1208,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1224,8 +1227,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1242,8 +1246,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1260,8 +1265,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1278,8 +1284,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1296,8 +1303,9 @@
Mode descriptor
-
+
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1340,8 +1348,9 @@
Structure index
-
+
+
Loads from main memory
@@ -1353,8 +1362,9 @@
-
+
+
Loads from main memory
@@ -1366,8 +1376,9 @@
-
+
+
Loads from main memory
@@ -1379,8 +1390,9 @@
-
+
+
Loads from main memory
@@ -1392,8 +1404,9 @@
-
+
+
Loads from main memory
@@ -1405,8 +1418,9 @@
-
+
+
Loads from main memory
@@ -1418,8 +1432,9 @@
-
+
+
Loads from main memory
@@ -1431,8 +1446,9 @@
-
+
+
Loads from main memory
@@ -1448,14 +1464,30 @@
Stores to main memory
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1624,14 +1656,22 @@
-
+
+
+
-
+
+
+
-
+
+
+
-
+
+
+
Value to convert
@@ -1643,14 +1683,22 @@
Performs the given data conversion.
-
+
+
+
-
+
+
+
-
+
+
+
-
+
+
+
Value to convert
@@ -1658,8 +1706,12 @@
Performs the given data conversion.
-
-
+
+
+
+
+
+
Value to convert
@@ -1668,19 +1720,28 @@
Performs the given data conversion.
-
+
+
+
-
+
+
+
-
+
+
+
-
+
+
+
Value to convert
-
+
+
Converts up with the specified round mode.
Value to convert
@@ -1692,15 +1753,23 @@
Performs the given data conversion.
-
+
+
+
-
+
+
+
-
+
+
+
-
+
+
+
Value to convert
@@ -1711,14 +1780,22 @@
Performs the given data conversion.
-
+
+
+
-
+
+
+
-
+
+
+
-
+
+
+
Value to convert
@@ -1729,47 +1806,56 @@
Performs the given rounding, using the convert unit.
-
+
+
+
-
+
+
+
Value to convert
-
+
+
Canonical register-to-register move.
-
+
+
Used as a primitive for various bitwise operations.
-
+
+
Used as a primitive for various bitwise operations.
-
+
+
Used as a primitive for various bitwise operations.
-
+
+
64-bit abs may be constructed in 4 instructions (5 clocks) by checking the
sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with
@@ -1778,19 +1864,22 @@
-
+
+
-
+
+
-
+
+
Only available as 32-bit. Smaller bitsizes require explicit conversions.
64-bit popcount may be constructed in 3 clocks by separate 32-bit
@@ -1800,24 +1889,27 @@
-
+
+
Only available as 32-bit. Other bitsizes may be derived with swizzles.
-
+
+
For fully featured bitwise operation, see the shift opcodes.
-
+
+
For fully featured bitwise operation, see the shift opcodes.
@@ -1843,8 +1935,12 @@
-
-
+
+
+
+
+
+
Flush special float values. The ftz modifier flushes subnormal values to
zero. The flush_inf modifier flushes +inf to the maximum finite value, and
@@ -1859,12 +1955,20 @@
-
+
+
+
-
-
+
+
+
+
+
+
-
+
+
+
Breaks up the floating-point input into its fractional (mantissa) and
exponent parts. By default, this is compatible with the `frexp()` function
@@ -1879,16 +1983,36 @@
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Performs a given special function. The floating-point reciprocal (`FRCP`)
and reciprocal square root (`FRSQ`) instructions may be freely used as-is.
@@ -1902,10 +2026,18 @@
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
Performs a given special function. The trigonometric tables
(`FSIN_TABLE.u6` and `FCOS_TABLE.u6`) are crude, requiring both an
@@ -1914,7 +2046,8 @@
-
+
+
@@ -1929,7 +2062,8 @@
B
-
+
+
@@ -1942,7 +2076,8 @@
B
-
+
+
@@ -1956,7 +2091,8 @@
-
+
+
@@ -1970,7 +2106,8 @@
B
-
+
+
@@ -1989,8 +2126,9 @@
-
+
+
Calculates the base-2 exponent of an argument specified as a 8:24
fixed-point. The original argument is passed as well for correct handling
@@ -2001,8 +2139,9 @@
Input as 32-bit float
-
+
+
Performs a floating-point addition specialized for logarithm computation.
@@ -2011,8 +2150,9 @@
B
-
+
+
Used for `atan2()` implementation. Destination is two 16-bit
values (int and float) for the first form, and a single 32-bit float when
@@ -2023,7 +2163,8 @@
B
-
+
+
$A + B$ with optional saturation.
@@ -2061,14 +2202,16 @@
-
+
+
Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)`
A
B
-
+
+
@@ -2101,7 +2244,8 @@
-
+
+
Similar to SHADDX, but especially used for loading offsets into
WLS. Usually this is only required for atomic operations, which cannot
@@ -2118,7 +2262,8 @@
B
-
+
+
Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
64-bit value A. These instructions accelerate address arithmetic, but may
@@ -2135,7 +2280,8 @@
B
-
+
+
@@ -2170,7 +2316,8 @@
-
+
+
@@ -2199,8 +2346,9 @@
-
+
+
Selects the value of A in the subgroup lane given by B. This implements
subgroup broadcasts. It may be used as a primitive for screen space
@@ -2228,7 +2376,8 @@
C
-
+
+
@@ -2241,7 +2390,6 @@
-
Left shifts its first source by a specified amount and bitwise ANDs it with the
second source, optionally inverting the second source or the result.
@@ -2252,7 +2400,8 @@
B
-
+
+
@@ -2265,7 +2414,6 @@
-
Right shifts its first source by a specified amount and bitwise ANDs it with the
second source, optionally inverting the second source or the result. If
@@ -2279,7 +2427,8 @@
B
-
+
+
@@ -2292,7 +2441,6 @@
-
Left shifts its first source by a specified amount and bitwise ORs it with the
second source, optionally inverting the second source or the result.
@@ -2303,7 +2451,8 @@
B
-
+
+
@@ -2316,7 +2465,6 @@
-
Right shifts its first source by a specified amount and bitwise ORs it with the
second source, optionally inverting the second source or the result. If
@@ -2330,7 +2478,8 @@
B
-
+
+
@@ -2343,7 +2492,6 @@
-
Left shifts its first source by a specified amount and bitwise XORs it with the
second source, optionally inverting the second source or the result.
@@ -2354,7 +2502,8 @@
B
-
+
+
@@ -2367,7 +2516,6 @@
-
Right shifts its first source by a specified amount and bitwise XORs it with the
second source, optionally inverting the second source or the result. If
@@ -2426,16 +2574,18 @@
Mask
-
+
+
During a cube map transform, select the S coordinate given a selected face.
Z coordinate as 32-bit floating point
X coordinate as 32-bit floating point
Cube face index
-
+
+
During a cube map transform, select the T coordinate given a selected face.
Y coordinate as 32-bit floating point
Z coordinate as 32-bit floating point
@@ -2487,15 +2637,20 @@
\mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally
saturates.
-
-
+
+
+
+
+
+
A
B
Accumulator
-
+
+
Evaluates the given condition, do a logical or with the condition in
the result source, and return in the given result type (integer
@@ -2520,7 +2675,8 @@
C
-
+
+
Evaluates the given condition, do a logical and with the condition in
the result source, and return in the given result type (integer
@@ -2544,7 +2700,8 @@
C
-
+
+
Evaluates the given condition, do a logical or with the condition in
the result source, and return in the given result type (integer
@@ -2565,7 +2722,8 @@
C
-
+
+
Evaluates the given condition, do a logical and/or with the condition in
the result source, and return in the given result type (integer
@@ -2585,7 +2743,8 @@
C
-
+
+
Evaluates the given condition, do a logical or with the condition in
the result source, and return in the given result type (integer
@@ -2609,7 +2768,8 @@
C
-
+
+
Evaluates the given condition, do a logical and with the condition in
the result source, and return in the given result type (integer
@@ -2633,7 +2793,8 @@
C
-
+
+
Evaluates the given condition, do a logical and/or with the condition in
the result source, and return in the given result type (integer
@@ -2726,12 +2887,12 @@
-
+
+
-
@@ -2739,12 +2900,12 @@
-
+
+
-
@@ -2752,32 +2913,33 @@
-
+
+
-
64-bit address to operate on
-
+
+
-
64-bit address to operate on
-
+
+
@@ -2786,7 +2948,6 @@
-
@@ -2794,8 +2955,9 @@
-
+
+
@@ -2804,7 +2966,6 @@
-
diff --git a/src/panfrost/compiler/bifrost/valhall/asm.py b/src/panfrost/compiler/bifrost/valhall/asm.py
index 560a2b1834d..ba4127fdc6d 100644
--- a/src/panfrost/compiler/bifrost/valhall/asm.py
+++ b/src/panfrost/compiler/bifrost/valhall/asm.py
@@ -316,7 +316,8 @@ def parse_asm(line):
# Encode the operation itself
encoded |= (ins.opcode.value << ins.opcode.start)
- encoded |= (ins.opcode2 << ins.secondary_shift)
+ if ins.opcode2:
+ encoded |= (ins.opcode2.value << ins.opcode2.start)
# Encode FAU page
if fau.page:
diff --git a/src/panfrost/compiler/bifrost/valhall/disasm.py b/src/panfrost/compiler/bifrost/valhall/disasm.py
index fca5f87f877..4a757cc5bcd 100644
--- a/src/panfrost/compiler/bifrost/valhall/disasm.py
+++ b/src/panfrost/compiler/bifrost/valhall/disasm.py
@@ -121,17 +121,17 @@ va_disasm_instr(FILE *fp, uint64_t instr)
% if len(ops) > 0:
case ${hex(bucket)}:
% if ambiguous:
- secondary_opc = (instr >> ${ops[0].secondary_shift}) & ${hex(ops[0].secondary_mask)};
+ secondary_opc = (instr >> ${ops[0].opcode2.start}) & ${hex(ops[0].opcode2.mask)};
% endif
% for op in ops:
<% no_comma = True %>
% if ambiguous:
- if (secondary_opc == ${op.opcode2}) {
+ if (secondary_opc == ${op.opcode2.value}) {
% endif
fputs("${op.name}", fp);
% for mod in op.modifiers:
-% if mod.name not in ["left", "memory_width", "descriptor_type", "staging_register_count", "staging_register_write_count"]:
+% if mod.name not in ["staging_register_count", "staging_register_write_count"]:
% if mod.is_enum:
fputs(valhall_${safe_name(mod.enum)}[(instr >> ${mod.start}) & ${hex((1 << mod.size) - 1)}], fp);
% else:
@@ -288,8 +288,8 @@ for op in OPCODE_BUCKETS:
assert(len(ins.srcs) == len(bucket[0].srcs))
# Must not repeat, else we're ambiguous
- assert(ins.opcode2 not in SECONDARY)
- SECONDARY[ins.opcode2] = ins
+ assert(ins.opcode2.value not in SECONDARY)
+ SECONDARY[ins.opcode2.value] = ins
try:
print(Template(template).render(OPCODES = OPCODE_BUCKETS, IMMEDIATES = immediates, ENUMS = enums, typesize = typesize, safe_name = safe_name))
diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
index 47f7d4a328a..91d4f7c0c07 100644
--- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py
+++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py
@@ -147,7 +147,10 @@ valhall_opcodes[BI_NUM_OPCODES] = {
# Exact value to be ORed in to every opcode
def exact_op(op):
- return (op.opcode.value << op.opcode.start) | (op.opcode2 << op.secondary_shift)
+ exact_op = (op.opcode.value << op.opcode.start)
+ if op.opcode2:
+ exact_op |= (op.opcode2.value << op.opcode2.start)
+ return exact_op
try:
print(Template(template).render(immediates = immediates, instructions = instructions, skip = SKIP, exact = exact_op, typesize = typesize))
diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.py b/src/panfrost/compiler/bifrost/valhall/valhall.py
index 0ac517a47e4..cddc277d2aa 100644
--- a/src/panfrost/compiler/bifrost/valhall/valhall.py
+++ b/src/panfrost/compiler/bifrost/valhall/valhall.py
@@ -169,7 +169,7 @@ class Instruction:
self.srcs = srcs
self.dests = dests
self.opcode = opcode
- self.opcode2 = opcode2 or 0
+ self.opcode2 = opcode2
self.immediates = immediates
self.modifiers = modifiers
self.staging = staging
@@ -179,27 +179,8 @@ class Instruction:
# Message-passing instruction <===> not ALU instruction
self.message = unit not in ["FMA", "CVT", "SFU"]
- self.secondary_shift = max(len(self.srcs) * 8, 16)
- self.secondary_mask = 0xF if opcode2 is not None else 0x0
- if "left" in [x.name for x in self.modifiers]:
- self.secondary_mask |= 0x100
- if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes or srcs[1].swizzle):
- self.secondary_mask &= ~0xC # conflicts
- if opcode.value == 0x90:
- # XXX: XMLify this, but disambiguates sign of conversions
- self.secondary_mask |= 0x10
- if name.startswith("LOAD.i") or name.startswith("STORE.i") or name.startswith("LD_PKA.i"):
- self.secondary_shift = 27 # Alias with memory_size
- self.secondary_mask = 0x7
- if "descriptor_type" in [x.name for x in self.modifiers]:
- self.secondary_mask = 0x3
- self.secondary_shift = 37
- elif "memory_width" in [x.name for x in self.modifiers]:
- self.secondary_mask = 0x7
- self.secondary_shift = 27
-
assert(len(dests) == 0 or not staging)
- assert(not opcode2 or (opcode2 & self.secondary_mask) == opcode2)
+ assert(not opcode2 or (opcode2.value & opcode2.mask) == opcode2.value)
def __str__(self):
return self.name
@@ -258,9 +239,8 @@ def build_instr(el, overrides = {}):
# Get overridables
name = overrides.get('name') or el.attrib.get('name')
opcode = overrides.get('opcode') or build_opcode(el, 'opcode')
- opcode2 = overrides.get('opcode2') or el.attrib.get('opcode2')
+ opcode2 = overrides.get('opcode2') or build_opcode(el, 'opcode2')
unit = overrides.get('unit') or el.attrib.get('unit')
- opcode2 = int(opcode2, base=0) if opcode2 else None
# Get explicit sources/dests
tsize = typesize(name)
@@ -310,7 +290,7 @@ def build_group(el):
build_instr(el, overrides = {
'name': ins.attrib['name'],
'opcode': build_opcode(ins, 'opcode'),
- 'opcode2': ins.attrib.get('opcode2'),
+ 'opcode2': build_opcode(ins, 'opcode2'),
'unit': ins.attrib.get('unit'),
})