From adffad6adb1f3d198b16a46b6812a9615ec266e7 Mon Sep 17 00:00:00 2001 From: Lars-Ivar Hesselberg Simonsen Date: Mon, 2 Mar 2026 13:22:59 +0100 Subject: [PATCH] pan/va: XMLify opcode2 Opcode2 was a bit all over the place, so utilize the new opcode modifier to gather opcode2 information in a single place. This cleans up the implicit va_mods "left", "descriptor_type" and "memory_width". Reviewed-by: Christoph Pillmayer Acked-by: Lorenzo Rossi Acked-by: Eric R. Smith Part-of: --- src/panfrost/compiler/bifrost/valhall/ISA.xml | 455 ++++++++++++------ src/panfrost/compiler/bifrost/valhall/asm.py | 3 +- .../compiler/bifrost/valhall/disasm.py | 10 +- .../compiler/bifrost/valhall/valhall.c.py | 5 +- .../compiler/bifrost/valhall/valhall.py | 28 +- 5 files changed, 323 insertions(+), 178 deletions(-) diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml index e40298edb3d..dc8b1fa84ba 100644 --- a/src/panfrost/compiler/bifrost/valhall/ISA.xml +++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml @@ -1008,8 +1008,9 @@ - + + Load `vecsize` components from the attribute descriptor at entry `index` of resource table `table` at index (vertex ID, instance ID), converting @@ -1019,7 +1020,6 @@ - Vertex ID Instance ID @@ -1027,8 +1027,9 @@ - + + Load `vecsize` components from the attribute descriptor at the specified location at index (vertex ID, instance ID), converting @@ -1040,7 +1041,6 @@ - Vertex ID Instance ID @@ -1055,8 +1055,9 @@ - + + Load `vecsize` components from the texture descriptor at entry `index` of resource table `table`, converting @@ -1066,7 +1067,6 @@ - X/Y coordinates (16:16) Z/W coordinates (16:16) @@ -1074,8 +1074,9 @@ - + + Load `vecsize` components from the texture descriptor at the specified location at index, converting @@ -1085,15 +1086,15 @@ - X/Y coordinates (16:16) Z/W coordinates (16:16) Index and table - + + Load the effective address of an attribute specified with the given immediate index. Returns three staging register: the low/high @@ -1101,7 +1102,6 @@ - Vertex index Instance index @@ -1109,8 +1109,9 @@ - + + Load the effective address of an attribute specified with the given index. Returns three staging register: the low/high @@ -1119,15 +1120,15 @@ - Vertex index Instance index Attribute index and table - + + Load the effective address of a texel from the image specified with the given immediate index. Returns three staging registers: the low/high @@ -1140,7 +1141,6 @@ - X/Y coordinates (16:16) Z/W coordinates (16:16) @@ -1148,8 +1148,9 @@ - + + Load the effective address of a texel from the image specified with the given index. Returns three staging register: the low/high @@ -1163,15 +1164,15 @@ - X/Y coordinates (16:16) Z/W coordinates (16:16) Index and table - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1188,8 +1189,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1206,8 +1208,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1224,8 +1227,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1242,8 +1246,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1260,8 +1265,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1278,8 +1284,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1296,8 +1303,9 @@ Mode descriptor - + + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1340,8 +1348,9 @@ Structure index - + + Loads from main memory @@ -1353,8 +1362,9 @@ - + + Loads from main memory @@ -1366,8 +1376,9 @@ - + + Loads from main memory @@ -1379,8 +1390,9 @@ - + + Loads from main memory @@ -1392,8 +1404,9 @@ - + + Loads from main memory @@ -1405,8 +1418,9 @@ - + + Loads from main memory @@ -1418,8 +1432,9 @@ - + + Loads from main memory @@ -1431,8 +1446,9 @@ - + + Loads from main memory @@ -1448,14 +1464,30 @@ Stores to main memory - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + @@ -1624,14 +1656,22 @@ - + + + - + + + - + + + - + + + Value to convert @@ -1643,14 +1683,22 @@ Performs the given data conversion. - + + + - + + + - + + + - + + + Value to convert @@ -1658,8 +1706,12 @@ Performs the given data conversion. - - + + + + + + Value to convert @@ -1668,19 +1720,28 @@ Performs the given data conversion. - + + + - + + + - + + + - + + + Value to convert - + + Converts up with the specified round mode. Value to convert @@ -1692,15 +1753,23 @@ Performs the given data conversion. - + + + - + + + - + + + - + + + Value to convert @@ -1711,14 +1780,22 @@ Performs the given data conversion. - + + + - + + + - + + + - + + + Value to convert @@ -1729,47 +1806,56 @@ Performs the given rounding, using the convert unit. - + + + - + + + Value to convert - + + Canonical register-to-register move. - + + Used as a primitive for various bitwise operations. - + + Used as a primitive for various bitwise operations. - + + Used as a primitive for various bitwise operations. - + + 64-bit abs may be constructed in 4 instructions (5 clocks) by checking the sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with @@ -1778,19 +1864,22 @@ - + + - + + - + + Only available as 32-bit. Smaller bitsizes require explicit conversions. 64-bit popcount may be constructed in 3 clocks by separate 32-bit @@ -1800,24 +1889,27 @@ - + + Only available as 32-bit. Other bitsizes may be derived with swizzles. - + + For fully featured bitwise operation, see the shift opcodes. - + + For fully featured bitwise operation, see the shift opcodes. @@ -1843,8 +1935,12 @@ - - + + + + + + Flush special float values. The ftz modifier flushes subnormal values to zero. The flush_inf modifier flushes +inf to the maximum finite value, and @@ -1859,12 +1955,20 @@ - + + + - - + + + + + + - + + + Breaks up the floating-point input into its fractional (mantissa) and exponent parts. By default, this is compatible with the `frexp()` function @@ -1879,16 +1983,36 @@ - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Performs a given special function. The floating-point reciprocal (`FRCP`) and reciprocal square root (`FRSQ`) instructions may be freely used as-is. @@ -1902,10 +2026,18 @@ - - - - + + + + + + + + + + + + Performs a given special function. The trigonometric tables (`FSIN_TABLE.u6` and `FCOS_TABLE.u6`) are crude, requiring both an @@ -1914,7 +2046,8 @@ - + + @@ -1929,7 +2062,8 @@ B - + + @@ -1942,7 +2076,8 @@ B - + + @@ -1956,7 +2091,8 @@ - + + @@ -1970,7 +2106,8 @@ B - + + @@ -1989,8 +2126,9 @@ - + + Calculates the base-2 exponent of an argument specified as a 8:24 fixed-point. The original argument is passed as well for correct handling @@ -2001,8 +2139,9 @@ Input as 32-bit float - + + Performs a floating-point addition specialized for logarithm computation. @@ -2011,8 +2150,9 @@ B - + + Used for `atan2()` implementation. Destination is two 16-bit values (int and float) for the first form, and a single 32-bit float when @@ -2023,7 +2163,8 @@ B - + + $A + B$ with optional saturation. @@ -2061,14 +2202,16 @@ - + + Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)` A B - + + @@ -2101,7 +2244,8 @@ - + + Similar to SHADDX, but especially used for loading offsets into WLS. Usually this is only required for atomic operations, which cannot @@ -2118,7 +2262,8 @@ B - + + Sign or zero extend B to 64-bits, left-shift by `shift`, and add the 64-bit value A. These instructions accelerate address arithmetic, but may @@ -2135,7 +2280,8 @@ B - + + @@ -2170,7 +2316,8 @@ - + + @@ -2199,8 +2346,9 @@ - + + Selects the value of A in the subgroup lane given by B. This implements subgroup broadcasts. It may be used as a primitive for screen space @@ -2228,7 +2376,8 @@ C - + + @@ -2241,7 +2390,6 @@ - Left shifts its first source by a specified amount and bitwise ANDs it with the second source, optionally inverting the second source or the result. @@ -2252,7 +2400,8 @@ B - + + @@ -2265,7 +2414,6 @@ - Right shifts its first source by a specified amount and bitwise ANDs it with the second source, optionally inverting the second source or the result. If @@ -2279,7 +2427,8 @@ B - + + @@ -2292,7 +2441,6 @@ - Left shifts its first source by a specified amount and bitwise ORs it with the second source, optionally inverting the second source or the result. @@ -2303,7 +2451,8 @@ B - + + @@ -2316,7 +2465,6 @@ - Right shifts its first source by a specified amount and bitwise ORs it with the second source, optionally inverting the second source or the result. If @@ -2330,7 +2478,8 @@ B - + + @@ -2343,7 +2492,6 @@ - Left shifts its first source by a specified amount and bitwise XORs it with the second source, optionally inverting the second source or the result. @@ -2354,7 +2502,8 @@ B - + + @@ -2367,7 +2516,6 @@ - Right shifts its first source by a specified amount and bitwise XORs it with the second source, optionally inverting the second source or the result. If @@ -2426,16 +2574,18 @@ Mask - + + During a cube map transform, select the S coordinate given a selected face. Z coordinate as 32-bit floating point X coordinate as 32-bit floating point Cube face index - + + During a cube map transform, select the T coordinate given a selected face. Y coordinate as 32-bit floating point Z coordinate as 32-bit floating point @@ -2487,15 +2637,20 @@ \mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally saturates. - - + + + + + + A B Accumulator - + + Evaluates the given condition, do a logical or with the condition in the result source, and return in the given result type (integer @@ -2520,7 +2675,8 @@ C - + + Evaluates the given condition, do a logical and with the condition in the result source, and return in the given result type (integer @@ -2544,7 +2700,8 @@ C - + + Evaluates the given condition, do a logical or with the condition in the result source, and return in the given result type (integer @@ -2565,7 +2722,8 @@ C - + + Evaluates the given condition, do a logical and/or with the condition in the result source, and return in the given result type (integer @@ -2585,7 +2743,8 @@ C - + + Evaluates the given condition, do a logical or with the condition in the result source, and return in the given result type (integer @@ -2609,7 +2768,8 @@ C - + + Evaluates the given condition, do a logical and with the condition in the result source, and return in the given result type (integer @@ -2633,7 +2793,8 @@ C - + + Evaluates the given condition, do a logical and/or with the condition in the result source, and return in the given result type (integer @@ -2726,12 +2887,12 @@ - + + - @@ -2739,12 +2900,12 @@ - + + - @@ -2752,32 +2913,33 @@ - + + - 64-bit address to operate on - + + - 64-bit address to operate on - + + @@ -2786,7 +2948,6 @@ - @@ -2794,8 +2955,9 @@ - + + @@ -2804,7 +2966,6 @@ - diff --git a/src/panfrost/compiler/bifrost/valhall/asm.py b/src/panfrost/compiler/bifrost/valhall/asm.py index 560a2b1834d..ba4127fdc6d 100644 --- a/src/panfrost/compiler/bifrost/valhall/asm.py +++ b/src/panfrost/compiler/bifrost/valhall/asm.py @@ -316,7 +316,8 @@ def parse_asm(line): # Encode the operation itself encoded |= (ins.opcode.value << ins.opcode.start) - encoded |= (ins.opcode2 << ins.secondary_shift) + if ins.opcode2: + encoded |= (ins.opcode2.value << ins.opcode2.start) # Encode FAU page if fau.page: diff --git a/src/panfrost/compiler/bifrost/valhall/disasm.py b/src/panfrost/compiler/bifrost/valhall/disasm.py index fca5f87f877..4a757cc5bcd 100644 --- a/src/panfrost/compiler/bifrost/valhall/disasm.py +++ b/src/panfrost/compiler/bifrost/valhall/disasm.py @@ -121,17 +121,17 @@ va_disasm_instr(FILE *fp, uint64_t instr) % if len(ops) > 0: case ${hex(bucket)}: % if ambiguous: - secondary_opc = (instr >> ${ops[0].secondary_shift}) & ${hex(ops[0].secondary_mask)}; + secondary_opc = (instr >> ${ops[0].opcode2.start}) & ${hex(ops[0].opcode2.mask)}; % endif % for op in ops: <% no_comma = True %> % if ambiguous: - if (secondary_opc == ${op.opcode2}) { + if (secondary_opc == ${op.opcode2.value}) { % endif fputs("${op.name}", fp); % for mod in op.modifiers: -% if mod.name not in ["left", "memory_width", "descriptor_type", "staging_register_count", "staging_register_write_count"]: +% if mod.name not in ["staging_register_count", "staging_register_write_count"]: % if mod.is_enum: fputs(valhall_${safe_name(mod.enum)}[(instr >> ${mod.start}) & ${hex((1 << mod.size) - 1)}], fp); % else: @@ -288,8 +288,8 @@ for op in OPCODE_BUCKETS: assert(len(ins.srcs) == len(bucket[0].srcs)) # Must not repeat, else we're ambiguous - assert(ins.opcode2 not in SECONDARY) - SECONDARY[ins.opcode2] = ins + assert(ins.opcode2.value not in SECONDARY) + SECONDARY[ins.opcode2.value] = ins try: print(Template(template).render(OPCODES = OPCODE_BUCKETS, IMMEDIATES = immediates, ENUMS = enums, typesize = typesize, safe_name = safe_name)) diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py index 47f7d4a328a..91d4f7c0c07 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py +++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py @@ -147,7 +147,10 @@ valhall_opcodes[BI_NUM_OPCODES] = { # Exact value to be ORed in to every opcode def exact_op(op): - return (op.opcode.value << op.opcode.start) | (op.opcode2 << op.secondary_shift) + exact_op = (op.opcode.value << op.opcode.start) + if op.opcode2: + exact_op |= (op.opcode2.value << op.opcode2.start) + return exact_op try: print(Template(template).render(immediates = immediates, instructions = instructions, skip = SKIP, exact = exact_op, typesize = typesize)) diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.py b/src/panfrost/compiler/bifrost/valhall/valhall.py index 0ac517a47e4..cddc277d2aa 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.py +++ b/src/panfrost/compiler/bifrost/valhall/valhall.py @@ -169,7 +169,7 @@ class Instruction: self.srcs = srcs self.dests = dests self.opcode = opcode - self.opcode2 = opcode2 or 0 + self.opcode2 = opcode2 self.immediates = immediates self.modifiers = modifiers self.staging = staging @@ -179,27 +179,8 @@ class Instruction: # Message-passing instruction <===> not ALU instruction self.message = unit not in ["FMA", "CVT", "SFU"] - self.secondary_shift = max(len(self.srcs) * 8, 16) - self.secondary_mask = 0xF if opcode2 is not None else 0x0 - if "left" in [x.name for x in self.modifiers]: - self.secondary_mask |= 0x100 - if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes or srcs[1].swizzle): - self.secondary_mask &= ~0xC # conflicts - if opcode.value == 0x90: - # XXX: XMLify this, but disambiguates sign of conversions - self.secondary_mask |= 0x10 - if name.startswith("LOAD.i") or name.startswith("STORE.i") or name.startswith("LD_PKA.i"): - self.secondary_shift = 27 # Alias with memory_size - self.secondary_mask = 0x7 - if "descriptor_type" in [x.name for x in self.modifiers]: - self.secondary_mask = 0x3 - self.secondary_shift = 37 - elif "memory_width" in [x.name for x in self.modifiers]: - self.secondary_mask = 0x7 - self.secondary_shift = 27 - assert(len(dests) == 0 or not staging) - assert(not opcode2 or (opcode2 & self.secondary_mask) == opcode2) + assert(not opcode2 or (opcode2.value & opcode2.mask) == opcode2.value) def __str__(self): return self.name @@ -258,9 +239,8 @@ def build_instr(el, overrides = {}): # Get overridables name = overrides.get('name') or el.attrib.get('name') opcode = overrides.get('opcode') or build_opcode(el, 'opcode') - opcode2 = overrides.get('opcode2') or el.attrib.get('opcode2') + opcode2 = overrides.get('opcode2') or build_opcode(el, 'opcode2') unit = overrides.get('unit') or el.attrib.get('unit') - opcode2 = int(opcode2, base=0) if opcode2 else None # Get explicit sources/dests tsize = typesize(name) @@ -310,7 +290,7 @@ def build_group(el): build_instr(el, overrides = { 'name': ins.attrib['name'], 'opcode': build_opcode(ins, 'opcode'), - 'opcode2': ins.attrib.get('opcode2'), + 'opcode2': build_opcode(ins, 'opcode2'), 'unit': ins.attrib.get('unit'), })