diff --git a/src/panfrost/compiler/IR_pseudo.xml b/src/panfrost/compiler/IR_pseudo.xml
new file mode 100644
index 00000000000..19e2483a07c
--- /dev/null
+++ b/src/panfrost/compiler/IR_pseudo.xml
@@ -0,0 +1,195 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ computed_lod
+ zero_lod
+
+
+
+
+
+
+
+
+
+
+ wls
+
+
+
+
+ tl
+
+
+
+
+
+
+
+
+
+
+
+ aadd
+
+
+
+
+
+ asmin
+ asmax
+ aumin
+ aumax
+ aand
+ aor
+ axor
+ axchg
+ acmpxchg
+
+
+
+
+
+
+
+
+
+ ainc
+ adec
+ aumax1
+ asmax1
+ aor1
+
+
+
+
+
+
+
+
+
+
+
+ aadd
+
+
+
+
+
+ asmin
+ asmax
+ aumin
+ aumax
+ aand
+ aor
+ axor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ none
+ h0
+ h1
+
+
+
+
+
+
+
+
+ h00
+ h10
+ h01
+ h11
+
+
+
+
+
+
+ none
+ clamp_0_inf
+ clamp_m1_1
+ clamp_0_1
+
+
+
+
+
+
+ none
+ clamp_0_inf
+ clamp_m1_1
+ clamp_0_1
+
+
+
+
+
+
+ none
+ h0
+ h1
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/panfrost/compiler/bi_builder.h.py b/src/panfrost/compiler/bi_builder.h.py
index 4ce47fb0597..2b55a67f552 100644
--- a/src/panfrost/compiler/bi_builder.h.py
+++ b/src/panfrost/compiler/bi_builder.h.py
@@ -187,7 +187,11 @@ import sys
from bifrost_isa import *
from mako.template import Template
-instructions = parse_instructions(sys.argv[1], include_pseudo = True)
+instructions = {}
+for arg in sys.argv[1:]:
+ new_instructions = parse_instructions(arg, include_pseudo = True)
+ instructions.update(new_instructions)
+
ir_instructions = partition_mnemonics(instructions)
modifier_lists = order_modifiers(ir_instructions)
diff --git a/src/panfrost/compiler/bi_opcodes.c.py b/src/panfrost/compiler/bi_opcodes.c.py
index cbe0ae458a2..034ee2c60cb 100644
--- a/src/panfrost/compiler/bi_opcodes.c.py
+++ b/src/panfrost/compiler/bi_opcodes.c.py
@@ -59,7 +59,11 @@ import sys
from bifrost_isa import *
from mako.template import Template
-instructions = parse_instructions(sys.argv[1], include_pseudo = True)
+instructions = {}
+for arg in sys.argv[1:]:
+ new_instructions = parse_instructions(arg, include_pseudo = True)
+ instructions.update(new_instructions)
+
ir_instructions = partition_mnemonics(instructions)
mnemonics = set(x[1:] for x in instructions.keys())
diff --git a/src/panfrost/compiler/bi_opcodes.h.py b/src/panfrost/compiler/bi_opcodes.h.py
index 3b8ff0b33d8..1f74331f972 100644
--- a/src/panfrost/compiler/bi_opcodes.h.py
+++ b/src/panfrost/compiler/bi_opcodes.h.py
@@ -108,7 +108,11 @@ import sys
from bifrost_isa import *
from mako.template import Template
-instructions = parse_instructions(sys.argv[1], include_pseudo = True)
+instructions = {}
+for arg in sys.argv[1:]:
+ new_instructions = parse_instructions(arg, include_pseudo = True)
+ instructions.update(new_instructions)
+
ir_instructions = partition_mnemonics(instructions)
modifier_lists = order_modifiers(ir_instructions)
diff --git a/src/panfrost/compiler/bi_packer.c.py b/src/panfrost/compiler/bi_packer.c.py
index 601750e2aa3..c5060639809 100644
--- a/src/panfrost/compiler/bi_packer.c.py
+++ b/src/panfrost/compiler/bi_packer.c.py
@@ -25,12 +25,16 @@ from bifrost_isa import *
from mako.template import Template
# Consider pseudo instructions when getting the modifier list
-instructions_with_pseudo = parse_instructions(sys.argv[1], include_pseudo = True)
+instructions_with_pseudo = {}
+for arg in sys.argv[1:]:
+ new_instructions = parse_instructions(arg, include_pseudo = True)
+ instructions_with_pseudo.update(new_instructions)
+
ir_instructions_with_pseudo = partition_mnemonics(instructions_with_pseudo)
modifier_lists = order_modifiers(ir_instructions_with_pseudo)
# ...but strip for packing
-instructions = parse_instructions(sys.argv[1])
+instructions = parse_instructions(sys.argv[2]) # skip the pseudo instructions in sys.argv[1]
ir_instructions = partition_mnemonics(instructions)
# Packs sources into an argument. Offset argument to work around a quirk of our
diff --git a/src/panfrost/compiler/bi_printer.c.py b/src/panfrost/compiler/bi_printer.c.py
index 04a9c0095e2..729c139559b 100644
--- a/src/panfrost/compiler/bi_printer.c.py
+++ b/src/panfrost/compiler/bi_printer.c.py
@@ -224,7 +224,11 @@ import sys
from bifrost_isa import *
from mako.template import Template
-instructions = parse_instructions(sys.argv[1], include_pseudo = True)
+instructions = {}
+for arg in sys.argv[1:]:
+ new_instructions = parse_instructions(arg, include_pseudo = True)
+ instructions.update(new_instructions)
+
ir_instructions = partition_mnemonics(instructions)
modifier_lists = order_modifiers(ir_instructions)
diff --git a/src/panfrost/compiler/ISA.xml b/src/panfrost/compiler/bifrost/ISA.xml
similarity index 100%
rename from src/panfrost/compiler/ISA.xml
rename to src/panfrost/compiler/bifrost/ISA.xml
diff --git a/src/panfrost/compiler/meson.build b/src/panfrost/compiler/meson.build
index ed4ad08823f..af6208453b1 100644
--- a/src/panfrost/compiler/meson.build
+++ b/src/panfrost/compiler/meson.build
@@ -43,7 +43,7 @@ libpanfrost_bifrost_files = files(
bifrost_gen_disasm_c = custom_target(
'bifrost_gen_disasm.c',
- input : ['gen_disasm.py', 'ISA.xml'],
+ input : ['gen_disasm.py', 'bifrost/ISA.xml'],
output : 'bifrost_gen_disasm.c',
command : [prog_python, '@INPUT@'],
capture : true,
@@ -52,7 +52,7 @@ bifrost_gen_disasm_c = custom_target(
bi_opcodes_c = custom_target(
'bi_opcodes.c',
- input : ['bi_opcodes.c.py', 'ISA.xml'],
+ input : ['bi_opcodes.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
output : 'bi_opcodes.c',
command : [prog_python, '@INPUT@'],
capture : true,
@@ -61,7 +61,7 @@ bi_opcodes_c = custom_target(
bi_printer_c = custom_target(
'bi_printer.c',
- input : ['bi_printer.c.py', 'ISA.xml'],
+ input : ['bi_printer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
output : 'bi_printer.c',
command : [prog_python, '@INPUT@'],
capture : true,
@@ -70,7 +70,7 @@ bi_printer_c = custom_target(
bi_packer_c = custom_target(
'bi_packer.c',
- input : ['bi_packer.c.py', 'ISA.xml'],
+ input : ['bi_packer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
output : 'bi_packer.c',
command : [prog_python, '@INPUT@'],
capture : true,
@@ -79,7 +79,7 @@ bi_packer_c = custom_target(
bi_opcodes_h = custom_target(
'bi_opcodes.h',
- input : ['bi_opcodes.h.py', 'ISA.xml'],
+ input : ['bi_opcodes.h.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
output : 'bi_opcodes.h',
command : [prog_python, '@INPUT@'],
capture : true,
@@ -93,7 +93,7 @@ idep_bi_opcodes_h = declare_dependency(
bi_builder_h = custom_target(
'bi_builder.h',
- input : ['bi_builder.h.py', 'ISA.xml'],
+ input : ['bi_builder.h.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
output : 'bi_builder.h',
command : [prog_python, '@INPUT@'],
capture : true,
diff --git a/src/panfrost/compiler/valhall/ISA.xml b/src/panfrost/compiler/valhall/ISA.xml
index 0861153016c..7b12eb6107d 100644
--- a/src/panfrost/compiler/valhall/ISA.xml
+++ b/src/panfrost/compiler/valhall/ISA.xml
@@ -778,7 +778,12 @@
aor1
-
+
+
Do nothing. Useful at the start of a block for waiting on slots required
by the first actual instruction of the block, to reconcile dependencies
@@ -786,7 +791,7 @@
-
+
Branches to a specified relative offset if its source is nonzero (default)
or if its source is zero (if `.eq` is set). The offset is 27-bits and
@@ -805,10 +810,10 @@
Value to compare against zero
-
+
-
+
Evaluates the given condition, and if it passes, discards the current
fragment and terminates the thread. Only valid in a **fragment** shader.
@@ -818,7 +823,7 @@
Right value to compare
-
+
Jump to an indirectly specified (absolute or relative) address. Used to
jump to blend shaders at the end of a fragment shader.
@@ -826,11 +831,11 @@
Value to compare against zero
Branch target
-
-
+
+
-
+
General-purpose barrier. Must use slot #7. Must be paired with a
`.wait` flow on the instruction.
@@ -838,7 +843,7 @@
-
+
@@ -852,7 +857,7 @@
Return value if false
-
+
@@ -873,7 +878,7 @@
Return value if false
-
+
@@ -885,37 +890,39 @@
-
+
Interpolates a given varying from hardware buffer
+
-
+
-
+
Interpolates a given varying from hardware buffer
+
-
+
-
+
Interpolates a given varying from a software buffer
@@ -928,7 +935,7 @@
Varying index and table
-
+
Interpolates a given varying from a software buffer
@@ -942,7 +949,7 @@
-
+
Fetches a given varying from a software buffer
@@ -952,7 +959,7 @@
Varying index and table
-
+
Fetches a given varying from a software buffer
@@ -963,7 +970,7 @@
-
+
Load `vecsize` components from the attribute descriptor at entry `index`
of resource table `table` at index (vertex ID, instance ID), converting
@@ -973,7 +980,7 @@
-
+
Vertex ID
Instance ID
@@ -981,7 +988,7 @@
-
+
Load `vecsize` components from the attribute descriptor at the specified
location at index (vertex ID, instance ID), converting
@@ -993,49 +1000,49 @@
-
+
Vertex ID
Instance ID
Index and table
-
+
Load `vecsize` components from the texture descriptor at entry `index`
of resource table `table`, converting
to the specified register format.
-
+
-
+
X/Y coordinates (16:16)
Z/W coordinates (16:16)
-
-
+
+
-
+
Load `vecsize` components from the texture descriptor at the specified
location at index, converting
to the specified register format.
-
+
-
+
X/Y coordinates (16:16)
Z/W coordinates (16:16)
Index and table
-
+
Load the effective address of an attribute specified with the
given immediate index. Returns three staging register: the low/high
@@ -1043,7 +1050,7 @@
-
+
Vertex index
Instance index
@@ -1051,7 +1058,7 @@
-
+
Load the effective address of an attribute specified with the
given index. Returns three staging register: the low/high
@@ -1060,14 +1067,14 @@
-
+
Vertex index
Instance index
Attribute index and table
-
+
Load the effective address of a texel from the image specified with the
given immediate index. Returns three staging registers: the low/high
@@ -1080,7 +1087,7 @@
-
+
X/Y coordinates (16:16)
Z/W coordinates (16:16)
@@ -1088,7 +1095,7 @@
-
+
Load the effective address of a texel from the image specified with the
given index. Returns three staging register: the low/high
@@ -1102,14 +1109,14 @@
-
+
X/Y coordinates (16:16)
Z/W coordinates (16:16)
Index and table
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1118,15 +1125,15 @@
the mode descriptor.
-
-
-
+
+
+
Address to load from after adding offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1135,15 +1142,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1152,15 +1159,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1169,15 +1176,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1186,15 +1193,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1203,15 +1210,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1220,15 +1227,15 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Loads a buffer descriptor. If bits 25...31 of the mode descriptor are
all-ones, load from the buffer descriptors in the table indexed by the
@@ -1237,123 +1244,123 @@
the mode descriptor.
-
-
-
+
+
+
Byte offset
Mode descriptor
-
+
Load effective address of a buffer with an immediate offset added.
-
+
-
-
+
+
Linear ID
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Loads from main memory
-
-
+
+
Address to load from after adding offset
-
+
Stores to main memory
@@ -1371,7 +1378,7 @@
-
+
Store to memory with data conversion. The address to store to is given in
the first source, which must be a 64-bit register (a pair of 32-bit
@@ -1380,7 +1387,7 @@
Used with LEA_TEX_IMM to implement image stores.
-
+
@@ -1390,7 +1397,7 @@
Internal conversion descriptor
-
+
Loads a given render target, specified in the pixel indices descriptor, at
a given location and sample, and convert to the format specified in the
@@ -1407,7 +1414,7 @@
Conversion descriptor
-
+
Store to given render target, specified in the pixel indices descriptor, at
a given location and sample, and convert to the format specified in the
@@ -1423,7 +1430,7 @@
Conversion descriptor
-
+
Blends a given render target. This loads the API-specified blend state for
the render target from the first source. Blend descriptors are available
@@ -1459,7 +1466,7 @@
-
+
Does alpha-to-coverage testing, updating the sample coverage mask. ATEST
does not do an implicit discard. It should be executed before the first
@@ -1472,13 +1479,13 @@
-
+
Programatically writes out depth, stencil, or both, depending on which
modifiers are set. Used to implement gl_FragDepth and gl_FragStencil.
-
-
+
+
Updated coverage mask
Depth value
Stencil value
@@ -1487,7 +1494,7 @@
-
+
Performs the given data conversion. Note that floating-point rounding is
handled via the same hardware and therefore shares an encoding. Round mode
@@ -1506,7 +1513,7 @@
Value to convert
-
+
Performs the given data conversion.
@@ -1519,7 +1526,7 @@
Value to convert
-
+
Performs the given data conversion.
@@ -1527,7 +1534,7 @@
Value to convert
-
+
Performs the given data conversion.
@@ -1537,13 +1544,13 @@
Value to convert
-
+
Converts up with the specified round mode.
Value to convert
-
+
Performs the given data conversion.
@@ -1557,7 +1564,7 @@
Value to convert
-
+
Performs the given data conversion.
@@ -1571,7 +1578,7 @@
Value to convert
-
+
Performs the given rounding, using the convert unit.
@@ -1583,33 +1590,33 @@
Value to convert
-
+
Canonical register-to-register move.
-
+
Used as a primitive for various bitwise operations.
-
+
Used as a primitive for various bitwise operations.
-
+
Used as a primitive for various bitwise operations.
-
+
64-bit abs may be constructed in 4 instructions (5 clocks) by checking the
sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with
@@ -1618,15 +1625,15 @@
-
+
-
+
-
+
Only available as 32-bit. Smaller bitsizes require explicit conversions.
64-bit popcount may be constructed in 3 clocks by separate 32-bit
@@ -1636,28 +1643,28 @@
-
+
Only available as 32-bit. Other bitsizes may be derived with swizzles.
-
+
For fully featured bitwise operation, see the shift opcodes.
-
+
For fully featured bitwise operation, see the shift opcodes.
-
+
Returns the mask of lanes ever active within the warp (subgroup), such
that the source is nonzero. The number of work-items in a subgroup is
@@ -1673,7 +1680,7 @@
-
+
@@ -1685,12 +1692,12 @@
adjusted to be compatible with Valhall's argument reduction for logarithm
and square root computation respectively.
-
-
+
+
-
+
@@ -1712,7 +1719,7 @@
-
+
@@ -1725,7 +1732,7 @@
-
+
$A + B$
@@ -1734,7 +1741,7 @@
B
-
+
$\min \{ A, B \}$
@@ -1743,7 +1750,7 @@
B
-
+
$\max \{ A, B \}$
@@ -1752,7 +1759,7 @@
B
-
+
Given a pair of 32-bit floats, output a pair of 16-bit floats packed into
@@ -1764,7 +1771,7 @@
B
-
+
@@ -1779,7 +1786,7 @@
-
+
Calculates the base-2 exponent of an argument specified as a 8:24
fixed-point. The original argument is passed as well for correct handling
@@ -1790,7 +1797,7 @@
Input as 32-bit float
-
+
Performs a floating-point addition specialized for logarithm computation.
@@ -1799,18 +1806,18 @@
B
-
+
Used for `atan2()` implementation. Destination is two 16-bit
values (int and float) for the first form, and a single 32-bit float when
`.second` is set (indicating the FATAN_TABLE.f32 instruction).
-
+
A
B
-
+
$A + B$ with optional saturation.
@@ -1831,13 +1838,13 @@
-
+
Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)`
A
B
-
+
@@ -1852,7 +1859,7 @@
-
+
Similar to SHADDX, but especially used for loading offsets into
WLS. Usually this is only required for atomic operations, which cannot
@@ -1861,13 +1868,13 @@
.neg indicates SEG_SUB instead.
-
-
+
+
A
B
-
+
Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
64-bit value A. These instructions accelerate address arithmetic, but may
@@ -1880,7 +1887,7 @@
B
-
+
@@ -1901,14 +1908,14 @@
-
+
-
+
A
B
@@ -1918,7 +1925,7 @@
-
+
@@ -1940,7 +1947,7 @@
-
+
$A \cdot B + C$
@@ -1950,12 +1957,12 @@
C
-
+
-
+
Left shifts its first source by a specified amount and bitwise ANDs it with the
second source, optionally inverting the second source or the result.
@@ -1966,31 +1973,31 @@
B
-
+
-
+
Right shifts its first source by a specified amount and bitwise ANDs it with the
second source, optionally inverting the second source or the result. If
`signed` is set, the hardware performs an arithmetic right shift; otherwise,
it performs an unsigned right shift.
-
+
A
shift
B
-
+
-
+
Left shifts its first source by a specified amount and bitwise ORs it with the
second source, optionally inverting the second source or the result.
@@ -2001,31 +2008,31 @@
B
-
+
-
+
Right shifts its first source by a specified amount and bitwise ORs it with the
second source, optionally inverting the second source or the result. If
`signed` is set, the hardware performs an arithmetic right shift; otherwise,
it performs an unsigned right shift.
-
+
A
shift
B
-
+
-
+
Left shifts its first source by a specified amount and bitwise XORs it with the
second source, optionally inverting the second source or the result.
@@ -2036,26 +2043,26 @@
B
-
+
-
+
Right shifts its first source by a specified amount and bitwise XORs it with the
second source, optionally inverting the second source or the result. If
`signed` is set, the hardware performs an arithmetic right shift; otherwise,
it performs an unsigned right shift.
-
+
A
shift
B
-
+
Mux between A and B based on the provided mask. The condition specified
as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
@@ -2063,13 +2070,13 @@
`bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates
`(A & mask) | (B & ~mask)`.
-
+
A
B
Mask
-
+
Mux between A and B based on the provided mask. The condition specified
as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
@@ -2077,13 +2084,13 @@
`bitselect()` in OpenCL, so `MUX.v2i16.bit A, B, mask` calculates
`(A & mask) | (B & ~mask)`.
-
+
A
B
Mask
-
+
Mux between A and B based on the provided mask. The condition specified
as the `mux` modifier is evaluated on the mask. If true, `A` is chosen,
@@ -2091,20 +2098,20 @@
`bitselect()` in OpenCL, so `MUX.v4i8.bit A, B, mask` calculates
`(A & mask) | (B & ~mask)`.
-
+
A
B
Mask
-
+
During a cube map transform, select the S coordinate given a selected face.
Z coordinate as 32-bit floating point
X coordinate as 32-bit floating point
Cube face index
-
+
During a cube map transform, select the T coordinate given a selected face.
Y coordinate as 32-bit floating point
Z coordinate as 32-bit floating point
@@ -2126,21 +2133,21 @@
CD
-
+
Select the maximum absolute value of its arguments.
X coordinate as 32-bit floating point
Y coordinate as 32-bit floating point
Z coordinate as 32-bit floating point
-
+
Select the cube face index corresponding to the arguments.
X coordinate as 32-bit floating point
Y coordinate as 32-bit floating point
Z coordinate as 32-bit floating point
-
+
8-bit integer dot product between 4 channel vectors, intended for machine
learning. Available in both unsigned and signed variants, controlling
@@ -2172,7 +2179,7 @@
-
+
A
B
@@ -2189,7 +2196,7 @@
-
+
A
B
@@ -2239,7 +2246,7 @@
-
+
A
B
@@ -2256,7 +2263,7 @@
-
+
A
B
@@ -2279,7 +2286,7 @@
-
+
A
B
@@ -2296,7 +2303,7 @@
`IADD_IMM.i32` with the source tied to zero is the canonical immediate move.
A
-
+
@@ -2308,7 +2315,7 @@
single 16-bit constant requires replication of the constant.
A
-
+
@@ -2320,7 +2327,7 @@
single 8-bit constant requires replication of the constant.
A
-
+
@@ -2331,7 +2338,7 @@
inline, `FADD.f32` is preferred.
A
-
+
@@ -2343,14 +2350,14 @@
single 16-bit constant requires replication of the constant.
A
-
+
-
+
-
+
@@ -2358,11 +2365,11 @@
-
+
-
+
@@ -2370,38 +2377,38 @@
-
+
-
+
64-bit address to operate on
-
+
-
+
64-bit address to operate on
-
+
-
+
-
+
@@ -2409,13 +2416,13 @@
-
+
-
+
-
+
@@ -2423,7 +2430,7 @@
-
+
Unfiltered textured instruction.
@@ -2434,6 +2441,7 @@
+
@@ -2442,9 +2450,11 @@
Image to read from
+ Dummy for IR
+
-
+
Ordinary texturing instruction using a sampler.
@@ -2455,6 +2465,7 @@
+
@@ -2465,9 +2476,11 @@
Image to read from
+ Dummy for IR
+
-
+
Texture gather instruction.
@@ -2480,18 +2493,21 @@
+
-
+
Image to read from
+ Dummy source for IR
+
-
+
Pair of texture instructions.
@@ -2514,7 +2530,7 @@
Image to read from
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2536,7 +2552,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2559,7 +2575,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2582,7 +2598,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
@@ -2604,7 +2620,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2626,7 +2642,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2649,7 +2665,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
@@ -2672,7 +2688,7 @@
Varying offset
-
+
Only works for FP32 varyings. Performance characteristics are similar
to LD_VAR_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units.
@@ -2694,7 +2710,7 @@
Varying offset
-
+
First calculates $A \cdot B + C$ and then biases the exponent by D. Used in
special transcendental function sequences. It should not be used for
@@ -2709,7 +2725,7 @@
D
-
+
First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
= 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an
@@ -2725,7 +2741,7 @@
D
-
+
First calculates $A \cdot B + C$ and then biases the exponent by D. If $A
= 0$ or $B = 0$, the multiply is treated as $A$ even if an
@@ -2741,7 +2757,7 @@
D
-
+
First calculates $A \cdot B + C$ and then biases the exponent by D,
interpreted as a 16-bit value. Used in special transcendental function
diff --git a/src/panfrost/compiler/valhall/valhall.py b/src/panfrost/compiler/valhall/valhall.py
index 3c1c8bbd8db..7b2bb9d0c4e 100644
--- a/src/panfrost/compiler/valhall/valhall.py
+++ b/src/panfrost/compiler/valhall/valhall.py
@@ -272,7 +272,7 @@ def build_instr(el, overrides = {}):
i = 0
for src in el.findall('src'):
- if (src.attrib.get('ir_only', False)):
+ if (src.attrib.get('pseudo', False)):
continue
built = build_source(src, i, tsize)
sources += [built]
@@ -298,9 +298,9 @@ def build_instr(el, overrides = {}):
modifiers = []
for mod in el:
- if (mod.tag in MODIFIERS) and not (mod.attrib.get('ir_only', False)):
+ if (mod.tag in MODIFIERS) and not (mod.attrib.get('pseudo', False)):
modifiers.append(MODIFIERS[mod.tag])
- elif mod.tag =='mod':
+ elif mod.tag =='va_mod':
modifiers.append(build_modifier(mod))
instr = Instruction(name, opcode, opcode2, srcs = sources, dests = dests, immediates = imms, modifiers = modifiers, staging = staging, unit = unit)