From 1ae8ac35c03b3c3be74f0570a864d142408ca397 Mon Sep 17 00:00:00 2001 From: "Eric R. Smith" Date: Thu, 4 Jul 2024 21:50:33 +0000 Subject: [PATCH] panfrost: split pseudo instructions from Bifrost and Valhall Make pseudo instructions for the IR separate from real Bifrost and Valhall instructions, which are kept in their own ISA.xml files. Reviewed-by: Mary Guillemard Acked-by: Boris Brezillon Part-of: --- src/panfrost/compiler/IR_pseudo.xml | 195 +++++++++ src/panfrost/compiler/bi_builder.h.py | 6 +- src/panfrost/compiler/bi_opcodes.c.py | 6 +- src/panfrost/compiler/bi_opcodes.h.py | 6 +- src/panfrost/compiler/bi_packer.c.py | 8 +- src/panfrost/compiler/bi_printer.c.py | 6 +- src/panfrost/compiler/{ => bifrost}/ISA.xml | 0 src/panfrost/compiler/meson.build | 12 +- src/panfrost/compiler/valhall/ISA.xml | 460 ++++++++++---------- src/panfrost/compiler/valhall/valhall.py | 6 +- 10 files changed, 468 insertions(+), 237 deletions(-) create mode 100644 src/panfrost/compiler/IR_pseudo.xml rename src/panfrost/compiler/{ => bifrost}/ISA.xml (100%) diff --git a/src/panfrost/compiler/IR_pseudo.xml b/src/panfrost/compiler/IR_pseudo.xml new file mode 100644 index 00000000000..19e2483a07c --- /dev/null +++ b/src/panfrost/compiler/IR_pseudo.xml @@ -0,0 +1,195 @@ + + + + + + + + + + + + + + computed_lod + zero_lod + + + + + + + + + + + wls + + + + + tl + + + + + + + + + + + + aadd + + + + + + asmin + asmax + aumin + aumax + aand + aor + axor + axchg + acmpxchg + + + + + + + + + + ainc + adec + aumax1 + asmax1 + aor1 + + + + + + + + + + + + aadd + + + + + + asmin + asmax + aumin + aumax + aand + aor + axor + + + + + + + + + + + + + + + + + + + + + none + h0 + h1 + + + + + + + + + h00 + h10 + h01 + h11 + + + + + + + none + clamp_0_inf + clamp_m1_1 + clamp_0_1 + + + + + + + none + clamp_0_inf + clamp_m1_1 + clamp_0_1 + + + + + + + none + h0 + h1 + + + + + + + + + + + + + diff --git a/src/panfrost/compiler/bi_builder.h.py b/src/panfrost/compiler/bi_builder.h.py index 4ce47fb0597..2b55a67f552 100644 --- a/src/panfrost/compiler/bi_builder.h.py +++ b/src/panfrost/compiler/bi_builder.h.py @@ -187,7 +187,11 @@ import sys from bifrost_isa import * from mako.template import Template -instructions = parse_instructions(sys.argv[1], include_pseudo = True) +instructions = {} +for arg in sys.argv[1:]: + new_instructions = parse_instructions(arg, include_pseudo = True) + instructions.update(new_instructions) + ir_instructions = partition_mnemonics(instructions) modifier_lists = order_modifiers(ir_instructions) diff --git a/src/panfrost/compiler/bi_opcodes.c.py b/src/panfrost/compiler/bi_opcodes.c.py index cbe0ae458a2..034ee2c60cb 100644 --- a/src/panfrost/compiler/bi_opcodes.c.py +++ b/src/panfrost/compiler/bi_opcodes.c.py @@ -59,7 +59,11 @@ import sys from bifrost_isa import * from mako.template import Template -instructions = parse_instructions(sys.argv[1], include_pseudo = True) +instructions = {} +for arg in sys.argv[1:]: + new_instructions = parse_instructions(arg, include_pseudo = True) + instructions.update(new_instructions) + ir_instructions = partition_mnemonics(instructions) mnemonics = set(x[1:] for x in instructions.keys()) diff --git a/src/panfrost/compiler/bi_opcodes.h.py b/src/panfrost/compiler/bi_opcodes.h.py index 3b8ff0b33d8..1f74331f972 100644 --- a/src/panfrost/compiler/bi_opcodes.h.py +++ b/src/panfrost/compiler/bi_opcodes.h.py @@ -108,7 +108,11 @@ import sys from bifrost_isa import * from mako.template import Template -instructions = parse_instructions(sys.argv[1], include_pseudo = True) +instructions = {} +for arg in sys.argv[1:]: + new_instructions = parse_instructions(arg, include_pseudo = True) + instructions.update(new_instructions) + ir_instructions = partition_mnemonics(instructions) modifier_lists = order_modifiers(ir_instructions) diff --git a/src/panfrost/compiler/bi_packer.c.py b/src/panfrost/compiler/bi_packer.c.py index 601750e2aa3..c5060639809 100644 --- a/src/panfrost/compiler/bi_packer.c.py +++ b/src/panfrost/compiler/bi_packer.c.py @@ -25,12 +25,16 @@ from bifrost_isa import * from mako.template import Template # Consider pseudo instructions when getting the modifier list -instructions_with_pseudo = parse_instructions(sys.argv[1], include_pseudo = True) +instructions_with_pseudo = {} +for arg in sys.argv[1:]: + new_instructions = parse_instructions(arg, include_pseudo = True) + instructions_with_pseudo.update(new_instructions) + ir_instructions_with_pseudo = partition_mnemonics(instructions_with_pseudo) modifier_lists = order_modifiers(ir_instructions_with_pseudo) # ...but strip for packing -instructions = parse_instructions(sys.argv[1]) +instructions = parse_instructions(sys.argv[2]) # skip the pseudo instructions in sys.argv[1] ir_instructions = partition_mnemonics(instructions) # Packs sources into an argument. Offset argument to work around a quirk of our diff --git a/src/panfrost/compiler/bi_printer.c.py b/src/panfrost/compiler/bi_printer.c.py index 04a9c0095e2..729c139559b 100644 --- a/src/panfrost/compiler/bi_printer.c.py +++ b/src/panfrost/compiler/bi_printer.c.py @@ -224,7 +224,11 @@ import sys from bifrost_isa import * from mako.template import Template -instructions = parse_instructions(sys.argv[1], include_pseudo = True) +instructions = {} +for arg in sys.argv[1:]: + new_instructions = parse_instructions(arg, include_pseudo = True) + instructions.update(new_instructions) + ir_instructions = partition_mnemonics(instructions) modifier_lists = order_modifiers(ir_instructions) diff --git a/src/panfrost/compiler/ISA.xml b/src/panfrost/compiler/bifrost/ISA.xml similarity index 100% rename from src/panfrost/compiler/ISA.xml rename to src/panfrost/compiler/bifrost/ISA.xml diff --git a/src/panfrost/compiler/meson.build b/src/panfrost/compiler/meson.build index ed4ad08823f..af6208453b1 100644 --- a/src/panfrost/compiler/meson.build +++ b/src/panfrost/compiler/meson.build @@ -43,7 +43,7 @@ libpanfrost_bifrost_files = files( bifrost_gen_disasm_c = custom_target( 'bifrost_gen_disasm.c', - input : ['gen_disasm.py', 'ISA.xml'], + input : ['gen_disasm.py', 'bifrost/ISA.xml'], output : 'bifrost_gen_disasm.c', command : [prog_python, '@INPUT@'], capture : true, @@ -52,7 +52,7 @@ bifrost_gen_disasm_c = custom_target( bi_opcodes_c = custom_target( 'bi_opcodes.c', - input : ['bi_opcodes.c.py', 'ISA.xml'], + input : ['bi_opcodes.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'], output : 'bi_opcodes.c', command : [prog_python, '@INPUT@'], capture : true, @@ -61,7 +61,7 @@ bi_opcodes_c = custom_target( bi_printer_c = custom_target( 'bi_printer.c', - input : ['bi_printer.c.py', 'ISA.xml'], + input : ['bi_printer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'], output : 'bi_printer.c', command : [prog_python, '@INPUT@'], capture : true, @@ -70,7 +70,7 @@ bi_printer_c = custom_target( bi_packer_c = custom_target( 'bi_packer.c', - input : ['bi_packer.c.py', 'ISA.xml'], + input : ['bi_packer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'], output : 'bi_packer.c', command : [prog_python, '@INPUT@'], capture : true, @@ -79,7 +79,7 @@ bi_packer_c = custom_target( bi_opcodes_h = custom_target( 'bi_opcodes.h', - input : ['bi_opcodes.h.py', 'ISA.xml'], + input : ['bi_opcodes.h.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'], output : 'bi_opcodes.h', command : [prog_python, '@INPUT@'], capture : true, @@ -93,7 +93,7 @@ idep_bi_opcodes_h = declare_dependency( bi_builder_h = custom_target( 'bi_builder.h', - input : ['bi_builder.h.py', 'ISA.xml'], + input : ['bi_builder.h.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'], output : 'bi_builder.h', command : [prog_python, '@INPUT@'], capture : true, diff --git a/src/panfrost/compiler/valhall/ISA.xml b/src/panfrost/compiler/valhall/ISA.xml index 0861153016c..7b12eb6107d 100644 --- a/src/panfrost/compiler/valhall/ISA.xml +++ b/src/panfrost/compiler/valhall/ISA.xml @@ -778,7 +778,12 @@ aor1 - + + Do nothing. Useful at the start of a block for waiting on slots required by the first actual instruction of the block, to reconcile dependencies @@ -786,7 +791,7 @@ - + Branches to a specified relative offset if its source is nonzero (default) or if its source is zero (if `.eq` is set). The offset is 27-bits and @@ -805,10 +810,10 @@ Value to compare against zero - + - + Evaluates the given condition, and if it passes, discards the current fragment and terminates the thread. Only valid in a **fragment** shader. @@ -818,7 +823,7 @@ Right value to compare - + Jump to an indirectly specified (absolute or relative) address. Used to jump to blend shaders at the end of a fragment shader. @@ -826,11 +831,11 @@ Value to compare against zero Branch target - - + + - + General-purpose barrier. Must use slot #7. Must be paired with a `.wait` flow on the instruction. @@ -838,7 +843,7 @@ - + @@ -852,7 +857,7 @@ Return value if false - + @@ -873,7 +878,7 @@ Return value if false - + @@ -885,37 +890,39 @@ - + Interpolates a given varying from hardware buffer + - + - + Interpolates a given varying from hardware buffer + - + - + Interpolates a given varying from a software buffer @@ -928,7 +935,7 @@ Varying index and table - + Interpolates a given varying from a software buffer @@ -942,7 +949,7 @@ - + Fetches a given varying from a software buffer @@ -952,7 +959,7 @@ Varying index and table - + Fetches a given varying from a software buffer @@ -963,7 +970,7 @@ - + Load `vecsize` components from the attribute descriptor at entry `index` of resource table `table` at index (vertex ID, instance ID), converting @@ -973,7 +980,7 @@ - + Vertex ID Instance ID @@ -981,7 +988,7 @@ - + Load `vecsize` components from the attribute descriptor at the specified location at index (vertex ID, instance ID), converting @@ -993,49 +1000,49 @@ - + Vertex ID Instance ID Index and table - + Load `vecsize` components from the texture descriptor at entry `index` of resource table `table`, converting to the specified register format. - + - + X/Y coordinates (16:16) Z/W coordinates (16:16) - - + + - + Load `vecsize` components from the texture descriptor at the specified location at index, converting to the specified register format. - + - + X/Y coordinates (16:16) Z/W coordinates (16:16) Index and table - + Load the effective address of an attribute specified with the given immediate index. Returns three staging register: the low/high @@ -1043,7 +1050,7 @@ - + Vertex index Instance index @@ -1051,7 +1058,7 @@ - + Load the effective address of an attribute specified with the given index. Returns three staging register: the low/high @@ -1060,14 +1067,14 @@ - + Vertex index Instance index Attribute index and table - + Load the effective address of a texel from the image specified with the given immediate index. Returns three staging registers: the low/high @@ -1080,7 +1087,7 @@ - + X/Y coordinates (16:16) Z/W coordinates (16:16) @@ -1088,7 +1095,7 @@ - + Load the effective address of a texel from the image specified with the given index. Returns three staging register: the low/high @@ -1102,14 +1109,14 @@ - + X/Y coordinates (16:16) Z/W coordinates (16:16) Index and table - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1118,15 +1125,15 @@ the mode descriptor. - - - + + + Address to load from after adding offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1135,15 +1142,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1152,15 +1159,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1169,15 +1176,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1186,15 +1193,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1203,15 +1210,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1220,15 +1227,15 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Loads a buffer descriptor. If bits 25...31 of the mode descriptor are all-ones, load from the buffer descriptors in the table indexed by the @@ -1237,123 +1244,123 @@ the mode descriptor. - - - + + + Byte offset Mode descriptor - + Load effective address of a buffer with an immediate offset added. - + - - + + Linear ID - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Loads from main memory - - + + Address to load from after adding offset - + Stores to main memory @@ -1371,7 +1378,7 @@ - + Store to memory with data conversion. The address to store to is given in the first source, which must be a 64-bit register (a pair of 32-bit @@ -1380,7 +1387,7 @@ Used with LEA_TEX_IMM to implement image stores. - + @@ -1390,7 +1397,7 @@ Internal conversion descriptor - + Loads a given render target, specified in the pixel indices descriptor, at a given location and sample, and convert to the format specified in the @@ -1407,7 +1414,7 @@ Conversion descriptor - + Store to given render target, specified in the pixel indices descriptor, at a given location and sample, and convert to the format specified in the @@ -1423,7 +1430,7 @@ Conversion descriptor - + Blends a given render target. This loads the API-specified blend state for the render target from the first source. Blend descriptors are available @@ -1459,7 +1466,7 @@ - + Does alpha-to-coverage testing, updating the sample coverage mask. ATEST does not do an implicit discard. It should be executed before the first @@ -1472,13 +1479,13 @@ - + Programatically writes out depth, stencil, or both, depending on which modifiers are set. Used to implement gl_FragDepth and gl_FragStencil. - - + + Updated coverage mask Depth value Stencil value @@ -1487,7 +1494,7 @@ - + Performs the given data conversion. Note that floating-point rounding is handled via the same hardware and therefore shares an encoding. Round mode @@ -1506,7 +1513,7 @@ Value to convert - + Performs the given data conversion. @@ -1519,7 +1526,7 @@ Value to convert - + Performs the given data conversion. @@ -1527,7 +1534,7 @@ Value to convert - + Performs the given data conversion. @@ -1537,13 +1544,13 @@ Value to convert - + Converts up with the specified round mode. Value to convert - + Performs the given data conversion. @@ -1557,7 +1564,7 @@ Value to convert - + Performs the given data conversion. @@ -1571,7 +1578,7 @@ Value to convert - + Performs the given rounding, using the convert unit. @@ -1583,33 +1590,33 @@ Value to convert - + Canonical register-to-register move. - + Used as a primitive for various bitwise operations. - + Used as a primitive for various bitwise operations. - + Used as a primitive for various bitwise operations. - + 64-bit abs may be constructed in 4 instructions (5 clocks) by checking the sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with @@ -1618,15 +1625,15 @@ - + - + - + Only available as 32-bit. Smaller bitsizes require explicit conversions. 64-bit popcount may be constructed in 3 clocks by separate 32-bit @@ -1636,28 +1643,28 @@ - + Only available as 32-bit. Other bitsizes may be derived with swizzles. - + For fully featured bitwise operation, see the shift opcodes. - + For fully featured bitwise operation, see the shift opcodes. - + Returns the mask of lanes ever active within the warp (subgroup), such that the source is nonzero. The number of work-items in a subgroup is @@ -1673,7 +1680,7 @@ - + @@ -1685,12 +1692,12 @@ adjusted to be compatible with Valhall's argument reduction for logarithm and square root computation respectively. - - + + - + @@ -1712,7 +1719,7 @@ - + @@ -1725,7 +1732,7 @@ - + $A + B$ @@ -1734,7 +1741,7 @@ B - + $\min \{ A, B \}$ @@ -1743,7 +1750,7 @@ B - + $\max \{ A, B \}$ @@ -1752,7 +1759,7 @@ B - + Given a pair of 32-bit floats, output a pair of 16-bit floats packed into @@ -1764,7 +1771,7 @@ B - + @@ -1779,7 +1786,7 @@ - + Calculates the base-2 exponent of an argument specified as a 8:24 fixed-point. The original argument is passed as well for correct handling @@ -1790,7 +1797,7 @@ Input as 32-bit float - + Performs a floating-point addition specialized for logarithm computation. @@ -1799,18 +1806,18 @@ B - + Used for `atan2()` implementation. Destination is two 16-bit values (int and float) for the first form, and a single 32-bit float when `.second` is set (indicating the FATAN_TABLE.f32 instruction). - + A B - + $A + B$ with optional saturation. @@ -1831,13 +1838,13 @@ - + Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)` A B - + @@ -1852,7 +1859,7 @@ - + Similar to SHADDX, but especially used for loading offsets into WLS. Usually this is only required for atomic operations, which cannot @@ -1861,13 +1868,13 @@ .neg indicates SEG_SUB instead. - - + + A B - + Sign or zero extend B to 64-bits, left-shift by `shift`, and add the 64-bit value A. These instructions accelerate address arithmetic, but may @@ -1880,7 +1887,7 @@ B - + @@ -1901,14 +1908,14 @@ - + - + A B @@ -1918,7 +1925,7 @@ - + @@ -1940,7 +1947,7 @@ - + $A \cdot B + C$ @@ -1950,12 +1957,12 @@ C - + - + Left shifts its first source by a specified amount and bitwise ANDs it with the second source, optionally inverting the second source or the result. @@ -1966,31 +1973,31 @@ B - + - + Right shifts its first source by a specified amount and bitwise ANDs it with the second source, optionally inverting the second source or the result. If `signed` is set, the hardware performs an arithmetic right shift; otherwise, it performs an unsigned right shift. - + A shift B - + - + Left shifts its first source by a specified amount and bitwise ORs it with the second source, optionally inverting the second source or the result. @@ -2001,31 +2008,31 @@ B - + - + Right shifts its first source by a specified amount and bitwise ORs it with the second source, optionally inverting the second source or the result. If `signed` is set, the hardware performs an arithmetic right shift; otherwise, it performs an unsigned right shift. - + A shift B - + - + Left shifts its first source by a specified amount and bitwise XORs it with the second source, optionally inverting the second source or the result. @@ -2036,26 +2043,26 @@ B - + - + Right shifts its first source by a specified amount and bitwise XORs it with the second source, optionally inverting the second source or the result. If `signed` is set, the hardware performs an arithmetic right shift; otherwise, it performs an unsigned right shift. - + A shift B - + Mux between A and B based on the provided mask. The condition specified as the `mux` modifier is evaluated on the mask. If true, `A` is chosen, @@ -2063,13 +2070,13 @@ `bitselect()` in OpenCL, so `MUX.i32.bit A, B, mask` calculates `(A & mask) | (B & ~mask)`. - + A B Mask - + Mux between A and B based on the provided mask. The condition specified as the `mux` modifier is evaluated on the mask. If true, `A` is chosen, @@ -2077,13 +2084,13 @@ `bitselect()` in OpenCL, so `MUX.v2i16.bit A, B, mask` calculates `(A & mask) | (B & ~mask)`. - + A B Mask - + Mux between A and B based on the provided mask. The condition specified as the `mux` modifier is evaluated on the mask. If true, `A` is chosen, @@ -2091,20 +2098,20 @@ `bitselect()` in OpenCL, so `MUX.v4i8.bit A, B, mask` calculates `(A & mask) | (B & ~mask)`. - + A B Mask - + During a cube map transform, select the S coordinate given a selected face. Z coordinate as 32-bit floating point X coordinate as 32-bit floating point Cube face index - + During a cube map transform, select the T coordinate given a selected face. Y coordinate as 32-bit floating point Z coordinate as 32-bit floating point @@ -2126,21 +2133,21 @@ CD - + Select the maximum absolute value of its arguments. X coordinate as 32-bit floating point Y coordinate as 32-bit floating point Z coordinate as 32-bit floating point - + Select the cube face index corresponding to the arguments. X coordinate as 32-bit floating point Y coordinate as 32-bit floating point Z coordinate as 32-bit floating point - + 8-bit integer dot product between 4 channel vectors, intended for machine learning. Available in both unsigned and signed variants, controlling @@ -2172,7 +2179,7 @@ - + A B @@ -2189,7 +2196,7 @@ - + A B @@ -2239,7 +2246,7 @@ - + A B @@ -2256,7 +2263,7 @@ - + A B @@ -2279,7 +2286,7 @@ - + A B @@ -2296,7 +2303,7 @@ `IADD_IMM.i32` with the source tied to zero is the canonical immediate move. A - + @@ -2308,7 +2315,7 @@ single 16-bit constant requires replication of the constant. A - + @@ -2320,7 +2327,7 @@ single 8-bit constant requires replication of the constant. A - + @@ -2331,7 +2338,7 @@ inline, `FADD.f32` is preferred. A - + @@ -2343,14 +2350,14 @@ single 16-bit constant requires replication of the constant. A - + - + - + @@ -2358,11 +2365,11 @@ - + - + @@ -2370,38 +2377,38 @@ - + - + 64-bit address to operate on - + - + 64-bit address to operate on - + - + - + @@ -2409,13 +2416,13 @@ - + - + - + @@ -2423,7 +2430,7 @@ - + Unfiltered textured instruction. @@ -2434,6 +2441,7 @@ + @@ -2442,9 +2450,11 @@ Image to read from + Dummy for IR + - + Ordinary texturing instruction using a sampler. @@ -2455,6 +2465,7 @@ + @@ -2465,9 +2476,11 @@ Image to read from + Dummy for IR + - + Texture gather instruction. @@ -2480,18 +2493,21 @@ + - + Image to read from + Dummy source for IR + - + Pair of texture instructions. @@ -2514,7 +2530,7 @@ Image to read from - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2536,7 +2552,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2559,7 +2575,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2582,7 +2598,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_BUF_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units. @@ -2604,7 +2620,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2626,7 +2642,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2649,7 +2665,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units. @@ -2672,7 +2688,7 @@ Varying offset - + Only works for FP32 varyings. Performance characteristics are similar to LD_VAR_IMM_F32.v2.f32 followed by TEX_DUAL, using both V and T units. @@ -2694,7 +2710,7 @@ Varying offset - + First calculates $A \cdot B + C$ and then biases the exponent by D. Used in special transcendental function sequences. It should not be used for @@ -2709,7 +2725,7 @@ D - + First calculates $A \cdot B + C$ and then biases the exponent by D. If $A = 0$ or $B = 0$, the multiply $A \cdot B$ is treated as zero even if an @@ -2725,7 +2741,7 @@ D - + First calculates $A \cdot B + C$ and then biases the exponent by D. If $A = 0$ or $B = 0$, the multiply is treated as $A$ even if an @@ -2741,7 +2757,7 @@ D - + First calculates $A \cdot B + C$ and then biases the exponent by D, interpreted as a 16-bit value. Used in special transcendental function diff --git a/src/panfrost/compiler/valhall/valhall.py b/src/panfrost/compiler/valhall/valhall.py index 3c1c8bbd8db..7b2bb9d0c4e 100644 --- a/src/panfrost/compiler/valhall/valhall.py +++ b/src/panfrost/compiler/valhall/valhall.py @@ -272,7 +272,7 @@ def build_instr(el, overrides = {}): i = 0 for src in el.findall('src'): - if (src.attrib.get('ir_only', False)): + if (src.attrib.get('pseudo', False)): continue built = build_source(src, i, tsize) sources += [built] @@ -298,9 +298,9 @@ def build_instr(el, overrides = {}): modifiers = [] for mod in el: - if (mod.tag in MODIFIERS) and not (mod.attrib.get('ir_only', False)): + if (mod.tag in MODIFIERS) and not (mod.attrib.get('pseudo', False)): modifiers.append(MODIFIERS[mod.tag]) - elif mod.tag =='mod': + elif mod.tag =='va_mod': modifiers.append(build_modifier(mod)) instr = Instruction(name, opcode, opcode2, srcs = sources, dests = dests, immediates = imms, modifiers = modifiers, staging = staging, unit = unit)