mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
pan/bi: Annotate Valhall instructions with units
Based on analyzing the cycle counts reported by the Mali offline compiler. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13802>
This commit is contained in:
parent
04cc1b93b1
commit
855ab23d9a
1 changed files with 97 additions and 91 deletions
|
|
@ -576,7 +576,7 @@
|
|||
<value name="0x7C007C00">v2inf</value>
|
||||
</enum>
|
||||
|
||||
<ins name="NOP" title="No operation" dests="0" opcode="0x00">
|
||||
<ins name="NOP" title="No operation" dests="0" opcode="0x00" unit="CVT">
|
||||
<desc>
|
||||
Do nothing. Useful at the start of a block for waiting on slots required
|
||||
by the first actual instruction of the block, to reconcile dependencies
|
||||
|
|
@ -584,7 +584,7 @@
|
|||
</desc>
|
||||
</ins>
|
||||
|
||||
<ins name="BRANCHZ" title="Compare to zero and branch" dests="0" opcode="0x1F">
|
||||
<ins name="BRANCHZ" title="Compare to zero and branch" dests="0" opcode="0x1F" unit="CVT">
|
||||
<desc>
|
||||
Branches to a specified relative offset if its source is nonzero (default)
|
||||
or if its source is zero (if `.eq` is set). The offset is 27-bits and
|
||||
|
|
@ -605,7 +605,7 @@
|
|||
<mod name="eq" start="36" size="1"/>
|
||||
</ins>
|
||||
|
||||
<ins name="DISCARD.f32" title="Discard fragment" opcode="0x20">
|
||||
<ins name="DISCARD.f32" title="Discard fragment" opcode="0x20" unit="CVT">
|
||||
<desc>
|
||||
Evaluates the given condition, and if it passes, discards the current
|
||||
fragment and terminates the thread. The destination should be set to R60.
|
||||
|
|
@ -617,7 +617,7 @@
|
|||
<src absneg="true" swizzle="true">Right value to compare</src>
|
||||
</ins>
|
||||
|
||||
<ins name="BRANCHZI" title="Compare to zero and branch indirect" opcode="0x2F">
|
||||
<ins name="BRANCHZI" title="Compare to zero and branch indirect" opcode="0x2F" unit="CVT">
|
||||
<desc>
|
||||
Jump to an indirectly specified address. Used to jump to blend shaders at
|
||||
the end of a fragment shader.
|
||||
|
|
@ -627,7 +627,7 @@
|
|||
<mod name="eq" start="36" size="1"/>
|
||||
</ins>
|
||||
|
||||
<ins name="BARRIER" title="Execution and memory barrier" opcode="0x45">
|
||||
<ins name="BARRIER" title="Execution and memory barrier" opcode="0x45" unit="NONE">
|
||||
<desc>
|
||||
General-purpose barrier. Must use slot #7. Must be paired with a
|
||||
`.barrier` action on the instruction.
|
||||
|
|
@ -635,7 +635,7 @@
|
|||
<slot/>
|
||||
</ins>
|
||||
|
||||
<group name="CSEL" title="Floating-point conditional select" dests="1">
|
||||
<group name="CSEL" title="Floating-point conditional select" dests="1" unit="CVT">
|
||||
<ins name="CSEL.f32" opcode="0x154"/>
|
||||
<ins name="CSEL.v2f16" opcode="0x155"/>
|
||||
<desc>
|
||||
|
|
@ -649,7 +649,7 @@
|
|||
<src float="true">Return value if false</src>
|
||||
</group>
|
||||
|
||||
<group name="CSEL" title="Integer conditional select" dests="1">
|
||||
<group name="CSEL" title="Integer conditional select" dests="1" unit="CVT">
|
||||
<ins name="CSEL.u32" opcode="0x150"/>
|
||||
<ins name="CSEL.v2u16" opcode="0x151"/>
|
||||
<ins name="CSEL.i32" opcode="0x158"/>
|
||||
|
|
@ -670,7 +670,7 @@
|
|||
<src>Return value if false</src>
|
||||
</group>
|
||||
|
||||
<ins name="LD_VAR_SPECIAL" title="Load special varying" opcode="0x56">
|
||||
<ins name="LD_VAR_SPECIAL" title="Load special varying" opcode="0x56" unit="V">
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
<vecsize/>
|
||||
|
|
@ -680,7 +680,7 @@
|
|||
<imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
|
||||
</ins>
|
||||
|
||||
<group name="LD_VAR_IMM_F32" title="Load immediate varying">
|
||||
<group name="LD_VAR_IMM_F32" title="Load immediate varying" unit="V">
|
||||
<desc>Interpolates a given varying</desc>
|
||||
<ins name="LD_VAR_IMM_F32" opcode="0x5C"/>
|
||||
<ins name="LD_VAR_IMM_F16" opcode="0x5D"/>
|
||||
|
|
@ -694,7 +694,7 @@
|
|||
<imm name="index" start="20" size="4"/>
|
||||
</group>
|
||||
|
||||
<ins name="LD_ATTR_IMM" title="Load immediate attribute" opcode="0x66">
|
||||
<ins name="LD_ATTR_IMM" title="Load immediate attribute" opcode="0x66" unit="LS">
|
||||
<sr_count/>
|
||||
<vecsize/>
|
||||
<regfmt/>
|
||||
|
|
@ -705,7 +705,7 @@
|
|||
<imm name="index" start="20" size="4"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LD_ATTR" title="Load indirect attribute" opcode="0x67">
|
||||
<ins name="LD_ATTR" title="Load indirect attribute" opcode="0x67" unit="LS">
|
||||
<desc>The index must not diverge within a warp.</desc>
|
||||
<vecsize/>
|
||||
<regfmt/>
|
||||
|
|
@ -717,7 +717,7 @@
|
|||
<src>Index</src>
|
||||
</ins>
|
||||
|
||||
<ins name="LEA_ATTR" title="Load effective address" opcode="0x5E">
|
||||
<ins name="LEA_ATTR" title="Load effective address" opcode="0x5E" unit="LS">
|
||||
<desc>
|
||||
Loads the effective address of the position buffer (in a position shader)
|
||||
or the varying buffer (in a varying shader). That is, the base pointer
|
||||
|
|
@ -736,7 +736,7 @@
|
|||
<src>Linear ID</src>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i8" title="Global memory load" opcode="0x60" opcode2="0">
|
||||
<ins name="LOAD.i8" title="Global memory load" opcode="0x60" opcode2="0" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -747,7 +747,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i16" title="Global memory load" opcode="0x60" opcode2="1">
|
||||
<ins name="LOAD.i16" title="Global memory load" opcode="0x60" opcode2="1" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -758,7 +758,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i24" title="Global memory load" opcode="0x60" opcode2="2">
|
||||
<ins name="LOAD.i24" title="Global memory load" opcode="0x60" opcode2="2" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -769,7 +769,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i32" title="Global memory load" opcode="0x60" opcode2="3">
|
||||
<ins name="LOAD.i32" title="Global memory load" opcode="0x60" opcode2="3" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -780,7 +780,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i48" title="Global memory load" opcode="0x60" opcode2="4">
|
||||
<ins name="LOAD.i48" title="Global memory load" opcode="0x60" opcode2="4" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -791,7 +791,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i64" title="Global memory load" opcode="0x60" opcode2="5">
|
||||
<ins name="LOAD.i64" title="Global memory load" opcode="0x60" opcode2="5" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -802,7 +802,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i96" title="Global memory load" opcode="0x60" opcode2="6">
|
||||
<ins name="LOAD.i96" title="Global memory load" opcode="0x60" opcode2="6" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -813,7 +813,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="LOAD.i128" title="Global memory load" opcode="0x60" opcode2="7">
|
||||
<ins name="LOAD.i128" title="Global memory load" opcode="0x60" opcode2="7" unit="LS">
|
||||
<desc>Loads from main memory</desc>
|
||||
<sr write="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -824,7 +824,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</ins>
|
||||
|
||||
<group name="STORE" title="Global memory store" opcode="0x61">
|
||||
<group name="STORE" title="Global memory store" opcode="0x61" unit="LS">
|
||||
<desc>Stores to main memory</desc>
|
||||
<sr read="true"/>
|
||||
<ins name="STORE.i8" opcode2="0x0"/>
|
||||
|
|
@ -842,7 +842,7 @@
|
|||
<imm name="offset" start="8" size="16" signed="true"/>
|
||||
</group>
|
||||
|
||||
<ins name="ST_IMAGE" title="Image store" opcode="0x71">
|
||||
<ins name="ST_IMAGE" title="Image store" opcode="0x71" unit="LS">
|
||||
<desc>Stores to images</desc>
|
||||
<sr read="true"/>
|
||||
<sr_count/>
|
||||
|
|
@ -850,7 +850,7 @@
|
|||
<src>Address to store to after adding offset</src>
|
||||
</ins>
|
||||
|
||||
<ins name="LD_TILE" title="Load from tilebuffer" opcode="0x78">
|
||||
<ins name="LD_TILE" title="Load from tilebuffer" opcode="0x78" unit="NONE">
|
||||
<desc>
|
||||
Loads a given render target, specified in the pixel indices descriptor, at
|
||||
a given location and sample, and convert to the format specified in the
|
||||
|
|
@ -865,7 +865,7 @@
|
|||
<src>Conversion descriptor</src>
|
||||
</ins>
|
||||
|
||||
<ins name="BLEND" title="Blend render target" opcode="0x7F">
|
||||
<ins name="BLEND" title="Blend render target" opcode="0x7F" unit="NONE">
|
||||
<desc>
|
||||
Blends a given render target. This loads the API-specified blend state for
|
||||
the render target from the first source. Blend descriptors are available
|
||||
|
|
@ -901,7 +901,7 @@
|
|||
<regfmt/>
|
||||
</ins>
|
||||
|
||||
<ins name="ATEST" title="Alpha test" opcode="0x7D">
|
||||
<ins name="ATEST" title="Alpha test" opcode="0x7D" unit="NONE">
|
||||
<desc>
|
||||
Does alpha-to-coverage testing, updating the sample coverage mask. ATEST
|
||||
does not do an implicit discard. It should be executed before the first
|
||||
|
|
@ -914,7 +914,7 @@
|
|||
<sr_count/>
|
||||
</ins>
|
||||
|
||||
<ins name="ZS_EMIT" title="Depth/stencil write" opcode="0x7E">
|
||||
<ins name="ZS_EMIT" title="Depth/stencil write" opcode="0x7E" unit="NONE">
|
||||
<desc>
|
||||
Programatically writes out depth, stencil, or both, depending on which
|
||||
modifiers are set. Used to implement gl_FragDepth and gl_FragStencil.
|
||||
|
|
@ -927,7 +927,7 @@
|
|||
<src>Input coverage mask</src>
|
||||
</ins>
|
||||
|
||||
<group name="CONVERT" title="Data conversions" dests="1" opcode="0x90">
|
||||
<group name="CONVERT" title="Data conversions" dests="1" opcode="0x90" unit="CVT">
|
||||
<desc>
|
||||
Performs the given data conversion. Note that floating-point rounding is
|
||||
handled via the same hardware and therefore shares an encoding. Round mode
|
||||
|
|
@ -950,7 +950,7 @@
|
|||
<src widen="true">Value to convert</src>
|
||||
</group>
|
||||
|
||||
<group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90">
|
||||
<group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
|
||||
<desc>Performs the given data conversion.</desc>
|
||||
<ins name="F32_TO_S32" opcode2="0xC"/>
|
||||
<ins name="F32_TO_U32" opcode2="0x1C"/>
|
||||
|
|
@ -958,7 +958,7 @@
|
|||
<src absneg="true">Value to convert</src>
|
||||
</group>
|
||||
|
||||
<group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90">
|
||||
<group name="CONVERT" title="Float-to-int data conversions" dests="1" opcode="0x90" unit="CVT">
|
||||
<desc>Performs the given data conversion.</desc>
|
||||
<ins name="V2F16_TO_V2S16" opcode2="0xE"/>
|
||||
<ins name="V2F16_TO_V2U16" opcode2="0x1E"/>
|
||||
|
|
@ -968,13 +968,13 @@
|
|||
<src swizzle="true" absneg="true" size="16">Value to convert</src>
|
||||
</group>
|
||||
|
||||
<ins name="F16_TO_F32" title="16-bit float to 32-bit float conversion" dests="1" opcode="0x90" opcode2="0xB">
|
||||
<ins name="F16_TO_F32" title="16-bit float to 32-bit float conversion" dests="1" opcode="0x90" opcode2="0xB" unit="CVT">
|
||||
<desc>Converts up with the specified round mode.</desc>
|
||||
<roundmode/>
|
||||
<src lane="28" size="16" absneg="true">Value to convert</src>
|
||||
</ins>
|
||||
|
||||
<group name="CONVERT" title="8-bit data conversions" dests="1" opcode="0x90">
|
||||
<group name="CONVERT" title="8-bit data conversions" dests="1" opcode="0x90" unit="CVT">
|
||||
<desc>
|
||||
Performs the given data conversion.
|
||||
</desc>
|
||||
|
|
@ -992,7 +992,7 @@
|
|||
<src lane="28" size="8">Value to convert</src>
|
||||
</group>
|
||||
|
||||
<group name="FROUND" title="Floating-point rounding" dests="1" opcode="0x90">
|
||||
<group name="FROUND" title="Floating-point rounding" dests="1" opcode="0x90" unit="CVT">
|
||||
<desc>
|
||||
Performs the given rounding, using the convert unit.
|
||||
</desc>
|
||||
|
|
@ -1004,33 +1004,33 @@
|
|||
<src swizzle="true" absneg="true">Value to convert</src>
|
||||
</group>
|
||||
|
||||
<ins name="MOV.i32" title="Register move" dests="1" opcode="0x91" opcode2="0x0">
|
||||
<ins name="MOV.i32" title="Register move" dests="1" opcode="0x91" opcode2="0x0" unit="CVT">
|
||||
<desc>Canonical register-to-register move.</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="CLZ.u32" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x4">
|
||||
<ins name="CLZ.u32" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x4" unit="CVT">
|
||||
<desc>
|
||||
Used as a primitive for various bitwise operations.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="CLZ.v2u16" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x5">
|
||||
<ins name="CLZ.v2u16" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x5" unit="CVT">
|
||||
<desc>
|
||||
Used as a primitive for various bitwise operations.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="CLZ.v4u8" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x6">
|
||||
<ins name="CLZ.v4u8" title="Count leading zeroes" dests="1" opcode="0x91" opcode2="0x6" unit="CVT">
|
||||
<desc>
|
||||
Used as a primitive for various bitwise operations.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="IABS.s32" title="Absolute value" dests="1" opcode="0x91" opcode2="0x8">
|
||||
<ins name="IABS.s32" title="Absolute value" dests="1" opcode="0x91" opcode2="0x8" unit="CVT">
|
||||
<desc>
|
||||
64-bit abs may be constructed in 4 instructions (5 clocks) by checking the
|
||||
sign with `ICMP.s32.lt.m1 hi, 0` and negating based on the result with
|
||||
|
|
@ -1039,15 +1039,15 @@
|
|||
<src widen="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="IABS.v2s16" title="Absolute value" dests="1" opcode="0x91" opcode2="0x9">
|
||||
<ins name="IABS.v2s16" title="Absolute value" dests="1" opcode="0x91" opcode2="0x9" unit="CVT">
|
||||
<src widen="true"/>
|
||||
</ins>
|
||||
|
||||
<ins name="IABS.v4s8" title="Absolute value" dests="1" opcode="0x91" opcode2="0xa">
|
||||
<ins name="IABS.v4s8" title="Absolute value" dests="1" opcode="0x91" opcode2="0xa" unit="CVT">
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="POPCOUNT.i32" title="Population count" dests="1" opcode="0x91" opcode2="0xC">
|
||||
<ins name="POPCOUNT.i32" title="Population count" dests="1" opcode="0x91" opcode2="0xC" unit="SFU">
|
||||
<desc>
|
||||
Only available as 32-bit. Smaller bitsizes require explicit conversions.
|
||||
64-bit popcount may be constructed in 3 clocks by separate 32-bit
|
||||
|
|
@ -1057,28 +1057,29 @@
|
|||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="BITREV.i32" title="Bitwise reverse" dests="1" opcode="0x91" opcode2="0xD">
|
||||
<ins name="BITREV.i32" title="Bitwise reverse" dests="1" opcode="0x91" opcode2="0xD" unit="SFU">
|
||||
<desc>
|
||||
Only available as 32-bit. Other bitsizes may be derived with swizzles.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="NOT.i32" title="Bitwise complement" dests="1" opcode="0x91" opcode2="0xE">
|
||||
<ins name="NOT.i32" title="Bitwise complement" dests="1" opcode="0x91" opcode2="0xE" unit="SFU">
|
||||
<desc>
|
||||
For fully featured bitwise operation, see the shift opcodes.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="NOT.i64" title="Bitwise complement" dests="1" opcode="0x191" opcode2="0xE">
|
||||
<ins name="NOT.i64" title="Bitwise complement" dests="1" opcode="0x191" opcode2="0xE" unit="SFU">
|
||||
<desc>
|
||||
For fully featured bitwise operation, see the shift opcodes.
|
||||
</desc>
|
||||
<src/>
|
||||
</ins>
|
||||
|
||||
<ins name="WMASK" title="Warp mask" dests="1" opcode="0x95">
|
||||
<ins name="WMASK" title="Warp mask" dests="1" opcode="0x95" unit="SFU">
|
||||
<!-- TODO: confirm unit -->
|
||||
<desc>
|
||||
Returns the mask of lanes ever active within the warp (subgroup), such
|
||||
that the source is nonzero. The number of work-items in a subgroup is
|
||||
|
|
@ -1094,7 +1095,7 @@
|
|||
<subgroup/>
|
||||
</ins>
|
||||
|
||||
<group name="FREXP" title="Fraction/exponent extract" dests="1" opcode="0x99">
|
||||
<group name="FREXP" title="Fraction/exponent extract" dests="1" opcode="0x99" unit="CVT">
|
||||
<ins name="FREXPM.f32" opcode2="0"/>
|
||||
<ins name="FREXPM.v2f16" opcode2="1"/>
|
||||
<ins name="FREXPE.f32" opcode2="2"/>
|
||||
|
|
@ -1109,7 +1110,7 @@
|
|||
<src float="true" swizzle="true"/>
|
||||
</group>
|
||||
|
||||
<group name="SFU" title="Special function unit" dests="1" opcode="0x9C">
|
||||
<group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
|
||||
<ins name="FRCP.f32" opcode2="0"/>
|
||||
<ins name="FRCP.f16" opcode2="1"/>
|
||||
<ins name="FRSQ.f32" opcode2="2"/>
|
||||
|
|
@ -1121,10 +1122,10 @@
|
|||
The logarithm instruction (`FLOGD.f32`) requires an argument reduction. See the
|
||||
transcendentals section for more information.
|
||||
</desc>
|
||||
<src float="true" swizzle="true"/>
|
||||
<src float="true" swizzle="true" absneg="true"/>
|
||||
</group>
|
||||
|
||||
<group name="SFU" title="Special function unit" dests="1" opcode="0x9C">
|
||||
<group name="SFU" title="Special function unit" dests="1" opcode="0x9C" unit="SFU">
|
||||
<ins name="FSIN_TABLE.u6" opcode2="4"/>
|
||||
<ins name="FCOS_TABLE.u6" opcode2="5"/>
|
||||
<desc>
|
||||
|
|
@ -1134,7 +1135,7 @@
|
|||
<src/>
|
||||
</group>
|
||||
|
||||
<group name="FADD" title="Floating-point add" dests="1" opcode2="0">
|
||||
<group name="FADD" title="Floating-point add" dests="1" opcode2="0" unit="FMA">
|
||||
<ins name="FADD.f32" opcode="0xA4"/>
|
||||
<ins name="FADD.v2f16" opcode="0xA5"/>
|
||||
<desc>$A + B$</desc>
|
||||
|
|
@ -1143,7 +1144,7 @@
|
|||
<src absneg="true" swizzle="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="FMIN" title="Floating-point minimum" dests="1" opcode2="2">
|
||||
<group name="FMIN" title="Floating-point minimum" dests="1" opcode2="2" unit="CVT">
|
||||
<ins name="FMIN.f32" opcode="0xA4"/>
|
||||
<ins name="FMIN.v2f16" opcode="0xA5"/>
|
||||
<desc>$\min \{ A, B \}$</desc>
|
||||
|
|
@ -1152,7 +1153,7 @@
|
|||
<src absneg="true" swizzle="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="FMAX" title="Floating-point maximum" dests="1" opcode2="3">
|
||||
<group name="FMAX" title="Floating-point maximum" dests="1" opcode2="3" unit="CVT">
|
||||
<ins name="FMAX.f32" opcode="0xA4"/>
|
||||
<ins name="FMAX.v2f16" opcode="0xA5"/>
|
||||
<desc>$\max \{ A, B \}$</desc>
|
||||
|
|
@ -1161,7 +1162,7 @@
|
|||
<src absneg="true" swizzle="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="V2F32_TO_V2F16" title="Vectorized floating-point conversion" dests="1" opcode2="4">
|
||||
<group name="V2F32_TO_V2F16" title="Vectorized floating-point conversion" dests="1" opcode2="4" unit="CVT">
|
||||
<ins name="V2F32_TO_V2F16" opcode="0xA5"/>
|
||||
<desc>
|
||||
Given a pair of 32-bit floats, output a pair of 16-bit floats packed into
|
||||
|
|
@ -1171,7 +1172,7 @@
|
|||
<src>B</src>
|
||||
</group>
|
||||
|
||||
<group name="FRSCALE" title="Floating-point rescaling" dests="1" opcode2="6">
|
||||
<group name="FRSCALE" title="Floating-point rescaling" dests="1" opcode2="6" unit="FMA">
|
||||
<ins name="FRSCALE.f32" opcode="0xA4"/>
|
||||
<ins name="FRSCALE.v2f16" opcode="0xA5"/>
|
||||
<desc>
|
||||
|
|
@ -1185,7 +1186,7 @@
|
|||
<src absneg="true" swizzle="true">B</src>
|
||||
</group>
|
||||
|
||||
<ins name="FEXP.f32" title="Floating-point exponent" dests="1" opcode="0xA4" opcode2="8">
|
||||
<ins name="FEXP.f32" title="Floating-point exponent" dests="1" opcode="0xA4" opcode2="8" unit="SFU">
|
||||
<desc>
|
||||
Calculates the base-2 exponent of an argument specified as a 8:24
|
||||
fixed-point. The original argument is passed as well for correct handling
|
||||
|
|
@ -1196,7 +1197,7 @@
|
|||
<src absneg="true">Input as 32-bit float</src>
|
||||
</ins>
|
||||
|
||||
<ins name="FADD_LSCALE.f32" title="Floating-point add with logarithm scale" dests="1" opcode="0xA4" opcode2="9">
|
||||
<ins name="FADD_LSCALE.f32" title="Floating-point add with logarithm scale" dests="1" opcode="0xA4" opcode2="9" unit="FMA">
|
||||
<desc>
|
||||
Performs a floating-point addition specialized for logarithm computation.
|
||||
</desc>
|
||||
|
|
@ -1205,7 +1206,7 @@
|
|||
<src absneg="true">B</src>
|
||||
</ins>
|
||||
|
||||
<group name="IADD" title="Integer addition" dests="1" opcode2="0">
|
||||
<group name="IADD" title="Integer addition" dests="1" opcode2="0" unit="CVT">
|
||||
<desc>
|
||||
$A + B$ with optional saturation.
|
||||
|
||||
|
|
@ -1226,13 +1227,13 @@
|
|||
<saturate/>
|
||||
</group>
|
||||
|
||||
<ins name="MKVEC.v2i16" title="Make 16-bit vector" dests="1" opcode="0xA1" opcode2="0x5">
|
||||
<ins name="MKVEC.v2i16" title="Make 16-bit vector" dests="1" opcode="0xA1" opcode2="0x5" unit="CVT">
|
||||
<desc>Calculates $A | (B \ll 16)$. Used to implement `(ushort2)(A, B)`</desc>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
</ins>
|
||||
|
||||
<group name="ISUB" title="Integer subtract" dests="1" opcode2="1">
|
||||
<group name="ISUB" title="Integer subtract" dests="1" opcode2="1" unit="CVT">
|
||||
<ins name="ISUB.u32" opcode="0xA0"/>
|
||||
<ins name="ISUB.v2u16" opcode="0xA1"/>
|
||||
<ins name="ISUB.v4u8" opcode="0xA2"/>
|
||||
|
|
@ -1247,7 +1248,7 @@
|
|||
<saturate/>
|
||||
</group>
|
||||
|
||||
<group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" opcode2="7">
|
||||
<group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" opcode2="7" unit="CVT">
|
||||
<desc>
|
||||
Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
|
||||
64-bit value A. These instructions accelerate address arithmetic, but may
|
||||
|
|
@ -1260,7 +1261,7 @@
|
|||
<src widen="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="IMUL" title="Integer multiply" dests="1" opcode2="0x0A">
|
||||
<group name="IMUL" title="Integer multiply" dests="1" opcode2="0x0A" unit="SFU">
|
||||
<ins name="IMUL.i32" opcode="0xA0"/>
|
||||
<ins name="IMUL.v2i16" opcode="0xA1"/>
|
||||
<ins name="IMUL.v4i8" opcode="0xA2"/>
|
||||
|
|
@ -1281,7 +1282,8 @@
|
|||
<saturate/>
|
||||
</group>
|
||||
|
||||
<group name="HADD" title="Integer half-add" dests="1" opcode2="0x0B">
|
||||
<group name="HADD" title="Integer half-add" dests="1" opcode2="0x0B" unit="CVT">
|
||||
<!-- TODO: confirm unit -->
|
||||
<ins name="HADD.u32" opcode="0xA0"/>
|
||||
<ins name="HADD.v2u16" opcode="0xA1"/>
|
||||
<ins name="HADD.v4u8" opcode="0xA2"/>
|
||||
|
|
@ -1298,7 +1300,7 @@
|
|||
</desc>
|
||||
</group>
|
||||
|
||||
<group name="CLPER" title="Cross-lane permute" dests="1" opcode2="0xF">
|
||||
<group name="CLPER" title="Cross-lane permute" dests="1" opcode2="0xF" unit="SFU">
|
||||
<ins name="CLPER.i32" opcode="0xA0"/>
|
||||
<ins name="CLPER.v2u16" opcode="0xA1"/>
|
||||
<ins name="CLPER.v4u8" opcode="0xA2"/>
|
||||
|
|
@ -1320,7 +1322,7 @@
|
|||
<inactive_result/>
|
||||
</group>
|
||||
|
||||
<group name="FMA" title="Fused floating-point multiply add" dests="1">
|
||||
<group name="FMA" title="Fused floating-point multiply add" dests="1" unit="FMA">
|
||||
<ins name="FMA.f32" opcode="0xB2"/>
|
||||
<ins name="FMA.v2f16" opcode="0xB3"/>
|
||||
<desc>$A \cdot B + C$</desc>
|
||||
|
|
@ -1330,7 +1332,7 @@
|
|||
<src absneg="true" swizzle="true">C</src>
|
||||
</group>
|
||||
|
||||
<group name="LSHIFT_AND" title="Left shift and bitwise AND" dests="1" opcode2="0x100">
|
||||
<group name="LSHIFT_AND" title="Left shift and bitwise AND" dests="1" opcode2="0x100" unit="SFU">
|
||||
<ins name="LSHIFT_AND.i32" opcode="0xB4"/>
|
||||
<ins name="LSHIFT_AND.v2i16" opcode="0xB5"/>
|
||||
<ins name="LSHIFT_AND.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1346,7 +1348,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="RSHIFT_AND" title="Right shift and bitwise AND" dests="1" opcode2="0x000">
|
||||
<group name="RSHIFT_AND" title="Right shift and bitwise AND" dests="1" opcode2="0x000" unit="SFU">
|
||||
<ins name="RSHIFT_AND.i32" opcode="0xB4"/>
|
||||
<ins name="RSHIFT_AND.v2i16" opcode="0xB5"/>
|
||||
<ins name="RSHIFT_AND.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1362,7 +1364,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="LSHIFT_OR" title="Left shift and bitwise OR" dests="1" opcode2="0x101">
|
||||
<group name="LSHIFT_OR" title="Left shift and bitwise OR" dests="1" opcode2="0x101" unit="SFU">
|
||||
<ins name="LSHIFT_OR.i32" opcode="0xB4"/>
|
||||
<ins name="LSHIFT_OR.v2i16" opcode="0xB5"/>
|
||||
<ins name="LSHIFT_OR.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1378,7 +1380,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="RSHIFT_OR" title="Right shift and bitwise OR" dests="1" opcode2="0x001">
|
||||
<group name="RSHIFT_OR" title="Right shift and bitwise OR" dests="1" opcode2="0x001" unit="SFU">
|
||||
<ins name="RSHIFT_OR.i32" opcode="0xB4"/>
|
||||
<ins name="RSHIFT_OR.v2i16" opcode="0xB5"/>
|
||||
<ins name="RSHIFT_OR.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1394,7 +1396,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="LSHIFT_XOR" title="Left shift and bitwise XOR" dests="1" opcode2="0x102">
|
||||
<group name="LSHIFT_XOR" title="Left shift and bitwise XOR" dests="1" opcode2="0x102" unit="SFU">
|
||||
<ins name="LSHIFT_XOR.i32" opcode="0xB4"/>
|
||||
<ins name="LSHIFT_XOR.v2i16" opcode="0xB5"/>
|
||||
<ins name="LSHIFT_XOR.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1410,7 +1412,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="RSHIFT_XOR" title="Right shift and bitwise XOR" dests="1" opcode2="0x002">
|
||||
<group name="RSHIFT_XOR" title="Right shift and bitwise XOR" dests="1" opcode2="0x002" unit="SFU">
|
||||
<ins name="RSHIFT_XOR.i32" opcode="0xB4"/>
|
||||
<ins name="RSHIFT_XOR.v2i16" opcode="0xB5"/>
|
||||
<ins name="RSHIFT_XOR.v4i8" opcode="0xB6"/>
|
||||
|
|
@ -1426,7 +1428,7 @@
|
|||
<src not="true">B</src>
|
||||
</group>
|
||||
|
||||
<ins name="MUX.i32" title="Mux" dests="1" opcode="0xB8">
|
||||
<ins name="MUX.i32" title="Mux" dests="1" opcode="0xB8" unit="SFU">
|
||||
<desc>
|
||||
Mux between A and B based on the provided mask. Equivalent to
|
||||
`bitselect()` in OpenCL. `(A & mask) | (A & ~mask)`
|
||||
|
|
@ -1436,21 +1438,21 @@
|
|||
<src>Mask</src>
|
||||
</ins>
|
||||
|
||||
<ins name="CUBE_SSEL" title="Cube S-coordinate select" dests="1" opcode="0xBC" opcode2="0">
|
||||
<ins name="CUBE_SSEL" title="Cube S-coordinate select" dests="1" opcode="0xBC" opcode2="0" unit="SFU">
|
||||
<desc>During a cube map transform, select the S coordinate given a selected face.</desc>
|
||||
<src absneg="true">Z coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">X coordinate as 32-bit floating point</src>
|
||||
<src>Cube face index</src>
|
||||
</ins>
|
||||
|
||||
<ins name="CUBE_TSEL" title="Cube T-coordinate select" dests="1" opcode="0xBC" opcode2="1">
|
||||
<ins name="CUBE_TSEL" title="Cube T-coordinate select" dests="1" opcode="0xBC" opcode2="1" unit="SFU">
|
||||
<desc>During a cube map transform, select the T coordinate given a selected face.</desc>
|
||||
<src absneg="true">Y coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">Z coordinate as 32-bit floating point</src>
|
||||
<src>Cube face index</src>
|
||||
</ins>
|
||||
|
||||
<ins name="MKVEC.v4i8" title="Make 8-bit vector" dests="1" opcode="0xBD">
|
||||
<ins name="MKVEC.v4i8" title="Make 8-bit vector" dests="1" opcode="0xBD" unit="CVT">
|
||||
<desc>
|
||||
Calculates $A | (B \ll 8) | (CD \ll 16)$ for 8-bit A and B and 16-bit CD.
|
||||
|
||||
|
|
@ -1465,21 +1467,22 @@
|
|||
<src>CD</src>
|
||||
</ins>
|
||||
|
||||
<ins name="CUBEFACE1" title="Cube map transform step 1" dests="1" opcode="0xC0">
|
||||
<ins name="CUBEFACE1" title="Cube map transform step 1" dests="1" opcode="0xC0" unit="SFU">
|
||||
<desc>Select the maximum absolute value of its arguments.</desc>
|
||||
<src absneg="true">X coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">Y coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">Z coordinate as 32-bit floating point</src>
|
||||
</ins>
|
||||
|
||||
<ins name="CUBEFACE2" title="Cube map transform step 2" dests="1" opcode="0xC1">
|
||||
<ins name="CUBEFACE2" title="Cube map transform step 2" dests="1" opcode="0xC1" unit="SFU">
|
||||
<desc>Select the cube face index corresponding to the arguments.</desc>
|
||||
<src absneg="true">X coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">Y coordinate as 32-bit floating point</src>
|
||||
<src absneg="true">Z coordinate as 32-bit floating point</src>
|
||||
</ins>
|
||||
|
||||
<group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2">
|
||||
<group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2" unit="SFU">
|
||||
<!-- TODO: confirm unit -->
|
||||
<desc>
|
||||
8-bit integer dot product between 4 channel vectors, intended for machine
|
||||
learning. Available in both unsigned and signed variants, controlling
|
||||
|
|
@ -1500,7 +1503,7 @@
|
|||
<saturate/>
|
||||
</group>
|
||||
|
||||
<group name="ICMP" title="Unsigned integer compare" dests="1">
|
||||
<group name="ICMP" title="Unsigned integer compare" dests="1" unit="CVT">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
|
|
@ -1528,7 +1531,7 @@
|
|||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="FCMP" title="Floating-point compare" dests="1">
|
||||
<group name="FCMP" title="Floating-point compare" dests="1" unit="CVT">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
|
|
@ -1547,7 +1550,7 @@
|
|||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="ICMP" title="Signed integer compare" dests="1">
|
||||
<group name="ICMP" title="Signed integer compare" dests="1" unit="CVT">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
|
|
@ -1575,7 +1578,7 @@
|
|||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<ins name="IADD_IMM.i32" title="Integer addition with immediate" dests="1" opcode="0x110">
|
||||
<ins name="IADD_IMM.i32" title="Integer addition with immediate" dests="1" opcode="0x110" unit="CVT">
|
||||
<desc>
|
||||
Adds an arbitrary 32-bit immediate embedded within the instruction stream.
|
||||
If no modifiers are required, this is preferred to `IADD.i32` with a
|
||||
|
|
@ -1588,7 +1591,7 @@
|
|||
<imm name="constant" start="8" size="32"/>
|
||||
</ins>
|
||||
|
||||
<ins name="IADD_IMM.v2i16" title="Integer addition with immediate" dests="1" opcode="0x111">
|
||||
<ins name="IADD_IMM.v2i16" title="Integer addition with immediate" dests="1" opcode="0x111" unit="CVT">
|
||||
<desc>
|
||||
Adds an arbitrary pair of 16-bit immediates embedded within the
|
||||
instruction stream. If no modifiers are required, this is preferred to
|
||||
|
|
@ -1600,7 +1603,7 @@
|
|||
<imm name="constant" start="8" size="32"/>
|
||||
</ins>
|
||||
|
||||
<ins name="IADD_IMM.v4i8" title="Integer addition with immediate" dests="1" opcode="0x112">
|
||||
<ins name="IADD_IMM.v4i8" title="Integer addition with immediate" dests="1" opcode="0x112" unit="CVT">
|
||||
<desc>
|
||||
Adds an arbitrary quad of 8-bit immediates embedded within the
|
||||
instruction stream. If no modifiers are required, this is preferred to
|
||||
|
|
@ -1612,7 +1615,7 @@
|
|||
<imm name="constant" start="8" size="32"/>
|
||||
</ins>
|
||||
|
||||
<ins name="FADD_IMM.f32" title="Floating-point addition with immediate" dests="1" opcode="0x114">
|
||||
<ins name="FADD_IMM.f32" title="Floating-point addition with immediate" dests="1" opcode="0x114" unit="FMA">
|
||||
<desc>
|
||||
Adds an arbitrary 32-bit immediate embedded within the instruction stream.
|
||||
If no modifiers are required, this is preferred to `FADD.f32` with a
|
||||
|
|
@ -1623,7 +1626,7 @@
|
|||
<imm name="constant" start="8" size="32"/>
|
||||
</ins>
|
||||
|
||||
<ins name="FADD_IMM.v2f16" title="Floating-point addition with immediate" dests="1" opcode="0x115">
|
||||
<ins name="FADD_IMM.v2f16" title="Floating-point addition with immediate" dests="1" opcode="0x115" unit="FMA">
|
||||
<desc>
|
||||
Adds an arbitrary pair of 16-bit immediates embedded within the
|
||||
instruction stream. If no modifiers are required, this is preferred to
|
||||
|
|
@ -1635,7 +1638,7 @@
|
|||
<imm name="constant" start="8" size="32"/>
|
||||
</ins>
|
||||
|
||||
<ins name="TODO.ATOM_C1" title="Atomic operations on memory with 1" opcode="0x69">
|
||||
<ins name="TODO.ATOM_C1" title="Atomic operations on memory with 1" opcode="0x69" unit="LS">
|
||||
<!-- TODO -->
|
||||
<mod name="i32" start="17" size="1"/>
|
||||
<mod name="unk" start="23" size="1"/>
|
||||
|
|
@ -1646,7 +1649,7 @@
|
|||
<slot/>
|
||||
</ins>
|
||||
|
||||
<ins name="TODO.ATOM_C" title="Atomic operations on memory" opcode="0x120">
|
||||
<ins name="TODO.ATOM_C" title="Atomic operations on memory" opcode="0x120" unit="LS">
|
||||
<!-- TODO -->
|
||||
<mod name="i32" start="17" size="1"/>
|
||||
<mod name="unk" start="23" size="1"/>
|
||||
|
|
@ -1657,7 +1660,7 @@
|
|||
<slot/>
|
||||
</ins>
|
||||
|
||||
<ins name="TEX_FETCH" title="Texel fetch" opcode="0x125">
|
||||
<ins name="TEX_FETCH" title="Texel fetch" opcode="0x125" unit="T">
|
||||
<desc>Unfiltered textured instruction.</desc>
|
||||
<sr read="true"/>
|
||||
<sr write="true" count="4"/>
|
||||
|
|
@ -1669,7 +1672,7 @@
|
|||
<src>Image to read from</src>
|
||||
</ins>
|
||||
|
||||
<ins name="TEX" title="Texture load" opcode="0x128">
|
||||
<ins name="TEX" title="Texture load" opcode="0x128" unit="T">
|
||||
<desc>Ordinary texturing instruction using a sampler.</desc>
|
||||
<sr read="true"/>
|
||||
<sr write="true" count="4"/>
|
||||
|
|
@ -1683,8 +1686,11 @@
|
|||
<slot/>
|
||||
</ins>
|
||||
|
||||
<ins name="TODO.VAR_TEX" title="Fused varying-texturing" opcode="0x130">
|
||||
<desc>Only works for FP32 varyings.</desc>
|
||||
<ins name="TODO.VAR_TEX" title="Fused varying-texturing" opcode="0x130" unit="VT">
|
||||
<desc>
|
||||
Only works for FP32 varyings. Performance characteristics are similar
|
||||
to LD_VAR_IMM_F32.v2.f32 followed by TEX, using both V and T units.
|
||||
</desc>
|
||||
<sr write="true" count="4"/>
|
||||
<mod name="dimension" start="28" size="2"/>
|
||||
<mod name="skip" start="39" size="1"/>
|
||||
|
|
@ -1692,7 +1698,7 @@
|
|||
<src>Image to read from</src>
|
||||
</ins>
|
||||
|
||||
<ins name="FMA_RSCALE.f32" title="Fused floating-point multiply add with exponent bias" dests="1" opcode="0x160">
|
||||
<ins name="FMA_RSCALE.f32" title="Fused floating-point multiply add with exponent bias" dests="1" opcode="0x160" unit="FMA">
|
||||
<desc>
|
||||
First calculates $A \cdot B + C$ and then biases the exponent by D. Used in
|
||||
special transcendental function sequences. It should not be used for
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue