mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
pan/va: Split out compare instructions
The different combine modes form different instructions from each other and in particular from the two-source version on Bifrost. Model them as such so we can represent the relevant Valhall-specific lowering/optimizations accurately in the compiler. This requires updating the unit tests to use the new names since there's not much point keeping around the aliases. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17794>
This commit is contained in:
parent
40bf6da4a4
commit
b25c42d8ae
6 changed files with 196 additions and 101 deletions
|
|
@ -2161,54 +2161,109 @@
|
|||
<saturate/>
|
||||
</group>
|
||||
|
||||
<group name="ICMP" title="Unsigned integer compare" dests="1" unit="CVT">
|
||||
<group name="ICMP_OR" title="Unsigned integer compare" dests="1" unit="CVT" opcode2="0">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
Evaluates the given condition, do a logical or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic;
|
||||
when this is not desired, tie it to zero and use the OR combine mode (do
|
||||
not set the `.and` modifier).
|
||||
|
||||
The sequence modifier `.seq` is used to construct 64-bit compares in 2
|
||||
`ICMP.u32` instructions, in conjunction with the `u1` result type on the
|
||||
low half, the `m1` result type on the high half, and the result of the low
|
||||
half comparison passed as the third source. For comparisons other than
|
||||
64-bit, do not set the `.seq` modifier and do not use the `u1` result
|
||||
type.
|
||||
when this is not desired, tie it to zero.
|
||||
</desc>
|
||||
<ins name="ICMP.u32" opcode="0xF0"/>
|
||||
<ins name="ICMP.v2u16" opcode="0xF1"/>
|
||||
<ins name="ICMP.v4u8" opcode="0xF2"/>
|
||||
<ins name="ICMP_OR.u32" opcode="0xF0"/>
|
||||
<ins name="ICMP_OR.v2u16" opcode="0xF1"/>
|
||||
<ins name="ICMP_OR.v4u8" opcode="0xF2"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<mod name="and" start="24" size="1"/>
|
||||
<mod name="seq" start="25" size="1"/>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="FCMP" title="Floating-point compare" dests="1" unit="CVT">
|
||||
<group name="ICMP_AND" title="Unsigned integer compare" dests="1" unit="CVT" opcode2="1">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
Evaluates the given condition, do a logical and with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic.
|
||||
</desc>
|
||||
<ins name="ICMP_AND.u32" opcode="0xF0"/>
|
||||
<ins name="ICMP_AND.v2u16" opcode="0xF1"/>
|
||||
<ins name="ICMP_AND.v4u8" opcode="0xF2"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="FCMP_OR" title="Floating-point compare" dests="1" unit="CVT" opcode2="0">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic;
|
||||
when this is not desired, tie it to zero and use the OR combine mode (do
|
||||
not set the `.and` modifier).
|
||||
when this is not desired, tie it to zero.
|
||||
</desc>
|
||||
<ins name="FCMP.f32" opcode="0xF4"/>
|
||||
<ins name="FCMP.v2f16" opcode="0xF5"/>
|
||||
<ins name="FCMP_OR.f32" opcode="0xF4"/>
|
||||
<ins name="FCMP_OR.v2f16" opcode="0xF5"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<mod name="and" start="24" size="1"/>
|
||||
<src absneg="true" swizzle="true">A</src>
|
||||
<src absneg="true" swizzle="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="ICMP" title="Signed integer compare" dests="1" unit="CVT">
|
||||
<group name="FCMP_AND" title="Floating-point compare" dests="1" unit="CVT" opcode2="1">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic.
|
||||
</desc>
|
||||
<ins name="FCMP_AND.f32" opcode="0xF4"/>
|
||||
<ins name="FCMP_AND.v2f16" opcode="0xF5"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<src absneg="true" swizzle="true">A</src>
|
||||
<src absneg="true" swizzle="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="ICMP_OR" title="Signed integer compare" dests="1" unit="CVT" opcode2="0">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic.
|
||||
</desc>
|
||||
<ins name="ICMP_OR.s32" opcode="0xF8"/>
|
||||
<ins name="ICMP_OR.v2s16" opcode="0xF9"/>
|
||||
<ins name="ICMP_OR.v4s8" opcode="0xFA"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="ICMP_AND" title="Signed integer compare" dests="1" unit="CVT" opcode2="1">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
one, integer minus one, or floating-point one). The third source is useful
|
||||
for chaining together conditions without intermediate bitwise arithmetic.
|
||||
</desc>
|
||||
<ins name="ICMP_AND.s32" opcode="0xF8"/>
|
||||
<ins name="ICMP_AND.v2s16" opcode="0xF9"/>
|
||||
<ins name="ICMP_AND.v4s8" opcode="0xFA"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
<src>C</src>
|
||||
</group>
|
||||
|
||||
<group name="ICMP_MULTI" title="Integer compare" dests="1" unit="CVT" opcode2="2">
|
||||
<desc>
|
||||
Evaluates the given condition, do a logical and/or with the condition in
|
||||
the result source, and return in the given result type (integer
|
||||
|
|
@ -2217,20 +2272,15 @@
|
|||
when this is not desired, tie it to zero and use the OR combine mode (do
|
||||
not set the `.and` modifier).
|
||||
|
||||
The sequence modifier `.seq` is used to construct signed 64-bit compares
|
||||
Used to construct signed 64-bit compares
|
||||
in 1 `ICMP.u32` and 1 `ICMP.s32` instruction, in conjunction with the `u1`
|
||||
result type on the low half, the `m1` result type on the high half, and
|
||||
the result of the low half comparison passed as the third source. For
|
||||
comparisons other than 64-bit, do not set the `.seq` modifier and do not
|
||||
use the `u1` result type.
|
||||
the result of the low half comparison passed as the third source.
|
||||
</desc>
|
||||
<ins name="ICMP.s32" opcode="0xF8"/>
|
||||
<ins name="ICMP.v2s16" opcode="0xF9"/>
|
||||
<ins name="ICMP.v4s8" opcode="0xFA"/>
|
||||
<ins name="ICMP_MULTI.u32" opcode="0xF0"/>
|
||||
<ins name="ICMP_MULTI.s32" opcode="0xF8"/>
|
||||
<cmp/>
|
||||
<result_type/>
|
||||
<mod name="and" start="24" size="1"/>
|
||||
<mod name="seq" start="25" size="1"/>
|
||||
<src widen="true">A</src>
|
||||
<src widen="true">B</src>
|
||||
<src>C</src>
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0
|
|||
80 7c 47 20 00 c0 a3 01 SHADDX.u64 r0, u0, ^r60.w0, shift:0x4
|
||||
40 00 00 38 08 44 61 78 STORE.i128.slot0.end @r4:r5:r6:r7, ^r0, offset:0
|
||||
00 00 00 00 00 c0 00 78 NOP.end
|
||||
40 c4 c0 9c 01 c1 f0 00 ICMP.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0
|
||||
40 c4 c0 9c 01 c1 f0 00 ICMP_OR.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0
|
||||
42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, ^r2, offset:0
|
||||
00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0
|
||||
00 00 00 30 00 c7 90 00 S8_TO_S32 r7, r0.b3
|
||||
|
|
@ -87,10 +87,10 @@ f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.end @r0:r1, blend_descriptor_0.w0,
|
|||
42 14 13 12 ad c2 12 01 IADD_IMM.v4i8 r2, ^r2, #0xAD121314
|
||||
42 14 00 13 00 c2 11 01 IADD_IMM.v2i16 r2, ^r2, #0x130014
|
||||
42 ab 4b 00 00 c2 10 01 IADD_IMM.i32 r2, ^r2, #0x4BAB
|
||||
43 42 c0 84 11 c2 f9 00 ICMP.v2s16.gt.m1 r2, ^r3.h10, ^r2.h10, 0x0
|
||||
43 42 c0 90 01 c2 f5 00 FCMP.v2f16.gt.m1 r2, ^r3.h10, ^r2.h00, 0x0
|
||||
43 42 c0 84 11 c2 f9 00 ICMP_OR.v2s16.gt.m1 r2, ^r3.h10, ^r2.h10, 0x0
|
||||
43 42 c0 90 01 c2 f5 00 FCMP_OR.v2f16.gt.m1 r2, ^r3.h10, ^r2.h00, 0x0
|
||||
42 00 07 00 20 c2 90 00 V2S16_TO_V2F16 r2, ^r2
|
||||
00 c0 c0 00 43 c1 f2 00 ICMP.v4u8.ne.i1 r1, r0.b0000, 0x0, 0x0
|
||||
00 c0 c0 00 43 c1 f2 00 ICMP_OR.v4u8.ne.i1 r1, r0.b0000, 0x0, 0x0
|
||||
41 03 00 00 00 c0 1f 50 BRANCHZ.reconverge ^r1, offset:3
|
||||
00 03 00 00 20 c0 1f 50 BRANCHZ.reconverge r0.h0, offset:3
|
||||
00 03 00 00 40 c0 1f 50 BRANCHZ.reconverge r0.h1, offset:3
|
||||
|
|
@ -99,7 +99,7 @@ c0 00 00 00 00 c0 10 01 IADD_IMM.i32 r0, 0x0, #0x0
|
|||
c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1
|
||||
80 00 27 20 00 c2 a3 01 SHADDX.u64 r2, u0, r0.w0, shift:0x2
|
||||
40 c9 00 10 00 c0 a0 00 IADD.u32 r0, ^r0, 0x7060504.b0
|
||||
00 82 c0 80 03 c1 f0 00 ICMP.u32.ne.m1 r1, r0, u2, 0x0
|
||||
00 82 c0 80 03 c1 f0 00 ICMP_OR.u32.ne.m1 r1, r0, u2, 0x0
|
||||
04 00 00 00 00 c5 91 00 MOV.i32 r5, r4
|
||||
04 00 00 00 00 c6 91 00 MOV.i32 r6, r4
|
||||
04 00 00 00 00 c7 91 08 MOV.i32.wait0 r7, r4
|
||||
|
|
@ -223,3 +223,17 @@ c0 f1 00 00 10 c1 2f 08 BRANCHZI.eq.absolute.wait0 0x0, blend_descriptor_0.w1
|
|||
82 00 80 15 b4 80 38 49 VAR_TEX_SINGLE.slot0.skip.sample_store.f.32.2d.zero.wait @r0:r1:r2:r3, u2, u0
|
||||
82 20 80 15 b4 80 38 09 VAR_TEX_SINGLE.slot0.skip.sample_store.f.32.2d.computed.wait0 @r0:r1:r2:r3, u2, u0
|
||||
82 20 80 1d 84 80 38 41 VAR_TEX_SINGLE.slot0.skip.sample_store.s.32.2d.computed.wait0126 @r0, u2, u0
|
||||
40 c0 c0 80 03 c0 f0 10 ICMP_OR.u32.ne.m1.wait1 r0, ^r0, 0x0, 0x0
|
||||
42 43 40 01 01 c0 f8 00 ICMP_AND.s32.gt.i1 r0, ^r2, ^r3, ^r0
|
||||
42 c0 c0 c2 03 c0 f0 10 ICMP_MULTI.u32.ne.u1.wait1 r0, ^r2, 0x0, 0x0
|
||||
44 46 c0 c2 01 c2 f0 00 ICMP_MULTI.u32.gt.u1 r2, ^r4, ^r6, 0x0
|
||||
45 47 42 82 01 c2 f0 00 ICMP_MULTI.u32.gt.m1 r2, ^r5, ^r7, ^r2
|
||||
43 c0 40 82 03 c0 f0 00 ICMP_MULTI.u32.ne.m1 r0, ^r3, 0x0, ^r0
|
||||
40 42 c0 c2 01 c0 f0 00 ICMP_MULTI.u32.gt.u1 r0, ^r0, ^r2, 0x0
|
||||
41 43 40 82 01 c4 f8 00 ICMP_MULTI.s32.gt.m1 r4, ^r1, ^r3, ^r0
|
||||
40 c0 c0 a8 03 c0 f5 10 FCMP_OR.v2f16.ne.m1.wait1 r0, ^r0, 0x0, 0x0
|
||||
41 41 40 ad 01 c0 f5 00 FCMP_AND.v2f16.gt.m1 r0, ^r1, ^r1.h11, ^r0
|
||||
40 c0 c0 a8 03 c0 f5 10 FCMP_OR.v2f16.ne.m1.wait1 r0, ^r0, 0x0, 0x0
|
||||
41 41 40 ad 01 c0 f5 00 FCMP_AND.v2f16.gt.m1 r0, ^r1, ^r1.h11, ^r0
|
||||
c4 c0 40 10 71 c0 b6 00 LSHIFT_AND.v4i8 r0, 0x1000000.b3333, 0x0.b00, ^r0
|
||||
40 00 13 00 80 c0 90 00 V2U8_TO_V2F16 r0, ^r0.b02
|
||||
|
|
|
|||
|
|
@ -114,70 +114,70 @@ TEST_F(LowerConstants, Int8InInt32)
|
|||
|
||||
TEST_F(LowerConstants, ZeroExtendForUnsigned)
|
||||
{
|
||||
CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(1), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(1), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, SignExtendPositiveForSigned)
|
||||
{
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7F), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(2), 3), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7FFF), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(2), 1), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, SignExtendNegativeForSigned)
|
||||
{
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(23), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(3), 1), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, DontZeroExtendForSigned)
|
||||
{
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, DontZeroExtendNegative)
|
||||
{
|
||||
CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, HandleTrickyNegativesFP16)
|
||||
|
|
|
|||
|
|
@ -147,19 +147,17 @@ TEST_F(ValhallPacking, FaddImm) {
|
|||
}
|
||||
|
||||
TEST_F(ValhallPacking, Comparions) {
|
||||
bi_instr *I =
|
||||
bi_icmp_v2s16_to(b, bi_register(2),
|
||||
CASE(bi_icmp_or_v2s16_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), true, false)),
|
||||
BI_CMPF_GT,
|
||||
BI_RESULT_TYPE_M1);
|
||||
I->src[2] = zero; // TODO: model in the IR
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
0x00f9c21184c04243);
|
||||
|
||||
CASE(I, 0x00f9c21184c04243);
|
||||
|
||||
I->op = BI_OPCODE_FCMP_V2F16;
|
||||
I->src[1] = bi_discard(bi_swz_16(bi_register(2), false, false));
|
||||
CASE(I, 0x00f5c20190c04243);
|
||||
CASE(bi_fcmp_or_v2f16_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), false, false)),
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
0x00f5c20190c04243);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Conversions) {
|
||||
|
|
@ -307,11 +305,9 @@ TEST_F(ValhallPacking, Convert16To32) {
|
|||
}
|
||||
|
||||
TEST_F(ValhallPacking, Swizzle8) {
|
||||
bi_instr *I = bi_icmp_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
|
||||
zero, BI_CMPF_NE, BI_RESULT_TYPE_I1);
|
||||
I->src[2] = zero; // TODO: model in the IR
|
||||
|
||||
CASE(I, 0x00f2c14300c0c000);
|
||||
CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
|
||||
zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
|
||||
0x00f2c14300c0c000);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, FauPage1) {
|
||||
|
|
|
|||
|
|
@ -41,30 +41,65 @@ va_lower_isel(bi_instr *I)
|
|||
I->src[1] = bi_zero();
|
||||
break;
|
||||
|
||||
/* Extra source in Valhall not yet modeled in the Bifrost IR */
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
I->op = BI_OPCODE_ICMP_U32;
|
||||
I->op = BI_OPCODE_ICMP_OR_U32;
|
||||
I->src[2] = bi_zero();
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V2I16:
|
||||
I->op = BI_OPCODE_ICMP_V2U16;
|
||||
I->op = BI_OPCODE_ICMP_OR_V2U16;
|
||||
I->src[2] = bi_zero();
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
I->op = BI_OPCODE_ICMP_V4U8;
|
||||
I->op = BI_OPCODE_ICMP_OR_V4U8;
|
||||
I->src[2] = bi_zero();
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_U32:
|
||||
I->op = BI_OPCODE_ICMP_OR_U32;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V2U16:
|
||||
I->op = BI_OPCODE_ICMP_OR_V2U16;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
I->op = BI_OPCODE_ICMP_OR_V4U8;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_S32:
|
||||
I->op = BI_OPCODE_ICMP_OR_S32;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V2S16:
|
||||
I->op = BI_OPCODE_ICMP_OR_V2S16;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ICMP_V4S8:
|
||||
I->op = BI_OPCODE_ICMP_OR_V4S8;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_FCMP_F32:
|
||||
I->op = BI_OPCODE_FCMP_OR_F32;
|
||||
I->src[2] = bi_zero();
|
||||
I->nr_srcs = 3;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
I->op = BI_OPCODE_FCMP_OR_V2F16;
|
||||
I->src[2] = bi_zero();
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ class Instruction:
|
|||
self.secondary_mask = 0xF if opcode2 is not None else 0x0
|
||||
if "left" in [x.name for x in self.modifiers]:
|
||||
self.secondary_mask |= 0x100
|
||||
if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes):
|
||||
if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes or srcs[1].swizzle):
|
||||
self.secondary_mask &= ~0xC # conflicts
|
||||
if opcode == 0x90:
|
||||
# XXX: XMLify this, but disambiguates sign of conversions
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue