diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c
index c0b551425e2..687a22979ba 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@@ -4531,7 +4531,7 @@ bi_compile_variant_nir(nir_shader *nir,
va_lower_constants(ctx, I, const_hist, min_count_for_fau);
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
- va_repair_fau(&b, I);
+ va_repair_fau(&b, I, ctx->arch);
}
_mesa_hash_table_u64_destroy(const_hist);
diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml
index 6fc6e0d12de..53ddbc06856 100644
--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@@ -597,6 +597,18 @@
slot7
+
+
+ Dependency slot set on a message-passing instruction that writes to
+ registers. Before reading the destination, a future instruction must wait
+ on the specified slot. Slot #7 is for `BARRIER` instructions only.
+
+ slot0
+ slot1
+ slot2
+ slot7
+
+
Memory access hint for a `LOAD` or `STORE` instruction.
none
diff --git a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
index 3b92c96087f..44b8257b583 100644
--- a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
+++ b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
@@ -48,10 +48,12 @@ TEST_F(ValhallPacking, Moves)
{
bi_instr *I = bi_mov_i32_to(b, bi_register(1), bi_register(2));
CASE_ARCH(I, 10, 0x0091c10000000002ULL);
+ CASE_ARCH(I, 15, 0x0060010000200002ULL);
I = bi_mov_i32_to(b, bi_register(1),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false));
CASE_ARCH(I, 10, 0x0091c1000000008aULL);
+ CASE_ARCH(I, 15, 0x006101000020000aULL);
}
TEST_F(ValhallPacking, Fadd)
@@ -59,44 +61,55 @@ TEST_F(ValhallPacking, Fadd)
bi_instr *I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2));
CASE_ARCH(I, 10, 0x00a4c00000000201ULL);
+ CASE_ARCH(I, 15, 0x00f0000000000201ULL);
I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2)));
CASE_ARCH(I, 10, 0x00a4c02000000201ULL);
+ CASE_ARCH(I, 15, 0x00f0002000000201ULL);
I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2)));
CASE_ARCH(I, 10, 0x00a4c01000000201ULL);
+ CASE_ARCH(I, 15, 0x00f0001000000201ULL);
I = bi_fadd_v2f16_to(b, bi_register(0),
bi_swz_16(bi_register(1), false, false),
bi_swz_16(bi_register(0), true, true));
CASE_ARCH(I, 10, 0x00a5c0000c000001ULL);
+ CASE_ARCH(I, 15, 0x00f400000c000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0));
CASE_ARCH(I, 10, 0x00a5c00028000001ULL);
+ CASE_ARCH(I, 15, 0x00f4000028000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
bi_swz_16(bi_register(0), true, false));
CASE_ARCH(I, 10, 0x00a5c00024000001ULL);
+ CASE_ARCH(I, 15, 0x00f4000024000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
bi_neg(zero));
CASE_ARCH(I, 10, 0x00a5c0902800c040ULL);
+ CASE_ARCH(I, 15, 0x00f600902800c080ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero);
CASE_ARCH(I, 10, 0x00a4c0000000c001ULL);
+ CASE_ARCH(I, 15, 0x00f200000000c001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero));
CASE_ARCH(I, 10, 0x00a4c0100000c001ULL);
+ CASE_ARCH(I, 15, 0x00f200100000c001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), true));
CASE_ARCH(I, 10, 0x00a4c00008000001ULL);
+ CASE_ARCH(I, 15, 0x00f0000008000001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), false));
CASE_ARCH(I, 10, 0x00a4c00004000001ULL);
+ CASE_ARCH(I, 15, 0x00f0000004000001ULL);
}
TEST_F(ValhallPacking, Clper)
@@ -105,6 +118,7 @@ TEST_F(ValhallPacking, Clper)
bi_byte(n4567, 0), BI_INACTIVE_RESULT_F1,
BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16);
CASE_ARCH(I, 10, 0x00a0c030128fc900);
+ CASE_ARCH(I, 15, 0x00e20030028fc900);
}
TEST_F(ValhallPacking, Clamps)
@@ -112,9 +126,11 @@ TEST_F(ValhallPacking, Clamps)
bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(bi_abs(bi_register(2))));
CASE_ARCH(I, 10, 0x00a4c03000000201ULL);
+ CASE_ARCH(I, 15, 0x00f0003000000201ULL);
I->clamp = BI_CLAMP_CLAMP_M1_1;
CASE_ARCH(I, 10, 0x00a4c03200000201ULL);
+ CASE_ARCH(I, 15, 0x00f0003080000201ULL);
}
TEST_F(ValhallPacking, Misc)
@@ -123,18 +139,22 @@ TEST_F(ValhallPacking, Misc)
b, bi_register(1), bi_discard(bi_register(1)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false), bi_neg(zero));
CASE_ARCH(I, 10, 0x00b2c10400c08841ULL);
+ CASE_ARCH(I, 15, 0x0166010400c00881ULL);
I = bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x0090c240800d0042ULL);
+ CASE_ARCH(I, 15, 0x00600242004d0082ULL);
I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x00904000a00f0000ULL);
+ /* Removed on v11 */
I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x00904000900f0001ULL);
+ /* Removed on v11 */
}
TEST_F(ValhallPacking, FaddImm)
@@ -142,10 +162,12 @@ TEST_F(ValhallPacking, FaddImm)
bi_instr *I = bi_fadd_imm_f32_to(b, bi_register(2),
bi_discard(bi_register(2)), 0x4847C6C0);
CASE_ARCH(I, 10, 0x0114C24847C6C042ULL);
+ CASE_ARCH(I, 15, 0x0064024847c6c082ULL);
I = bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
0x70AC6784);
CASE_ARCH(I, 10, 0x0115C270AC678442ULL);
+ CASE_ARCH(I, 15, 0x00620270ac678482ULL);
}
TEST_F(ValhallPacking, Comparions)
@@ -155,12 +177,14 @@ TEST_F(ValhallPacking, Comparions)
bi_discard(bi_swz_16(bi_register(2), true, false)), zero, BI_CMPF_GT,
BI_RESULT_TYPE_M1);
CASE_ARCH(I, 10, 0x00f9c21184c04243);
+ CASE_ARCH(I, 15, 0x01e40212c6c08283);
I = bi_fcmp_or_v2f16_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), false, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1);
CASE_ARCH(I, 10, 0x00f5c20190c04243);
+ CASE_ARCH(I, 15, 0x01e4020352c08283);
}
TEST_F(ValhallPacking, Conversions)
@@ -168,6 +192,7 @@ TEST_F(ValhallPacking, Conversions)
bi_instr *I =
bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)));
CASE_ARCH(I, 10, 0x0090c22000070042);
+ /* Removed on v11 */
}
TEST_F(ValhallPacking, BranchzI16)
@@ -176,6 +201,7 @@ TEST_F(ValhallPacking, BranchzI16)
bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
I->branch_offset = 1;
CASE_ARCH(I, 10, 0x001fc03000000102);
+ CASE_ARCH(I, 15, 0x02b8003000000102);
}
TEST_F(ValhallPacking, BranchzI16Backwards)
@@ -183,6 +209,7 @@ TEST_F(ValhallPacking, BranchzI16Backwards)
bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
I->branch_offset = -8;
CASE_ARCH(I, 10, 0x001fc017fffff8c0);
+ CASE_ARCH(I, 15, 0x02b90017fffff8c0);
}
TEST_F(ValhallPacking, Blend)
@@ -192,6 +219,7 @@ TEST_F(ValhallPacking, Blend)
bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0);
CASE_ARCH(I, 10, 0x007f4004333c00f0);
+ CASE_ARCH(I, 15, 0x031b0082333c00f0);
}
TEST_F(ValhallPacking, Mux)
@@ -200,6 +228,7 @@ TEST_F(ValhallPacking, Mux)
b, bi_register(0), bi_discard(bi_register(0)), bi_discard(bi_register(4)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT);
CASE_ARCH(I, 10, 0x00b8c00300804440ull);
+ CASE_ARCH(I, 15, 0x017c000c80008480ull);
}
TEST_F(ValhallPacking, AtestFP16)
@@ -208,6 +237,7 @@ TEST_F(ValhallPacking, AtestFP16)
bi_half(bi_register(1), true),
bi_fau(BIR_FAU_ATEST_PARAM, false));
CASE_ARCH(I, 10, 0x007dbc0208ea013c);
+ CASE_ARCH(I, 15, 0x03d43c0108ea013c);
}
TEST_F(ValhallPacking, AtestFP32)
@@ -215,6 +245,7 @@ TEST_F(ValhallPacking, AtestFP32)
bi_instr *I = bi_atest_to(b, bi_register(60), bi_register(60), one,
bi_fau(BIR_FAU_ATEST_PARAM, false));
CASE_ARCH(I, 10, 0x007dbc0200ead03c);
+ CASE_ARCH(I, 15, 0x03d63c0100ead03c);
}
TEST_F(ValhallPacking, Transcendentals)
@@ -222,18 +253,28 @@ TEST_F(ValhallPacking, Transcendentals)
bi_instr *I =
bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true);
CASE_ARCH(I, 10, 0x0099c10001000000);
+ CASE_ARCH(I, 15, 0x0060010041200000);
I = bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
true);
CASE_ARCH(I, 10, 0x0099c00001020040);
+ CASE_ARCH(I, 15, 0x0060000041220080);
I = bi_frsq_f32_to(b, bi_register(2), bi_register(1));
CASE_ARCH(I, 10, 0x009cc20000020001);
+ CASE_ARCH(I, 15, 0x0060020001820001);
I = bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(2)), bi_neg(zero),
bi_discard(bi_register(0)), BI_SPECIAL_LEFT);
CASE_ARCH(I, 10, 0x0162c00440c04241);
+ CASE_ARCH(I, 15, 0x0264000e80c08281);
+
+ I = bi_fma_rscale_f32_to(b, bi_register(0), bi_register(1), bi_register(2),
+ bi_neg(zero), bi_discard(bi_register(0)),
+ BI_SPECIAL_N);
+ CASE_ARCH(I, 10, 0x0161c00440c00201);
+ CASE_ARCH(I, 15, 0x0264000d80c00201);
}
TEST_F(ValhallPacking, Csel)
@@ -243,18 +284,21 @@ TEST_F(ValhallPacking, Csel)
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_EQ);
CASE_ARCH(I, 10, 0x0150c10085844342);
+ CASE_ARCH(I, 15, 0x027c010005048382);
I = bi_csel_u32_to(
b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT);
CASE_ARCH(I, 10, 0x0150c10485844342);
+ CASE_ARCH(I, 15, 0x027c010805048382);
I = bi_csel_s32_to(
b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT);
CASE_ARCH(I, 10, 0x0158c10485844342);
+ CASE_ARCH(I, 15, 0x027c014805048382);
}
TEST_F(ValhallPacking, LdAttrImm)
@@ -265,6 +309,7 @@ TEST_F(ValhallPacking, LdAttrImm)
I->table = 1;
CASE_ARCH(I, 10, 0x0066800433117d7c);
+ CASE_ARCH(I, 15, 0x038400023311bdbc);
}
TEST_F(ValhallPacking, LdVarBufImmF16)
@@ -274,12 +319,14 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_SAMPLE_CENTER, BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE, BI_VECSIZE_V4,
0);
CASE_ARCH(I, 10, 0x005d82143300003d);
+ CASE_ARCH(I, 15, 0x0310020a3f00003d);
I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 0);
CASE_ARCH(I, 10, 0x005d80843300003d);
+ CASE_ARCH(I, 15, 0x031000423f00003d);
I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
@@ -287,6 +334,7 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_VECSIZE_V4, 8);
CASE_ARCH(I, 10, 0x005d80443308003d);
CASE_ARCH(I, 11, 0x005d80443300083d);
+ CASE_ARCH(I, 15, 0x031000223f00083d);
}
TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
@@ -294,10 +342,12 @@ TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
bi_instr *I = bi_ld_var_buf_flat_imm_to(
b, bi_register(0), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 0x12);
CASE_ARCH(I, 14, 0x0040800832001200);
+ CASE_ARCH(I, 15, 0x033900043a0012c0);
I = bi_ld_var_buf_flat_imm_to(b, bi_register(0), BI_REGISTER_FORMAT_F16,
BI_VECSIZE_V4, 0x12);
CASE_ARCH(I, 14, 0x0040800433001200);
+ CASE_ARCH(I, 15, 0x033900023b0012c0);
}
TEST_F(ValhallPacking, LdVarBufFlat)
@@ -305,10 +355,12 @@ TEST_F(ValhallPacking, LdVarBufFlat)
bi_instr *I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
CASE_ARCH(I, 14, 0x005f80083200003d);
+ CASE_ARCH(I, 15, 0x031400043a00003d);
I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4);
CASE_ARCH(I, 14, 0x005f80043300003d);
+ CASE_ARCH(I, 15, 0x031400023b00003d);
}
TEST_F(ValhallPacking, LeaBufImm)
@@ -316,6 +368,7 @@ TEST_F(ValhallPacking, LeaBufImm)
bi_instr *I =
bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59)));
CASE_ARCH(I, 10, 0x005e84040000007b);
+ CASE_ARCH(I, 15, 0x03080402000000bb);
}
TEST_F(ValhallPacking, StoreMemoryAccess)
@@ -324,6 +377,7 @@ TEST_F(ValhallPacking, StoreMemoryAccess)
bi_discard(bi_register(5)), BI_SEG_NONE, 0);
I->mem_access = VA_MEMORY_ACCESS_ESTREAM;
CASE_ARCH(I, 10, 0x0061400632000044);
+ CASE_ARCH(I, 15, 0x0320009302000084);
}
TEST_F(ValhallPacking, Convert16To32)
@@ -331,26 +385,32 @@ TEST_F(ValhallPacking, Convert16To32)
bi_instr *I = bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000140077);
+ CASE_ARCH(I, 15, 0x00600200005400b7);
I = bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010140077);
+ CASE_ARCH(I, 15, 0x00600200105400b7);
I = bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000150077);
+ /* Removed on v11 */
I = bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010150077);
+ /* Removed on v11 */
I = bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000040077);
+ CASE_ARCH(I, 15, 0x00600200004400b7);
I = bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010040077);
+ CASE_ARCH(I, 15, 0x00600200104400b7);
}
TEST_F(ValhallPacking, Swizzle8)
@@ -359,6 +419,7 @@ TEST_F(ValhallPacking, Swizzle8)
bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
zero, BI_CMPF_NE, BI_RESULT_TYPE_I1);
CASE_ARCH(I, 10, 0x00f2c14300c0c000);
+ /* Removed on v11 */
}
TEST_F(ValhallPacking, FauPage1)
@@ -366,6 +427,7 @@ TEST_F(ValhallPacking, FauPage1)
bi_instr *I = bi_mov_i32_to(
b, bi_register(1), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false));
CASE_ARCH(I, 10, 0x0291c10000000080ULL);
+ CASE_ARCH(I, 15, 0x0061010000200040ULL);
}
TEST_F(ValhallPacking, LdTileV3F16)
@@ -374,6 +436,7 @@ TEST_F(ValhallPacking, LdTileV3F16)
bi_register(60), bi_register(3),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3);
CASE_ARCH(I, 10, 0x0078840423033c40);
+ CASE_ARCH(I, 15, 0x03c0040223033c80);
}
TEST_F(ValhallPacking, Rhadd8)
@@ -381,4 +444,26 @@ TEST_F(ValhallPacking, Rhadd8)
bi_instr *I = bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(0)), BI_ROUND_RTP);
CASE_ARCH(I, 10, 0x00aac000400b4041);
+ /* Removed on v11 */
+}
+
+TEST_F(ValhallPacking, Atomics)
+{
+
+ bi_instr *I =
+ bi_atom1_return_i64_to(b, bi_register(0), bi_discard(bi_register(2)),
+ bi_register(3), BI_ATOM_OPC_AINC, 2);
+ CASE_ARCH(I, 10, 0x0069800428000042);
+ CASE_ARCH(I, 15, 0x0328000220000082);
+
+ I = bi_atom_return_i32_to(b, bi_register(0), bi_discard(bi_register(1)),
+ bi_register(2), bi_register(3), BI_ATOM_OPC_AXCHG,
+ 1);
+ CASE_ARCH(I, 10, 0x0120c1021bc00002);
+ CASE_ARCH(I, 15, 0x032401c10f000002);
+
+ I = bi_atom_return_i64_to(b, bi_register(0), bi_register(2), bi_register(6),
+ bi_register(7), BI_ATOM_OPC_ACMPXCHG, 2);
+ CASE_ARCH(I, 10, 0x0120c2182fc00006);
+ CASE_ARCH(I, 15, 0x032802cc2f000006);
}
diff --git a/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp b/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp
index e4a0945f1f6..6c3f1f44905 100644
--- a/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp
+++ b/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp
@@ -9,9 +9,9 @@
#include
-#define CASE(instr, expected) \
+#define CASE_ARCH(instr, arch, expected) \
do { \
- if (va_validate_fau(instr) != expected) { \
+ if (va_validate_fau(instr, arch) != expected) { \
fprintf(stderr, "Incorrect validation for:\n"); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
@@ -19,8 +19,8 @@
} \
} while (0)
-#define VALID(instr) CASE(instr, true)
-#define INVALID(instr) CASE(instr, false)
+#define VALID(instr) CASE_ARCH(instr, 10, true)
+#define INVALID(instr) CASE_ARCH(instr, 10, false)
class ValidateFau : public testing::Test {
protected:
diff --git a/src/panfrost/compiler/bifrost/valhall/va_compiler.h b/src/panfrost/compiler/bifrost/valhall/va_compiler.h
index 622ab81b302..5a227c80412 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_compiler.h
+++ b/src/panfrost/compiler/bifrost/valhall/va_compiler.h
@@ -13,9 +13,9 @@
extern "C" {
#endif
-bool va_validate_fau(bi_instr *I);
+bool va_validate_fau(bi_instr *I, unsigned arch);
void va_validate(FILE *fp, bi_context *ctx);
-void va_repair_fau(bi_builder *b, bi_instr *I);
+void va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch);
void va_fuse_add_imm(bi_instr *I);
void va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count);
void va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts);
@@ -28,14 +28,15 @@ void va_gather_hsr_info(bi_context *ctx, struct pan_shader_info *info);
uint64_t va_pack_instr(const bi_instr *I, unsigned arch);
static inline unsigned
-va_fau_page(enum bir_fau value)
+va_fau_page(enum bir_fau value, unsigned arch)
{
/* Uniform slots of FAU have a 7-bit index. The top 2-bits are the page; the
* bottom 5-bits are specified in the source.
*/
if (value & BIR_FAU_UNIFORM) {
+ unsigned value_shift = arch >= 15 ? 6 : 5;
unsigned slot = value & ~BIR_FAU_UNIFORM;
- unsigned page = slot >> 5;
+ unsigned page = slot >> value_shift;
assert(page <= 3);
return page;
@@ -57,11 +58,11 @@ va_fau_page(enum bir_fau value)
}
static inline unsigned
-va_select_fau_page(const bi_instr *I)
+va_select_fau_page(const bi_instr *I, unsigned arch)
{
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_FAU)
- return va_fau_page((enum bir_fau)I->src[s].value);
+ return va_fau_page((enum bir_fau)I->src[s].value, arch);
}
return 0;
diff --git a/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c b/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c
index 9f3e7881ac3..c2812546067 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c
@@ -520,7 +520,7 @@ va_assign_slots(bi_context *ctx)
bi_foreach_instr_global(ctx, I) {
if (I->op == BI_OPCODE_BARRIER) {
- I->slot = 7;
+ I->slot = (ctx->arch >= 15) ? VA_SLOT_V15_SLOT7 : VA_SLOT_SLOT7;
} else if (I->op == BI_OPCODE_ZS_EMIT || I->op == BI_OPCODE_ATEST) {
I->slot = 0;
} else if (bi_get_opcode_props(I)->message) {
diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c
index 129512ce170..ea2e78e98e0 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_pack.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c
@@ -74,6 +74,15 @@ va_pack_reg(const bi_instr *I, bi_index idx)
return idx.value;
}
+static unsigned
+va_pack_reg_v15(const bi_instr *I, bi_index idx)
+{
+ pack_assert(I, idx.type == BI_INDEX_REGISTER);
+ pack_assert(I, idx.value < 128);
+
+ return idx.value;
+}
+
static unsigned
va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
{
@@ -124,6 +133,21 @@ va_pack_fau_64(const bi_instr *I, bi_index idx)
return (0x7 << 5) | (va_pack_fau_special(I, idx.value) << 1);
}
+static unsigned
+va_pack_fau_64_v15(const bi_instr *I, bi_index idx)
+{
+ pack_assert(I, idx.type == BI_INDEX_FAU);
+
+ unsigned val = (idx.value & BITFIELD_MASK(6));
+
+ if (idx.value & BIR_FAU_IMMEDIATE)
+ return (0x7 << 6) | (val << 1);
+ else if (idx.value & BIR_FAU_UNIFORM)
+ return (0x2 << 7) | (val << 1);
+ else
+ return (0xf << 5) | (va_pack_fau_special(I, idx.value) << 1);
+}
+
static unsigned
va_pack_src(const bi_instr *I, unsigned s)
{
@@ -142,6 +166,33 @@ va_pack_src(const bi_instr *I, unsigned s)
invalid_instruction(I, "type of source %u", s);
}
+static uint64_t
+va_pack_src_v15(const bi_instr *I, unsigned s, unsigned loc)
+{
+ bi_index idx = I->src[s];
+
+ uint64_t hex = 0;
+ uint64_t regval = 0;
+
+ if (idx.type == BI_INDEX_REGISTER) {
+ regval = va_pack_reg_v15(I, idx);
+ if (idx.discard)
+ regval |= (1 << 7);
+ } else if (idx.type == BI_INDEX_FAU) {
+ pack_assert(I, idx.offset <= 1);
+ regval = va_pack_fau_64_v15(I, idx) | idx.offset;
+ } else
+ invalid_instruction(I, "type of source %u", s);
+
+ uint64_t low8 = regval & 0xff;
+ uint64_t high1 = (regval >> 8) & 0x1;
+
+ hex |= (low8 << (8 * loc));
+ hex |= (high1 << (48 + loc));
+
+ return hex;
+}
+
static unsigned
va_pack_wrmask(const bi_instr *I)
{
@@ -211,6 +262,20 @@ va_pack_dest(const bi_instr *I)
return va_pack_reg(I, I->dest[0]) | (va_pack_wrmask(I) << 6);
}
+static unsigned
+va_pack_dest_v15(const bi_instr *I)
+{
+ assert(I->nr_dests);
+ switch (I->op) {
+ case BI_OPCODE_SHADDX_S64:
+ case BI_OPCODE_SHADDX_U64:
+ /* 64 bit dest has a 0x0 wrmask */
+ return va_pack_reg_v15(I, I->dest[0]);
+ default:
+ return va_pack_reg_v15(I, I->dest[0]) | (va_pack_wrmask(I) << 13);
+ }
+}
+
static enum va_widen
va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
{
@@ -452,6 +517,18 @@ va_pack_rhadd(const bi_instr *I)
}
}
+static uint64_t
+va_pack_clamp_special_round_v15(const bi_instr *I)
+{
+ pack_assert(I, I->special < 4);
+ if (I->special == BI_SPECIAL_N && I->round == BI_ROUND_RTZ)
+ return 0x4;
+ else if (I->special)
+ return 0x4 | I->special;
+ else
+ return I->clamp;
+}
+
static uint64_t
va_pack_alu(const bi_instr *I, unsigned arch)
{
@@ -465,25 +542,25 @@ va_pack_alu(const bi_instr *I, unsigned arch)
case BI_OPCODE_FREXPM_F32:
case BI_OPCODE_FREXPM_V2F16:
if (I->sqrt)
- hex |= 1ull << 24;
+ hex |= 1ull << ((arch >= 15) ? 30 : 24);
if (I->log)
- hex |= 1ull << 25;
+ hex |= 1ull << ((arch >= 15) ? 31 : 25);
break;
case BI_OPCODE_FLUSH_F32:
case BI_OPCODE_FLUSH_V2F16:
- hex |= I->nan_mode << 8;
+ hex |= I->nan_mode << ((arch >= 15) ? 30 : 8);
if (I->ftz)
- hex |= 1ull << 10;
+ hex |= 1ull << ((arch >= 15) ? 32 : 10);
if (I->flush_inf)
- hex |= 1ull << 11;
+ hex |= 1ull << ((arch >= 15) ? 33 : 11);
break;
/* Add mux type */
case BI_OPCODE_MUX_I32:
case BI_OPCODE_MUX_V2I16:
case BI_OPCODE_MUX_V4I8:
- hex |= (uint64_t)I->mux << 32;
+ hex |= (uint64_t)I->mux << ((arch >= 15) ? 34 : 32);
break;
/* Add .eq flag */
@@ -495,7 +572,7 @@ va_pack_alu(const bi_instr *I, unsigned arch)
hex |= (1ull << 36);
if (I->op == BI_OPCODE_BRANCHZI)
- hex |= (0x1ull << 40); /* Absolute */
+ hex |= (0x1ull << ((arch >= 15) ? 31 : 40)); /* Absolute */
else
hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;
@@ -511,7 +588,46 @@ va_pack_alu(const bi_instr *I, unsigned arch)
case BI_OPCODE_RSHIFT_XOR_I32:
case BI_OPCODE_RSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V4I8:
- hex |= (uint64_t)I->arithmetic << 34;
+ if (arch >= 15) {
+ /* Rewrite exact to ARSHIFT */
+ if (I->arithmetic) {
+ switch (I->op) {
+ case BI_OPCODE_RSHIFT_AND_I32:
+ case BI_OPCODE_RSHIFT_AND_V2I16:
+ case BI_OPCODE_RSHIFT_AND_V4I8: {
+ uint64_t arshift_and_op = (0xcULL << 30);
+ /* Check that we can safely overwrite opcode */
+ pack_assert(I, ((info.exact & (0xfULL << 30)) |
+ arshift_and_op) == arshift_and_op);
+ hex |= arshift_and_op;
+ break;
+ }
+ case BI_OPCODE_RSHIFT_OR_I32:
+ case BI_OPCODE_RSHIFT_OR_V2I16:
+ case BI_OPCODE_RSHIFT_OR_V4I8: {
+ uint64_t arshift_or_op = (0xdULL << 30);
+ /* Check that we can safely overwrite opcode */
+ pack_assert(I, ((info.exact & (0xfULL << 30)) | arshift_or_op) ==
+ arshift_or_op);
+ hex |= arshift_or_op;
+ break;
+ }
+ case BI_OPCODE_RSHIFT_XOR_I32:
+ case BI_OPCODE_RSHIFT_XOR_V2I16:
+ case BI_OPCODE_RSHIFT_XOR_V4I8: {
+ uint64_t arshift_xor_op = (0xbULL << 30);
+ /* Check that we can safely overwrite opcode */
+ pack_assert(I, ((info.exact & (0xfULL << 30)) |
+ arshift_xor_op) == arshift_xor_op);
+ hex |= arshift_xor_op;
+ break;
+ }
+ default:
+ UNREACHABLE("RSHIFT->ARSHIFT");
+ }
+ }
+ } else
+ hex |= (uint64_t)I->arithmetic << 34;
break;
case BI_OPCODE_LEA_BUF_IMM:
@@ -562,8 +678,8 @@ va_pack_alu(const bi_instr *I, unsigned arch)
}
hex |= ((uint64_t)va_pack_source_format(I)) << 24;
- hex |= ((uint64_t)I->update) << 36;
- hex |= ((uint64_t)I->sample) << 38;
+ hex |= ((uint64_t)I->update) << ((arch >= 15) ? 35 : 36);
+ hex |= ((uint64_t)I->sample) << ((arch >= 15) ? 37 : 38);
break;
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
@@ -601,20 +717,18 @@ va_pack_alu(const bi_instr *I, unsigned arch)
break;
}
- /* FMA_RSCALE.f32 special modes treated as extra opcodes */
- if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
- pack_assert(I, I->special < 4);
- hex |= ((uint64_t)I->special) << 48;
- }
-
/* Add the normal destination or a placeholder. Staging destinations are
* added elsewhere, as they require special handling for control fields.
*/
if (info.has_dest && info.nr_staging_dests == 0) {
- hex |= (uint64_t)va_pack_dest(I) << 40;
+ if (arch >= 15)
+ hex |= (uint64_t)va_pack_dest_v15(I) << 40;
+ else
+ hex |= (uint64_t)va_pack_dest(I) << 40;
} else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
pack_assert(I, I->nr_dests == 0);
- hex |= 0xC0ull << 40; /* Placeholder */
+ if (arch < 15)
+ hex |= 0xC0ull << 40; /* Placeholder */
}
bool swap12 = va_swap_12(I->op);
@@ -629,7 +743,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
enum va_size size = src_info.size;
bi_index src = I->src[logical_i + src_offset];
- hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, logical_i + src_offset, i);
+ else
+ hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
if (src_info.notted) {
if (src.neg)
@@ -638,10 +755,15 @@ va_pack_alu(const bi_instr *I, unsigned arch)
unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
unsigned abs_offs = 33 + 2 + ((2 - i) * 2);
- if (src.neg)
- hex |= 1ull << neg_offs;
- if (src.abs)
- hex |= 1ull << abs_offs;
+ if (arch >= 15 && I->op == BI_OPCODE_FMA_RSCALE_F32 && i == 2) {
+ if (src.neg)
+ hex |= 1ull << (neg_offs + 1);
+ } else {
+ if (src.neg)
+ hex |= 1ull << neg_offs;
+ if (src.abs)
+ hex |= 1ull << abs_offs;
+ }
} else {
if (src.neg)
invalid_instruction(I, "negate");
@@ -661,8 +783,8 @@ va_pack_alu(const bi_instr *I, unsigned arch)
unsigned offs = (i == 1) ? 26 : 36;
hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
} else if (src_info.lane) {
- unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
- ((i == 0) ? 38 : 36) : ((i == 0) ? 28 : 26);
+ unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36)
+ : ((i == 0) ? 28 : 26);
if (src_info.size == VA_SIZE_16) {
hex |= (src.swizzle == BI_SWIZZLE_H1 ? 1 : 0) << offs;
@@ -675,7 +797,25 @@ va_pack_alu(const bi_instr *I, unsigned arch)
} else if (src_info.lanes) {
pack_assert(I, src_info.size == VA_SIZE_8);
pack_assert(I, i == 1);
- hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
+ if (arch >= 15 && I->op == BI_OPCODE_CLPER_I32) {
+ switch (src.swizzle) {
+ case BI_SWIZZLE_B00:
+ hex |= 0x0ULL << 28;
+ break;
+ case BI_SWIZZLE_B11:
+ hex |= 0x1ULL << 28;
+ break;
+ case BI_SWIZZLE_B22:
+ hex |= 0x2ULL << 28;
+ break;
+ case BI_SWIZZLE_B33:
+ hex |= 0x3ULL << 28;
+ break;
+ default:
+ invalid_instruction(I, "lane shift");
+ }
+ } else
+ hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
} else if (src_info.combine) {
/* Treat as swizzle, subgroup ops not yet supported */
pack_assert(I, src_info.size == VA_SIZE_32);
@@ -691,17 +831,33 @@ va_pack_alu(const bi_instr *I, unsigned arch)
}
if (info.saturate)
- hex |= (uint64_t)I->saturate << 30;
- if (info.rhadd)
+ hex |= (uint64_t)I->saturate << ((arch >= 15) ? 25 : 30);
+ if (info.rhadd) {
+ pack_assert(I, arch < 15);
hex |= va_pack_rhadd(I);
- if (info.clamp)
- hex |= (uint64_t)I->clamp << 32;
- if (info.round_mode)
- hex |= (uint64_t)I->round << 30;
+ }
+ /* FMA_RSCALE.f32 special modes treated as extra opcodes */
+ if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
+ if (arch >= 15) {
+ hex |= va_pack_clamp_special_round_v15(I) << 32;
+ } else {
+ pack_assert(I, I->special < 4);
+ hex |= ((uint64_t)I->special) << 48;
+ if (info.clamp)
+ hex |= (uint64_t)I->clamp << 32;
+ if (info.round_mode && I->round == BI_ROUND_RTZ)
+ hex |= (uint64_t)0x1 << 50;
+ }
+ } else {
+ if (info.clamp)
+ hex |= (uint64_t)I->clamp << ((arch >= 15) ? 30 : 32);
+ if (info.round_mode)
+ hex |= (uint64_t)I->round << ((arch >= 15) ? 32 : 30);
+ }
if (info.condition)
- hex |= (uint64_t)I->cmpf << 32;
+ hex |= (uint64_t)I->cmpf << ((arch >= 15) ? 33 : 32);
if (info.result_type)
- hex |= (uint64_t)I->result_type << 30;
+ hex |= (uint64_t)I->result_type << ((arch >= 15) ? 24 : 30);
return hex;
}
@@ -768,6 +924,26 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
return hex;
}
+
+static uint64_t
+va_pack_load_v15(const bi_instr *I, bool buffer_descriptor)
+{
+ /* This implicitly means identity: VA_LOAD_LANE_8_BIT_B0 for i8 (bits[28;27])
+ * and VA_LOAD_LANE_16_BIT_H0 for i16 (bit[27]) */
+ uint64_t hex = 0;
+
+ if (!buffer_descriptor)
+ hex |= va_pack_byte_offset(I);
+
+ hex |= va_pack_src_v15(I, 0, 0);
+ hex |= (uint64_t)I->mem_access << 24;
+
+ if (buffer_descriptor)
+ hex |= va_pack_src_v15(I, 1, 1);
+
+ return hex;
+}
+
static uint64_t
va_pack_store(const bi_instr *I)
{
@@ -782,6 +958,20 @@ va_pack_store(const bi_instr *I)
return hex;
}
+static uint64_t
+va_pack_store_v15(const bi_instr *I)
+{
+ uint64_t hex = 0;
+
+ va_validate_register_pair(I, 1);
+ hex |= va_pack_src_v15(I, 1, 0);
+ hex |= I->mem_access << 24;
+
+ hex |= va_pack_byte_offset(I);
+
+ return hex;
+}
+
static enum va_lod_mode
va_pack_lod_mode(const bi_instr *I)
{
@@ -824,13 +1014,45 @@ va_pack_register_format(const bi_instr *I)
}
}
+static uint64_t
+va_pack_src_null_v15(unsigned loc)
+{
+ uint64_t hex = 0;
+ uint64_t regval = 0x1c0;
+
+ uint64_t low8 = regval & 0xff;
+ uint64_t high1 = (regval >> 8) & 0x1;
+
+ hex |= (low8 << (8 * loc));
+ hex |= (high1 << (48 + loc));
+
+ return hex;
+}
+
+static unsigned
+va_repack_sr_control_v15(unsigned sr_control)
+{
+ unsigned repacked = 0;
+ bool read = sr_control & 0x1;
+ bool write = sr_control & 0x2;
+
+ if (read) {
+ repacked |= 0x2;
+ if (write)
+ repacked |= 0x1;
+ }
+
+ return repacked;
+}
+
uint64_t
va_pack_instr(const bi_instr *I, unsigned arch)
{
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
- uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
- hex |= ((uint64_t)va_select_fau_page(I)) << 57;
+ uint64_t hex =
+ info.exact | (((uint64_t)I->flow) << ((arch >= 15) ? 58 : 59));
+ hex |= ((uint64_t)va_select_fau_page(I, arch)) << ((arch >= 15) ? 62 : 57);
if (info.slot)
hex |= ((uint64_t)I->slot << 30);
@@ -842,14 +1064,60 @@ va_pack_instr(const bi_instr *I, unsigned arch)
unsigned count =
read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);
- hex |= ((uint64_t)count << 33);
- hex |= (uint64_t)va_pack_reg(I, sr) << 40;
- hex |= ((uint64_t)info.sr_control << 46);
+ hex |= ((uint64_t)count << ((arch >= 15) ? 32 : 33));
+ if (arch >= 15) {
+ hex |= (uint64_t)va_pack_reg_v15(I, sr) << 40;
+ hex |= ((uint64_t)va_repack_sr_control_v15(info.sr_control) << 38);
+ } else {
+ hex |= (uint64_t)va_pack_reg(I, sr) << 40;
+ hex |= ((uint64_t)info.sr_control << 46);
+ }
+ }
+
+ /* On v15, some instructions require special sr_control values */
+ if (arch >= 15) {
+ switch (I->op) {
+ case BI_OPCODE_BARRIER: {
+ unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
+ pack_assert(I, sr_control == 0x0 || sr_control == 0x2);
+ hex |= (uint64_t)0x2 << 38;
+ break;
+ }
+ case BI_OPCODE_ATOM1_RETURN_I32:
+ case BI_OPCODE_ATOM1_RETURN_I64: {
+ unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
+ pack_assert(I, sr_control == 0x0);
+ break;
+ }
+ case BI_OPCODE_ATOM_I32:
+ case BI_OPCODE_ATOM_I64: {
+ unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
+ pack_assert(I, sr_control == 0x2);
+ break;
+ }
+ case BI_OPCODE_ATOM_RETURN_I32:
+ case BI_OPCODE_ATOM_RETURN_I64:
+ case BI_OPCODE_AXCHG_I32:
+ case BI_OPCODE_AXCHG_I64:
+ case BI_OPCODE_ACMPXCHG_I32:
+ case BI_OPCODE_ACMPXCHG_I64: {
+ unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
+ pack_assert(I, sr_control == 0x0 || sr_control == 0x3);
+ hex |= (uint64_t)0x3 << 38;
+ break;
+ }
+ default:
+ break;
+ }
}
if (info.sr_write_count) {
- hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
- hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
+ hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1)
+ << ((arch >= 15) ? 35 : 36);
+ if (arch >= 15)
+ hex |= ((uint64_t)va_pack_reg_v15(I, I->dest[0])) << 16;
+ else
+ hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
}
if (info.vecsize)
@@ -867,7 +1135,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_LOAD_I64:
case BI_OPCODE_LOAD_I96:
case BI_OPCODE_LOAD_I128:
- hex |= va_pack_load(I, false);
+ if (arch >= 15)
+ hex |= va_pack_load_v15(I, false);
+ else
+ hex |= va_pack_load(I, false);
break;
case BI_OPCODE_LD_PKA_I8:
@@ -878,7 +1149,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_LD_PKA_I64:
case BI_OPCODE_LD_PKA_I96:
case BI_OPCODE_LD_PKA_I128:
- hex |= va_pack_load(I, true);
+ if (arch >= 15)
+ hex |= va_pack_load_v15(I, true);
+ else
+ hex |= va_pack_load(I, true);
break;
case BI_OPCODE_STORE_I8:
@@ -889,20 +1163,26 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_STORE_I64:
case BI_OPCODE_STORE_I96:
case BI_OPCODE_STORE_I128:
- hex |= va_pack_store(I);
+ if (arch >= 15)
+ hex |= va_pack_store_v15(I);
+ else
+ hex |= va_pack_store(I);
break;
case BI_OPCODE_ATOM1_RETURN_I64:
/* Permit omitting the destination for plain ATOM1 */
- if (!bi_count_write_registers(I, 0)) {
+ if (arch < 15 && !bi_count_write_registers(I, 0)) {
hex |= (0x40ull << 40); // fake read
}
/* 64-bit source */
va_validate_register_pair(I, 0);
- hex |= (uint64_t)va_pack_src(I, 0) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 0, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= va_pack_byte_offset_8(I);
- hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
+ hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22);
break;
case BI_OPCODE_ACMPXCHG_I64:
@@ -911,29 +1191,43 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_ATOM_RETURN_I64:
/* 64-bit source */
va_validate_register_pair(I, 1);
- hex |= (uint64_t)va_pack_src(I, 1) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 1, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset_8(I);
- hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
+ hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22);
- if (I->op == BI_OPCODE_ATOM_RETURN_I64)
- hex |= (0xc0ull << 40); // flags
+ if (arch >= 15) {
+ if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) {
+ /* Change bits [51;50] to be ACMPXCHG */
+ pack_assert(I, ((hex >> 50) & 0b11) == 0b01);
+ hex ^= (0b11ull << 50);
+ }
+ } else {
+ if (I->op == BI_OPCODE_ATOM_RETURN_I64)
+ hex |= (0xc0ull << 40); // flags
- if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
- hex |= (1 << 26); /* .compare */
+ if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
+ hex |= (1 << 26); /* .compare */
+ }
break;
case BI_OPCODE_ATOM1_RETURN_I32:
/* Permit omitting the destination for plain ATOM1 */
- if (!bi_count_write_registers(I, 0)) {
+ if (arch < 15 && !bi_count_write_registers(I, 0)) {
hex |= (0x40ull << 40); // fake read
}
/* 64-bit source */
va_validate_register_pair(I, 0);
- hex |= (uint64_t)va_pack_src(I, 0) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 0, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= va_pack_byte_offset_8(I);
- hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
+ hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22);
break;
case BI_OPCODE_ACMPXCHG_I32:
@@ -942,41 +1236,67 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_ATOM_RETURN_I32:
/* 64-bit source */
va_validate_register_pair(I, 1);
- hex |= (uint64_t)va_pack_src(I, 1) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 1, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset_8(I);
- hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
+ hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22);
- if (I->op == BI_OPCODE_ATOM_RETURN_I32)
- hex |= (0xc0ull << 40); // flags
+ if (arch >= 15) {
+ if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) {
+ /* Change bits [51;50] to be ACMPXCHG */
+ pack_assert(I, ((hex >> 50) & 0b11) == 0b01);
+ hex ^= (0b11ull << 50);
+ }
+ } else {
+ if (I->op == BI_OPCODE_ATOM_RETURN_I32)
+ hex |= (0xc0ull << 40); // flags
- if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
- hex |= (1 << 26); /* .compare */
+ if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
+ hex |= (1 << 26); /* .compare */
+ }
break;
case BI_OPCODE_LD_CVT:
- hex |= (uint64_t)va_pack_src(I, 0);
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 0, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 0);
hex |= va_pack_byte_offset(I);
/* Conversion descriptor */
- hex |= (uint64_t)va_pack_src(I, 2) << 16;
- hex |= (uint64_t)I->mem_access << 37;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 2, 2);
+ else
+ hex |= (uint64_t)va_pack_src(I, 2) << 16;
+ hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37);
break;
case BI_OPCODE_ST_CVT:
/* Staging read */
va_validate_register_pair(I, 1);
- hex |= (uint64_t)va_pack_src(I, 1) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 1, 0);
+ else
+ hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset(I);
/* Conversion descriptor */
- hex |= (uint64_t)va_pack_src(I, 3) << 16;
- hex |= (uint64_t)I->mem_access << 37;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 3, 2);
+ else
+ hex |= (uint64_t)va_pack_src(I, 3) << 16;
+ hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37);
break;
case BI_OPCODE_BLEND: {
/* Source 0 - Blend descriptor (64-bit) */
- hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 2, 0);
+ else
+ hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
va_validate_register_pair(I, 2);
/* Target */
@@ -987,7 +1307,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
hex |= ((I->branch_offset >> 3) << 8);
/* Source 2 - coverage mask */
- hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 1, 2);
+ else
+ hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
/* Vector size */
unsigned vecsize = 4;
@@ -997,7 +1320,7 @@ va_pack_instr(const bi_instr *I, unsigned arch)
}
case BI_OPCODE_LD_GCLK_U64:
- hex |= va_pack_gclk(I);
+ hex |= va_pack_gclk(I) << ((arch >= 15) ? 8 : 0);
break;
case BI_OPCODE_TEX_GRADIENT:
@@ -1005,7 +1328,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_TEX_FETCH:
case BI_OPCODE_TEX_GATHER: {
/* Image to read from */
- hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
+ if (arch >= 15)
+ hex |= va_pack_src_v15(I, 1, 0);
+ else
+ hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
if ((I->op == BI_OPCODE_TEX_FETCH || I->op == BI_OPCODE_TEX_GRADIENT) &&
I->shadow)
@@ -1022,7 +1348,7 @@ va_pack_instr(const bi_instr *I, unsigned arch)
if (I->skip)
hex |= (1ull << 39);
if (!bi_is_regfmt_16(I->register_format))
- hex |= (1ull << 46);
+ hex |= (1ull << ((arch >= 15) ? 38 : 46));
if (I->op == BI_OPCODE_TEX_GRADIENT) {
if (I->force_delta_enable)
@@ -1044,20 +1370,35 @@ va_pack_instr(const bi_instr *I, unsigned arch)
hex |= ((uint64_t)I->fetch_component) << 14;
}
- hex |= (I->write_mask << 22);
+ hex |= (I->write_mask << ((arch >= 15) ? 24 : 22));
hex |= ((uint64_t)I->dimension) << 28;
break;
}
default:
- if (!info.exact && I->op != BI_OPCODE_NOP)
+ if (!info.exact && (arch >= 15 || I->op != BI_OPCODE_NOP))
invalid_instruction(I, "opcode");
hex |= va_pack_alu(I, arch);
break;
}
+ /* On v15, some instrutions require an encoded null src. */
+ if (arch >= 15) {
+ switch (I->op) {
+ case BI_OPCODE_NOP:
+ case BI_OPCODE_LD_VAR_FLAT_IMM:
+ case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
+ case BI_OPCODE_LD_GCLK_U64:
+ case BI_OPCODE_BARRIER:
+ hex |= va_pack_src_null_v15(0);
+ break;
+ default:
+ break;
+ }
+ }
+
return hex;
}
diff --git a/src/panfrost/compiler/bifrost/valhall/va_validate.c b/src/panfrost/compiler/bifrost/valhall/va_validate.c
index b597692eb00..da32405849d 100644
--- a/src/panfrost/compiler/bifrost/valhall/va_validate.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_validate.c
@@ -93,7 +93,8 @@ fau_state_uniform(struct fau_state *fau, bi_index idx, enum bi_opcode op)
}
static bool
-fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
+fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op,
+ unsigned arch)
{
for (unsigned i = 0; i < ARRAY_SIZE(fau->buffer); ++i) {
bi_index buf = fau->buffer[i];
@@ -106,7 +107,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
/* Instructions executed by the messaging unit should not encode WARP_ID or
* anything from special page 3. */
if (can_run_on_message_unit(op) &&
- (va_fau_page(idx.value) == 3 || idx.value == BIR_FAU_WARP_ID))
+ (va_fau_page(idx.value, arch) == 3 || idx.value == BIR_FAU_WARP_ID))
return false;
return fau->uniform_slot == -1 || can_use_two_fau_indices(op);
@@ -114,7 +115,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
static bool
valid_src(struct fau_state *fau, unsigned fau_page, bi_index src,
- enum bi_opcode op)
+ enum bi_opcode op, unsigned arch)
{
if (src.type != BI_INDEX_FAU)
return true;
@@ -128,42 +129,42 @@ valid_src(struct fau_state *fau, unsigned fau_page, bi_index src,
return fau_state_buffer(fau, src);
}
- bool valid = (fau_page == va_fau_page(src.value));
+ bool valid = (fau_page == va_fau_page(src.value, arch));
valid &= fau_state_buffer(fau, src);
if (src.value & BIR_FAU_UNIFORM)
valid &= fau_state_uniform(fau, src, op);
else if (fau_is_special(src.value))
- valid &= fau_state_special(fau, src, op);
+ valid &= fau_state_special(fau, src, op, arch);
return valid;
}
bool
-va_validate_fau(bi_instr *I)
+va_validate_fau(bi_instr *I, unsigned arch)
{
bool valid = true;
struct fau_state fau = {.uniform_slot = -1};
- unsigned fau_page = va_select_fau_page(I);
+ unsigned fau_page = va_select_fau_page(I, arch);
bi_foreach_src(I, s) {
- valid &= valid_src(&fau, fau_page, I->src[s], I->op);
+ valid &= valid_src(&fau, fau_page, I->src[s], I->op, arch);
}
return valid;
}
void
-va_repair_fau(bi_builder *b, bi_instr *I)
+va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch)
{
struct fau_state fau = {.uniform_slot = -1};
- unsigned fau_page = va_select_fau_page(I);
+ unsigned fau_page = va_select_fau_page(I, arch);
bi_foreach_src(I, s) {
struct fau_state push = fau;
bi_index src = I->src[s];
- if (!valid_src(&fau, fau_page, src, I->op)) {
+ if (!valid_src(&fau, fau_page, src, I->op, arch)) {
bi_replace_src(I, s, bi_mov_i32(b, bi_strip_index(src)));
/* Rollback update. Since the replacement move doesn't affect FAU
@@ -180,7 +181,7 @@ va_validate(FILE *fp, bi_context *ctx)
bool errors = false;
bi_foreach_instr_global(ctx, I) {
- if (!va_validate_fau(I)) {
+ if (!va_validate_fau(I, ctx->arch)) {
if (!errors) {
fprintf(fp, "Validation failed, this is a bug. Shader:\n\n");
bi_print_shader(ctx, fp);