From 6a7aecaeecf1e79300777d9445348e7c10d0770f Mon Sep 17 00:00:00 2001 From: Lars-Ivar Hesselberg Simonsen Date: Thu, 12 Mar 2026 11:29:46 +0100 Subject: [PATCH] pan/va: Implement v15 encoding support Update va_pack to support the new encodings required by v15. --- .../compiler/bifrost/bifrost_compile.c | 2 +- src/panfrost/compiler/bifrost/valhall/ISA.xml | 12 + .../bifrost/valhall/test/test-packing.cpp | 85 +++ .../valhall/test/test-validate-fau.cpp | 8 +- .../compiler/bifrost/valhall/va_compiler.h | 13 +- .../compiler/bifrost/valhall/va_insert_flow.c | 2 +- .../compiler/bifrost/valhall/va_pack.c | 491 +++++++++++++++--- .../compiler/bifrost/valhall/va_validate.c | 25 +- 8 files changed, 539 insertions(+), 99 deletions(-) diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index c0b551425e2..687a22979ba 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -4531,7 +4531,7 @@ bi_compile_variant_nir(nir_shader *nir, va_lower_constants(ctx, I, const_hist, min_count_for_fau); bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); - va_repair_fau(&b, I); + va_repair_fau(&b, I, ctx->arch); } _mesa_hash_table_u64_destroy(const_hist); diff --git a/src/panfrost/compiler/bifrost/valhall/ISA.xml b/src/panfrost/compiler/bifrost/valhall/ISA.xml index 6fc6e0d12de..53ddbc06856 100644 --- a/src/panfrost/compiler/bifrost/valhall/ISA.xml +++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml @@ -597,6 +597,18 @@ slot7 + + + Dependency slot set on a message-passing instruction that writes to + registers. Before reading the destination, a future instruction must wait + on the specified slot. Slot #7 is for `BARRIER` instructions only. + + slot0 + slot1 + slot2 + slot7 + + Memory access hint for a `LOAD` or `STORE` instruction. none diff --git a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp index 3b92c96087f..44b8257b583 100644 --- a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp +++ b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp @@ -48,10 +48,12 @@ TEST_F(ValhallPacking, Moves) { bi_instr *I = bi_mov_i32_to(b, bi_register(1), bi_register(2)); CASE_ARCH(I, 10, 0x0091c10000000002ULL); + CASE_ARCH(I, 15, 0x0060010000200002ULL); I = bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false)); CASE_ARCH(I, 10, 0x0091c1000000008aULL); + CASE_ARCH(I, 15, 0x006101000020000aULL); } TEST_F(ValhallPacking, Fadd) @@ -59,44 +61,55 @@ TEST_F(ValhallPacking, Fadd) bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)); CASE_ARCH(I, 10, 0x00a4c00000000201ULL); + CASE_ARCH(I, 15, 0x00f0000000000201ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))); CASE_ARCH(I, 10, 0x00a4c02000000201ULL); + CASE_ARCH(I, 15, 0x00f0002000000201ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))); CASE_ARCH(I, 10, 0x00a4c01000000201ULL); + CASE_ARCH(I, 15, 0x00f0001000000201ULL); I = bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false), bi_swz_16(bi_register(0), true, true)); CASE_ARCH(I, 10, 0x00a5c0000c000001ULL); + CASE_ARCH(I, 15, 0x00f400000c000001ULL); I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)); CASE_ARCH(I, 10, 0x00a5c00028000001ULL); + CASE_ARCH(I, 15, 0x00f4000028000001ULL); I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_swz_16(bi_register(0), true, false)); CASE_ARCH(I, 10, 0x00a5c00024000001ULL); + CASE_ARCH(I, 15, 0x00f4000024000001ULL); I = bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))), bi_neg(zero)); CASE_ARCH(I, 10, 0x00a5c0902800c040ULL); + CASE_ARCH(I, 15, 0x00f600902800c080ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero); CASE_ARCH(I, 10, 0x00a4c0000000c001ULL); + CASE_ARCH(I, 15, 0x00f200000000c001ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero)); CASE_ARCH(I, 10, 0x00a4c0100000c001ULL); + CASE_ARCH(I, 15, 0x00f200100000c001ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_half(bi_register(0), true)); CASE_ARCH(I, 10, 0x00a4c00008000001ULL); + CASE_ARCH(I, 15, 0x00f0000008000001ULL); I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_half(bi_register(0), false)); CASE_ARCH(I, 10, 0x00a4c00004000001ULL); + CASE_ARCH(I, 15, 0x00f0000004000001ULL); } TEST_F(ValhallPacking, Clper) @@ -105,6 +118,7 @@ TEST_F(ValhallPacking, Clper) bi_byte(n4567, 0), BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16); CASE_ARCH(I, 10, 0x00a0c030128fc900); + CASE_ARCH(I, 15, 0x00e20030028fc900); } TEST_F(ValhallPacking, Clamps) @@ -112,9 +126,11 @@ TEST_F(ValhallPacking, Clamps) bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_abs(bi_register(2)))); CASE_ARCH(I, 10, 0x00a4c03000000201ULL); + CASE_ARCH(I, 15, 0x00f0003000000201ULL); I->clamp = BI_CLAMP_CLAMP_M1_1; CASE_ARCH(I, 10, 0x00a4c03200000201ULL); + CASE_ARCH(I, 15, 0x00f0003080000201ULL); } TEST_F(ValhallPacking, Misc) @@ -123,18 +139,22 @@ TEST_F(ValhallPacking, Misc) b, bi_register(1), bi_discard(bi_register(1)), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false), bi_neg(zero)); CASE_ARCH(I, 10, 0x00b2c10400c08841ULL); + CASE_ARCH(I, 15, 0x0166010400c00881ULL); I = bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))), BI_ROUND_RTN); CASE_ARCH(I, 10, 0x0090c240800d0042ULL); + CASE_ARCH(I, 15, 0x00600242004d0082ULL); I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0), BI_ROUND_RTN); CASE_ARCH(I, 10, 0x00904000a00f0000ULL); + /* Removed on v11 */ I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN); CASE_ARCH(I, 10, 0x00904000900f0001ULL); + /* Removed on v11 */ } TEST_F(ValhallPacking, FaddImm) @@ -142,10 +162,12 @@ TEST_F(ValhallPacking, FaddImm) bi_instr *I = bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0); CASE_ARCH(I, 10, 0x0114C24847C6C042ULL); + CASE_ARCH(I, 15, 0x0064024847c6c082ULL); I = bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784); CASE_ARCH(I, 10, 0x0115C270AC678442ULL); + CASE_ARCH(I, 15, 0x00620270ac678482ULL); } TEST_F(ValhallPacking, Comparions) @@ -155,12 +177,14 @@ TEST_F(ValhallPacking, Comparions) bi_discard(bi_swz_16(bi_register(2), true, false)), zero, BI_CMPF_GT, BI_RESULT_TYPE_M1); CASE_ARCH(I, 10, 0x00f9c21184c04243); + CASE_ARCH(I, 15, 0x01e40212c6c08283); I = bi_fcmp_or_v2f16_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(3), true, false)), bi_discard(bi_swz_16(bi_register(2), false, false)), zero, BI_CMPF_GT, BI_RESULT_TYPE_M1); CASE_ARCH(I, 10, 0x00f5c20190c04243); + CASE_ARCH(I, 15, 0x01e4020352c08283); } TEST_F(ValhallPacking, Conversions) @@ -168,6 +192,7 @@ TEST_F(ValhallPacking, Conversions) bi_instr *I = bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))); CASE_ARCH(I, 10, 0x0090c22000070042); + /* Removed on v11 */ } TEST_F(ValhallPacking, BranchzI16) @@ -176,6 +201,7 @@ TEST_F(ValhallPacking, BranchzI16) bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ); I->branch_offset = 1; CASE_ARCH(I, 10, 0x001fc03000000102); + CASE_ARCH(I, 15, 0x02b8003000000102); } TEST_F(ValhallPacking, BranchzI16Backwards) @@ -183,6 +209,7 @@ TEST_F(ValhallPacking, BranchzI16Backwards) bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ); I->branch_offset = -8; CASE_ARCH(I, 10, 0x001fc017fffff8c0); + CASE_ARCH(I, 15, 0x02b90017fffff8c0); } TEST_F(ValhallPacking, Blend) @@ -192,6 +219,7 @@ TEST_F(ValhallPacking, Blend) bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true), bi_null(), BI_REGISTER_FORMAT_F16, 2, 0); CASE_ARCH(I, 10, 0x007f4004333c00f0); + CASE_ARCH(I, 15, 0x031b0082333c00f0); } TEST_F(ValhallPacking, Mux) @@ -200,6 +228,7 @@ TEST_F(ValhallPacking, Mux) b, bi_register(0), bi_discard(bi_register(0)), bi_discard(bi_register(4)), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT); CASE_ARCH(I, 10, 0x00b8c00300804440ull); + CASE_ARCH(I, 15, 0x017c000c80008480ull); } TEST_F(ValhallPacking, AtestFP16) @@ -208,6 +237,7 @@ TEST_F(ValhallPacking, AtestFP16) bi_half(bi_register(1), true), bi_fau(BIR_FAU_ATEST_PARAM, false)); CASE_ARCH(I, 10, 0x007dbc0208ea013c); + CASE_ARCH(I, 15, 0x03d43c0108ea013c); } TEST_F(ValhallPacking, AtestFP32) @@ -215,6 +245,7 @@ TEST_F(ValhallPacking, AtestFP32) bi_instr *I = bi_atest_to(b, bi_register(60), bi_register(60), one, bi_fau(BIR_FAU_ATEST_PARAM, false)); CASE_ARCH(I, 10, 0x007dbc0200ead03c); + CASE_ARCH(I, 15, 0x03d63c0100ead03c); } TEST_F(ValhallPacking, Transcendentals) @@ -222,18 +253,28 @@ TEST_F(ValhallPacking, Transcendentals) bi_instr *I = bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true); CASE_ARCH(I, 10, 0x0099c10001000000); + CASE_ARCH(I, 15, 0x0060010041200000); I = bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true); CASE_ARCH(I, 10, 0x0099c00001020040); + CASE_ARCH(I, 15, 0x0060000041220080); I = bi_frsq_f32_to(b, bi_register(2), bi_register(1)); CASE_ARCH(I, 10, 0x009cc20000020001); + CASE_ARCH(I, 15, 0x0060020001820001); I = bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT); CASE_ARCH(I, 10, 0x0162c00440c04241); + CASE_ARCH(I, 15, 0x0264000e80c08281); + + I = bi_fma_rscale_f32_to(b, bi_register(0), bi_register(1), bi_register(2), + bi_neg(zero), bi_discard(bi_register(0)), + BI_SPECIAL_N); + CASE_ARCH(I, 10, 0x0161c00440c00201); + CASE_ARCH(I, 15, 0x0264000d80c00201); } TEST_F(ValhallPacking, Csel) @@ -243,18 +284,21 @@ TEST_F(ValhallPacking, Csel) bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_EQ); CASE_ARCH(I, 10, 0x0150c10085844342); + CASE_ARCH(I, 15, 0x027c010005048382); I = bi_csel_u32_to( b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT); CASE_ARCH(I, 10, 0x0150c10485844342); + CASE_ARCH(I, 15, 0x027c010805048382); I = bi_csel_s32_to( b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT); CASE_ARCH(I, 10, 0x0158c10485844342); + CASE_ARCH(I, 15, 0x027c014805048382); } TEST_F(ValhallPacking, LdAttrImm) @@ -265,6 +309,7 @@ TEST_F(ValhallPacking, LdAttrImm) I->table = 1; CASE_ARCH(I, 10, 0x0066800433117d7c); + CASE_ARCH(I, 15, 0x038400023311bdbc); } TEST_F(ValhallPacking, LdVarBufImmF16) @@ -274,12 +319,14 @@ TEST_F(ValhallPacking, LdVarBufImmF16) BI_SAMPLE_CENTER, BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0); CASE_ARCH(I, 10, 0x005d82143300003d); + CASE_ARCH(I, 15, 0x0310020a3f00003d); I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE, BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE, BI_VECSIZE_V4, 0); CASE_ARCH(I, 10, 0x005d80843300003d); + CASE_ARCH(I, 15, 0x031000423f00003d); I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID, @@ -287,6 +334,7 @@ TEST_F(ValhallPacking, LdVarBufImmF16) BI_VECSIZE_V4, 8); CASE_ARCH(I, 10, 0x005d80443308003d); CASE_ARCH(I, 11, 0x005d80443300083d); + CASE_ARCH(I, 15, 0x031000223f00083d); } TEST_F(ValhallPacking, LdVarBufFlatImmFormat) @@ -294,10 +342,12 @@ TEST_F(ValhallPacking, LdVarBufFlatImmFormat) bi_instr *I = bi_ld_var_buf_flat_imm_to( b, bi_register(0), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 0x12); CASE_ARCH(I, 14, 0x0040800832001200); + CASE_ARCH(I, 15, 0x033900043a0012c0); I = bi_ld_var_buf_flat_imm_to(b, bi_register(0), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 0x12); CASE_ARCH(I, 14, 0x0040800433001200); + CASE_ARCH(I, 15, 0x033900023b0012c0); } TEST_F(ValhallPacking, LdVarBufFlat) @@ -305,10 +355,12 @@ TEST_F(ValhallPacking, LdVarBufFlat) bi_instr *I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4); CASE_ARCH(I, 14, 0x005f80083200003d); + CASE_ARCH(I, 15, 0x031400043a00003d); I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4); CASE_ARCH(I, 14, 0x005f80043300003d); + CASE_ARCH(I, 15, 0x031400023b00003d); } TEST_F(ValhallPacking, LeaBufImm) @@ -316,6 +368,7 @@ TEST_F(ValhallPacking, LeaBufImm) bi_instr *I = bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))); CASE_ARCH(I, 10, 0x005e84040000007b); + CASE_ARCH(I, 15, 0x03080402000000bb); } TEST_F(ValhallPacking, StoreMemoryAccess) @@ -324,6 +377,7 @@ TEST_F(ValhallPacking, StoreMemoryAccess) bi_discard(bi_register(5)), BI_SEG_NONE, 0); I->mem_access = VA_MEMORY_ACCESS_ESTREAM; CASE_ARCH(I, 10, 0x0061400632000044); + CASE_ARCH(I, 15, 0x0320009302000084); } TEST_F(ValhallPacking, Convert16To32) @@ -331,26 +385,32 @@ TEST_F(ValhallPacking, Convert16To32) bi_instr *I = bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), false))); CASE_ARCH(I, 10, 0x0090c20000140077); + CASE_ARCH(I, 15, 0x00600200005400b7); I = bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), true))); CASE_ARCH(I, 10, 0x0090c20010140077); + CASE_ARCH(I, 15, 0x00600200105400b7); I = bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), false))); CASE_ARCH(I, 10, 0x0090c20000150077); + /* Removed on v11 */ I = bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), true))); CASE_ARCH(I, 10, 0x0090c20010150077); + /* Removed on v11 */ I = bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), false))); CASE_ARCH(I, 10, 0x0090c20000040077); + CASE_ARCH(I, 15, 0x00600200004400b7); I = bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_half(bi_register(55), true))); CASE_ARCH(I, 10, 0x0090c20010040077); + CASE_ARCH(I, 15, 0x00600200104400b7); } TEST_F(ValhallPacking, Swizzle8) @@ -359,6 +419,7 @@ TEST_F(ValhallPacking, Swizzle8) bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1); CASE_ARCH(I, 10, 0x00f2c14300c0c000); + /* Removed on v11 */ } TEST_F(ValhallPacking, FauPage1) @@ -366,6 +427,7 @@ TEST_F(ValhallPacking, FauPage1) bi_instr *I = bi_mov_i32_to( b, bi_register(1), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false)); CASE_ARCH(I, 10, 0x0291c10000000080ULL); + CASE_ARCH(I, 15, 0x0061010000200040ULL); } TEST_F(ValhallPacking, LdTileV3F16) @@ -374,6 +436,7 @@ TEST_F(ValhallPacking, LdTileV3F16) bi_register(60), bi_register(3), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3); CASE_ARCH(I, 10, 0x0078840423033c40); + CASE_ARCH(I, 15, 0x03c0040223033c80); } TEST_F(ValhallPacking, Rhadd8) @@ -381,4 +444,26 @@ TEST_F(ValhallPacking, Rhadd8) bi_instr *I = bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(0)), BI_ROUND_RTP); CASE_ARCH(I, 10, 0x00aac000400b4041); + /* Removed on v11 */ +} + +TEST_F(ValhallPacking, Atomics) +{ + + bi_instr *I = + bi_atom1_return_i64_to(b, bi_register(0), bi_discard(bi_register(2)), + bi_register(3), BI_ATOM_OPC_AINC, 2); + CASE_ARCH(I, 10, 0x0069800428000042); + CASE_ARCH(I, 15, 0x0328000220000082); + + I = bi_atom_return_i32_to(b, bi_register(0), bi_discard(bi_register(1)), + bi_register(2), bi_register(3), BI_ATOM_OPC_AXCHG, + 1); + CASE_ARCH(I, 10, 0x0120c1021bc00002); + CASE_ARCH(I, 15, 0x032401c10f000002); + + I = bi_atom_return_i64_to(b, bi_register(0), bi_register(2), bi_register(6), + bi_register(7), BI_ATOM_OPC_ACMPXCHG, 2); + CASE_ARCH(I, 10, 0x0120c2182fc00006); + CASE_ARCH(I, 15, 0x032802cc2f000006); } diff --git a/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp b/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp index e4a0945f1f6..6c3f1f44905 100644 --- a/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp +++ b/src/panfrost/compiler/bifrost/valhall/test/test-validate-fau.cpp @@ -9,9 +9,9 @@ #include -#define CASE(instr, expected) \ +#define CASE_ARCH(instr, arch, expected) \ do { \ - if (va_validate_fau(instr) != expected) { \ + if (va_validate_fau(instr, arch) != expected) { \ fprintf(stderr, "Incorrect validation for:\n"); \ bi_print_instr(instr, stderr); \ fprintf(stderr, "\n"); \ @@ -19,8 +19,8 @@ } \ } while (0) -#define VALID(instr) CASE(instr, true) -#define INVALID(instr) CASE(instr, false) +#define VALID(instr) CASE_ARCH(instr, 10, true) +#define INVALID(instr) CASE_ARCH(instr, 10, false) class ValidateFau : public testing::Test { protected: diff --git a/src/panfrost/compiler/bifrost/valhall/va_compiler.h b/src/panfrost/compiler/bifrost/valhall/va_compiler.h index 622ab81b302..5a227c80412 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_compiler.h +++ b/src/panfrost/compiler/bifrost/valhall/va_compiler.h @@ -13,9 +13,9 @@ extern "C" { #endif -bool va_validate_fau(bi_instr *I); +bool va_validate_fau(bi_instr *I, unsigned arch); void va_validate(FILE *fp, bi_context *ctx); -void va_repair_fau(bi_builder *b, bi_instr *I); +void va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch); void va_fuse_add_imm(bi_instr *I); void va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count); void va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts); @@ -28,14 +28,15 @@ void va_gather_hsr_info(bi_context *ctx, struct pan_shader_info *info); uint64_t va_pack_instr(const bi_instr *I, unsigned arch); static inline unsigned -va_fau_page(enum bir_fau value) +va_fau_page(enum bir_fau value, unsigned arch) { /* Uniform slots of FAU have a 7-bit index. The top 2-bits are the page; the * bottom 5-bits are specified in the source. */ if (value & BIR_FAU_UNIFORM) { + unsigned value_shift = arch >= 15 ? 6 : 5; unsigned slot = value & ~BIR_FAU_UNIFORM; - unsigned page = slot >> 5; + unsigned page = slot >> value_shift; assert(page <= 3); return page; @@ -57,11 +58,11 @@ va_fau_page(enum bir_fau value) } static inline unsigned -va_select_fau_page(const bi_instr *I) +va_select_fau_page(const bi_instr *I, unsigned arch) { bi_foreach_src(I, s) { if (I->src[s].type == BI_INDEX_FAU) - return va_fau_page((enum bir_fau)I->src[s].value); + return va_fau_page((enum bir_fau)I->src[s].value, arch); } return 0; diff --git a/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c b/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c index 9f3e7881ac3..c2812546067 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c +++ b/src/panfrost/compiler/bifrost/valhall/va_insert_flow.c @@ -520,7 +520,7 @@ va_assign_slots(bi_context *ctx) bi_foreach_instr_global(ctx, I) { if (I->op == BI_OPCODE_BARRIER) { - I->slot = 7; + I->slot = (ctx->arch >= 15) ? VA_SLOT_V15_SLOT7 : VA_SLOT_SLOT7; } else if (I->op == BI_OPCODE_ZS_EMIT || I->op == BI_OPCODE_ATEST) { I->slot = 0; } else if (bi_get_opcode_props(I)->message) { diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c index 129512ce170..ea2e78e98e0 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_pack.c +++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c @@ -74,6 +74,15 @@ va_pack_reg(const bi_instr *I, bi_index idx) return idx.value; } +static unsigned +va_pack_reg_v15(const bi_instr *I, bi_index idx) +{ + pack_assert(I, idx.type == BI_INDEX_REGISTER); + pack_assert(I, idx.value < 128); + + return idx.value; +} + static unsigned va_pack_fau_special(const bi_instr *I, enum bir_fau fau) { @@ -124,6 +133,21 @@ va_pack_fau_64(const bi_instr *I, bi_index idx) return (0x7 << 5) | (va_pack_fau_special(I, idx.value) << 1); } +static unsigned +va_pack_fau_64_v15(const bi_instr *I, bi_index idx) +{ + pack_assert(I, idx.type == BI_INDEX_FAU); + + unsigned val = (idx.value & BITFIELD_MASK(6)); + + if (idx.value & BIR_FAU_IMMEDIATE) + return (0x7 << 6) | (val << 1); + else if (idx.value & BIR_FAU_UNIFORM) + return (0x2 << 7) | (val << 1); + else + return (0xf << 5) | (va_pack_fau_special(I, idx.value) << 1); +} + static unsigned va_pack_src(const bi_instr *I, unsigned s) { @@ -142,6 +166,33 @@ va_pack_src(const bi_instr *I, unsigned s) invalid_instruction(I, "type of source %u", s); } +static uint64_t +va_pack_src_v15(const bi_instr *I, unsigned s, unsigned loc) +{ + bi_index idx = I->src[s]; + + uint64_t hex = 0; + uint64_t regval = 0; + + if (idx.type == BI_INDEX_REGISTER) { + regval = va_pack_reg_v15(I, idx); + if (idx.discard) + regval |= (1 << 7); + } else if (idx.type == BI_INDEX_FAU) { + pack_assert(I, idx.offset <= 1); + regval = va_pack_fau_64_v15(I, idx) | idx.offset; + } else + invalid_instruction(I, "type of source %u", s); + + uint64_t low8 = regval & 0xff; + uint64_t high1 = (regval >> 8) & 0x1; + + hex |= (low8 << (8 * loc)); + hex |= (high1 << (48 + loc)); + + return hex; +} + static unsigned va_pack_wrmask(const bi_instr *I) { @@ -211,6 +262,20 @@ va_pack_dest(const bi_instr *I) return va_pack_reg(I, I->dest[0]) | (va_pack_wrmask(I) << 6); } +static unsigned +va_pack_dest_v15(const bi_instr *I) +{ + assert(I->nr_dests); + switch (I->op) { + case BI_OPCODE_SHADDX_S64: + case BI_OPCODE_SHADDX_U64: + /* 64 bit dest has a 0x0 wrmask */ + return va_pack_reg_v15(I, I->dest[0]); + default: + return va_pack_reg_v15(I, I->dest[0]) | (va_pack_wrmask(I) << 13); + } +} + static enum va_widen va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz) { @@ -452,6 +517,18 @@ va_pack_rhadd(const bi_instr *I) } } +static uint64_t +va_pack_clamp_special_round_v15(const bi_instr *I) +{ + pack_assert(I, I->special < 4); + if (I->special == BI_SPECIAL_N && I->round == BI_ROUND_RTZ) + return 0x4; + else if (I->special) + return 0x4 | I->special; + else + return I->clamp; +} + static uint64_t va_pack_alu(const bi_instr *I, unsigned arch) { @@ -465,25 +542,25 @@ va_pack_alu(const bi_instr *I, unsigned arch) case BI_OPCODE_FREXPM_F32: case BI_OPCODE_FREXPM_V2F16: if (I->sqrt) - hex |= 1ull << 24; + hex |= 1ull << ((arch >= 15) ? 30 : 24); if (I->log) - hex |= 1ull << 25; + hex |= 1ull << ((arch >= 15) ? 31 : 25); break; case BI_OPCODE_FLUSH_F32: case BI_OPCODE_FLUSH_V2F16: - hex |= I->nan_mode << 8; + hex |= I->nan_mode << ((arch >= 15) ? 30 : 8); if (I->ftz) - hex |= 1ull << 10; + hex |= 1ull << ((arch >= 15) ? 32 : 10); if (I->flush_inf) - hex |= 1ull << 11; + hex |= 1ull << ((arch >= 15) ? 33 : 11); break; /* Add mux type */ case BI_OPCODE_MUX_I32: case BI_OPCODE_MUX_V2I16: case BI_OPCODE_MUX_V4I8: - hex |= (uint64_t)I->mux << 32; + hex |= (uint64_t)I->mux << ((arch >= 15) ? 34 : 32); break; /* Add .eq flag */ @@ -495,7 +572,7 @@ va_pack_alu(const bi_instr *I, unsigned arch) hex |= (1ull << 36); if (I->op == BI_OPCODE_BRANCHZI) - hex |= (0x1ull << 40); /* Absolute */ + hex |= (0x1ull << ((arch >= 15) ? 31 : 40)); /* Absolute */ else hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8; @@ -511,7 +588,46 @@ va_pack_alu(const bi_instr *I, unsigned arch) case BI_OPCODE_RSHIFT_XOR_I32: case BI_OPCODE_RSHIFT_XOR_V2I16: case BI_OPCODE_RSHIFT_XOR_V4I8: - hex |= (uint64_t)I->arithmetic << 34; + if (arch >= 15) { + /* Rewrite exact to ARSHIFT */ + if (I->arithmetic) { + switch (I->op) { + case BI_OPCODE_RSHIFT_AND_I32: + case BI_OPCODE_RSHIFT_AND_V2I16: + case BI_OPCODE_RSHIFT_AND_V4I8: { + uint64_t arshift_and_op = (0xcULL << 30); + /* Check that we can safely overwrite opcode */ + pack_assert(I, ((info.exact & (0xfULL << 30)) | + arshift_and_op) == arshift_and_op); + hex |= arshift_and_op; + break; + } + case BI_OPCODE_RSHIFT_OR_I32: + case BI_OPCODE_RSHIFT_OR_V2I16: + case BI_OPCODE_RSHIFT_OR_V4I8: { + uint64_t arshift_or_op = (0xdULL << 30); + /* Check that we can safely overwrite opcode */ + pack_assert(I, ((info.exact & (0xfULL << 30)) | arshift_or_op) == + arshift_or_op); + hex |= arshift_or_op; + break; + } + case BI_OPCODE_RSHIFT_XOR_I32: + case BI_OPCODE_RSHIFT_XOR_V2I16: + case BI_OPCODE_RSHIFT_XOR_V4I8: { + uint64_t arshift_xor_op = (0xbULL << 30); + /* Check that we can safely overwrite opcode */ + pack_assert(I, ((info.exact & (0xfULL << 30)) | + arshift_xor_op) == arshift_xor_op); + hex |= arshift_xor_op; + break; + } + default: + UNREACHABLE("RSHIFT->ARSHIFT"); + } + } + } else + hex |= (uint64_t)I->arithmetic << 34; break; case BI_OPCODE_LEA_BUF_IMM: @@ -562,8 +678,8 @@ va_pack_alu(const bi_instr *I, unsigned arch) } hex |= ((uint64_t)va_pack_source_format(I)) << 24; - hex |= ((uint64_t)I->update) << 36; - hex |= ((uint64_t)I->sample) << 38; + hex |= ((uint64_t)I->update) << ((arch >= 15) ? 35 : 36); + hex |= ((uint64_t)I->sample) << ((arch >= 15) ? 37 : 38); break; case BI_OPCODE_LD_VAR_BUF_FLAT_IMM: @@ -601,20 +717,18 @@ va_pack_alu(const bi_instr *I, unsigned arch) break; } - /* FMA_RSCALE.f32 special modes treated as extra opcodes */ - if (I->op == BI_OPCODE_FMA_RSCALE_F32) { - pack_assert(I, I->special < 4); - hex |= ((uint64_t)I->special) << 48; - } - /* Add the normal destination or a placeholder. Staging destinations are * added elsewhere, as they require special handling for control fields. */ if (info.has_dest && info.nr_staging_dests == 0) { - hex |= (uint64_t)va_pack_dest(I) << 40; + if (arch >= 15) + hex |= (uint64_t)va_pack_dest_v15(I) << 40; + else + hex |= (uint64_t)va_pack_dest(I) << 40; } else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) { pack_assert(I, I->nr_dests == 0); - hex |= 0xC0ull << 40; /* Placeholder */ + if (arch < 15) + hex |= 0xC0ull << 40; /* Placeholder */ } bool swap12 = va_swap_12(I->op); @@ -629,7 +743,10 @@ va_pack_alu(const bi_instr *I, unsigned arch) enum va_size size = src_info.size; bi_index src = I->src[logical_i + src_offset]; - hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i); + if (arch >= 15) + hex |= va_pack_src_v15(I, logical_i + src_offset, i); + else + hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i); if (src_info.notted) { if (src.neg) @@ -638,10 +755,15 @@ va_pack_alu(const bi_instr *I, unsigned arch) unsigned neg_offs = 32 + 2 + ((2 - i) * 2); unsigned abs_offs = 33 + 2 + ((2 - i) * 2); - if (src.neg) - hex |= 1ull << neg_offs; - if (src.abs) - hex |= 1ull << abs_offs; + if (arch >= 15 && I->op == BI_OPCODE_FMA_RSCALE_F32 && i == 2) { + if (src.neg) + hex |= 1ull << (neg_offs + 1); + } else { + if (src.neg) + hex |= 1ull << neg_offs; + if (src.abs) + hex |= 1ull << abs_offs; + } } else { if (src.neg) invalid_instruction(I, "negate"); @@ -661,8 +783,8 @@ va_pack_alu(const bi_instr *I, unsigned arch) unsigned offs = (i == 1) ? 26 : 36; hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs; } else if (src_info.lane) { - unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ? - ((i == 0) ? 38 : 36) : ((i == 0) ? 28 : 26); + unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) + : ((i == 0) ? 28 : 26); if (src_info.size == VA_SIZE_16) { hex |= (src.swizzle == BI_SWIZZLE_H1 ? 1 : 0) << offs; @@ -675,7 +797,25 @@ va_pack_alu(const bi_instr *I, unsigned arch) } else if (src_info.lanes) { pack_assert(I, src_info.size == VA_SIZE_8); pack_assert(I, i == 1); - hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26; + if (arch >= 15 && I->op == BI_OPCODE_CLPER_I32) { + switch (src.swizzle) { + case BI_SWIZZLE_B00: + hex |= 0x0ULL << 28; + break; + case BI_SWIZZLE_B11: + hex |= 0x1ULL << 28; + break; + case BI_SWIZZLE_B22: + hex |= 0x2ULL << 28; + break; + case BI_SWIZZLE_B33: + hex |= 0x3ULL << 28; + break; + default: + invalid_instruction(I, "lane shift"); + } + } else + hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26; } else if (src_info.combine) { /* Treat as swizzle, subgroup ops not yet supported */ pack_assert(I, src_info.size == VA_SIZE_32); @@ -691,17 +831,33 @@ va_pack_alu(const bi_instr *I, unsigned arch) } if (info.saturate) - hex |= (uint64_t)I->saturate << 30; - if (info.rhadd) + hex |= (uint64_t)I->saturate << ((arch >= 15) ? 25 : 30); + if (info.rhadd) { + pack_assert(I, arch < 15); hex |= va_pack_rhadd(I); - if (info.clamp) - hex |= (uint64_t)I->clamp << 32; - if (info.round_mode) - hex |= (uint64_t)I->round << 30; + } + /* FMA_RSCALE.f32 special modes treated as extra opcodes */ + if (I->op == BI_OPCODE_FMA_RSCALE_F32) { + if (arch >= 15) { + hex |= va_pack_clamp_special_round_v15(I) << 32; + } else { + pack_assert(I, I->special < 4); + hex |= ((uint64_t)I->special) << 48; + if (info.clamp) + hex |= (uint64_t)I->clamp << 32; + if (info.round_mode && I->round == BI_ROUND_RTZ) + hex |= (uint64_t)0x1 << 50; + } + } else { + if (info.clamp) + hex |= (uint64_t)I->clamp << ((arch >= 15) ? 30 : 32); + if (info.round_mode) + hex |= (uint64_t)I->round << ((arch >= 15) ? 32 : 30); + } if (info.condition) - hex |= (uint64_t)I->cmpf << 32; + hex |= (uint64_t)I->cmpf << ((arch >= 15) ? 33 : 32); if (info.result_type) - hex |= (uint64_t)I->result_type << 30; + hex |= (uint64_t)I->result_type << ((arch >= 15) ? 24 : 30); return hex; } @@ -768,6 +924,26 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor) return hex; } + +static uint64_t +va_pack_load_v15(const bi_instr *I, bool buffer_descriptor) +{ + /* This implicitly means identity: VA_LOAD_LANE_8_BIT_B0 for i8 (bits[28;27]) + * and VA_LOAD_LANE_16_BIT_H0 for i16 (bit[27]) */ + uint64_t hex = 0; + + if (!buffer_descriptor) + hex |= va_pack_byte_offset(I); + + hex |= va_pack_src_v15(I, 0, 0); + hex |= (uint64_t)I->mem_access << 24; + + if (buffer_descriptor) + hex |= va_pack_src_v15(I, 1, 1); + + return hex; +} + static uint64_t va_pack_store(const bi_instr *I) { @@ -782,6 +958,20 @@ va_pack_store(const bi_instr *I) return hex; } +static uint64_t +va_pack_store_v15(const bi_instr *I) +{ + uint64_t hex = 0; + + va_validate_register_pair(I, 1); + hex |= va_pack_src_v15(I, 1, 0); + hex |= I->mem_access << 24; + + hex |= va_pack_byte_offset(I); + + return hex; +} + static enum va_lod_mode va_pack_lod_mode(const bi_instr *I) { @@ -824,13 +1014,45 @@ va_pack_register_format(const bi_instr *I) } } +static uint64_t +va_pack_src_null_v15(unsigned loc) +{ + uint64_t hex = 0; + uint64_t regval = 0x1c0; + + uint64_t low8 = regval & 0xff; + uint64_t high1 = (regval >> 8) & 0x1; + + hex |= (low8 << (8 * loc)); + hex |= (high1 << (48 + loc)); + + return hex; +} + +static unsigned +va_repack_sr_control_v15(unsigned sr_control) +{ + unsigned repacked = 0; + bool read = sr_control & 0x1; + bool write = sr_control & 0x2; + + if (read) { + repacked |= 0x2; + if (write) + repacked |= 0x1; + } + + return repacked; +} + uint64_t va_pack_instr(const bi_instr *I, unsigned arch) { struct va_opcode_info info = get_valhall_opcode(I->op, arch); - uint64_t hex = info.exact | (((uint64_t)I->flow) << 59); - hex |= ((uint64_t)va_select_fau_page(I)) << 57; + uint64_t hex = + info.exact | (((uint64_t)I->flow) << ((arch >= 15) ? 58 : 59)); + hex |= ((uint64_t)va_select_fau_page(I, arch)) << ((arch >= 15) ? 62 : 57); if (info.slot) hex |= ((uint64_t)I->slot << 30); @@ -842,14 +1064,60 @@ va_pack_instr(const bi_instr *I, unsigned arch) unsigned count = read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0); - hex |= ((uint64_t)count << 33); - hex |= (uint64_t)va_pack_reg(I, sr) << 40; - hex |= ((uint64_t)info.sr_control << 46); + hex |= ((uint64_t)count << ((arch >= 15) ? 32 : 33)); + if (arch >= 15) { + hex |= (uint64_t)va_pack_reg_v15(I, sr) << 40; + hex |= ((uint64_t)va_repack_sr_control_v15(info.sr_control) << 38); + } else { + hex |= (uint64_t)va_pack_reg(I, sr) << 40; + hex |= ((uint64_t)info.sr_control << 46); + } + } + + /* On v15, some instructions require special sr_control values */ + if (arch >= 15) { + switch (I->op) { + case BI_OPCODE_BARRIER: { + unsigned sr_control = va_repack_sr_control_v15(info.sr_control); + pack_assert(I, sr_control == 0x0 || sr_control == 0x2); + hex |= (uint64_t)0x2 << 38; + break; + } + case BI_OPCODE_ATOM1_RETURN_I32: + case BI_OPCODE_ATOM1_RETURN_I64: { + unsigned sr_control = va_repack_sr_control_v15(info.sr_control); + pack_assert(I, sr_control == 0x0); + break; + } + case BI_OPCODE_ATOM_I32: + case BI_OPCODE_ATOM_I64: { + unsigned sr_control = va_repack_sr_control_v15(info.sr_control); + pack_assert(I, sr_control == 0x2); + break; + } + case BI_OPCODE_ATOM_RETURN_I32: + case BI_OPCODE_ATOM_RETURN_I64: + case BI_OPCODE_AXCHG_I32: + case BI_OPCODE_AXCHG_I64: + case BI_OPCODE_ACMPXCHG_I32: + case BI_OPCODE_ACMPXCHG_I64: { + unsigned sr_control = va_repack_sr_control_v15(info.sr_control); + pack_assert(I, sr_control == 0x0 || sr_control == 0x3); + hex |= (uint64_t)0x3 << 38; + break; + } + default: + break; + } } if (info.sr_write_count) { - hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36; - hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16; + hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) + << ((arch >= 15) ? 35 : 36); + if (arch >= 15) + hex |= ((uint64_t)va_pack_reg_v15(I, I->dest[0])) << 16; + else + hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16; } if (info.vecsize) @@ -867,7 +1135,10 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_LOAD_I64: case BI_OPCODE_LOAD_I96: case BI_OPCODE_LOAD_I128: - hex |= va_pack_load(I, false); + if (arch >= 15) + hex |= va_pack_load_v15(I, false); + else + hex |= va_pack_load(I, false); break; case BI_OPCODE_LD_PKA_I8: @@ -878,7 +1149,10 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_LD_PKA_I64: case BI_OPCODE_LD_PKA_I96: case BI_OPCODE_LD_PKA_I128: - hex |= va_pack_load(I, true); + if (arch >= 15) + hex |= va_pack_load_v15(I, true); + else + hex |= va_pack_load(I, true); break; case BI_OPCODE_STORE_I8: @@ -889,20 +1163,26 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_STORE_I64: case BI_OPCODE_STORE_I96: case BI_OPCODE_STORE_I128: - hex |= va_pack_store(I); + if (arch >= 15) + hex |= va_pack_store_v15(I); + else + hex |= va_pack_store(I); break; case BI_OPCODE_ATOM1_RETURN_I64: /* Permit omitting the destination for plain ATOM1 */ - if (!bi_count_write_registers(I, 0)) { + if (arch < 15 && !bi_count_write_registers(I, 0)) { hex |= (0x40ull << 40); // fake read } /* 64-bit source */ va_validate_register_pair(I, 0); - hex |= (uint64_t)va_pack_src(I, 0) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 0, 0); + else + hex |= (uint64_t)va_pack_src(I, 0) << 0; hex |= va_pack_byte_offset_8(I); - hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22; + hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22); break; case BI_OPCODE_ACMPXCHG_I64: @@ -911,29 +1191,43 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_ATOM_RETURN_I64: /* 64-bit source */ va_validate_register_pair(I, 1); - hex |= (uint64_t)va_pack_src(I, 1) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 1, 0); + else + hex |= (uint64_t)va_pack_src(I, 1) << 0; hex |= va_pack_byte_offset_8(I); - hex |= ((uint64_t)va_pack_atom_opc(I)) << 22; + hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22); - if (I->op == BI_OPCODE_ATOM_RETURN_I64) - hex |= (0xc0ull << 40); // flags + if (arch >= 15) { + if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) { + /* Change bits [51;50] to be ACMPXCHG */ + pack_assert(I, ((hex >> 50) & 0b11) == 0b01); + hex ^= (0b11ull << 50); + } + } else { + if (I->op == BI_OPCODE_ATOM_RETURN_I64) + hex |= (0xc0ull << 40); // flags - if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) - hex |= (1 << 26); /* .compare */ + if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) + hex |= (1 << 26); /* .compare */ + } break; case BI_OPCODE_ATOM1_RETURN_I32: /* Permit omitting the destination for plain ATOM1 */ - if (!bi_count_write_registers(I, 0)) { + if (arch < 15 && !bi_count_write_registers(I, 0)) { hex |= (0x40ull << 40); // fake read } /* 64-bit source */ va_validate_register_pair(I, 0); - hex |= (uint64_t)va_pack_src(I, 0) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 0, 0); + else + hex |= (uint64_t)va_pack_src(I, 0) << 0; hex |= va_pack_byte_offset_8(I); - hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22; + hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22); break; case BI_OPCODE_ACMPXCHG_I32: @@ -942,41 +1236,67 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_ATOM_RETURN_I32: /* 64-bit source */ va_validate_register_pair(I, 1); - hex |= (uint64_t)va_pack_src(I, 1) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 1, 0); + else + hex |= (uint64_t)va_pack_src(I, 1) << 0; hex |= va_pack_byte_offset_8(I); - hex |= ((uint64_t)va_pack_atom_opc(I)) << 22; + hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22); - if (I->op == BI_OPCODE_ATOM_RETURN_I32) - hex |= (0xc0ull << 40); // flags + if (arch >= 15) { + if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) { + /* Change bits [51;50] to be ACMPXCHG */ + pack_assert(I, ((hex >> 50) & 0b11) == 0b01); + hex ^= (0b11ull << 50); + } + } else { + if (I->op == BI_OPCODE_ATOM_RETURN_I32) + hex |= (0xc0ull << 40); // flags - if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) - hex |= (1 << 26); /* .compare */ + if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) + hex |= (1 << 26); /* .compare */ + } break; case BI_OPCODE_LD_CVT: - hex |= (uint64_t)va_pack_src(I, 0); + if (arch >= 15) + hex |= va_pack_src_v15(I, 0, 0); + else + hex |= (uint64_t)va_pack_src(I, 0); hex |= va_pack_byte_offset(I); /* Conversion descriptor */ - hex |= (uint64_t)va_pack_src(I, 2) << 16; - hex |= (uint64_t)I->mem_access << 37; + if (arch >= 15) + hex |= va_pack_src_v15(I, 2, 2); + else + hex |= (uint64_t)va_pack_src(I, 2) << 16; + hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37); break; case BI_OPCODE_ST_CVT: /* Staging read */ va_validate_register_pair(I, 1); - hex |= (uint64_t)va_pack_src(I, 1) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 1, 0); + else + hex |= (uint64_t)va_pack_src(I, 1) << 0; hex |= va_pack_byte_offset(I); /* Conversion descriptor */ - hex |= (uint64_t)va_pack_src(I, 3) << 16; - hex |= (uint64_t)I->mem_access << 37; + if (arch >= 15) + hex |= va_pack_src_v15(I, 3, 2); + else + hex |= (uint64_t)va_pack_src(I, 3) << 16; + hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37); break; case BI_OPCODE_BLEND: { /* Source 0 - Blend descriptor (64-bit) */ - hex |= ((uint64_t)va_pack_src(I, 2)) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 2, 0); + else + hex |= ((uint64_t)va_pack_src(I, 2)) << 0; va_validate_register_pair(I, 2); /* Target */ @@ -987,7 +1307,10 @@ va_pack_instr(const bi_instr *I, unsigned arch) hex |= ((I->branch_offset >> 3) << 8); /* Source 2 - coverage mask */ - hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16; + if (arch >= 15) + hex |= va_pack_src_v15(I, 1, 2); + else + hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16; /* Vector size */ unsigned vecsize = 4; @@ -997,7 +1320,7 @@ va_pack_instr(const bi_instr *I, unsigned arch) } case BI_OPCODE_LD_GCLK_U64: - hex |= va_pack_gclk(I); + hex |= va_pack_gclk(I) << ((arch >= 15) ? 8 : 0); break; case BI_OPCODE_TEX_GRADIENT: @@ -1005,7 +1328,10 @@ va_pack_instr(const bi_instr *I, unsigned arch) case BI_OPCODE_TEX_FETCH: case BI_OPCODE_TEX_GATHER: { /* Image to read from */ - hex |= ((uint64_t)va_pack_src(I, 1)) << 0; + if (arch >= 15) + hex |= va_pack_src_v15(I, 1, 0); + else + hex |= ((uint64_t)va_pack_src(I, 1)) << 0; if ((I->op == BI_OPCODE_TEX_FETCH || I->op == BI_OPCODE_TEX_GRADIENT) && I->shadow) @@ -1022,7 +1348,7 @@ va_pack_instr(const bi_instr *I, unsigned arch) if (I->skip) hex |= (1ull << 39); if (!bi_is_regfmt_16(I->register_format)) - hex |= (1ull << 46); + hex |= (1ull << ((arch >= 15) ? 38 : 46)); if (I->op == BI_OPCODE_TEX_GRADIENT) { if (I->force_delta_enable) @@ -1044,20 +1370,35 @@ va_pack_instr(const bi_instr *I, unsigned arch) hex |= ((uint64_t)I->fetch_component) << 14; } - hex |= (I->write_mask << 22); + hex |= (I->write_mask << ((arch >= 15) ? 24 : 22)); hex |= ((uint64_t)I->dimension) << 28; break; } default: - if (!info.exact && I->op != BI_OPCODE_NOP) + if (!info.exact && (arch >= 15 || I->op != BI_OPCODE_NOP)) invalid_instruction(I, "opcode"); hex |= va_pack_alu(I, arch); break; } + /* On v15, some instrutions require an encoded null src. */ + if (arch >= 15) { + switch (I->op) { + case BI_OPCODE_NOP: + case BI_OPCODE_LD_VAR_FLAT_IMM: + case BI_OPCODE_LD_VAR_BUF_FLAT_IMM: + case BI_OPCODE_LD_GCLK_U64: + case BI_OPCODE_BARRIER: + hex |= va_pack_src_null_v15(0); + break; + default: + break; + } + } + return hex; } diff --git a/src/panfrost/compiler/bifrost/valhall/va_validate.c b/src/panfrost/compiler/bifrost/valhall/va_validate.c index b597692eb00..da32405849d 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_validate.c +++ b/src/panfrost/compiler/bifrost/valhall/va_validate.c @@ -93,7 +93,8 @@ fau_state_uniform(struct fau_state *fau, bi_index idx, enum bi_opcode op) } static bool -fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op) +fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op, + unsigned arch) { for (unsigned i = 0; i < ARRAY_SIZE(fau->buffer); ++i) { bi_index buf = fau->buffer[i]; @@ -106,7 +107,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op) /* Instructions executed by the messaging unit should not encode WARP_ID or * anything from special page 3. */ if (can_run_on_message_unit(op) && - (va_fau_page(idx.value) == 3 || idx.value == BIR_FAU_WARP_ID)) + (va_fau_page(idx.value, arch) == 3 || idx.value == BIR_FAU_WARP_ID)) return false; return fau->uniform_slot == -1 || can_use_two_fau_indices(op); @@ -114,7 +115,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op) static bool valid_src(struct fau_state *fau, unsigned fau_page, bi_index src, - enum bi_opcode op) + enum bi_opcode op, unsigned arch) { if (src.type != BI_INDEX_FAU) return true; @@ -128,42 +129,42 @@ valid_src(struct fau_state *fau, unsigned fau_page, bi_index src, return fau_state_buffer(fau, src); } - bool valid = (fau_page == va_fau_page(src.value)); + bool valid = (fau_page == va_fau_page(src.value, arch)); valid &= fau_state_buffer(fau, src); if (src.value & BIR_FAU_UNIFORM) valid &= fau_state_uniform(fau, src, op); else if (fau_is_special(src.value)) - valid &= fau_state_special(fau, src, op); + valid &= fau_state_special(fau, src, op, arch); return valid; } bool -va_validate_fau(bi_instr *I) +va_validate_fau(bi_instr *I, unsigned arch) { bool valid = true; struct fau_state fau = {.uniform_slot = -1}; - unsigned fau_page = va_select_fau_page(I); + unsigned fau_page = va_select_fau_page(I, arch); bi_foreach_src(I, s) { - valid &= valid_src(&fau, fau_page, I->src[s], I->op); + valid &= valid_src(&fau, fau_page, I->src[s], I->op, arch); } return valid; } void -va_repair_fau(bi_builder *b, bi_instr *I) +va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch) { struct fau_state fau = {.uniform_slot = -1}; - unsigned fau_page = va_select_fau_page(I); + unsigned fau_page = va_select_fau_page(I, arch); bi_foreach_src(I, s) { struct fau_state push = fau; bi_index src = I->src[s]; - if (!valid_src(&fau, fau_page, src, I->op)) { + if (!valid_src(&fau, fau_page, src, I->op, arch)) { bi_replace_src(I, s, bi_mov_i32(b, bi_strip_index(src))); /* Rollback update. Since the replacement move doesn't affect FAU @@ -180,7 +181,7 @@ va_validate(FILE *fp, bi_context *ctx) bool errors = false; bi_foreach_instr_global(ctx, I) { - if (!va_validate_fau(I)) { + if (!va_validate_fau(I, ctx->arch)) { if (!errors) { fprintf(fp, "Validation failed, this is a bug. Shader:\n\n"); bi_print_shader(ctx, fp);