pan/va: Implement v15 encoding support

Update va_pack to support the new encodings required by v15.
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2026-03-12 11:29:46 +01:00
parent e1739f271a
commit 6a7aecaeec
8 changed files with 539 additions and 99 deletions

View file

@ -4531,7 +4531,7 @@ bi_compile_variant_nir(nir_shader *nir,
va_lower_constants(ctx, I, const_hist, min_count_for_fau);
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
va_repair_fau(&b, I);
va_repair_fau(&b, I, ctx->arch);
}
_mesa_hash_table_u64_destroy(const_hist);

View file

@ -597,6 +597,18 @@
<value desc="Slot #7">slot7</value>
</enum>
<enum name="Slot v15">
<desc>
Dependency slot set on a message-passing instruction that writes to
registers. Before reading the destination, a future instruction must wait
on the specified slot. Slot #7 is for `BARRIER` instructions only.
</desc>
<value desc="Slot #0">slot0</value>
<value desc="Slot #1">slot1</value>
<value desc="Slot #2">slot2</value>
<value desc="Slot #7">slot7</value>
</enum>
<enum name="Memory access">
<desc>Memory access hint for a `LOAD` or `STORE` instruction.</desc>
<value desc="No hint (global)" default="true">none</value>

View file

@ -48,10 +48,12 @@ TEST_F(ValhallPacking, Moves)
{
bi_instr *I = bi_mov_i32_to(b, bi_register(1), bi_register(2));
CASE_ARCH(I, 10, 0x0091c10000000002ULL);
CASE_ARCH(I, 15, 0x0060010000200002ULL);
I = bi_mov_i32_to(b, bi_register(1),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false));
CASE_ARCH(I, 10, 0x0091c1000000008aULL);
CASE_ARCH(I, 15, 0x006101000020000aULL);
}
TEST_F(ValhallPacking, Fadd)
@ -59,44 +61,55 @@ TEST_F(ValhallPacking, Fadd)
bi_instr *I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2));
CASE_ARCH(I, 10, 0x00a4c00000000201ULL);
CASE_ARCH(I, 15, 0x00f0000000000201ULL);
I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2)));
CASE_ARCH(I, 10, 0x00a4c02000000201ULL);
CASE_ARCH(I, 15, 0x00f0002000000201ULL);
I =
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2)));
CASE_ARCH(I, 10, 0x00a4c01000000201ULL);
CASE_ARCH(I, 15, 0x00f0001000000201ULL);
I = bi_fadd_v2f16_to(b, bi_register(0),
bi_swz_16(bi_register(1), false, false),
bi_swz_16(bi_register(0), true, true));
CASE_ARCH(I, 10, 0x00a5c0000c000001ULL);
CASE_ARCH(I, 15, 0x00f400000c000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0));
CASE_ARCH(I, 10, 0x00a5c00028000001ULL);
CASE_ARCH(I, 15, 0x00f4000028000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
bi_swz_16(bi_register(0), true, false));
CASE_ARCH(I, 10, 0x00a5c00024000001ULL);
CASE_ARCH(I, 15, 0x00f4000024000001ULL);
I = bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
bi_neg(zero));
CASE_ARCH(I, 10, 0x00a5c0902800c040ULL);
CASE_ARCH(I, 15, 0x00f600902800c080ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero);
CASE_ARCH(I, 10, 0x00a4c0000000c001ULL);
CASE_ARCH(I, 15, 0x00f200000000c001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero));
CASE_ARCH(I, 10, 0x00a4c0100000c001ULL);
CASE_ARCH(I, 15, 0x00f200100000c001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), true));
CASE_ARCH(I, 10, 0x00a4c00008000001ULL);
CASE_ARCH(I, 15, 0x00f0000008000001ULL);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), false));
CASE_ARCH(I, 10, 0x00a4c00004000001ULL);
CASE_ARCH(I, 15, 0x00f0000004000001ULL);
}
TEST_F(ValhallPacking, Clper)
@ -105,6 +118,7 @@ TEST_F(ValhallPacking, Clper)
bi_byte(n4567, 0), BI_INACTIVE_RESULT_F1,
BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16);
CASE_ARCH(I, 10, 0x00a0c030128fc900);
CASE_ARCH(I, 15, 0x00e20030028fc900);
}
TEST_F(ValhallPacking, Clamps)
@ -112,9 +126,11 @@ TEST_F(ValhallPacking, Clamps)
bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(bi_abs(bi_register(2))));
CASE_ARCH(I, 10, 0x00a4c03000000201ULL);
CASE_ARCH(I, 15, 0x00f0003000000201ULL);
I->clamp = BI_CLAMP_CLAMP_M1_1;
CASE_ARCH(I, 10, 0x00a4c03200000201ULL);
CASE_ARCH(I, 15, 0x00f0003080000201ULL);
}
TEST_F(ValhallPacking, Misc)
@ -123,18 +139,22 @@ TEST_F(ValhallPacking, Misc)
b, bi_register(1), bi_discard(bi_register(1)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false), bi_neg(zero));
CASE_ARCH(I, 10, 0x00b2c10400c08841ULL);
CASE_ARCH(I, 15, 0x0166010400c00881ULL);
I = bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x0090c240800d0042ULL);
CASE_ARCH(I, 15, 0x00600242004d0082ULL);
I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x00904000a00f0000ULL);
/* Removed on v11 */
I = bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN);
CASE_ARCH(I, 10, 0x00904000900f0001ULL);
/* Removed on v11 */
}
TEST_F(ValhallPacking, FaddImm)
@ -142,10 +162,12 @@ TEST_F(ValhallPacking, FaddImm)
bi_instr *I = bi_fadd_imm_f32_to(b, bi_register(2),
bi_discard(bi_register(2)), 0x4847C6C0);
CASE_ARCH(I, 10, 0x0114C24847C6C042ULL);
CASE_ARCH(I, 15, 0x0064024847c6c082ULL);
I = bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
0x70AC6784);
CASE_ARCH(I, 10, 0x0115C270AC678442ULL);
CASE_ARCH(I, 15, 0x00620270ac678482ULL);
}
TEST_F(ValhallPacking, Comparions)
@ -155,12 +177,14 @@ TEST_F(ValhallPacking, Comparions)
bi_discard(bi_swz_16(bi_register(2), true, false)), zero, BI_CMPF_GT,
BI_RESULT_TYPE_M1);
CASE_ARCH(I, 10, 0x00f9c21184c04243);
CASE_ARCH(I, 15, 0x01e40212c6c08283);
I = bi_fcmp_or_v2f16_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), false, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1);
CASE_ARCH(I, 10, 0x00f5c20190c04243);
CASE_ARCH(I, 15, 0x01e4020352c08283);
}
TEST_F(ValhallPacking, Conversions)
@ -168,6 +192,7 @@ TEST_F(ValhallPacking, Conversions)
bi_instr *I =
bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)));
CASE_ARCH(I, 10, 0x0090c22000070042);
/* Removed on v11 */
}
TEST_F(ValhallPacking, BranchzI16)
@ -176,6 +201,7 @@ TEST_F(ValhallPacking, BranchzI16)
bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
I->branch_offset = 1;
CASE_ARCH(I, 10, 0x001fc03000000102);
CASE_ARCH(I, 15, 0x02b8003000000102);
}
TEST_F(ValhallPacking, BranchzI16Backwards)
@ -183,6 +209,7 @@ TEST_F(ValhallPacking, BranchzI16Backwards)
bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
I->branch_offset = -8;
CASE_ARCH(I, 10, 0x001fc017fffff8c0);
CASE_ARCH(I, 15, 0x02b90017fffff8c0);
}
TEST_F(ValhallPacking, Blend)
@ -192,6 +219,7 @@ TEST_F(ValhallPacking, Blend)
bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0);
CASE_ARCH(I, 10, 0x007f4004333c00f0);
CASE_ARCH(I, 15, 0x031b0082333c00f0);
}
TEST_F(ValhallPacking, Mux)
@ -200,6 +228,7 @@ TEST_F(ValhallPacking, Mux)
b, bi_register(0), bi_discard(bi_register(0)), bi_discard(bi_register(4)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT);
CASE_ARCH(I, 10, 0x00b8c00300804440ull);
CASE_ARCH(I, 15, 0x017c000c80008480ull);
}
TEST_F(ValhallPacking, AtestFP16)
@ -208,6 +237,7 @@ TEST_F(ValhallPacking, AtestFP16)
bi_half(bi_register(1), true),
bi_fau(BIR_FAU_ATEST_PARAM, false));
CASE_ARCH(I, 10, 0x007dbc0208ea013c);
CASE_ARCH(I, 15, 0x03d43c0108ea013c);
}
TEST_F(ValhallPacking, AtestFP32)
@ -215,6 +245,7 @@ TEST_F(ValhallPacking, AtestFP32)
bi_instr *I = bi_atest_to(b, bi_register(60), bi_register(60), one,
bi_fau(BIR_FAU_ATEST_PARAM, false));
CASE_ARCH(I, 10, 0x007dbc0200ead03c);
CASE_ARCH(I, 15, 0x03d63c0100ead03c);
}
TEST_F(ValhallPacking, Transcendentals)
@ -222,18 +253,28 @@ TEST_F(ValhallPacking, Transcendentals)
bi_instr *I =
bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true);
CASE_ARCH(I, 10, 0x0099c10001000000);
CASE_ARCH(I, 15, 0x0060010041200000);
I = bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
true);
CASE_ARCH(I, 10, 0x0099c00001020040);
CASE_ARCH(I, 15, 0x0060000041220080);
I = bi_frsq_f32_to(b, bi_register(2), bi_register(1));
CASE_ARCH(I, 10, 0x009cc20000020001);
CASE_ARCH(I, 15, 0x0060020001820001);
I = bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(2)), bi_neg(zero),
bi_discard(bi_register(0)), BI_SPECIAL_LEFT);
CASE_ARCH(I, 10, 0x0162c00440c04241);
CASE_ARCH(I, 15, 0x0264000e80c08281);
I = bi_fma_rscale_f32_to(b, bi_register(0), bi_register(1), bi_register(2),
bi_neg(zero), bi_discard(bi_register(0)),
BI_SPECIAL_N);
CASE_ARCH(I, 10, 0x0161c00440c00201);
CASE_ARCH(I, 15, 0x0264000d80c00201);
}
TEST_F(ValhallPacking, Csel)
@ -243,18 +284,21 @@ TEST_F(ValhallPacking, Csel)
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_EQ);
CASE_ARCH(I, 10, 0x0150c10085844342);
CASE_ARCH(I, 15, 0x027c010005048382);
I = bi_csel_u32_to(
b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT);
CASE_ARCH(I, 10, 0x0150c10485844342);
CASE_ARCH(I, 15, 0x027c010805048382);
I = bi_csel_s32_to(
b, bi_register(1), bi_discard(bi_register(2)), bi_discard(bi_register(3)),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true), BI_CMPF_LT);
CASE_ARCH(I, 10, 0x0158c10485844342);
CASE_ARCH(I, 15, 0x027c014805048382);
}
TEST_F(ValhallPacking, LdAttrImm)
@ -265,6 +309,7 @@ TEST_F(ValhallPacking, LdAttrImm)
I->table = 1;
CASE_ARCH(I, 10, 0x0066800433117d7c);
CASE_ARCH(I, 15, 0x038400023311bdbc);
}
TEST_F(ValhallPacking, LdVarBufImmF16)
@ -274,12 +319,14 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_SAMPLE_CENTER, BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE, BI_VECSIZE_V4,
0);
CASE_ARCH(I, 10, 0x005d82143300003d);
CASE_ARCH(I, 15, 0x0310020a3f00003d);
I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 0);
CASE_ARCH(I, 10, 0x005d80843300003d);
CASE_ARCH(I, 15, 0x031000423f00003d);
I = bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
@ -287,6 +334,7 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
BI_VECSIZE_V4, 8);
CASE_ARCH(I, 10, 0x005d80443308003d);
CASE_ARCH(I, 11, 0x005d80443300083d);
CASE_ARCH(I, 15, 0x031000223f00083d);
}
TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
@ -294,10 +342,12 @@ TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
bi_instr *I = bi_ld_var_buf_flat_imm_to(
b, bi_register(0), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 0x12);
CASE_ARCH(I, 14, 0x0040800832001200);
CASE_ARCH(I, 15, 0x033900043a0012c0);
I = bi_ld_var_buf_flat_imm_to(b, bi_register(0), BI_REGISTER_FORMAT_F16,
BI_VECSIZE_V4, 0x12);
CASE_ARCH(I, 14, 0x0040800433001200);
CASE_ARCH(I, 15, 0x033900023b0012c0);
}
TEST_F(ValhallPacking, LdVarBufFlat)
@ -305,10 +355,12 @@ TEST_F(ValhallPacking, LdVarBufFlat)
bi_instr *I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
CASE_ARCH(I, 14, 0x005f80083200003d);
CASE_ARCH(I, 15, 0x031400043a00003d);
I = bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4);
CASE_ARCH(I, 14, 0x005f80043300003d);
CASE_ARCH(I, 15, 0x031400023b00003d);
}
TEST_F(ValhallPacking, LeaBufImm)
@ -316,6 +368,7 @@ TEST_F(ValhallPacking, LeaBufImm)
bi_instr *I =
bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59)));
CASE_ARCH(I, 10, 0x005e84040000007b);
CASE_ARCH(I, 15, 0x03080402000000bb);
}
TEST_F(ValhallPacking, StoreMemoryAccess)
@ -324,6 +377,7 @@ TEST_F(ValhallPacking, StoreMemoryAccess)
bi_discard(bi_register(5)), BI_SEG_NONE, 0);
I->mem_access = VA_MEMORY_ACCESS_ESTREAM;
CASE_ARCH(I, 10, 0x0061400632000044);
CASE_ARCH(I, 15, 0x0320009302000084);
}
TEST_F(ValhallPacking, Convert16To32)
@ -331,26 +385,32 @@ TEST_F(ValhallPacking, Convert16To32)
bi_instr *I = bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000140077);
CASE_ARCH(I, 15, 0x00600200005400b7);
I = bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010140077);
CASE_ARCH(I, 15, 0x00600200105400b7);
I = bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000150077);
/* Removed on v11 */
I = bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010150077);
/* Removed on v11 */
I = bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), false)));
CASE_ARCH(I, 10, 0x0090c20000040077);
CASE_ARCH(I, 15, 0x00600200004400b7);
I = bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_half(bi_register(55), true)));
CASE_ARCH(I, 10, 0x0090c20010040077);
CASE_ARCH(I, 15, 0x00600200104400b7);
}
TEST_F(ValhallPacking, Swizzle8)
@ -359,6 +419,7 @@ TEST_F(ValhallPacking, Swizzle8)
bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
zero, BI_CMPF_NE, BI_RESULT_TYPE_I1);
CASE_ARCH(I, 10, 0x00f2c14300c0c000);
/* Removed on v11 */
}
TEST_F(ValhallPacking, FauPage1)
@ -366,6 +427,7 @@ TEST_F(ValhallPacking, FauPage1)
bi_instr *I = bi_mov_i32_to(
b, bi_register(1), bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false));
CASE_ARCH(I, 10, 0x0291c10000000080ULL);
CASE_ARCH(I, 15, 0x0061010000200040ULL);
}
TEST_F(ValhallPacking, LdTileV3F16)
@ -374,6 +436,7 @@ TEST_F(ValhallPacking, LdTileV3F16)
bi_register(60), bi_register(3),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3);
CASE_ARCH(I, 10, 0x0078840423033c40);
CASE_ARCH(I, 15, 0x03c0040223033c80);
}
TEST_F(ValhallPacking, Rhadd8)
@ -381,4 +444,26 @@ TEST_F(ValhallPacking, Rhadd8)
bi_instr *I = bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(0)), BI_ROUND_RTP);
CASE_ARCH(I, 10, 0x00aac000400b4041);
/* Removed on v11 */
}
TEST_F(ValhallPacking, Atomics)
{
bi_instr *I =
bi_atom1_return_i64_to(b, bi_register(0), bi_discard(bi_register(2)),
bi_register(3), BI_ATOM_OPC_AINC, 2);
CASE_ARCH(I, 10, 0x0069800428000042);
CASE_ARCH(I, 15, 0x0328000220000082);
I = bi_atom_return_i32_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_register(2), bi_register(3), BI_ATOM_OPC_AXCHG,
1);
CASE_ARCH(I, 10, 0x0120c1021bc00002);
CASE_ARCH(I, 15, 0x032401c10f000002);
I = bi_atom_return_i64_to(b, bi_register(0), bi_register(2), bi_register(6),
bi_register(7), BI_ATOM_OPC_ACMPXCHG, 2);
CASE_ARCH(I, 10, 0x0120c2182fc00006);
CASE_ARCH(I, 15, 0x032802cc2f000006);
}

View file

@ -9,9 +9,9 @@
#include <gtest/gtest.h>
#define CASE(instr, expected) \
#define CASE_ARCH(instr, arch, expected) \
do { \
if (va_validate_fau(instr) != expected) { \
if (va_validate_fau(instr, arch) != expected) { \
fprintf(stderr, "Incorrect validation for:\n"); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
@ -19,8 +19,8 @@
} \
} while (0)
#define VALID(instr) CASE(instr, true)
#define INVALID(instr) CASE(instr, false)
#define VALID(instr) CASE_ARCH(instr, 10, true)
#define INVALID(instr) CASE_ARCH(instr, 10, false)
class ValidateFau : public testing::Test {
protected:

View file

@ -13,9 +13,9 @@
extern "C" {
#endif
bool va_validate_fau(bi_instr *I);
bool va_validate_fau(bi_instr *I, unsigned arch);
void va_validate(FILE *fp, bi_context *ctx);
void va_repair_fau(bi_builder *b, bi_instr *I);
void va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch);
void va_fuse_add_imm(bi_instr *I);
void va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, uint32_t min_fau_count);
void va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts);
@ -28,14 +28,15 @@ void va_gather_hsr_info(bi_context *ctx, struct pan_shader_info *info);
uint64_t va_pack_instr(const bi_instr *I, unsigned arch);
static inline unsigned
va_fau_page(enum bir_fau value)
va_fau_page(enum bir_fau value, unsigned arch)
{
/* Uniform slots of FAU have a 7-bit index. The top 2-bits are the page; the
* bottom 5-bits are specified in the source.
*/
if (value & BIR_FAU_UNIFORM) {
unsigned value_shift = arch >= 15 ? 6 : 5;
unsigned slot = value & ~BIR_FAU_UNIFORM;
unsigned page = slot >> 5;
unsigned page = slot >> value_shift;
assert(page <= 3);
return page;
@ -57,11 +58,11 @@ va_fau_page(enum bir_fau value)
}
static inline unsigned
va_select_fau_page(const bi_instr *I)
va_select_fau_page(const bi_instr *I, unsigned arch)
{
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_FAU)
return va_fau_page((enum bir_fau)I->src[s].value);
return va_fau_page((enum bir_fau)I->src[s].value, arch);
}
return 0;

View file

@ -520,7 +520,7 @@ va_assign_slots(bi_context *ctx)
bi_foreach_instr_global(ctx, I) {
if (I->op == BI_OPCODE_BARRIER) {
I->slot = 7;
I->slot = (ctx->arch >= 15) ? VA_SLOT_V15_SLOT7 : VA_SLOT_SLOT7;
} else if (I->op == BI_OPCODE_ZS_EMIT || I->op == BI_OPCODE_ATEST) {
I->slot = 0;
} else if (bi_get_opcode_props(I)->message) {

View file

@ -74,6 +74,15 @@ va_pack_reg(const bi_instr *I, bi_index idx)
return idx.value;
}
static unsigned
va_pack_reg_v15(const bi_instr *I, bi_index idx)
{
pack_assert(I, idx.type == BI_INDEX_REGISTER);
pack_assert(I, idx.value < 128);
return idx.value;
}
static unsigned
va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
{
@ -124,6 +133,21 @@ va_pack_fau_64(const bi_instr *I, bi_index idx)
return (0x7 << 5) | (va_pack_fau_special(I, idx.value) << 1);
}
static unsigned
va_pack_fau_64_v15(const bi_instr *I, bi_index idx)
{
pack_assert(I, idx.type == BI_INDEX_FAU);
unsigned val = (idx.value & BITFIELD_MASK(6));
if (idx.value & BIR_FAU_IMMEDIATE)
return (0x7 << 6) | (val << 1);
else if (idx.value & BIR_FAU_UNIFORM)
return (0x2 << 7) | (val << 1);
else
return (0xf << 5) | (va_pack_fau_special(I, idx.value) << 1);
}
static unsigned
va_pack_src(const bi_instr *I, unsigned s)
{
@ -142,6 +166,33 @@ va_pack_src(const bi_instr *I, unsigned s)
invalid_instruction(I, "type of source %u", s);
}
static uint64_t
va_pack_src_v15(const bi_instr *I, unsigned s, unsigned loc)
{
bi_index idx = I->src[s];
uint64_t hex = 0;
uint64_t regval = 0;
if (idx.type == BI_INDEX_REGISTER) {
regval = va_pack_reg_v15(I, idx);
if (idx.discard)
regval |= (1 << 7);
} else if (idx.type == BI_INDEX_FAU) {
pack_assert(I, idx.offset <= 1);
regval = va_pack_fau_64_v15(I, idx) | idx.offset;
} else
invalid_instruction(I, "type of source %u", s);
uint64_t low8 = regval & 0xff;
uint64_t high1 = (regval >> 8) & 0x1;
hex |= (low8 << (8 * loc));
hex |= (high1 << (48 + loc));
return hex;
}
static unsigned
va_pack_wrmask(const bi_instr *I)
{
@ -211,6 +262,20 @@ va_pack_dest(const bi_instr *I)
return va_pack_reg(I, I->dest[0]) | (va_pack_wrmask(I) << 6);
}
static unsigned
va_pack_dest_v15(const bi_instr *I)
{
assert(I->nr_dests);
switch (I->op) {
case BI_OPCODE_SHADDX_S64:
case BI_OPCODE_SHADDX_U64:
/* 64 bit dest has a 0x0 wrmask */
return va_pack_reg_v15(I, I->dest[0]);
default:
return va_pack_reg_v15(I, I->dest[0]) | (va_pack_wrmask(I) << 13);
}
}
static enum va_widen
va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
{
@ -452,6 +517,18 @@ va_pack_rhadd(const bi_instr *I)
}
}
static uint64_t
va_pack_clamp_special_round_v15(const bi_instr *I)
{
pack_assert(I, I->special < 4);
if (I->special == BI_SPECIAL_N && I->round == BI_ROUND_RTZ)
return 0x4;
else if (I->special)
return 0x4 | I->special;
else
return I->clamp;
}
static uint64_t
va_pack_alu(const bi_instr *I, unsigned arch)
{
@ -465,25 +542,25 @@ va_pack_alu(const bi_instr *I, unsigned arch)
case BI_OPCODE_FREXPM_F32:
case BI_OPCODE_FREXPM_V2F16:
if (I->sqrt)
hex |= 1ull << 24;
hex |= 1ull << ((arch >= 15) ? 30 : 24);
if (I->log)
hex |= 1ull << 25;
hex |= 1ull << ((arch >= 15) ? 31 : 25);
break;
case BI_OPCODE_FLUSH_F32:
case BI_OPCODE_FLUSH_V2F16:
hex |= I->nan_mode << 8;
hex |= I->nan_mode << ((arch >= 15) ? 30 : 8);
if (I->ftz)
hex |= 1ull << 10;
hex |= 1ull << ((arch >= 15) ? 32 : 10);
if (I->flush_inf)
hex |= 1ull << 11;
hex |= 1ull << ((arch >= 15) ? 33 : 11);
break;
/* Add mux type */
case BI_OPCODE_MUX_I32:
case BI_OPCODE_MUX_V2I16:
case BI_OPCODE_MUX_V4I8:
hex |= (uint64_t)I->mux << 32;
hex |= (uint64_t)I->mux << ((arch >= 15) ? 34 : 32);
break;
/* Add .eq flag */
@ -495,7 +572,7 @@ va_pack_alu(const bi_instr *I, unsigned arch)
hex |= (1ull << 36);
if (I->op == BI_OPCODE_BRANCHZI)
hex |= (0x1ull << 40); /* Absolute */
hex |= (0x1ull << ((arch >= 15) ? 31 : 40)); /* Absolute */
else
hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;
@ -511,7 +588,46 @@ va_pack_alu(const bi_instr *I, unsigned arch)
case BI_OPCODE_RSHIFT_XOR_I32:
case BI_OPCODE_RSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V4I8:
hex |= (uint64_t)I->arithmetic << 34;
if (arch >= 15) {
/* Rewrite exact to ARSHIFT */
if (I->arithmetic) {
switch (I->op) {
case BI_OPCODE_RSHIFT_AND_I32:
case BI_OPCODE_RSHIFT_AND_V2I16:
case BI_OPCODE_RSHIFT_AND_V4I8: {
uint64_t arshift_and_op = (0xcULL << 30);
/* Check that we can safely overwrite opcode */
pack_assert(I, ((info.exact & (0xfULL << 30)) |
arshift_and_op) == arshift_and_op);
hex |= arshift_and_op;
break;
}
case BI_OPCODE_RSHIFT_OR_I32:
case BI_OPCODE_RSHIFT_OR_V2I16:
case BI_OPCODE_RSHIFT_OR_V4I8: {
uint64_t arshift_or_op = (0xdULL << 30);
/* Check that we can safely overwrite opcode */
pack_assert(I, ((info.exact & (0xfULL << 30)) | arshift_or_op) ==
arshift_or_op);
hex |= arshift_or_op;
break;
}
case BI_OPCODE_RSHIFT_XOR_I32:
case BI_OPCODE_RSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V4I8: {
uint64_t arshift_xor_op = (0xbULL << 30);
/* Check that we can safely overwrite opcode */
pack_assert(I, ((info.exact & (0xfULL << 30)) |
arshift_xor_op) == arshift_xor_op);
hex |= arshift_xor_op;
break;
}
default:
UNREACHABLE("RSHIFT->ARSHIFT");
}
}
} else
hex |= (uint64_t)I->arithmetic << 34;
break;
case BI_OPCODE_LEA_BUF_IMM:
@ -562,8 +678,8 @@ va_pack_alu(const bi_instr *I, unsigned arch)
}
hex |= ((uint64_t)va_pack_source_format(I)) << 24;
hex |= ((uint64_t)I->update) << 36;
hex |= ((uint64_t)I->sample) << 38;
hex |= ((uint64_t)I->update) << ((arch >= 15) ? 35 : 36);
hex |= ((uint64_t)I->sample) << ((arch >= 15) ? 37 : 38);
break;
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
@ -601,20 +717,18 @@ va_pack_alu(const bi_instr *I, unsigned arch)
break;
}
/* FMA_RSCALE.f32 special modes treated as extra opcodes */
if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
pack_assert(I, I->special < 4);
hex |= ((uint64_t)I->special) << 48;
}
/* Add the normal destination or a placeholder. Staging destinations are
* added elsewhere, as they require special handling for control fields.
*/
if (info.has_dest && info.nr_staging_dests == 0) {
hex |= (uint64_t)va_pack_dest(I) << 40;
if (arch >= 15)
hex |= (uint64_t)va_pack_dest_v15(I) << 40;
else
hex |= (uint64_t)va_pack_dest(I) << 40;
} else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
pack_assert(I, I->nr_dests == 0);
hex |= 0xC0ull << 40; /* Placeholder */
if (arch < 15)
hex |= 0xC0ull << 40; /* Placeholder */
}
bool swap12 = va_swap_12(I->op);
@ -629,7 +743,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
enum va_size size = src_info.size;
bi_index src = I->src[logical_i + src_offset];
hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
if (arch >= 15)
hex |= va_pack_src_v15(I, logical_i + src_offset, i);
else
hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
if (src_info.notted) {
if (src.neg)
@ -638,10 +755,15 @@ va_pack_alu(const bi_instr *I, unsigned arch)
unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
unsigned abs_offs = 33 + 2 + ((2 - i) * 2);
if (src.neg)
hex |= 1ull << neg_offs;
if (src.abs)
hex |= 1ull << abs_offs;
if (arch >= 15 && I->op == BI_OPCODE_FMA_RSCALE_F32 && i == 2) {
if (src.neg)
hex |= 1ull << (neg_offs + 1);
} else {
if (src.neg)
hex |= 1ull << neg_offs;
if (src.abs)
hex |= 1ull << abs_offs;
}
} else {
if (src.neg)
invalid_instruction(I, "negate");
@ -661,8 +783,8 @@ va_pack_alu(const bi_instr *I, unsigned arch)
unsigned offs = (i == 1) ? 26 : 36;
hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
} else if (src_info.lane) {
unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
((i == 0) ? 38 : 36) : ((i == 0) ? 28 : 26);
unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36)
: ((i == 0) ? 28 : 26);
if (src_info.size == VA_SIZE_16) {
hex |= (src.swizzle == BI_SWIZZLE_H1 ? 1 : 0) << offs;
@ -675,7 +797,25 @@ va_pack_alu(const bi_instr *I, unsigned arch)
} else if (src_info.lanes) {
pack_assert(I, src_info.size == VA_SIZE_8);
pack_assert(I, i == 1);
hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
if (arch >= 15 && I->op == BI_OPCODE_CLPER_I32) {
switch (src.swizzle) {
case BI_SWIZZLE_B00:
hex |= 0x0ULL << 28;
break;
case BI_SWIZZLE_B11:
hex |= 0x1ULL << 28;
break;
case BI_SWIZZLE_B22:
hex |= 0x2ULL << 28;
break;
case BI_SWIZZLE_B33:
hex |= 0x3ULL << 28;
break;
default:
invalid_instruction(I, "lane shift");
}
} else
hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
} else if (src_info.combine) {
/* Treat as swizzle, subgroup ops not yet supported */
pack_assert(I, src_info.size == VA_SIZE_32);
@ -691,17 +831,33 @@ va_pack_alu(const bi_instr *I, unsigned arch)
}
if (info.saturate)
hex |= (uint64_t)I->saturate << 30;
if (info.rhadd)
hex |= (uint64_t)I->saturate << ((arch >= 15) ? 25 : 30);
if (info.rhadd) {
pack_assert(I, arch < 15);
hex |= va_pack_rhadd(I);
if (info.clamp)
hex |= (uint64_t)I->clamp << 32;
if (info.round_mode)
hex |= (uint64_t)I->round << 30;
}
/* FMA_RSCALE.f32 special modes treated as extra opcodes */
if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
if (arch >= 15) {
hex |= va_pack_clamp_special_round_v15(I) << 32;
} else {
pack_assert(I, I->special < 4);
hex |= ((uint64_t)I->special) << 48;
if (info.clamp)
hex |= (uint64_t)I->clamp << 32;
if (info.round_mode && I->round == BI_ROUND_RTZ)
hex |= (uint64_t)0x1 << 50;
}
} else {
if (info.clamp)
hex |= (uint64_t)I->clamp << ((arch >= 15) ? 30 : 32);
if (info.round_mode)
hex |= (uint64_t)I->round << ((arch >= 15) ? 32 : 30);
}
if (info.condition)
hex |= (uint64_t)I->cmpf << 32;
hex |= (uint64_t)I->cmpf << ((arch >= 15) ? 33 : 32);
if (info.result_type)
hex |= (uint64_t)I->result_type << 30;
hex |= (uint64_t)I->result_type << ((arch >= 15) ? 24 : 30);
return hex;
}
@ -768,6 +924,26 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
return hex;
}
static uint64_t
va_pack_load_v15(const bi_instr *I, bool buffer_descriptor)
{
/* This implicitly means identity: VA_LOAD_LANE_8_BIT_B0 for i8 (bits[28;27])
* and VA_LOAD_LANE_16_BIT_H0 for i16 (bit[27]) */
uint64_t hex = 0;
if (!buffer_descriptor)
hex |= va_pack_byte_offset(I);
hex |= va_pack_src_v15(I, 0, 0);
hex |= (uint64_t)I->mem_access << 24;
if (buffer_descriptor)
hex |= va_pack_src_v15(I, 1, 1);
return hex;
}
static uint64_t
va_pack_store(const bi_instr *I)
{
@ -782,6 +958,20 @@ va_pack_store(const bi_instr *I)
return hex;
}
static uint64_t
va_pack_store_v15(const bi_instr *I)
{
uint64_t hex = 0;
va_validate_register_pair(I, 1);
hex |= va_pack_src_v15(I, 1, 0);
hex |= I->mem_access << 24;
hex |= va_pack_byte_offset(I);
return hex;
}
static enum va_lod_mode
va_pack_lod_mode(const bi_instr *I)
{
@ -824,13 +1014,45 @@ va_pack_register_format(const bi_instr *I)
}
}
static uint64_t
va_pack_src_null_v15(unsigned loc)
{
uint64_t hex = 0;
uint64_t regval = 0x1c0;
uint64_t low8 = regval & 0xff;
uint64_t high1 = (regval >> 8) & 0x1;
hex |= (low8 << (8 * loc));
hex |= (high1 << (48 + loc));
return hex;
}
static unsigned
va_repack_sr_control_v15(unsigned sr_control)
{
unsigned repacked = 0;
bool read = sr_control & 0x1;
bool write = sr_control & 0x2;
if (read) {
repacked |= 0x2;
if (write)
repacked |= 0x1;
}
return repacked;
}
uint64_t
va_pack_instr(const bi_instr *I, unsigned arch)
{
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
hex |= ((uint64_t)va_select_fau_page(I)) << 57;
uint64_t hex =
info.exact | (((uint64_t)I->flow) << ((arch >= 15) ? 58 : 59));
hex |= ((uint64_t)va_select_fau_page(I, arch)) << ((arch >= 15) ? 62 : 57);
if (info.slot)
hex |= ((uint64_t)I->slot << 30);
@ -842,14 +1064,60 @@ va_pack_instr(const bi_instr *I, unsigned arch)
unsigned count =
read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);
hex |= ((uint64_t)count << 33);
hex |= (uint64_t)va_pack_reg(I, sr) << 40;
hex |= ((uint64_t)info.sr_control << 46);
hex |= ((uint64_t)count << ((arch >= 15) ? 32 : 33));
if (arch >= 15) {
hex |= (uint64_t)va_pack_reg_v15(I, sr) << 40;
hex |= ((uint64_t)va_repack_sr_control_v15(info.sr_control) << 38);
} else {
hex |= (uint64_t)va_pack_reg(I, sr) << 40;
hex |= ((uint64_t)info.sr_control << 46);
}
}
/* On v15, some instructions require special sr_control values */
if (arch >= 15) {
switch (I->op) {
case BI_OPCODE_BARRIER: {
unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
pack_assert(I, sr_control == 0x0 || sr_control == 0x2);
hex |= (uint64_t)0x2 << 38;
break;
}
case BI_OPCODE_ATOM1_RETURN_I32:
case BI_OPCODE_ATOM1_RETURN_I64: {
unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
pack_assert(I, sr_control == 0x0);
break;
}
case BI_OPCODE_ATOM_I32:
case BI_OPCODE_ATOM_I64: {
unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
pack_assert(I, sr_control == 0x2);
break;
}
case BI_OPCODE_ATOM_RETURN_I32:
case BI_OPCODE_ATOM_RETURN_I64:
case BI_OPCODE_AXCHG_I32:
case BI_OPCODE_AXCHG_I64:
case BI_OPCODE_ACMPXCHG_I32:
case BI_OPCODE_ACMPXCHG_I64: {
unsigned sr_control = va_repack_sr_control_v15(info.sr_control);
pack_assert(I, sr_control == 0x0 || sr_control == 0x3);
hex |= (uint64_t)0x3 << 38;
break;
}
default:
break;
}
}
if (info.sr_write_count) {
hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1)
<< ((arch >= 15) ? 35 : 36);
if (arch >= 15)
hex |= ((uint64_t)va_pack_reg_v15(I, I->dest[0])) << 16;
else
hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
}
if (info.vecsize)
@ -867,7 +1135,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_LOAD_I64:
case BI_OPCODE_LOAD_I96:
case BI_OPCODE_LOAD_I128:
hex |= va_pack_load(I, false);
if (arch >= 15)
hex |= va_pack_load_v15(I, false);
else
hex |= va_pack_load(I, false);
break;
case BI_OPCODE_LD_PKA_I8:
@ -878,7 +1149,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_LD_PKA_I64:
case BI_OPCODE_LD_PKA_I96:
case BI_OPCODE_LD_PKA_I128:
hex |= va_pack_load(I, true);
if (arch >= 15)
hex |= va_pack_load_v15(I, true);
else
hex |= va_pack_load(I, true);
break;
case BI_OPCODE_STORE_I8:
@ -889,20 +1163,26 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_STORE_I64:
case BI_OPCODE_STORE_I96:
case BI_OPCODE_STORE_I128:
hex |= va_pack_store(I);
if (arch >= 15)
hex |= va_pack_store_v15(I);
else
hex |= va_pack_store(I);
break;
case BI_OPCODE_ATOM1_RETURN_I64:
/* Permit omitting the destination for plain ATOM1 */
if (!bi_count_write_registers(I, 0)) {
if (arch < 15 && !bi_count_write_registers(I, 0)) {
hex |= (0x40ull << 40); // fake read
}
/* 64-bit source */
va_validate_register_pair(I, 0);
hex |= (uint64_t)va_pack_src(I, 0) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 0, 0);
else
hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22);
break;
case BI_OPCODE_ACMPXCHG_I64:
@ -911,29 +1191,43 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_ATOM_RETURN_I64:
/* 64-bit source */
va_validate_register_pair(I, 1);
hex |= (uint64_t)va_pack_src(I, 1) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 1, 0);
else
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22);
if (I->op == BI_OPCODE_ATOM_RETURN_I64)
hex |= (0xc0ull << 40); // flags
if (arch >= 15) {
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) {
/* Change bits [51;50] to be ACMPXCHG */
pack_assert(I, ((hex >> 50) & 0b11) == 0b01);
hex ^= (0b11ull << 50);
}
} else {
if (I->op == BI_OPCODE_ATOM_RETURN_I64)
hex |= (0xc0ull << 40); // flags
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
hex |= (1 << 26); /* .compare */
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
hex |= (1 << 26); /* .compare */
}
break;
case BI_OPCODE_ATOM1_RETURN_I32:
/* Permit omitting the destination for plain ATOM1 */
if (!bi_count_write_registers(I, 0)) {
if (arch < 15 && !bi_count_write_registers(I, 0)) {
hex |= (0x40ull << 40); // fake read
}
/* 64-bit source */
va_validate_register_pair(I, 0);
hex |= (uint64_t)va_pack_src(I, 0) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 0, 0);
else
hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << ((arch >= 15) ? 24 : 22);
break;
case BI_OPCODE_ACMPXCHG_I32:
@ -942,41 +1236,67 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_ATOM_RETURN_I32:
/* 64-bit source */
va_validate_register_pair(I, 1);
hex |= (uint64_t)va_pack_src(I, 1) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 1, 0);
else
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc(I)) << ((arch >= 15) ? 24 : 22);
if (I->op == BI_OPCODE_ATOM_RETURN_I32)
hex |= (0xc0ull << 40); // flags
if (arch >= 15) {
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) {
/* Change bits [51;50] to be ACMPXCHG */
pack_assert(I, ((hex >> 50) & 0b11) == 0b01);
hex ^= (0b11ull << 50);
}
} else {
if (I->op == BI_OPCODE_ATOM_RETURN_I32)
hex |= (0xc0ull << 40); // flags
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
hex |= (1 << 26); /* .compare */
if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG)
hex |= (1 << 26); /* .compare */
}
break;
case BI_OPCODE_LD_CVT:
hex |= (uint64_t)va_pack_src(I, 0);
if (arch >= 15)
hex |= va_pack_src_v15(I, 0, 0);
else
hex |= (uint64_t)va_pack_src(I, 0);
hex |= va_pack_byte_offset(I);
/* Conversion descriptor */
hex |= (uint64_t)va_pack_src(I, 2) << 16;
hex |= (uint64_t)I->mem_access << 37;
if (arch >= 15)
hex |= va_pack_src_v15(I, 2, 2);
else
hex |= (uint64_t)va_pack_src(I, 2) << 16;
hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37);
break;
case BI_OPCODE_ST_CVT:
/* Staging read */
va_validate_register_pair(I, 1);
hex |= (uint64_t)va_pack_src(I, 1) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 1, 0);
else
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset(I);
/* Conversion descriptor */
hex |= (uint64_t)va_pack_src(I, 3) << 16;
hex |= (uint64_t)I->mem_access << 37;
if (arch >= 15)
hex |= va_pack_src_v15(I, 3, 2);
else
hex |= (uint64_t)va_pack_src(I, 3) << 16;
hex |= (uint64_t)I->mem_access << ((arch >= 15) ? 35 : 37);
break;
case BI_OPCODE_BLEND: {
/* Source 0 - Blend descriptor (64-bit) */
hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 2, 0);
else
hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
va_validate_register_pair(I, 2);
/* Target */
@ -987,7 +1307,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
hex |= ((I->branch_offset >> 3) << 8);
/* Source 2 - coverage mask */
hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
if (arch >= 15)
hex |= va_pack_src_v15(I, 1, 2);
else
hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
/* Vector size */
unsigned vecsize = 4;
@ -997,7 +1320,7 @@ va_pack_instr(const bi_instr *I, unsigned arch)
}
case BI_OPCODE_LD_GCLK_U64:
hex |= va_pack_gclk(I);
hex |= va_pack_gclk(I) << ((arch >= 15) ? 8 : 0);
break;
case BI_OPCODE_TEX_GRADIENT:
@ -1005,7 +1328,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
case BI_OPCODE_TEX_FETCH:
case BI_OPCODE_TEX_GATHER: {
/* Image to read from */
hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
if (arch >= 15)
hex |= va_pack_src_v15(I, 1, 0);
else
hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
if ((I->op == BI_OPCODE_TEX_FETCH || I->op == BI_OPCODE_TEX_GRADIENT) &&
I->shadow)
@ -1022,7 +1348,7 @@ va_pack_instr(const bi_instr *I, unsigned arch)
if (I->skip)
hex |= (1ull << 39);
if (!bi_is_regfmt_16(I->register_format))
hex |= (1ull << 46);
hex |= (1ull << ((arch >= 15) ? 38 : 46));
if (I->op == BI_OPCODE_TEX_GRADIENT) {
if (I->force_delta_enable)
@ -1044,20 +1370,35 @@ va_pack_instr(const bi_instr *I, unsigned arch)
hex |= ((uint64_t)I->fetch_component) << 14;
}
hex |= (I->write_mask << 22);
hex |= (I->write_mask << ((arch >= 15) ? 24 : 22));
hex |= ((uint64_t)I->dimension) << 28;
break;
}
default:
if (!info.exact && I->op != BI_OPCODE_NOP)
if (!info.exact && (arch >= 15 || I->op != BI_OPCODE_NOP))
invalid_instruction(I, "opcode");
hex |= va_pack_alu(I, arch);
break;
}
/* On v15, some instrutions require an encoded null src. */
if (arch >= 15) {
switch (I->op) {
case BI_OPCODE_NOP:
case BI_OPCODE_LD_VAR_FLAT_IMM:
case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
case BI_OPCODE_LD_GCLK_U64:
case BI_OPCODE_BARRIER:
hex |= va_pack_src_null_v15(0);
break;
default:
break;
}
}
return hex;
}

View file

@ -93,7 +93,8 @@ fau_state_uniform(struct fau_state *fau, bi_index idx, enum bi_opcode op)
}
static bool
fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op,
unsigned arch)
{
for (unsigned i = 0; i < ARRAY_SIZE(fau->buffer); ++i) {
bi_index buf = fau->buffer[i];
@ -106,7 +107,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
/* Instructions executed by the messaging unit should not encode WARP_ID or
* anything from special page 3. */
if (can_run_on_message_unit(op) &&
(va_fau_page(idx.value) == 3 || idx.value == BIR_FAU_WARP_ID))
(va_fau_page(idx.value, arch) == 3 || idx.value == BIR_FAU_WARP_ID))
return false;
return fau->uniform_slot == -1 || can_use_two_fau_indices(op);
@ -114,7 +115,7 @@ fau_state_special(struct fau_state *fau, bi_index idx, enum bi_opcode op)
static bool
valid_src(struct fau_state *fau, unsigned fau_page, bi_index src,
enum bi_opcode op)
enum bi_opcode op, unsigned arch)
{
if (src.type != BI_INDEX_FAU)
return true;
@ -128,42 +129,42 @@ valid_src(struct fau_state *fau, unsigned fau_page, bi_index src,
return fau_state_buffer(fau, src);
}
bool valid = (fau_page == va_fau_page(src.value));
bool valid = (fau_page == va_fau_page(src.value, arch));
valid &= fau_state_buffer(fau, src);
if (src.value & BIR_FAU_UNIFORM)
valid &= fau_state_uniform(fau, src, op);
else if (fau_is_special(src.value))
valid &= fau_state_special(fau, src, op);
valid &= fau_state_special(fau, src, op, arch);
return valid;
}
bool
va_validate_fau(bi_instr *I)
va_validate_fau(bi_instr *I, unsigned arch)
{
bool valid = true;
struct fau_state fau = {.uniform_slot = -1};
unsigned fau_page = va_select_fau_page(I);
unsigned fau_page = va_select_fau_page(I, arch);
bi_foreach_src(I, s) {
valid &= valid_src(&fau, fau_page, I->src[s], I->op);
valid &= valid_src(&fau, fau_page, I->src[s], I->op, arch);
}
return valid;
}
void
va_repair_fau(bi_builder *b, bi_instr *I)
va_repair_fau(bi_builder *b, bi_instr *I, unsigned arch)
{
struct fau_state fau = {.uniform_slot = -1};
unsigned fau_page = va_select_fau_page(I);
unsigned fau_page = va_select_fau_page(I, arch);
bi_foreach_src(I, s) {
struct fau_state push = fau;
bi_index src = I->src[s];
if (!valid_src(&fau, fau_page, src, I->op)) {
if (!valid_src(&fau, fau_page, src, I->op, arch)) {
bi_replace_src(I, s, bi_mov_i32(b, bi_strip_index(src)));
/* Rollback update. Since the replacement move doesn't affect FAU
@ -180,7 +181,7 @@ va_validate(FILE *fp, bi_context *ctx)
bool errors = false;
bi_foreach_instr_global(ctx, I) {
if (!va_validate_fau(I)) {
if (!va_validate_fau(I, ctx->arch)) {
if (!errors) {
fprintf(fp, "Validation failed, this is a bug. Shader:\n\n");
bi_print_shader(ctx, fp);