pan/va: Pick compiler table based on arch

Make v15 use the new tables added in a previous commit.
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2026-03-11 16:35:59 +01:00
parent 64504422ab
commit 1ba1f76146
16 changed files with 71 additions and 44 deletions

View file

@ -16,14 +16,14 @@
*/
static uint32_t
va_op_swizzles(enum bi_opcode op, unsigned src)
va_op_swizzles(enum bi_opcode op, unsigned src, unsigned arch)
{
/* This is a bifrost-only instruction that is lowered on valhall */
if (!valhall_opcodes[op].exact)
if (!get_valhall_opcode(op, arch).exact)
return bi_op_swizzles[op][src];
uint32_t swizzles = 0;
struct va_src_info info = va_src_info(op, src);
struct va_src_info info = va_src_info(op, src, arch);
if (info.swizzle) {
assert(info.size == VA_SIZE_16 || info.size == VA_SIZE_32);
@ -99,8 +99,8 @@ bool
bi_op_supports_swizzle(enum bi_opcode op, unsigned src,
enum bi_swizzle swizzle, unsigned arch)
{
uint32_t supported_swizzles = arch >= 9 ?
va_op_swizzles(op, src) : bi_op_swizzles[op][src];
uint32_t supported_swizzles =
arch >= 9 ? va_op_swizzles(op, src, arch) : bi_op_swizzles[op][src];
return supported_swizzles & BITFIELD_BIT(swizzle);
}

View file

@ -382,8 +382,8 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
bi_foreach_ssa_src(ins, s) {
if (bi_count_read_registers(ins, s) >= 2)
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
else if (s < valhall_opcodes[ins->op].nr_srcs &&
va_src_info(ins->op, s).size > VA_SIZE_32)
else if (s < get_valhall_opcode(ins->op, arch).nr_srcs &&
va_src_info(ins->op, s, arch).size > VA_SIZE_32)
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
}
}

View file

@ -4165,13 +4165,13 @@ va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size,
}
static unsigned
va_gather_stats_block(bi_block *block, struct va_stats *counts)
va_gather_stats_block(bi_block *block, unsigned arch, struct va_stats *counts)
{
unsigned nr_ins = 0;
bi_foreach_instr_in_block(block, I) {
nr_ins++;
va_count_instr_stats(I, counts);
va_count_instr_stats(I, arch, counts);
}
return nr_ins;
}
@ -4180,7 +4180,8 @@ va_gather_stats_block(bi_block *block, struct va_stats *counts)
* Gather stats for a minimum length path through the shader.
*/
static unsigned
va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
va_gather_min_path_stats(bi_block *block, unsigned arch,
struct va_stats *counts)
{
struct va_stats min_counts;
struct va_stats save_counts = *counts;
@ -4192,7 +4193,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
if (bi_block_dominates(next, block)) {
continue;
}
nr_ins = va_gather_min_path_stats(next, counts);
nr_ins = va_gather_min_path_stats(next, arch, counts);
if (min_ins == 0 || nr_ins < min_ins) {
min_ins = nr_ins;
min_counts = *counts;
@ -4202,7 +4203,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
if (min_ins != 0) {
*counts = min_counts;
}
nr_ins = min_ins + va_gather_stats_block(block, counts);
nr_ins = min_ins + va_gather_stats_block(block, arch, counts);
return nr_ins;
}
@ -4213,7 +4214,8 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
* bail out.
*/
static unsigned
va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *visited)
va_gather_max_path_stats(bi_block *block, unsigned arch,
struct va_stats *counts, BITSET_WORD *visited)
{
struct va_stats max_counts;
struct va_stats save_counts = *counts;
@ -4226,7 +4228,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *
if (BITSET_TEST(visited, next->index)) {
continue;
}
nr_ins = va_gather_max_path_stats(next, counts, visited);
nr_ins = va_gather_max_path_stats(next, arch, counts, visited);
if (nr_ins > max_ins) {
max_ins = nr_ins;
max_counts = *counts;
@ -4236,7 +4238,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *
if (max_ins != 0) {
*counts = max_counts;
}
nr_ins = max_ins + va_gather_stats_block(block, counts);
nr_ins = max_ins + va_gather_stats_block(block, arch, counts);
return nr_ins;
}
@ -4260,15 +4262,16 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out,
case GATHER_STATS_FULL:
bi_foreach_instr_global(ctx, I) {
nr_ins++;
va_count_instr_stats(I, &counts);
va_count_instr_stats(I, ctx->arch, &counts);
}
break;
case GATHER_STATS_MIN:
nr_ins = va_gather_min_path_stats(first_block, &counts);
nr_ins = va_gather_min_path_stats(first_block, ctx->arch, &counts);
break;
case GATHER_STATS_MAX:
visited = BITSET_RZALLOC(NULL, ctx->num_blocks);
nr_ins = va_gather_max_path_stats(first_block, &counts, visited);
nr_ins =
va_gather_max_path_stats(first_block, ctx->arch, &counts, visited);
ralloc_free(visited);
break;
}
@ -4630,7 +4633,7 @@ bi_compile_variant_nir(nir_shader *nir,
bifrost_debug & BIFROST_DBG_VERBOSE);
} else {
disassemble_valhall(stderr, binary->data + offset,
binary->size - offset,
binary->size - offset, ctx->arch,
bifrost_debug & BIFROST_DBG_VERBOSE);
}

View file

@ -48,7 +48,8 @@ disassemble(const char *filename)
}
if (pan_arch(gpu_id) >= 9)
disassemble_valhall(stdout, entrypoint, filesize, verbose);
disassemble_valhall(stdout, entrypoint, filesize, pan_arch(gpu_id),
verbose);
else
disassemble_bifrost(stdout, entrypoint, filesize, verbose);

View file

@ -249,7 +249,7 @@ static bool is_branch_v15(uint64_t instr)
}
void
disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose)
disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch, bool verbose)
{
assert((size & 7) == 0);
@ -275,11 +275,18 @@ disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose)
fprintf(fp, " ");
}
va_disasm_instr(fp, instr);
bool instr_is_branch;
if (arch >= 15) {
va_disasm_instr_v15(fp, instr);
instr_is_branch = is_branch_v15(instr);
} else {
va_disasm_instr(fp, instr);
instr_is_branch = is_branch(instr);
}
fprintf(fp, "\\n");
/* Separate blocks visually by inserting whitespace after branches */
if (is_branch(instr))
if (instr_is_branch)
fprintf(fp, "\\n");
}

View file

@ -16,6 +16,7 @@
void va_disasm_instr(FILE *fp, uint64_t instr);
void va_disasm_instr_v15(FILE *fp, uint64_t instr);
void disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose);
void disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch,
bool verbose);
#endif

View file

@ -77,7 +77,7 @@ struct va_stats {
unsigned nr_fau_uniforms;
};
void va_count_instr_stats(bi_instr *I, struct va_stats *stats);
void va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats);
#ifdef __cplusplus
} /* extern C */

View file

@ -211,7 +211,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
static uint32_t
va_resolve_swizzles(bi_context *ctx, bi_instr *I, unsigned s)
{
struct va_src_info info = va_src_info(I->op, s);
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
uint32_t value = I->src[s].value;
enum bi_swizzle swz = I->src[s].swizzle;
@ -257,9 +257,10 @@ va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts,
/* abs(#c) is pointless, but -#c occurs in transcendental sequences */
assert(!I->src[s].abs && "redundant .abs modifier");
bool is_signed = valhall_opcodes[I->op].is_signed;
bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
struct va_src_info info = va_src_info(I->op, s);
bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed;
bool staging =
(s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs);
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
const uint32_t value = va_resolve_swizzles(ctx, I, s);
const uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value);
@ -294,12 +295,13 @@ va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts)
if (I->src[s].type != BI_INDEX_CONSTANT)
continue;
const bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
const bool staging =
(s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs);
if (staging)
continue;
bool is_signed = valhall_opcodes[I->op].is_signed;
struct va_src_info info = va_src_info(I->op, s);
bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed;
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
uint32_t value = va_resolve_swizzles(ctx, I, s);
bi_index cons = va_lookup_constant(value, info, is_signed);

View file

@ -78,7 +78,7 @@ va_lower_split_64bit(bi_context *ctx)
if (bi_is_null(I->src[s]) || s >= 4)
continue;
struct va_src_info info = va_src_info(I->op, s);
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
/* Only split if the instruction expects 64-bit inputs as two separate
* sources. */

View file

@ -179,7 +179,7 @@ va_mark_last(bi_context *ctx)
break;
/* Only need to unmark split registers. */
if (va_src_info(I->op, s).size == VA_SIZE_64 &&
if (va_src_info(I->op, s, ctx->arch).size == VA_SIZE_64 &&
bi_count_read_registers(I, s) == 1) {
bool both_discard = I->src[s].discard && I->src[s + 1].discard;

View file

@ -286,7 +286,7 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple,
static bool
va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I)
{
struct va_opcode_info info = valhall_opcodes[I->op];
struct va_opcode_info info = get_valhall_opcode(I->op, ctx->arch);
bool progress = false;
bi_foreach_ssa_src(I, s) {

View file

@ -455,7 +455,7 @@ va_pack_rhadd(const bi_instr *I)
static uint64_t
va_pack_alu(const bi_instr *I, unsigned arch)
{
struct va_opcode_info info = valhall_opcodes[I->op];
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
uint64_t hex = 0;
switch (I->op) {
@ -750,7 +750,8 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
};
unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
/* TODO hack */
unsigned memory_size = (get_valhall_opcode(I->op, 10).exact >> 27) & 0x7;
uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;
// unsigned
@ -826,7 +827,7 @@ va_pack_register_format(const bi_instr *I)
uint64_t
va_pack_instr(const bi_instr *I, unsigned arch)
{
struct va_opcode_info info = valhall_opcodes[I->op];
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
hex |= ((uint64_t)va_select_fau_page(I)) << 57;

View file

@ -9,7 +9,7 @@
#include "valhall.h"
void
va_count_instr_stats(bi_instr *I, struct va_stats *stats)
va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats)
{
/* Adjusted for 64-bit arithmetic */
unsigned words = bi_count_write_registers(I, 0);
@ -35,7 +35,7 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
}
}
}
switch (valhall_opcodes[I->op].unit) {
switch (get_valhall_opcode(I->op, arch).unit) {
/* Arithmetic is 2x slower for 64-bit than 32-bit */
case VA_UNIT_FMA:
stats->fma += words;

View file

@ -198,6 +198,16 @@ valhall_v15_opcodes[BI_NUM_OPCODES] = {
% endif
% endfor
};
const struct va_opcode_info
get_valhall_opcode(enum bi_opcode op, unsigned arch)
{
assert(arch >= 9);
if (arch < 15)
return valhall_opcodes[op];
else
return valhall_v15_opcodes[op];
}
"""
# Exact value to be ORed in to every opcode

View file

@ -89,7 +89,8 @@ struct va_opcode_info {
unsigned sr_control : 2;
};
extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];
const struct va_opcode_info get_valhall_opcode(enum bi_opcode op,
unsigned arch);
/* Bifrost specifies the source of bitwise operations as (A, B, shift), but
* Valhall specifies (A, shift, B). We follow Bifrost conventions in the
@ -130,10 +131,10 @@ va_swap_12(enum bi_opcode op)
}
static inline struct va_src_info
va_src_info(enum bi_opcode op, unsigned src)
va_src_info(enum bi_opcode op, unsigned src, unsigned arch)
{
unsigned idx = (va_swap_12(op) && (src == 1 || src == 2)) ? (3 - src) : src;
return valhall_opcodes[op].srcs[idx];
return get_valhall_opcode(op, arch).srcs[idx];
}
static inline bool

View file

@ -288,7 +288,8 @@ pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
bool verbose)
{
if (pan_arch(gpu_id) >= 9)
disassemble_valhall(fp, (const uint64_t *)code, size, verbose);
disassemble_valhall(fp, (const uint64_t *)code, size, pan_arch(gpu_id),
verbose);
else if (pan_arch(gpu_id) >= 6)
disassemble_bifrost(fp, code, size, verbose);
else