diff --git a/src/panfrost/compiler/bifrost/bi_lower_swizzle.c b/src/panfrost/compiler/bifrost/bi_lower_swizzle.c index d7e92803ba7..fc16cbe19d8 100644 --- a/src/panfrost/compiler/bifrost/bi_lower_swizzle.c +++ b/src/panfrost/compiler/bifrost/bi_lower_swizzle.c @@ -16,14 +16,14 @@ */ static uint32_t -va_op_swizzles(enum bi_opcode op, unsigned src) +va_op_swizzles(enum bi_opcode op, unsigned src, unsigned arch) { /* This is a bifrost-only instruction that is lowered on valhall */ - if (!valhall_opcodes[op].exact) + if (!get_valhall_opcode(op, arch).exact) return bi_op_swizzles[op][src]; uint32_t swizzles = 0; - struct va_src_info info = va_src_info(op, src); + struct va_src_info info = va_src_info(op, src, arch); if (info.swizzle) { assert(info.size == VA_SIZE_16 || info.size == VA_SIZE_32); @@ -99,8 +99,8 @@ bool bi_op_supports_swizzle(enum bi_opcode op, unsigned src, enum bi_swizzle swizzle, unsigned arch) { - uint32_t supported_swizzles = arch >= 9 ? - va_op_swizzles(op, src) : bi_op_swizzles[op][src]; + uint32_t supported_swizzles = + arch >= 9 ? va_op_swizzles(op, src, arch) : bi_op_swizzles[op][src]; return supported_swizzles & BITFIELD_BIT(swizzle); } diff --git a/src/panfrost/compiler/bifrost/bi_ra.c b/src/panfrost/compiler/bifrost/bi_ra.c index 7f058bf0d3f..6bd38fd2ef3 100644 --- a/src/panfrost/compiler/bifrost/bi_ra.c +++ b/src/panfrost/compiler/bifrost/bi_ra.c @@ -382,8 +382,8 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, bi_foreach_ssa_src(ins, s) { if (bi_count_read_registers(ins, s) >= 2) l->affinity[ins->src[s].value] &= EVEN_BITS_MASK; - else if (s < valhall_opcodes[ins->op].nr_srcs && - va_src_info(ins->op, s).size > VA_SIZE_32) + else if (s < get_valhall_opcode(ins->op, arch).nr_srcs && + va_src_info(ins->op, s, arch).size > VA_SIZE_32) l->affinity[ins->src[s].value] &= EVEN_BITS_MASK; } } diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 8f459a1d8e6..c0b551425e2 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -4165,13 +4165,13 @@ va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size, } static unsigned -va_gather_stats_block(bi_block *block, struct va_stats *counts) +va_gather_stats_block(bi_block *block, unsigned arch, struct va_stats *counts) { unsigned nr_ins = 0; bi_foreach_instr_in_block(block, I) { nr_ins++; - va_count_instr_stats(I, counts); + va_count_instr_stats(I, arch, counts); } return nr_ins; } @@ -4180,7 +4180,8 @@ va_gather_stats_block(bi_block *block, struct va_stats *counts) * Gather stats for a minimum length path through the shader. */ static unsigned -va_gather_min_path_stats(bi_block *block, struct va_stats *counts) +va_gather_min_path_stats(bi_block *block, unsigned arch, + struct va_stats *counts) { struct va_stats min_counts; struct va_stats save_counts = *counts; @@ -4192,7 +4193,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts) if (bi_block_dominates(next, block)) { continue; } - nr_ins = va_gather_min_path_stats(next, counts); + nr_ins = va_gather_min_path_stats(next, arch, counts); if (min_ins == 0 || nr_ins < min_ins) { min_ins = nr_ins; min_counts = *counts; @@ -4202,7 +4203,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts) if (min_ins != 0) { *counts = min_counts; } - nr_ins = min_ins + va_gather_stats_block(block, counts); + nr_ins = min_ins + va_gather_stats_block(block, arch, counts); return nr_ins; } @@ -4213,7 +4214,8 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts) * bail out. */ static unsigned -va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *visited) +va_gather_max_path_stats(bi_block *block, unsigned arch, + struct va_stats *counts, BITSET_WORD *visited) { struct va_stats max_counts; struct va_stats save_counts = *counts; @@ -4226,7 +4228,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD * if (BITSET_TEST(visited, next->index)) { continue; } - nr_ins = va_gather_max_path_stats(next, counts, visited); + nr_ins = va_gather_max_path_stats(next, arch, counts, visited); if (nr_ins > max_ins) { max_ins = nr_ins; max_counts = *counts; @@ -4236,7 +4238,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD * if (max_ins != 0) { *counts = max_counts; } - nr_ins = max_ins + va_gather_stats_block(block, counts); + nr_ins = max_ins + va_gather_stats_block(block, arch, counts); return nr_ins; } @@ -4260,15 +4262,16 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out, case GATHER_STATS_FULL: bi_foreach_instr_global(ctx, I) { nr_ins++; - va_count_instr_stats(I, &counts); + va_count_instr_stats(I, ctx->arch, &counts); } break; case GATHER_STATS_MIN: - nr_ins = va_gather_min_path_stats(first_block, &counts); + nr_ins = va_gather_min_path_stats(first_block, ctx->arch, &counts); break; case GATHER_STATS_MAX: visited = BITSET_RZALLOC(NULL, ctx->num_blocks); - nr_ins = va_gather_max_path_stats(first_block, &counts, visited); + nr_ins = + va_gather_max_path_stats(first_block, ctx->arch, &counts, visited); ralloc_free(visited); break; } @@ -4630,7 +4633,7 @@ bi_compile_variant_nir(nir_shader *nir, bifrost_debug & BIFROST_DBG_VERBOSE); } else { disassemble_valhall(stderr, binary->data + offset, - binary->size - offset, + binary->size - offset, ctx->arch, bifrost_debug & BIFROST_DBG_VERBOSE); } diff --git a/src/panfrost/compiler/bifrost/cmdline.c b/src/panfrost/compiler/bifrost/cmdline.c index b94b7efbefe..1b3c7ceccc6 100644 --- a/src/panfrost/compiler/bifrost/cmdline.c +++ b/src/panfrost/compiler/bifrost/cmdline.c @@ -48,7 +48,8 @@ disassemble(const char *filename) } if (pan_arch(gpu_id) >= 9) - disassemble_valhall(stdout, entrypoint, filesize, verbose); + disassemble_valhall(stdout, entrypoint, filesize, pan_arch(gpu_id), + verbose); else disassemble_bifrost(stdout, entrypoint, filesize, verbose); diff --git a/src/panfrost/compiler/bifrost/valhall/disasm.py b/src/panfrost/compiler/bifrost/valhall/disasm.py index 6ec53f3fafd..d744d6bf45e 100644 --- a/src/panfrost/compiler/bifrost/valhall/disasm.py +++ b/src/panfrost/compiler/bifrost/valhall/disasm.py @@ -249,7 +249,7 @@ static bool is_branch_v15(uint64_t instr) } void -disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose) +disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch, bool verbose) { assert((size & 7) == 0); @@ -275,11 +275,18 @@ disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose) fprintf(fp, " "); } - va_disasm_instr(fp, instr); + bool instr_is_branch; + if (arch >= 15) { + va_disasm_instr_v15(fp, instr); + instr_is_branch = is_branch_v15(instr); + } else { + va_disasm_instr(fp, instr); + instr_is_branch = is_branch(instr); + } fprintf(fp, "\\n"); /* Separate blocks visually by inserting whitespace after branches */ - if (is_branch(instr)) + if (instr_is_branch) fprintf(fp, "\\n"); } diff --git a/src/panfrost/compiler/bifrost/valhall/disassemble.h b/src/panfrost/compiler/bifrost/valhall/disassemble.h index a7f73db52b8..05908b4d643 100644 --- a/src/panfrost/compiler/bifrost/valhall/disassemble.h +++ b/src/panfrost/compiler/bifrost/valhall/disassemble.h @@ -16,6 +16,7 @@ void va_disasm_instr(FILE *fp, uint64_t instr); void va_disasm_instr_v15(FILE *fp, uint64_t instr); -void disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose); +void disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch, + bool verbose); #endif diff --git a/src/panfrost/compiler/bifrost/valhall/va_compiler.h b/src/panfrost/compiler/bifrost/valhall/va_compiler.h index 1d8a38a1f37..622ab81b302 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_compiler.h +++ b/src/panfrost/compiler/bifrost/valhall/va_compiler.h @@ -77,7 +77,7 @@ struct va_stats { unsigned nr_fau_uniforms; }; -void va_count_instr_stats(bi_instr *I, struct va_stats *stats); +void va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats); #ifdef __cplusplus } /* extern C */ diff --git a/src/panfrost/compiler/bifrost/valhall/va_lower_constants.c b/src/panfrost/compiler/bifrost/valhall/va_lower_constants.c index 5646f682b1d..4ab718f6b35 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_lower_constants.c +++ b/src/panfrost/compiler/bifrost/valhall/va_lower_constants.c @@ -211,7 +211,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, static uint32_t va_resolve_swizzles(bi_context *ctx, bi_instr *I, unsigned s) { - struct va_src_info info = va_src_info(I->op, s); + struct va_src_info info = va_src_info(I->op, s, ctx->arch); uint32_t value = I->src[s].value; enum bi_swizzle swz = I->src[s].swizzle; @@ -257,9 +257,10 @@ va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts, /* abs(#c) is pointless, but -#c occurs in transcendental sequences */ assert(!I->src[s].abs && "redundant .abs modifier"); - bool is_signed = valhall_opcodes[I->op].is_signed; - bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); - struct va_src_info info = va_src_info(I->op, s); + bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed; + bool staging = + (s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs); + struct va_src_info info = va_src_info(I->op, s, ctx->arch); const uint32_t value = va_resolve_swizzles(ctx, I, s); const uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value); @@ -294,12 +295,13 @@ va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts) if (I->src[s].type != BI_INDEX_CONSTANT) continue; - const bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); + const bool staging = + (s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs); if (staging) continue; - bool is_signed = valhall_opcodes[I->op].is_signed; - struct va_src_info info = va_src_info(I->op, s); + bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed; + struct va_src_info info = va_src_info(I->op, s, ctx->arch); uint32_t value = va_resolve_swizzles(ctx, I, s); bi_index cons = va_lookup_constant(value, info, is_signed); diff --git a/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c b/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c index 6b81346845c..ac72b35261d 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c +++ b/src/panfrost/compiler/bifrost/valhall/va_lower_split_64bit.c @@ -78,7 +78,7 @@ va_lower_split_64bit(bi_context *ctx) if (bi_is_null(I->src[s]) || s >= 4) continue; - struct va_src_info info = va_src_info(I->op, s); + struct va_src_info info = va_src_info(I->op, s, ctx->arch); /* Only split if the instruction expects 64-bit inputs as two separate * sources. */ diff --git a/src/panfrost/compiler/bifrost/valhall/va_mark_last.c b/src/panfrost/compiler/bifrost/valhall/va_mark_last.c index 0bfe93ce228..454fba92ef8 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_mark_last.c +++ b/src/panfrost/compiler/bifrost/valhall/va_mark_last.c @@ -179,7 +179,7 @@ va_mark_last(bi_context *ctx) break; /* Only need to unmark split registers. */ - if (va_src_info(I->op, s).size == VA_SIZE_64 && + if (va_src_info(I->op, s, ctx->arch).size == VA_SIZE_64 && bi_count_read_registers(I, s) == 1) { bool both_discard = I->src[s].discard && I->src[s + 1].discard; diff --git a/src/panfrost/compiler/bifrost/valhall/va_optimize.c b/src/panfrost/compiler/bifrost/valhall/va_optimize.c index a0609601d4e..41fc2721bbb 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_optimize.c +++ b/src/panfrost/compiler/bifrost/valhall/va_optimize.c @@ -286,7 +286,7 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple, static bool va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I) { - struct va_opcode_info info = valhall_opcodes[I->op]; + struct va_opcode_info info = get_valhall_opcode(I->op, ctx->arch); bool progress = false; bi_foreach_ssa_src(I, s) { diff --git a/src/panfrost/compiler/bifrost/valhall/va_pack.c b/src/panfrost/compiler/bifrost/valhall/va_pack.c index d7f42168c7a..129512ce170 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_pack.c +++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c @@ -455,7 +455,7 @@ va_pack_rhadd(const bi_instr *I) static uint64_t va_pack_alu(const bi_instr *I, unsigned arch) { - struct va_opcode_info info = valhall_opcodes[I->op]; + struct va_opcode_info info = get_valhall_opcode(I->op, arch); uint64_t hex = 0; switch (I->op) { @@ -750,7 +750,8 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor) VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY, }; - unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7; + /* TODO hack */ + unsigned memory_size = (get_valhall_opcode(I->op, 10).exact >> 27) & 0x7; uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36; // unsigned @@ -826,7 +827,7 @@ va_pack_register_format(const bi_instr *I) uint64_t va_pack_instr(const bi_instr *I, unsigned arch) { - struct va_opcode_info info = valhall_opcodes[I->op]; + struct va_opcode_info info = get_valhall_opcode(I->op, arch); uint64_t hex = info.exact | (((uint64_t)I->flow) << 59); hex |= ((uint64_t)va_select_fau_page(I)) << 57; diff --git a/src/panfrost/compiler/bifrost/valhall/va_perf.c b/src/panfrost/compiler/bifrost/valhall/va_perf.c index 5067a2fc58e..5272a5bd084 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_perf.c +++ b/src/panfrost/compiler/bifrost/valhall/va_perf.c @@ -9,7 +9,7 @@ #include "valhall.h" void -va_count_instr_stats(bi_instr *I, struct va_stats *stats) +va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats) { /* Adjusted for 64-bit arithmetic */ unsigned words = bi_count_write_registers(I, 0); @@ -35,7 +35,7 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats) } } } - switch (valhall_opcodes[I->op].unit) { + switch (get_valhall_opcode(I->op, arch).unit) { /* Arithmetic is 2x slower for 64-bit than 32-bit */ case VA_UNIT_FMA: stats->fma += words; diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.c.py b/src/panfrost/compiler/bifrost/valhall/valhall.c.py index ae7a1b5a001..b8808bd30e4 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.c.py +++ b/src/panfrost/compiler/bifrost/valhall/valhall.c.py @@ -198,6 +198,16 @@ valhall_v15_opcodes[BI_NUM_OPCODES] = { % endif % endfor }; + +const struct va_opcode_info +get_valhall_opcode(enum bi_opcode op, unsigned arch) +{ + assert(arch >= 9); + if (arch < 15) + return valhall_opcodes[op]; + else + return valhall_v15_opcodes[op]; +} """ # Exact value to be ORed in to every opcode diff --git a/src/panfrost/compiler/bifrost/valhall/valhall.h b/src/panfrost/compiler/bifrost/valhall/valhall.h index ae716c36ffd..763628cd1fb 100644 --- a/src/panfrost/compiler/bifrost/valhall/valhall.h +++ b/src/panfrost/compiler/bifrost/valhall/valhall.h @@ -89,7 +89,8 @@ struct va_opcode_info { unsigned sr_control : 2; }; -extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES]; +const struct va_opcode_info get_valhall_opcode(enum bi_opcode op, + unsigned arch); /* Bifrost specifies the source of bitwise operations as (A, B, shift), but * Valhall specifies (A, shift, B). We follow Bifrost conventions in the @@ -130,10 +131,10 @@ va_swap_12(enum bi_opcode op) } static inline struct va_src_info -va_src_info(enum bi_opcode op, unsigned src) +va_src_info(enum bi_opcode op, unsigned src, unsigned arch) { unsigned idx = (va_swap_12(op) && (src == 1 || src == 2)) ? (3 - src) : src; - return valhall_opcodes[op].srcs[idx]; + return get_valhall_opcode(op, arch).srcs[idx]; } static inline bool diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 3fd702227c8..9c27a36e6ee 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -288,7 +288,8 @@ pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id, bool verbose) { if (pan_arch(gpu_id) >= 9) - disassemble_valhall(fp, (const uint64_t *)code, size, verbose); + disassemble_valhall(fp, (const uint64_t *)code, size, pan_arch(gpu_id), + verbose); else if (pan_arch(gpu_id) >= 6) disassemble_bifrost(fp, code, size, verbose); else