mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
pan/va: Pick compiler table based on arch
Make v15 use the new tables added in a previous commit.
This commit is contained in:
parent
64504422ab
commit
1ba1f76146
16 changed files with 71 additions and 44 deletions
|
|
@ -16,14 +16,14 @@
|
|||
*/
|
||||
|
||||
static uint32_t
|
||||
va_op_swizzles(enum bi_opcode op, unsigned src)
|
||||
va_op_swizzles(enum bi_opcode op, unsigned src, unsigned arch)
|
||||
{
|
||||
/* This is a bifrost-only instruction that is lowered on valhall */
|
||||
if (!valhall_opcodes[op].exact)
|
||||
if (!get_valhall_opcode(op, arch).exact)
|
||||
return bi_op_swizzles[op][src];
|
||||
|
||||
uint32_t swizzles = 0;
|
||||
struct va_src_info info = va_src_info(op, src);
|
||||
struct va_src_info info = va_src_info(op, src, arch);
|
||||
|
||||
if (info.swizzle) {
|
||||
assert(info.size == VA_SIZE_16 || info.size == VA_SIZE_32);
|
||||
|
|
@ -99,8 +99,8 @@ bool
|
|||
bi_op_supports_swizzle(enum bi_opcode op, unsigned src,
|
||||
enum bi_swizzle swizzle, unsigned arch)
|
||||
{
|
||||
uint32_t supported_swizzles = arch >= 9 ?
|
||||
va_op_swizzles(op, src) : bi_op_swizzles[op][src];
|
||||
uint32_t supported_swizzles =
|
||||
arch >= 9 ? va_op_swizzles(op, src, arch) : bi_op_swizzles[op][src];
|
||||
return supported_swizzles & BITFIELD_BIT(swizzle);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -382,8 +382,8 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
|||
bi_foreach_ssa_src(ins, s) {
|
||||
if (bi_count_read_registers(ins, s) >= 2)
|
||||
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
|
||||
else if (s < valhall_opcodes[ins->op].nr_srcs &&
|
||||
va_src_info(ins->op, s).size > VA_SIZE_32)
|
||||
else if (s < get_valhall_opcode(ins->op, arch).nr_srcs &&
|
||||
va_src_info(ins->op, s, arch).size > VA_SIZE_32)
|
||||
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4165,13 +4165,13 @@ va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size,
|
|||
}
|
||||
|
||||
static unsigned
|
||||
va_gather_stats_block(bi_block *block, struct va_stats *counts)
|
||||
va_gather_stats_block(bi_block *block, unsigned arch, struct va_stats *counts)
|
||||
{
|
||||
unsigned nr_ins = 0;
|
||||
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
nr_ins++;
|
||||
va_count_instr_stats(I, counts);
|
||||
va_count_instr_stats(I, arch, counts);
|
||||
}
|
||||
return nr_ins;
|
||||
}
|
||||
|
|
@ -4180,7 +4180,8 @@ va_gather_stats_block(bi_block *block, struct va_stats *counts)
|
|||
* Gather stats for a minimum length path through the shader.
|
||||
*/
|
||||
static unsigned
|
||||
va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
|
||||
va_gather_min_path_stats(bi_block *block, unsigned arch,
|
||||
struct va_stats *counts)
|
||||
{
|
||||
struct va_stats min_counts;
|
||||
struct va_stats save_counts = *counts;
|
||||
|
|
@ -4192,7 +4193,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
|
|||
if (bi_block_dominates(next, block)) {
|
||||
continue;
|
||||
}
|
||||
nr_ins = va_gather_min_path_stats(next, counts);
|
||||
nr_ins = va_gather_min_path_stats(next, arch, counts);
|
||||
if (min_ins == 0 || nr_ins < min_ins) {
|
||||
min_ins = nr_ins;
|
||||
min_counts = *counts;
|
||||
|
|
@ -4202,7 +4203,7 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
|
|||
if (min_ins != 0) {
|
||||
*counts = min_counts;
|
||||
}
|
||||
nr_ins = min_ins + va_gather_stats_block(block, counts);
|
||||
nr_ins = min_ins + va_gather_stats_block(block, arch, counts);
|
||||
return nr_ins;
|
||||
}
|
||||
|
||||
|
|
@ -4213,7 +4214,8 @@ va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
|
|||
* bail out.
|
||||
*/
|
||||
static unsigned
|
||||
va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *visited)
|
||||
va_gather_max_path_stats(bi_block *block, unsigned arch,
|
||||
struct va_stats *counts, BITSET_WORD *visited)
|
||||
{
|
||||
struct va_stats max_counts;
|
||||
struct va_stats save_counts = *counts;
|
||||
|
|
@ -4226,7 +4228,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *
|
|||
if (BITSET_TEST(visited, next->index)) {
|
||||
continue;
|
||||
}
|
||||
nr_ins = va_gather_max_path_stats(next, counts, visited);
|
||||
nr_ins = va_gather_max_path_stats(next, arch, counts, visited);
|
||||
if (nr_ins > max_ins) {
|
||||
max_ins = nr_ins;
|
||||
max_counts = *counts;
|
||||
|
|
@ -4236,7 +4238,7 @@ va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *
|
|||
if (max_ins != 0) {
|
||||
*counts = max_counts;
|
||||
}
|
||||
nr_ins = max_ins + va_gather_stats_block(block, counts);
|
||||
nr_ins = max_ins + va_gather_stats_block(block, arch, counts);
|
||||
return nr_ins;
|
||||
}
|
||||
|
||||
|
|
@ -4260,15 +4262,16 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out,
|
|||
case GATHER_STATS_FULL:
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
nr_ins++;
|
||||
va_count_instr_stats(I, &counts);
|
||||
va_count_instr_stats(I, ctx->arch, &counts);
|
||||
}
|
||||
break;
|
||||
case GATHER_STATS_MIN:
|
||||
nr_ins = va_gather_min_path_stats(first_block, &counts);
|
||||
nr_ins = va_gather_min_path_stats(first_block, ctx->arch, &counts);
|
||||
break;
|
||||
case GATHER_STATS_MAX:
|
||||
visited = BITSET_RZALLOC(NULL, ctx->num_blocks);
|
||||
nr_ins = va_gather_max_path_stats(first_block, &counts, visited);
|
||||
nr_ins =
|
||||
va_gather_max_path_stats(first_block, ctx->arch, &counts, visited);
|
||||
ralloc_free(visited);
|
||||
break;
|
||||
}
|
||||
|
|
@ -4630,7 +4633,7 @@ bi_compile_variant_nir(nir_shader *nir,
|
|||
bifrost_debug & BIFROST_DBG_VERBOSE);
|
||||
} else {
|
||||
disassemble_valhall(stderr, binary->data + offset,
|
||||
binary->size - offset,
|
||||
binary->size - offset, ctx->arch,
|
||||
bifrost_debug & BIFROST_DBG_VERBOSE);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ disassemble(const char *filename)
|
|||
}
|
||||
|
||||
if (pan_arch(gpu_id) >= 9)
|
||||
disassemble_valhall(stdout, entrypoint, filesize, verbose);
|
||||
disassemble_valhall(stdout, entrypoint, filesize, pan_arch(gpu_id),
|
||||
verbose);
|
||||
else
|
||||
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
|
||||
|
||||
|
|
|
|||
|
|
@ -249,7 +249,7 @@ static bool is_branch_v15(uint64_t instr)
|
|||
}
|
||||
|
||||
void
|
||||
disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose)
|
||||
disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch, bool verbose)
|
||||
{
|
||||
assert((size & 7) == 0);
|
||||
|
||||
|
|
@ -275,11 +275,18 @@ disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose)
|
|||
fprintf(fp, " ");
|
||||
}
|
||||
|
||||
va_disasm_instr(fp, instr);
|
||||
bool instr_is_branch;
|
||||
if (arch >= 15) {
|
||||
va_disasm_instr_v15(fp, instr);
|
||||
instr_is_branch = is_branch_v15(instr);
|
||||
} else {
|
||||
va_disasm_instr(fp, instr);
|
||||
instr_is_branch = is_branch(instr);
|
||||
}
|
||||
fprintf(fp, "\\n");
|
||||
|
||||
/* Separate blocks visually by inserting whitespace after branches */
|
||||
if (is_branch(instr))
|
||||
if (instr_is_branch)
|
||||
fprintf(fp, "\\n");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
void va_disasm_instr(FILE *fp, uint64_t instr);
|
||||
void va_disasm_instr_v15(FILE *fp, uint64_t instr);
|
||||
void disassemble_valhall(FILE *fp, const void *code, size_t size, bool verbose);
|
||||
void disassemble_valhall(FILE *fp, const void *code, size_t size, unsigned arch,
|
||||
bool verbose);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ struct va_stats {
|
|||
unsigned nr_fau_uniforms;
|
||||
};
|
||||
|
||||
void va_count_instr_stats(bi_instr *I, struct va_stats *stats);
|
||||
void va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
|||
static uint32_t
|
||||
va_resolve_swizzles(bi_context *ctx, bi_instr *I, unsigned s)
|
||||
{
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
|
||||
uint32_t value = I->src[s].value;
|
||||
enum bi_swizzle swz = I->src[s].swizzle;
|
||||
|
||||
|
|
@ -257,9 +257,10 @@ va_lower_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts,
|
|||
/* abs(#c) is pointless, but -#c occurs in transcendental sequences */
|
||||
assert(!I->src[s].abs && "redundant .abs modifier");
|
||||
|
||||
bool is_signed = valhall_opcodes[I->op].is_signed;
|
||||
bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed;
|
||||
bool staging =
|
||||
(s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs);
|
||||
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
|
||||
const uint32_t value = va_resolve_swizzles(ctx, I, s);
|
||||
|
||||
const uint32_t count = (uintptr_t)_mesa_hash_table_u64_search(counts, value);
|
||||
|
|
@ -294,12 +295,13 @@ va_count_constants(bi_context *ctx, bi_instr *I, struct hash_table_u64 *counts)
|
|||
if (I->src[s].type != BI_INDEX_CONSTANT)
|
||||
continue;
|
||||
|
||||
const bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
|
||||
const bool staging =
|
||||
(s < get_valhall_opcode(I->op, ctx->arch).nr_staging_srcs);
|
||||
if (staging)
|
||||
continue;
|
||||
|
||||
bool is_signed = valhall_opcodes[I->op].is_signed;
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
bool is_signed = get_valhall_opcode(I->op, ctx->arch).is_signed;
|
||||
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
|
||||
uint32_t value = va_resolve_swizzles(ctx, I, s);
|
||||
|
||||
bi_index cons = va_lookup_constant(value, info, is_signed);
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ va_lower_split_64bit(bi_context *ctx)
|
|||
if (bi_is_null(I->src[s]) || s >= 4)
|
||||
continue;
|
||||
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
struct va_src_info info = va_src_info(I->op, s, ctx->arch);
|
||||
|
||||
/* Only split if the instruction expects 64-bit inputs as two separate
|
||||
* sources. */
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ va_mark_last(bi_context *ctx)
|
|||
break;
|
||||
|
||||
/* Only need to unmark split registers. */
|
||||
if (va_src_info(I->op, s).size == VA_SIZE_64 &&
|
||||
if (va_src_info(I->op, s, ctx->arch).size == VA_SIZE_64 &&
|
||||
bi_count_read_registers(I, s) == 1) {
|
||||
bool both_discard = I->src[s].discard && I->src[s + 1].discard;
|
||||
|
||||
|
|
|
|||
|
|
@ -286,7 +286,7 @@ va_fuse_cmp(bi_context *ctx, bi_instr **lut, const BITSET_WORD *multiple,
|
|||
static bool
|
||||
va_propagate_replicate_wide(bi_context *ctx, bi_instr **lut, bi_instr *I)
|
||||
{
|
||||
struct va_opcode_info info = valhall_opcodes[I->op];
|
||||
struct va_opcode_info info = get_valhall_opcode(I->op, ctx->arch);
|
||||
bool progress = false;
|
||||
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
|
|
|
|||
|
|
@ -455,7 +455,7 @@ va_pack_rhadd(const bi_instr *I)
|
|||
static uint64_t
|
||||
va_pack_alu(const bi_instr *I, unsigned arch)
|
||||
{
|
||||
struct va_opcode_info info = valhall_opcodes[I->op];
|
||||
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
|
||||
uint64_t hex = 0;
|
||||
|
||||
switch (I->op) {
|
||||
|
|
@ -750,7 +750,8 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
|
|||
VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
|
||||
};
|
||||
|
||||
unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
|
||||
/* TODO hack */
|
||||
unsigned memory_size = (get_valhall_opcode(I->op, 10).exact >> 27) & 0x7;
|
||||
uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;
|
||||
|
||||
// unsigned
|
||||
|
|
@ -826,7 +827,7 @@ va_pack_register_format(const bi_instr *I)
|
|||
uint64_t
|
||||
va_pack_instr(const bi_instr *I, unsigned arch)
|
||||
{
|
||||
struct va_opcode_info info = valhall_opcodes[I->op];
|
||||
struct va_opcode_info info = get_valhall_opcode(I->op, arch);
|
||||
|
||||
uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
|
||||
hex |= ((uint64_t)va_select_fau_page(I)) << 57;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include "valhall.h"
|
||||
|
||||
void
|
||||
va_count_instr_stats(bi_instr *I, struct va_stats *stats)
|
||||
va_count_instr_stats(bi_instr *I, unsigned arch, struct va_stats *stats)
|
||||
{
|
||||
/* Adjusted for 64-bit arithmetic */
|
||||
unsigned words = bi_count_write_registers(I, 0);
|
||||
|
|
@ -35,7 +35,7 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
|
|||
}
|
||||
}
|
||||
}
|
||||
switch (valhall_opcodes[I->op].unit) {
|
||||
switch (get_valhall_opcode(I->op, arch).unit) {
|
||||
/* Arithmetic is 2x slower for 64-bit than 32-bit */
|
||||
case VA_UNIT_FMA:
|
||||
stats->fma += words;
|
||||
|
|
|
|||
|
|
@ -198,6 +198,16 @@ valhall_v15_opcodes[BI_NUM_OPCODES] = {
|
|||
% endif
|
||||
% endfor
|
||||
};
|
||||
|
||||
const struct va_opcode_info
|
||||
get_valhall_opcode(enum bi_opcode op, unsigned arch)
|
||||
{
|
||||
assert(arch >= 9);
|
||||
if (arch < 15)
|
||||
return valhall_opcodes[op];
|
||||
else
|
||||
return valhall_v15_opcodes[op];
|
||||
}
|
||||
"""
|
||||
|
||||
# Exact value to be ORed in to every opcode
|
||||
|
|
|
|||
|
|
@ -89,7 +89,8 @@ struct va_opcode_info {
|
|||
unsigned sr_control : 2;
|
||||
};
|
||||
|
||||
extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];
|
||||
const struct va_opcode_info get_valhall_opcode(enum bi_opcode op,
|
||||
unsigned arch);
|
||||
|
||||
/* Bifrost specifies the source of bitwise operations as (A, B, shift), but
|
||||
* Valhall specifies (A, shift, B). We follow Bifrost conventions in the
|
||||
|
|
@ -130,10 +131,10 @@ va_swap_12(enum bi_opcode op)
|
|||
}
|
||||
|
||||
static inline struct va_src_info
|
||||
va_src_info(enum bi_opcode op, unsigned src)
|
||||
va_src_info(enum bi_opcode op, unsigned src, unsigned arch)
|
||||
{
|
||||
unsigned idx = (va_swap_12(op) && (src == 1 || src == 2)) ? (3 - src) : src;
|
||||
return valhall_opcodes[op].srcs[idx];
|
||||
return get_valhall_opcode(op, arch).srcs[idx];
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
|
|||
|
|
@ -288,7 +288,8 @@ pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
|
|||
bool verbose)
|
||||
{
|
||||
if (pan_arch(gpu_id) >= 9)
|
||||
disassemble_valhall(fp, (const uint64_t *)code, size, verbose);
|
||||
disassemble_valhall(fp, (const uint64_t *)code, size, pan_arch(gpu_id),
|
||||
verbose);
|
||||
else if (pan_arch(gpu_id) >= 6)
|
||||
disassemble_bifrost(fp, code, size, verbose);
|
||||
else
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue