From e9170ef4521a2c843ed0bf4e494dc00de4acd05e Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 30 Jan 2023 20:11:57 -0600 Subject: [PATCH] nouveau/mme: Add an assembler and disassembler for the Turring+ MME Part-of: --- src/nouveau/meson.build | 1 + src/nouveau/mme/meson.build | 55 +++++ src/nouveau/mme/mme_tu104.c | 481 ++++++++++++++++++++++++++++++++++++ src/nouveau/mme/mme_tu104.h | 19 ++ 4 files changed, 556 insertions(+) create mode 100644 src/nouveau/mme/meson.build create mode 100644 src/nouveau/mme/mme_tu104.c diff --git a/src/nouveau/meson.build b/src/nouveau/meson.build index 29a18fc40db..6a7fa0b5924 100644 --- a/src/nouveau/meson.build +++ b/src/nouveau/meson.build @@ -28,5 +28,6 @@ endif subdir('codegen') if with_nouveau_vk subdir('nil') + subdir('mme') subdir('vulkan') endif diff --git a/src/nouveau/mme/meson.build b/src/nouveau/mme/meson.build new file mode 100644 index 00000000000..428570561ed --- /dev/null +++ b/src/nouveau/mme/meson.build @@ -0,0 +1,55 @@ +# Copyright © 2022 Collabora, Ltd. + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +mme_isa_depend_files = [ + 'mme_tu104.xml', + isaspec_py_deps +] + +mme_encode_h = custom_target( + 'mme-tu104-encode.h', + input: ['mme_tu104.xml'], + output: 'mme_tu104_encode.h', + command: [ + prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' + ], + depend_files: mme_isa_depend_files, +) + +libnouveau_mme_files = files( + 'mme_tu104.c', + 'mme_tu104.h', +) + +_libnouveau_mme = static_library( + 'nouveau_mme', + [libnouveau_mme_files, mme_encode_h], + include_directories : [inc_include, inc_src], + gnu_symbol_visibility : 'hidden', + dependencies : [ + idep_mesautil, + idep_nvidia_headers, + ], +) + +idep_nouveau_mme = declare_dependency( + include_directories : include_directories('.'), + link_with : _libnouveau_mme, +) diff --git a/src/nouveau/mme/mme_tu104.c b/src/nouveau/mme/mme_tu104.c new file mode 100644 index 00000000000..e740cd953e9 --- /dev/null +++ b/src/nouveau/mme/mme_tu104.c @@ -0,0 +1,481 @@ +#include "mme_tu104.h" +#include "mme_tu104_encode.h" + +#include "util/u_math.h" + +#include + +#include "nvk_clc597.h" + +#define PRED_TO_STR(OP) [MME_TU104_PRED_##OP] = #OP +const char *pred_to_str[] = { + PRED_TO_STR(UUUU), + PRED_TO_STR(TTTT), + PRED_TO_STR(FFFF), + PRED_TO_STR(TTUU), + PRED_TO_STR(FFUU), + PRED_TO_STR(TFUU), + PRED_TO_STR(TUUU), + PRED_TO_STR(FUUU), + PRED_TO_STR(UUTT), + PRED_TO_STR(UUTF), + PRED_TO_STR(UUTU), + PRED_TO_STR(UUFT), + PRED_TO_STR(UUFF), + PRED_TO_STR(UUFU), + PRED_TO_STR(UUUT), + PRED_TO_STR(UUUF), +}; +#undef PRED_TO_STR + +const char * +mme_tu104_pred_to_str(enum mme_tu104_pred pred) +{ + assert(pred < ARRAY_SIZE(pred_to_str)); + return pred_to_str[pred]; +} + +#define OP_TO_STR(OP) [MME_TU104_ALU_OP_##OP] = #OP +const char *alu_op_to_str[] = { + OP_TO_STR(ADD), + OP_TO_STR(ADDC), + OP_TO_STR(SUB), + OP_TO_STR(SUBB), + OP_TO_STR(MUL), + OP_TO_STR(MULH), + OP_TO_STR(MULU), + OP_TO_STR(EXTENDED), + OP_TO_STR(CLZ), + OP_TO_STR(SLL), + OP_TO_STR(SRL), + OP_TO_STR(SRA), + OP_TO_STR(AND), + OP_TO_STR(NAND), + OP_TO_STR(OR), + OP_TO_STR(XOR), + OP_TO_STR(MERGE), + OP_TO_STR(SLT), + OP_TO_STR(SLTU), + OP_TO_STR(SLE), + OP_TO_STR(SLEU), + OP_TO_STR(SEQ), + OP_TO_STR(STATE), + OP_TO_STR(LOOP), + OP_TO_STR(JAL), + OP_TO_STR(BLT), + OP_TO_STR(BLTU), + OP_TO_STR(BLE), + OP_TO_STR(BLEU), + OP_TO_STR(BEQ), + OP_TO_STR(DREAD), + OP_TO_STR(DWRITE), +}; +#undef OP_TO_STR + +const char * +mme_tu104_alu_op_to_str(enum mme_tu104_alu_op op) +{ + assert(op < ARRAY_SIZE(alu_op_to_str)); + return alu_op_to_str[op]; +} + +void +mme_tu104_encode(uint32_t *out, uint32_t inst_count, + const struct mme_tu104_inst *insts) +{ + for (uint32_t i = 0; i < inst_count; i++) { + bitmask_t enc = encode__instruction(NULL, NULL, insts[i]); + + /* Annoyingly, the words are reversed in the actual encoding */ + out[i * 3 + 0] = enc.bitset[2]; + out[i * 3 + 1] = enc.bitset[1]; + out[i * 3 + 2] = enc.bitset[0]; + } +} + +static uint64_t +unpack_field(bitmask_t bitmask, unsigned low, unsigned high, bool is_signed) +{ + bitmask_t field, mask; + + assert(high >= low); + + BITSET_ZERO(mask.bitset); + BITSET_SET_RANGE(mask.bitset, 0, high - low); + + BITSET_COPY(field.bitset, bitmask.bitset); + BITSET_SHR(field.bitset, low); + BITSET_AND(field.bitset, field.bitset, mask.bitset); + + uint64_t data = bitmask_to_uint64_t(field); + if (is_signed) + data = util_sign_extend(data, high - low + 1); + + return data; +} + +void +mme_tu104_decode(struct mme_tu104_inst *insts, + const uint32_t *in, uint32_t inst_count) +{ + for (uint32_t i = 0; i < inst_count; i++) { + /* Annoyingly, the words are reversed in the actual encoding */ + bitmask_t enc; + enc.bitset[0] = in[i * 3 + 2]; + enc.bitset[1] = in[i * 3 + 1]; + enc.bitset[2] = in[i * 3 + 0]; + + insts[i].end_next = unpack_field(enc, 0, 0, false); + insts[i].pred_mode = unpack_field(enc, 1, 4, false); + insts[i].pred = unpack_field(enc, 5, 9, false); + + insts[i].alu[0].op = unpack_field(enc, 10, 14, false); + insts[i].alu[0].dst = unpack_field(enc, 15, 19, false); + insts[i].alu[0].src[0] = unpack_field(enc, 20, 24, false); + insts[i].alu[0].src[1] = unpack_field(enc, 25, 29, false); + insts[i].imm[0] = unpack_field(enc, 30, 45, false); + + insts[i].alu[1].op = unpack_field(enc, 46, 50, false); + insts[i].alu[1].dst = unpack_field(enc, 51, 55, false); + insts[i].alu[1].src[0] = unpack_field(enc, 56, 60, false); + insts[i].alu[1].src[1] = unpack_field(enc, 61, 65, false); + insts[i].imm[1] = unpack_field(enc, 66, 81, false); + + insts[i].out[0].mthd = unpack_field(enc, 82, 84, false); + insts[i].out[0].emit = unpack_field(enc, 85, 88, false); + + insts[i].out[1].mthd = unpack_field(enc, 89, 91, false); + insts[i].out[1].emit = unpack_field(enc, 92, 95, false); + } +} + +static void +print_indent(FILE *fp, unsigned depth) +{ + for (unsigned i = 0; i < depth; i++) + fprintf(fp, " "); +} + +static bool +mme_tu104_alu_src_is_imm(const struct mme_tu104_inst *inst, + unsigned alu_idx, unsigned src_idx, + uint32_t *imm) +{ + const enum mme_tu104_reg reg = inst->alu[alu_idx].src[src_idx]; + + switch (reg) { + case MME_TU104_REG_ZERO: + *imm = 0; + return true; + case MME_TU104_REG_IMM: + *imm = (int32_t)(int16_t)inst->imm[alu_idx]; + return true; + case MME_TU104_REG_IMMPAIR: + *imm = (int32_t)(int16_t)inst->imm[1 - alu_idx]; + return true; + case MME_TU104_REG_IMM32: + *imm = ((uint32_t)inst->imm[0] << 16) | inst->imm[1]; + return true; + default: + return false; + } +} + +static void +mme_tu104_print_alu_src(FILE *fp, const struct mme_tu104_inst *inst, + unsigned alu_idx, unsigned src_idx) +{ + const enum mme_tu104_reg reg = inst->alu[alu_idx].src[src_idx]; + if (reg <= MME_TU104_REG_R23) { + fprintf(fp, " $r%u", (unsigned)reg); + } else { + switch (reg) { + case MME_TU104_REG_ZERO: + fprintf(fp, " $zero"); + break; + case MME_TU104_REG_IMM: + fprintf(fp, " %d /* 0x%04x */", (int)(int16_t)inst->imm[alu_idx], + (unsigned)inst->imm[alu_idx]); + break; + case MME_TU104_REG_IMMPAIR: + fprintf(fp, " %d /* 0x%04x */", (int)(int16_t)inst->imm[1 - alu_idx], + (unsigned)inst->imm[1 - alu_idx]); + break; + case MME_TU104_REG_IMM32: + fprintf(fp, " 0x%x", ((uint32_t)inst->imm[0] << 16) | inst->imm[1]); + break; + case MME_TU104_REG_LOAD0: + fprintf(fp, " $load0"); + break; + case MME_TU104_REG_LOAD1: + fprintf(fp, " $load1"); + break; + default: + unreachable("Invalid ALU source register"); + } + } +} + +bool +mme_tu104_alu_op_has_side_effects(enum mme_tu104_alu_op op) +{ + switch (op) { + case MME_TU104_ALU_OP_EXTENDED: + case MME_TU104_ALU_OP_LOOP: + case MME_TU104_ALU_OP_JAL: + case MME_TU104_ALU_OP_BLT: + case MME_TU104_ALU_OP_BLTU: + case MME_TU104_ALU_OP_BLE: + case MME_TU104_ALU_OP_BLEU: + case MME_TU104_ALU_OP_BEQ: + case MME_TU104_ALU_OP_DWRITE: + return true; + default: + return false; + } +} + +static bool +mme_tu104_alu_is_branch(const struct mme_tu104_inst *inst, unsigned alu_idx, + int *then_offset, unsigned *else_offset) +{ + switch (inst->alu[alu_idx].op) { + case MME_TU104_ALU_OP_BLT: + case MME_TU104_ALU_OP_BLTU: + case MME_TU104_ALU_OP_BLE: + case MME_TU104_ALU_OP_BLEU: + case MME_TU104_ALU_OP_BEQ: + *then_offset = util_mask_sign_extend(inst->imm[alu_idx], 14); + *else_offset = (inst->imm[alu_idx] >> 14) & 0x3; + return true; + default: + return false; + } +} + +static void +mme_tu104_print_alu(FILE *fp, unsigned indent, + const struct mme_tu104_inst *inst, + unsigned alu_idx) +{ + const struct mme_tu104_alu *alu = &inst->alu[alu_idx]; + + const bool used_by_out = + inst->out[0].mthd == MME_TU104_OUT_OP_ALU0 + alu_idx || + inst->out[0].emit == MME_TU104_OUT_OP_ALU0 + alu_idx || + inst->out[1].mthd == MME_TU104_OUT_OP_ALU0 + alu_idx || + inst->out[1].emit == MME_TU104_OUT_OP_ALU0 + alu_idx; + + if (!used_by_out && alu->dst == MME_TU104_REG_ZERO && + !mme_tu104_alu_op_has_side_effects(alu->op)) + return; + + print_indent(fp, indent); + + if (used_by_out || alu->dst != MME_TU104_REG_ZERO) { + if (used_by_out) + fprintf(fp, "$alu%u", alu_idx); + if (alu->dst <= MME_TU104_REG_R23) { + fprintf(fp, "%s$r%u", used_by_out ? ", " : "", (unsigned)alu->dst); + } else { + assert(alu->dst == MME_TU104_REG_ZERO); + } + fprintf(fp, " = "); + } + + switch (alu->op) { + case MME_TU104_ALU_OP_ADDC: + assert(alu_idx == 1); + fprintf(fp, "ADDC"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + fprintf(fp, " $carry"); + break; + case MME_TU104_ALU_OP_SUBB: + assert(alu_idx == 1); + fprintf(fp, "SUBB"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + fprintf(fp, " $borrow"); + break; + case MME_TU104_ALU_OP_MULH: + assert(alu_idx == 1); + assert(alu->src[0] == MME_TU104_REG_ZERO); + assert(alu->src[1] == MME_TU104_REG_ZERO); + fprintf(fp, "MULH $alu0"); + break; + case MME_TU104_ALU_OP_MERGE: { + uint16_t immed = inst->imm[alu_idx]; + uint32_t src_pos = (immed >> 0) & 0x1f; + uint32_t bits = (immed >> 5) & 0x1f; + uint32_t dst_pos = (immed >> 10) & 0x3f; + fprintf(fp, "MERGE"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + fprintf(fp, " (%u, %u, %u)", src_pos, bits, dst_pos); + break; + } + case MME_TU104_ALU_OP_STATE: { + fprintf(fp, "STATE"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + if (alu->src[1] != MME_TU104_REG_ZERO) { + fprintf(fp, " +"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + } + uint32_t imm; + if (mme_tu104_alu_src_is_imm(inst, alu_idx, 0, &imm)) { + uint32_t mthd = imm << 2; + fprintf(fp, " /* %s", P_PARSE_NVC597_MTHD(mthd)); + if (alu->src[1] != MME_TU104_REG_ZERO) { + fprintf(fp, " +"); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + } + fprintf(fp, " */"); + } + break; + } + case MME_TU104_ALU_OP_JAL: + assert(alu->src[0] == MME_TU104_REG_ZERO); + assert(alu->src[1] == MME_TU104_REG_ZERO); + fprintf(fp, "JAL (0x%04x)", (unsigned)inst->imm[alu_idx]); + break; + case MME_TU104_ALU_OP_LOOP: + case MME_TU104_ALU_OP_DREAD: + fprintf(fp, "%s", mme_tu104_alu_op_to_str(alu->op)); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + assert(alu->src[1] == MME_TU104_REG_ZERO); + break; + default: + fprintf(fp, "%s", mme_tu104_alu_op_to_str(alu->op)); + mme_tu104_print_alu_src(fp, inst, alu_idx, 0); + mme_tu104_print_alu_src(fp, inst, alu_idx, 1); + break; + } + + int then_offset; + unsigned else_offset; + if (mme_tu104_alu_is_branch(inst, alu_idx, &then_offset, &else_offset)) + fprintf(fp, " (%d, %u)", then_offset, else_offset); + + if (alu->op == MME_TU104_ALU_OP_LOOP) + fprintf(fp, " (%d)", (unsigned)inst->imm[alu_idx]); + + fprintf(fp, "\n"); +} + +static bool +mme_tu104_out_is_imm(const struct mme_tu104_inst *inst, + enum mme_tu104_out_op op, uint32_t *imm) +{ + switch (op) { + case MME_TU104_OUT_OP_IMM0: + case MME_TU104_OUT_OP_IMM1: + *imm = inst->imm[op - MME_TU104_OUT_OP_IMM0]; + return true; + case MME_TU104_OUT_OP_IMMHIGH0: + case MME_TU104_OUT_OP_IMMHIGH1: + *imm = inst->imm[op - MME_TU104_OUT_OP_IMMHIGH0] >> 12; + return true; + case MME_TU104_OUT_OP_IMM32: + *imm = ((uint32_t)inst->imm[0] << 16) | inst->imm[1]; + return true; + default: + return false; + } +} + +static void +mme_tu104_print_out_src(FILE *fp, const struct mme_tu104_inst *inst, + enum mme_tu104_out_op op) +{ + switch (op) { + case MME_TU104_OUT_OP_ALU0: + case MME_TU104_OUT_OP_ALU1: + fprintf(fp, "$alu%u", (int)op - MME_TU104_OUT_OP_ALU0); + break; + case MME_TU104_OUT_OP_LOAD0: + case MME_TU104_OUT_OP_LOAD1: + fprintf(fp, "$load%u", (int)op - MME_TU104_OUT_OP_LOAD0); + break; + case MME_TU104_OUT_OP_IMM0: + case MME_TU104_OUT_OP_IMM1: + fprintf(fp, "0x%x", (unsigned)inst->imm[op - MME_TU104_OUT_OP_IMM0]); + break; + case MME_TU104_OUT_OP_RESERVED: + fprintf(fp, "RESERVED"); + break; + case MME_TU104_OUT_OP_IMMHIGH0: + case MME_TU104_OUT_OP_IMMHIGH1: + fprintf(fp, "%u", + (unsigned)(inst->imm[op - MME_TU104_OUT_OP_IMMHIGH0] >> 12)); + break; + case MME_TU104_OUT_OP_IMM32: + fprintf(fp, "0x%x", ((uint32_t)inst->imm[0] << 16) | inst->imm[1]); + break; + default: + unreachable("Invalid output source"); + } +}; + +static void +mme_tu104_print_out(FILE *fp, unsigned indent, + const struct mme_tu104_inst *inst, + unsigned out_idx) +{ + const struct mme_tu104_out *out = &inst->out[out_idx]; + + if (out->mthd != MME_TU104_OUT_OP_NONE) { + print_indent(fp, indent); + fprintf(fp, "mthd("); + uint32_t imm; + if (mme_tu104_out_is_imm(inst, out->mthd, &imm)) { + uint32_t mthd = (imm & 0xfff) << 2; + uint32_t incr = imm >> 12; + fprintf(fp, "0x%04x, %u)", mthd, incr); + fprintf(fp, " /* %s */", P_PARSE_NVC597_MTHD(mthd)); + } else { + mme_tu104_print_out_src(fp, inst, out->mthd); + fprintf(fp, ")"); + } + fprintf(fp, "\n"); + } + if (out->emit != MME_TU104_OUT_OP_NONE) { + print_indent(fp, indent); + fprintf(fp, "emit("); + mme_tu104_print_out_src(fp, inst, out->emit); + fprintf(fp, ")\n"); + } +} + +void +mme_tu104_print_inst(FILE *fp, unsigned indent, + const struct mme_tu104_inst *inst) +{ + if (inst->pred_mode != MME_TU104_PRED_UUUU) { + print_indent(fp, indent); + fprintf(fp, "pred %s", mme_tu104_pred_to_str(inst->pred_mode)); + fprintf(fp, " $r%u {\n", (unsigned)inst->pred); + indent++; + } + + mme_tu104_print_alu(fp, indent, inst, 0); + mme_tu104_print_alu(fp, indent, inst, 1); + mme_tu104_print_out(fp, indent, inst, 0); + mme_tu104_print_out(fp, indent, inst, 1); + + if (inst->pred_mode != MME_TU104_PRED_UUUU) { + indent--; + print_indent(fp, indent); + fprintf(fp, "}\n"); + } +} + +void +mme_tu104_print(FILE *fp, const struct mme_tu104_inst *insts, + uint32_t inst_count) +{ + for (uint32_t i = 0; i < inst_count; i++) { + fprintf(fp, "%u:\n", i); + mme_tu104_print_inst(fp, 1, &insts[i]); + } +} diff --git a/src/nouveau/mme/mme_tu104.h b/src/nouveau/mme/mme_tu104.h index 0be706fd622..5b98f34dc32 100644 --- a/src/nouveau/mme/mme_tu104.h +++ b/src/nouveau/mme/mme_tu104.h @@ -3,6 +3,7 @@ #include #include +#include #include "util/macros.h" @@ -32,6 +33,8 @@ enum PACKED mme_tu104_pred { MME_TU104_PRED_UUUF, }; +const char *mme_tu104_pred_to_str(enum mme_tu104_pred pred); + enum PACKED mme_tu104_reg { MME_TU104_REG_R0, MME_TU104_REG_R1, @@ -101,6 +104,10 @@ enum PACKED mme_tu104_alu_op { MME_TU104_ALU_OP_DWRITE, }; +const char *mme_tu104_alu_op_to_str(enum mme_tu104_alu_op op); + +bool mme_tu104_alu_op_has_side_effects(enum mme_tu104_alu_op op); + enum PACKED mme_tu104_out_op { MME_TU104_OUT_OP_NONE, MME_TU104_OUT_OP_ALU0, @@ -161,6 +168,18 @@ struct mme_tu104_inst { { MME_TU104_OUT_DEFAULTS } \ }, +void mme_tu104_print_inst(FILE *fp, unsigned indent, + const struct mme_tu104_inst *inst); + +void mme_tu104_print(FILE *fp, const struct mme_tu104_inst *insts, + uint32_t inst_count); + +void mme_tu104_encode(uint32_t *out, uint32_t inst_count, + const struct mme_tu104_inst *insts); + +void mme_tu104_decode(struct mme_tu104_inst *insts, + const uint32_t *in, uint32_t inst_count); + #ifdef __cplusplus } #endif