diff --git a/src/intel/compiler/brw/brw_asm.c b/src/intel/compiler/brw/brw_asm.c deleted file mode 100644 index d34913d025c..00000000000 --- a/src/intel/compiler/brw/brw_asm.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include "brw_asm.h" -#include "brw_asm_internal.h" -#include "brw_disasm_info.h" -#include "util/hash_table.h" -#include "util/u_dynarray.h" - -typedef struct { - char *name; - int offset; /* -1 for unset */ - struct util_dynarray jip_uses; - struct util_dynarray uip_uses; -} brw_asm_label; - -static brw_asm_label * -brw_asm_label_lookup(struct brw_asm_parser *parser, const char *name) -{ - uint32_t h = _mesa_hash_string(name); - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(parser->labels, h, name); - if (!entry) { - void *mem_ctx = parser->labels; - brw_asm_label *label = rzalloc(mem_ctx, brw_asm_label); - label->name = ralloc_strdup(mem_ctx, name); - label->offset = -1; - util_dynarray_init(&label->jip_uses, mem_ctx); - util_dynarray_init(&label->uip_uses, mem_ctx); - entry = _mesa_hash_table_insert_pre_hashed(parser->labels, - h, name, label); - } - assert(entry); - return entry->data; -} - -void -brw_asm_label_set(struct brw_asm_parser *parser, const char *name) -{ - brw_asm_label *label = brw_asm_label_lookup(parser, name); - label->offset = parser->p->next_insn_offset; -} - -void -brw_asm_label_use_jip(struct brw_asm_parser *parser, const char *name) -{ - struct brw_codegen *p = parser->p; - brw_asm_label *label = brw_asm_label_lookup(parser, name); - int offset = p->next_insn_offset - sizeof(brw_eu_inst); - util_dynarray_append(&label->jip_uses, offset); - /* Will be patched later. */ - brw_eu_inst_set_jip(p->devinfo, brw_last_inst, 0); -} - -void -brw_asm_label_use_uip(struct brw_asm_parser *parser, const char *name) -{ - struct brw_codegen *p = parser->p; - brw_asm_label *label = brw_asm_label_lookup(parser, name); - int offset = p->next_insn_offset - sizeof(brw_eu_inst); - util_dynarray_append(&label->uip_uses, offset); - /* Will be patched later. */ - brw_eu_inst_set_uip(p->devinfo, brw_last_inst, 0); -} - -static bool -brw_postprocess_labels(struct brw_asm_parser *parser) -{ - unsigned unknown = 0; - struct brw_codegen *p = parser->p; - void *store = p->store; - - hash_table_foreach(parser->labels, entry) { - brw_asm_label *label = entry->data; - - if (label->offset == -1) { - fprintf(stderr, "Unknown label '%s'\n", label->name); - unknown++; - continue; - } - - util_dynarray_foreach(&label->jip_uses, int, use_offset) { - brw_eu_inst *inst = store + *use_offset; - brw_eu_inst_set_jip(parser->devinfo, inst, label->offset - *use_offset); - } - - util_dynarray_foreach(&label->uip_uses, int, use_offset) { - brw_eu_inst *inst = store + *use_offset; - brw_eu_inst_set_uip(parser->devinfo, inst, label->offset - *use_offset); - } - } - - return unknown == 0; -} - -/* TODO: Would be nice to make this operate on string instead on a FILE. */ - -brw_assemble_result -brw_assemble(void *mem_ctx, const struct intel_device_info *devinfo, - FILE *f, const char *filename, brw_assemble_flags flags) -{ - brw_assemble_result result = {0}; - - struct brw_isa_info isa; - brw_init_isa_info(&isa, devinfo); - - /* This is allocated separatedly from the parser since will outlive - * the parser state. - */ - struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(&isa, p, p); - - brw_asm_parser *parser = rzalloc(mem_ctx, brw_asm_parser); - parser->devinfo = devinfo; - parser->labels = _mesa_string_hash_table_create(parser); - parser->p = p; - parser->input_filename = filename; - parser->compaction_warning_given = false; - - parser->scanner = NULL; - brw_asm_lex_init_extra(parser, &parser->scanner); - brw_asm_restart(f, parser->scanner); - - int err = yyparse(parser); - brw_asm_lex_destroy(parser->scanner); - if (err || parser->errors) - goto end; - - if (!brw_postprocess_labels(parser)) - goto end; - - struct disasm_info *disasm_info = disasm_initialize(p->isa, NULL); - if (!disasm_info) { - ralloc_free(disasm_info); - fprintf(stderr, "Unable to initialize disasm_info struct instance\n"); - goto end; - } - - /* Add "inst groups" so validation errors can be recorded. */ - for (int i = 0; i <= p->next_insn_offset; i += 16) - disasm_new_inst_group(disasm_info, i); - - if (!brw_validate_instructions(p->isa, p->store, 0, - p->next_insn_offset, disasm_info)) { - dump_assembly(p->store, 0, p->next_insn_offset, disasm_info, NULL, stderr); - ralloc_free(disasm_info); - fprintf(stderr, "Invalid instructions.\n"); - goto end; - } - - if ((flags & BRW_ASSEMBLE_COMPACT) != 0) - brw_compact_instructions(p, 0, disasm_info); - - result.bin = p->store; - result.bin_size = p->next_insn_offset; - - if ((flags & BRW_ASSEMBLE_DUMP) != 0) - dump_assembly(p->store, 0, p->next_insn_offset, disasm_info, NULL, stderr); - - ralloc_free(disasm_info); - -end: - ralloc_free(parser); - - return result; -} - diff --git a/src/intel/compiler/brw/brw_asm.h b/src/intel/compiler/brw/brw_asm.h deleted file mode 100644 index 078cd988845..00000000000 --- a/src/intel/compiler/brw/brw_asm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#pragma once - -#include -#include - -struct intel_device_info; - -typedef struct { - void *bin; - int bin_size; -} brw_assemble_result; - -typedef enum { - BRW_ASSEMBLE_COMPACT = 1 << 0, - BRW_ASSEMBLE_DUMP = 1 << 1, -} brw_assemble_flags; - -brw_assemble_result brw_assemble( - void *mem_ctx, const struct intel_device_info *devinfo, - FILE *f, const char *filename, brw_assemble_flags flags); diff --git a/src/intel/compiler/brw/brw_asm_internal.h b/src/intel/compiler/brw/brw_asm_internal.h deleted file mode 100644 index 23d430ca915..00000000000 --- a/src/intel/compiler/brw/brw_asm_internal.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#pragma once - -/* Assembler internal state and definitions used by the brw_gram/brw_lex. */ - -#include -#include -#include -#include - -#include "brw_eu.h" -#include "brw_eu_defines.h" -#include "brw_eu_inst.h" -#include "brw_reg.h" -#include "brw_reg_type.h" -#include "dev/intel_device_info.h" -#include "util/list.h" - -/* glibc < 2.27 defines OVERFLOW in /usr/include/math.h. */ -#undef OVERFLOW - -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void *yyscan_t; -#endif - -typedef struct brw_asm_parser { - const struct intel_device_info *devinfo; - struct brw_codegen *p; - const char *input_filename; - int errors; - bool compaction_warning_given; - struct hash_table *labels; - - /* Lexer state. */ - yyscan_t scanner; - int saved_state; -} brw_asm_parser; - -/* A helper for accessing the last instruction emitted. This makes it easy - * to set various bits on an instruction without having to create temporary - * variable and assign the emitted instruction to those. - */ -#define brw_last_inst brw_eu_last_inst(parser->p) - -int yyparse(struct brw_asm_parser *parser); -char *brw_asm_get_text(yyscan_t scanner); - -int brw_asm_lex_init_extra(struct brw_asm_parser *parser, yyscan_t *scanner); -int brw_asm_lex_destroy(yyscan_t scanner); -void brw_asm_restart(FILE *input_file, yyscan_t scanner); - -struct condition { - unsigned cond_modifier:4; - unsigned flag_reg_nr:1; - unsigned flag_subreg_nr:1; -}; - -struct predicate { - unsigned pred_control:4; - unsigned pred_inv:1; - unsigned flag_reg_nr:1; - unsigned flag_subreg_nr:1; -}; - -enum instoption_type { - INSTOPTION_FLAG, - INSTOPTION_DEP_INFO, - INSTOPTION_CHAN_OFFSET, -}; - -struct instoption { - enum instoption_type type; - union { - unsigned uint_value; - gen_swsb depinfo_value; - }; -}; - -struct options { - uint8_t chan_offset; - unsigned access_mode:1; - unsigned compression_control:2; - unsigned thread_control:2; - unsigned branch_control:1; - unsigned no_dd_check:1; // Dependency control - unsigned no_dd_clear:1; // Dependency control - unsigned mask_control:1; - unsigned debug_control:1; - unsigned acc_wr_control:1; - unsigned end_of_thread:1; - unsigned compaction:1; - unsigned is_compr:1; - gen_swsb depinfo; -}; - -struct msgdesc { - unsigned ex_bso:1; - unsigned src1_len:5; -}; - -void brw_asm_label_set(struct brw_asm_parser *parser, const char *name); -void brw_asm_label_use_jip(struct brw_asm_parser *parser, const char *name); -void brw_asm_label_use_uip(struct brw_asm_parser *parser, const char *name); diff --git a/src/intel/compiler/brw/brw_asm_tool.c b/src/intel/compiler/brw/brw_asm_tool.c deleted file mode 100644 index f6b0d3adcb4..00000000000 --- a/src/intel/compiler/brw/brw_asm_tool.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include - -#include "util/ralloc.h" -#include "brw_eu_inst.h" -#include "dev/intel_device_info.h" - -#include "brw_asm.h" - -enum opt_output_type { - OPT_OUTPUT_HEX, - OPT_OUTPUT_C_LITERAL, - OPT_OUTPUT_BIN, -}; - -static enum opt_output_type output_type = OPT_OUTPUT_BIN; - -static void -print_help(const char *progname, FILE *file) -{ - fprintf(file, - "Usage: %s [OPTION] inputfile\n" - "Assemble i965 instructions from input file.\n\n" - " -h, --help display this help and exit\n" - " -t, --type=OUTPUT_TYPE OUTPUT_TYPE can be 'bin' (default if omitted),\n" - " 'c_literal', or 'hex'\n" - " -o, --output specify output file\n" - " --compact print compacted instructions\n" - " -g, --gen=platform assemble instructions for given \n" - " platform (3 letter platform name)\n" - "Example:\n" - " brw_asm -g kbl input.asm -t hex -o output\n", - progname); -} - -static uint32_t -get_dword(const brw_eu_inst *inst, int idx) -{ - uint32_t dword; - memcpy(&dword, (char *)inst + 4 * idx, sizeof(dword)); - return dword; -} - -static void -print_instruction(FILE *output, bool compact, const brw_eu_inst *instruction) -{ - int byte_limit; - - byte_limit = (compact == true) ? 8 : 16; - - switch (output_type) { - case OPT_OUTPUT_HEX: { - fprintf(output, "%02x", ((unsigned char *)instruction)[0]); - - for (unsigned i = 1; i < byte_limit; i++) { - fprintf(output, " %02x", ((unsigned char *)instruction)[i]); - } - break; - } - case OPT_OUTPUT_C_LITERAL: { - fprintf(output, "\t0x%08x,", get_dword(instruction, 0)); - - for (unsigned i = 1; i < byte_limit / 4; i++) - fprintf(output, " 0x%08x,", get_dword(instruction, i)); - - break; - } - case OPT_OUTPUT_BIN: - fwrite(instruction, 1, byte_limit, output); - break; - } - - if (output_type != OPT_OUTPUT_BIN) { - fprintf(output, "\n"); - } -} - -static struct intel_device_info * -i965_asm_init(uint16_t pci_id) -{ - struct intel_device_info *devinfo; - - devinfo = malloc(sizeof *devinfo); - if (devinfo == NULL) - return NULL; - - if (!intel_get_device_info_from_pci_id(pci_id, devinfo)) { - fprintf(stderr, "can't find device information: pci_id=0x%x\n", - pci_id); - free(devinfo); - return NULL; - } - - if (devinfo->ver < 9) { - fprintf(stderr, "device has gfx version %d but must be >= 9, try elk_asm instead", - devinfo->ver); - exit(EXIT_FAILURE); - } - - return devinfo; -} - - - -int main(int argc, char **argv) -{ - void *mem_ctx = ralloc_context(NULL); - FILE *input_file = NULL; - char *output_file = NULL; - int c; - FILE *output = stdout; - bool help = false, compact = false; - uint64_t pci_id = 0; - struct intel_device_info *devinfo = NULL; - int result = EXIT_FAILURE; - - const struct option brw_asm_opts[] = { - { "help", no_argument, (int *) &help, true }, - { "type", required_argument, NULL, 't' }, - { "gen", required_argument, NULL, 'g' }, - { "output", required_argument, NULL, 'o' }, - { "compact", no_argument, (int *) &compact, true }, - { NULL, 0, NULL, 0 } - }; - - while ((c = getopt_long(argc, argv, ":t:g:o:h", brw_asm_opts, NULL)) != -1) { - switch (c) { - case 'g': { - const int id = intel_device_name_to_pci_device_id(optarg); - if (id < 0) { - fprintf(stderr, "can't parse gen: '%s', expected 3 letter " - "platform name\n", optarg); - goto end; - } else { - pci_id = id; - } - break; - } - case 'h': - help = true; - print_help(argv[0], stderr); - goto end; - case 't': { - if (strcmp(optarg, "hex") == 0) { - output_type = OPT_OUTPUT_HEX; - } else if (strcmp(optarg, "c_literal") == 0) { - output_type = OPT_OUTPUT_C_LITERAL; - } else if (strcmp(optarg, "bin") == 0) { - output_type = OPT_OUTPUT_BIN; - } else { - fprintf(stderr, "invalid value for --type: %s\n", optarg); - goto end; - } - break; - } - case 'o': - output_file = strdup(optarg); - break; - case 0: - break; - case ':': - fprintf(stderr, "%s: option `-%c' requires an argument\n", - argv[0], optopt); - goto end; - case '?': - default: - fprintf(stderr, "%s: option `-%c' is invalid: ignored\n", - argv[0], optopt); - goto end; - } - } - - if (help || !pci_id) { - print_help(argv[0], stderr); - goto end; - } - - if (optind == argc) { - fprintf(stderr, "Please specify input file\n"); - goto end; - } - - const char *filename = argv[optind]; - input_file = fopen(filename, "r"); - if (!input_file) { - fprintf(stderr, "Unable to read input file : %s\n", - filename); - goto end; - } - - if (output_file) { - output = fopen(output_file, "w"); - if (!output) { - fprintf(stderr, "Couldn't open output file\n"); - goto end; - } - } - - devinfo = i965_asm_init(pci_id); - if (!devinfo) { - fprintf(stderr, "Unable to allocate memory for " - "intel_device_info struct instance.\n"); - goto end; - } - - brw_assemble_result r = brw_assemble(mem_ctx, devinfo, input_file, filename, - compact ? BRW_ASSEMBLE_COMPACT : 0); - if (!r.bin) - goto end; - - if (output_type == OPT_OUTPUT_C_LITERAL) - fprintf(output, "{\n"); - - for (int offset = 0; offset < r.bin_size;) { - const brw_eu_inst *insn = r.bin + offset; - bool compacted = false; - - if (compact && brw_eu_inst_cmpt_control(devinfo, insn)) { - offset += 8; - compacted = true; - } else { - offset += 16; - } - - print_instruction(output, compacted, insn); - } - - if (output_type == OPT_OUTPUT_C_LITERAL) - fprintf(output, "}"); - - result = EXIT_SUCCESS; - goto end; - -end: - free(output_file); - - if (input_file) - fclose(input_file); - - if (output) - fclose(output); - - ralloc_free(mem_ctx); - - free(devinfo); - - exit(result); -} diff --git a/src/intel/compiler/brw/brw_disasm.c b/src/intel/compiler/brw/brw_disasm.c deleted file mode 100644 index 219af1188d7..00000000000 --- a/src/intel/compiler/brw/brw_disasm.c +++ /dev/null @@ -1,2793 +0,0 @@ -/* - * Copyright © 2008 Keith Packard - * Copyright © 2014 Intel Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that copyright - * notice and this permission notice appear in supporting documentation, and - * that the name of the copyright holders not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. The copyright holders make no representations - * about the suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "brw_disasm.h" -#include "brw_disasm_info.h" -#include "brw_eu_defines.h" -#include "brw_eu.h" -#include "brw_eu_inst.h" -#include "brw_isa_info.h" -#include "brw_reg.h" -#include "util/half_float.h" - -bool -brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode) -{ - return opcode == BRW_OPCODE_IF || - opcode == BRW_OPCODE_ELSE || - opcode == BRW_OPCODE_ENDIF || - opcode == BRW_OPCODE_WHILE || - opcode == BRW_OPCODE_BREAK || - opcode == BRW_OPCODE_CONTINUE || - opcode == BRW_OPCODE_HALT || - opcode == BRW_OPCODE_GOTO || - opcode == BRW_OPCODE_JOIN; -} - -bool -brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode) -{ - return opcode == BRW_OPCODE_IF || - opcode == BRW_OPCODE_ELSE || - opcode == BRW_OPCODE_BREAK || - opcode == BRW_OPCODE_CONTINUE || - opcode == BRW_OPCODE_HALT || - opcode == BRW_OPCODE_GOTO; -} - -bool -brw_has_branch_ctrl(const struct intel_device_info *devinfo, enum opcode opcode) -{ - switch (opcode) { - case BRW_OPCODE_IF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_GOTO: - case BRW_OPCODE_BREAK: - case BRW_OPCODE_CALL: - case BRW_OPCODE_CALLA: - case BRW_OPCODE_CONTINUE: - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_HALT: - case BRW_OPCODE_JMPI: - case BRW_OPCODE_RET: - case BRW_OPCODE_WHILE: - case BRW_OPCODE_BRC: - case BRW_OPCODE_BRD: - /* TODO: "join" should also be here if added */ - return true; - default: - return false; - } -} - -static bool -is_logic_instruction(unsigned opcode) -{ - return opcode == BRW_OPCODE_AND || - opcode == BRW_OPCODE_NOT || - opcode == BRW_OPCODE_OR || - opcode == BRW_OPCODE_XOR; -} - -static bool -is_send(unsigned opcode) -{ - return opcode == BRW_OPCODE_SEND || - opcode == BRW_OPCODE_SENDC || - opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC; -} - -static bool -is_split_send(UNUSED const struct intel_device_info *devinfo, unsigned opcode) -{ - if (devinfo->ver >= 12) - return is_send(opcode); - else - return opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC; -} - -static bool -is_send_gather(const struct brw_isa_info *isa, - const struct brw_eu_inst *inst) -{ - return isa->devinfo->ver >= 30 && - is_split_send(isa->devinfo, brw_eu_inst_opcode(isa, inst)) && - brw_eu_inst_send_src0_reg_file(isa->devinfo, inst) == ARF; -} - -const char *const conditional_modifier[16] = { - [BRW_CONDITIONAL_NONE] = "", - [BRW_CONDITIONAL_Z] = ".z", - [BRW_CONDITIONAL_NZ] = ".nz", - [BRW_CONDITIONAL_G] = ".g", - [BRW_CONDITIONAL_GE] = ".ge", - [BRW_CONDITIONAL_L] = ".l", - [BRW_CONDITIONAL_LE] = ".le", - [BRW_CONDITIONAL_R] = ".r", - [BRW_CONDITIONAL_O] = ".o", - [BRW_CONDITIONAL_U] = ".u", -}; - -static const char *const m_negate[2] = { - [0] = "", - [1] = "-", -}; - -static const char *const _abs[2] = { - [0] = "", - [1] = "(abs)", -}; - -static const char *const m_bitnot[2] = { "", "~" }; - -static const char *const vert_stride[16] = { - [0] = "0", - [1] = "1", - [2] = "2", - [3] = "4", - [4] = "8", - [5] = "16", - [6] = "32", - [15] = "VxH", -}; - -static const char *const width[8] = { - [0] = "1", - [1] = "2", - [2] = "4", - [3] = "8", - [4] = "16", -}; - -static const char *const horiz_stride[4] = { - [0] = "0", - [1] = "1", - [2] = "2", - [3] = "4" -}; - -static const char *const chan_sel[4] = { - [0] = "x", - [1] = "y", - [2] = "z", - [3] = "w", -}; - -static const char *const debug_ctrl[2] = { - [0] = "", - [1] = ".breakpoint" -}; - -static const char *const saturate[2] = { - [0] = "", - [1] = ".sat" -}; - -static const char *const cmpt_ctrl[2] = { - [0] = "", - [1] = "compacted" -}; - -static const char *const accwr[2] = { - [0] = "", - [1] = "AccWrEnable" -}; - -static const char *const branch_ctrl[2] = { - [0] = "", - [1] = "BranchCtrl" -}; - -static const char *const fusion_ctrl[2] = { - [0] = "", - [1] = "FusionCtrl" -}; - -static const char *const wectrl[2] = { - [0] = "", - [1] = "WE_all" -}; - -static const char *const exec_size[8] = { - [0] = "1", - [1] = "2", - [2] = "4", - [3] = "8", - [4] = "16", - [5] = "32" -}; - -static const char *const pred_inv[2] = { - [0] = "+", - [1] = "-" -}; - -const char *const pred_ctrl_align16[16] = { - [1] = "", - [2] = ".x", - [3] = ".y", - [4] = ".z", - [5] = ".w", - [6] = ".any4h", - [7] = ".all4h", -}; - -static const char *const pred_ctrl_align1[16] = { - [BRW_PREDICATE_NORMAL] = "", - [BRW_PREDICATE_ALIGN1_ANYV] = ".anyv", - [BRW_PREDICATE_ALIGN1_ALLV] = ".allv", - [BRW_PREDICATE_ALIGN1_ANY2H] = ".any2h", - [BRW_PREDICATE_ALIGN1_ALL2H] = ".all2h", - [BRW_PREDICATE_ALIGN1_ANY4H] = ".any4h", - [BRW_PREDICATE_ALIGN1_ALL4H] = ".all4h", - [BRW_PREDICATE_ALIGN1_ANY8H] = ".any8h", - [BRW_PREDICATE_ALIGN1_ALL8H] = ".all8h", - [BRW_PREDICATE_ALIGN1_ANY16H] = ".any16h", - [BRW_PREDICATE_ALIGN1_ALL16H] = ".all16h", - [BRW_PREDICATE_ALIGN1_ANY32H] = ".any32h", - [BRW_PREDICATE_ALIGN1_ALL32H] = ".all32h", -}; - -static const char *const xe2_pred_ctrl[4] = { - [BRW_PREDICATE_NORMAL] = "", - [XE2_PREDICATE_ANY] = ".any", - [XE2_PREDICATE_ALL] = ".all", -}; - -static const char *const thread_ctrl[4] = { - [BRW_THREAD_NORMAL] = "", - [BRW_THREAD_ATOMIC] = "atomic", - [BRW_THREAD_SWITCH] = "switch", -}; - -static const char *const dep_ctrl[4] = { - [0] = "", - [1] = "NoDDClr", - [2] = "NoDDChk", - [3] = "NoDDClr,NoDDChk", -}; - -static const char *const access_mode[2] = { - [0] = "align1", - [1] = "align16", -}; - -static const char *const reg_file[4] = { - [ARF] = "A", - [FIXED_GRF] = "g", - [IMM] = "imm", -}; - -static const char *const writemask[16] = { - [0x0] = ".", - [0x1] = ".x", - [0x2] = ".y", - [0x3] = ".xy", - [0x4] = ".z", - [0x5] = ".xz", - [0x6] = ".yz", - [0x7] = ".xyz", - [0x8] = ".w", - [0x9] = ".xw", - [0xa] = ".yw", - [0xb] = ".xyw", - [0xc] = ".zw", - [0xd] = ".xzw", - [0xe] = ".yzw", - [0xf] = "", -}; - -static const char *const end_of_thread[2] = { - [0] = "", - [1] = "EOT" -}; - -static const char *const gen_sfid_names[16] = { - [GEN_SFID_NULL] = "null", - [GEN_SFID_SAMPLER] = "sampler", - [GEN_SFID_MESSAGE_GATEWAY] = "gateway", - [GEN_SFID_HDC2] = "hdc2", - [GEN_SFID_RENDER_CACHE] = "render", - [GEN_SFID_URB] = "urb", - [GEN_SFID_THREAD_SPAWNER] = "ts/btd", - [GEN_SFID_RAY_TRACE_ACCELERATOR] = "rt accel", - [GEN_SFID_HDC_READ_ONLY] = "hdc:ro", - [GEN_SFID_HDC0] = "hdc0", - [GEN_SFID_PIXEL_INTERPOLATOR] = "pi", - [GEN_SFID_HDC1] = "hdc1", - [GEN_SFID_SLM] = "slm", - [GEN_SFID_TGM] = "tgm", - [GEN_SFID_UGM] = "ugm", -}; - -static const char *const gfx7_gateway_subfuncid[8] = { - [GEN_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open", - [GEN_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close", - [GEN_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg", - [GEN_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp", - [GEN_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg", - [GEN_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state", - [GEN_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write", -}; - -static const char *const dp_rc_msg_type_gfx9[16] = { - [GEN_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", - [GEN_DATAPORT_RC_RENDER_TARGET_READ] = "RT read" -}; - -static const char *const * -dp_rc_msg_type(const struct intel_device_info *devinfo) -{ - return dp_rc_msg_type_gfx9; -} - -static const char *const m_rt_write_subtype[] = { - [0b000] = "SIMD16", - [0b001] = "SIMD16/RepData", - [0b010] = "SIMD8/DualSrcLow", - [0b011] = "SIMD8/DualSrcHigh", - [0b100] = "SIMD8", - [0b101] = "SIMD8/ImageWrite", /* Gfx6+ */ - [0b111] = "SIMD16/RepData-111", /* no idea how this is different than 1 */ -}; - -static const char *const m_rt_write_subtype_xe2[] = { - [0b000] = "SIMD16", - [0b001] = "SIMD32", - [0b010] = "SIMD16/DualSrc", - [0b011] = "invalid", - [0b100] = "invalid", - [0b101] = "invalid", - [0b111] = "invalid", -}; - -static const char *const dp_dc0_msg_type_gfx7[16] = { - [GEN_DATAPORT_DC_OWORD_BLOCK_READ] = "DC OWORD block read", - [GEN_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ] = - "DC unaligned OWORD block read", - [GEN_DATAPORT_DC_OWORD_DUAL_BLOCK_READ] = "DC OWORD dual block read", - [GEN_DATAPORT_DC_DWORD_SCATTERED_READ] = "DC DWORD scattered read", - [GEN_DATAPORT_DC_BYTE_SCATTERED_READ] = "DC byte scattered read", - [GEN_DATAPORT_DC_UNTYPED_SURFACE_READ] = "DC untyped surface read", - [GEN_DATAPORT_DC_UNTYPED_ATOMIC_OP] = "DC untyped atomic", - [GEN_DATAPORT_DC_MEMORY_FENCE] = "DC mfence", - [GEN_DATAPORT_DC_OWORD_BLOCK_WRITE] = "DC OWORD block write", - [GEN_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE] = "DC OWORD dual block write", - [GEN_DATAPORT_DC_DWORD_SCATTERED_WRITE] = "DC DWORD scatterd write", - [GEN_DATAPORT_DC_BYTE_SCATTERED_WRITE] = "DC byte scattered write", - [GEN_DATAPORT_DC_UNTYPED_SURFACE_WRITE] = "DC untyped surface write", -}; - -static const char *const dp_oword_block_rw[8] = { - [GEN_DATAPORT_OWORD_BLOCK_1_OWORDLOW] = "1-low", - [GEN_DATAPORT_OWORD_BLOCK_1_OWORDHIGH] = "1-high", - [GEN_DATAPORT_OWORD_BLOCK_2_OWORDS] = "2", - [GEN_DATAPORT_OWORD_BLOCK_4_OWORDS] = "4", - [GEN_DATAPORT_OWORD_BLOCK_8_OWORDS] = "8", -}; - -static const char *const dp_dc1_msg_type_hsw[32] = { - [GEN_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ] = "untyped surface read", - [GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP] = "DC untyped atomic op", - [GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2] = - "DC untyped 4x2 atomic op", - [GEN_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ] = "DC media block read", - [GEN_DATAPORT_DC_PORT1_TYPED_SURFACE_READ] = "DC typed surface read", - [GEN_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP] = "DC typed atomic", - [GEN_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2] = "DC typed 4x2 atomic op", - [GEN_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE] = "DC untyped surface write", - [GEN_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE] = "DC media block write", - [GEN_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP] = "DC atomic counter op", - [GEN_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] = - "DC 4x2 atomic counter op", - [GEN_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write", - [GEN_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read", - [GEN_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read", - [GEN_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op", - [GEN_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ] = "DC A64 oword block read", - [GEN_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE] = "DC A64 oword block write", - [GEN_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write", - [GEN_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write", - [GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] = - "DC untyped atomic float op", - [GEN_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] = - "DC A64 untyped atomic float op", - [GEN_GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP] = - "DC A64 untyped atomic half-integer op", - [GEN_GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP] = - "DC A64 untyped atomic half-float op", -}; - -static const char *const aop[16] = { - [GEN_AOP_AND] = "and", - [GEN_AOP_OR] = "or", - [GEN_AOP_XOR] = "xor", - [GEN_AOP_MOV] = "mov", - [GEN_AOP_INC] = "inc", - [GEN_AOP_DEC] = "dec", - [GEN_AOP_ADD] = "add", - [GEN_AOP_SUB] = "sub", - [GEN_AOP_REVSUB] = "revsub", - [GEN_AOP_IMAX] = "imax", - [GEN_AOP_IMIN] = "imin", - [GEN_AOP_UMAX] = "umax", - [GEN_AOP_UMIN] = "umin", - [GEN_AOP_CMPWR] = "cmpwr", - [GEN_AOP_PREDEC] = "predec", -}; - -static const char *const aop_float[5] = { - [GEN_AOP_FMAX] = "fmax", - [GEN_AOP_FMIN] = "fmin", - [GEN_AOP_FCMPWR] = "fcmpwr", - [GEN_AOP_FADD] = "fadd", -}; - -static const char * const pixel_interpolator_msg_types[4] = { - [GEN_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset", - [GEN_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position", - [GEN_PIXEL_INTERPOLATOR_LOC_CENTROID] = "centroid", - [GEN_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET] = "per_slot_offset", -}; - -static const char *const math_function[16] = { - [GEN_MATH_INV] = "inv", - [GEN_MATH_LOG] = "log", - [GEN_MATH_EXP] = "exp", - [GEN_MATH_SQRT] = "sqrt", - [GEN_MATH_RSQ] = "rsq", - [GEN_MATH_SIN] = "sin", - [GEN_MATH_COS] = "cos", - [GEN_MATH_FDIV] = "fdiv", - [GEN_MATH_POW] = "pow", - [GEN_MATH_INT_DIV_BOTH] = "intdivmod", - [GEN_MATH_INT_DIV_QUOTIENT] = "intdiv", - [GEN_MATH_INT_DIV_REMAINDER] = "intmod", - [GEN_MATH_INVM] = "invm", - [GEN_MATH_RSQRTM] = "rsqrtm", -}; - -static const char *const sync_function[16] = { - [TGL_SYNC_NOP] = "nop", - [TGL_SYNC_ALLRD] = "allrd", - [TGL_SYNC_ALLWR] = "allwr", - [TGL_SYNC_FENCE] = "fence", - [TGL_SYNC_BAR] = "bar", - [TGL_SYNC_HOST] = "host", -}; - -static const char *const gfx7_urb_opcode[] = { - [GEN_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gfx7+ */ - [GEN_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gfx7+ */ - [GEN_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gfx8+ */ - [GEN_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gfx8+ */ - [GEN_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gfx8+ */ - [GEN_GFX125_URB_OPCODE_FENCE] = "fence", /* Gfx12.5+ */ - /* [10-15] - reserved */ -}; - -static const char *const urb_swizzle[4] = { - [BRW_URB_SWIZZLE_NONE] = "", - [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", - [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", -}; - -static const char *const gfx5_sampler_msg_type[] = { - [GEN_SAMPLER_MESSAGE_SAMPLE] = "sample", - [GEN_SAMPLER_MESSAGE_SAMPLE_BIAS] = "sample_b", - [GEN_SAMPLER_MESSAGE_SAMPLE_LOD] = "sample_l", - [GEN_SAMPLER_MESSAGE_SAMPLE_COMPARE] = "sample_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_DERIVS] = "sample_d", - [GEN_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE] = "sample_b_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE] = "sample_l_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD] = "ld", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4] = "gather4", - [GEN_SAMPLER_MESSAGE_LOD] = "lod", - [GEN_SAMPLER_MESSAGE_SAMPLE_RESINFO] = "resinfo", - [GEN_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO] = "sampleinfo", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4_C] = "gather4_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LZ] = "sample_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_C_LZ] = "sample_c_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD_LZ] = "ld_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss", -}; - -static const char *const xe2_sampler_msg_type[] = { - [GEN_SAMPLER_MESSAGE_SAMPLE] = "sample", - [GEN_SAMPLER_MESSAGE_SAMPLE_BIAS] = "sample_b", - [GEN_SAMPLER_MESSAGE_SAMPLE_LOD] = "sample_l", - [GEN_SAMPLER_MESSAGE_SAMPLE_COMPARE] = "sample_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_DERIVS] = "sample_d", - [GEN_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE] = "sample_b_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE] = "sample_l_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD] = "ld", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4] = "gather4", - [GEN_SAMPLER_MESSAGE_LOD] = "lod", - [GEN_SAMPLER_MESSAGE_SAMPLE_RESINFO] = "resinfo", - [GEN_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO] = "sampleinfo", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4_C] = "gather4_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_MLOD] = "sample_mlod", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_COMPARE_MLOD] = "sample_c_mlod", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I] = "gather4_i", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L] = "gather4_l", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B] = "gather4_b", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C] = "gather4_i_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C] = "gather4_l_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I] = "gather4_po_i", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L] = "gather4_po_l", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I_C] = "gather4_po_i_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C] = "gather4_po_l_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B] = "gather4_po_b", - [GEN_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", - [GEN_SAMPLER_MESSAGE_SAMPLE_LZ] = "sample_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_C_LZ] = "sample_c_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD_LZ] = "ld_lz", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", - [GEN_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO] = "sample_po", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_BIAS] = "sample_po_b", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD] = "sample_po_l", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_COMPARE] = "sample_po_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_DERIVS] = "sample_po_d", - [GEN_XE3_SAMPLER_MESSAGE_SAMPLE_PO_BIAS_COMPARE] = "sample_po_b_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD_COMPARE] = "sample_po_l_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_D_C] = "sample_po_d_c", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_LZ] = "sample_po_lz", - [GEN_XE2_SAMPLER_MESSAGE_SAMPLE_PO_C_LZ] = "sample_po_c_lz", -}; - -static const char *const gfx9_sampler_simd_mode[7] = { - [GEN_SAMPLER_SIMD_MODE_SIMD8D] = "SIMD8D", - [GEN_SAMPLER_SIMD_MODE_SIMD8] = "SIMD8", - [GEN_SAMPLER_SIMD_MODE_SIMD16] = "SIMD16", - [GEN_SAMPLER_SIMD_MODE_SIMD32_64] = "SIMD32/64", - [GEN_GFX11_SAMPLER_SIMD_MODE_SIMD8H] = "SIMD8H", - [GEN_GFX11_SAMPLER_SIMD_MODE_SIMD16H] = "SIMD16H", -}; - -static const char *const xe2_sampler_simd_mode[7] = { - [GEN_XE2_SAMPLER_SIMD_MODE_SIMD16] = "SIMD16", - [GEN_XE2_SAMPLER_SIMD_MODE_SIMD32] = "SIMD32", - [GEN_XE2_SAMPLER_SIMD_MODE_SIMD16H] = "SIMD16H", - [GEN_XE2_SAMPLER_SIMD_MODE_SIMD32H] = "SIMD32H", -}; - -static const char *const lsc_operation[] = { - [LSC_OP_LOAD] = "load", - [LSC_OP_LOAD_CMASK] = "load_cmask", - [LSC_OP_STORE] = "store", - [LSC_OP_STORE_CMASK] = "store_cmask", - [LSC_OP_FENCE] = "fence", - [LSC_OP_ATOMIC_INC] = "atomic_inc", - [LSC_OP_ATOMIC_DEC] = "atomic_dec", - [LSC_OP_ATOMIC_LOAD] = "atomic_load", - [LSC_OP_ATOMIC_STORE] = "atomic_store", - [LSC_OP_ATOMIC_ADD] = "atomic_add", - [LSC_OP_ATOMIC_SUB] = "atomic_sub", - [LSC_OP_ATOMIC_MIN] = "atomic_min", - [LSC_OP_ATOMIC_MAX] = "atomic_max", - [LSC_OP_ATOMIC_UMIN] = "atomic_umin", - [LSC_OP_ATOMIC_UMAX] = "atomic_umax", - [LSC_OP_ATOMIC_CMPXCHG] = "atomic_cmpxchg", - [LSC_OP_ATOMIC_FADD] = "atomic_fadd", - [LSC_OP_ATOMIC_FSUB] = "atomic_fsub", - [LSC_OP_ATOMIC_FMIN] = "atomic_fmin", - [LSC_OP_ATOMIC_FMAX] = "atomic_fmax", - [LSC_OP_ATOMIC_FCMPXCHG] = "atomic_fcmpxchg", - [LSC_OP_ATOMIC_AND] = "atomic_and", - [LSC_OP_ATOMIC_OR] = "atomic_or", - [LSC_OP_ATOMIC_XOR] = "atomic_xor", - [LSC_OP_LOAD_CMASK_MSRT] = "load_cmask_msrt", - [LSC_OP_STORE_CMASK_MSRT] = "store_cmask_msrt", -}; - -const char * -brw_lsc_op_to_string(unsigned op) -{ - assert(op < ARRAY_SIZE(lsc_operation)); - return lsc_operation[op]; -} - -static const char *const lsc_addr_surface_type[] = { - [LSC_ADDR_SURFTYPE_FLAT] = "flat", - [LSC_ADDR_SURFTYPE_BSS] = "bss", - [LSC_ADDR_SURFTYPE_SS] = "ss", - [LSC_ADDR_SURFTYPE_BTI] = "bti", -}; - -const char * -brw_lsc_addr_surftype_to_string(unsigned t) -{ - assert(t < ARRAY_SIZE(lsc_addr_surface_type)); - return lsc_addr_surface_type[t]; -} - -static const char* const lsc_fence_scope[] = { - [LSC_FENCE_THREADGROUP] = "threadgroup", - [LSC_FENCE_LOCAL] = "local", - [LSC_FENCE_TILE] = "tile", - [LSC_FENCE_GPU] = "gpu", - [LSC_FENCE_ALL_GPU] = "all_gpu", - [LSC_FENCE_SYSTEM_RELEASE] = "system_release", - [LSC_FENCE_SYSTEM_ACQUIRE] = "system_acquire", -}; - -static const char* const lsc_flush_type[] = { - [LSC_FLUSH_TYPE_NONE] = "none", - [LSC_FLUSH_TYPE_EVICT] = "evict", - [LSC_FLUSH_TYPE_INVALIDATE] = "invalidate", - [LSC_FLUSH_TYPE_DISCARD] = "discard", - [LSC_FLUSH_TYPE_CLEAN] = "clean", - [LSC_FLUSH_TYPE_L3ONLY] = "l3only", - [LSC_FLUSH_TYPE_NONE_6] = "none_6", -}; - -static const char* const lsc_addr_size[] = { - [LSC_ADDR_SIZE_A16] = "a16", - [LSC_ADDR_SIZE_A32] = "a32", - [LSC_ADDR_SIZE_A64] = "a64", -}; - -static const char* const lsc_backup_fence_routing[] = { - [LSC_NORMAL_ROUTING] = "normal_routing", - [LSC_ROUTE_TO_LSC] = "route_to_lsc", -}; - -static const char* const lsc_data_size[] = { - [LSC_DATA_SIZE_D8] = "d8", - [LSC_DATA_SIZE_D16] = "d16", - [LSC_DATA_SIZE_D32] = "d32", - [LSC_DATA_SIZE_D64] = "d64", - [LSC_DATA_SIZE_D8U32] = "d8u32", - [LSC_DATA_SIZE_D16U32] = "d16u32", - [LSC_DATA_SIZE_D16BF32] = "d16bf32", -}; - -const char * -brw_lsc_data_size_to_string(unsigned s) -{ - assert(s < ARRAY_SIZE(lsc_data_size)); - return lsc_data_size[s]; -} - -static const char* const lsc_vect_size_str[] = { - [LSC_VECT_SIZE_V1] = "V1", - [LSC_VECT_SIZE_V2] = "V2", - [LSC_VECT_SIZE_V3] = "V3", - [LSC_VECT_SIZE_V4] = "V4", - [LSC_VECT_SIZE_V8] = "V8", - [LSC_VECT_SIZE_V16] = "V16", - [LSC_VECT_SIZE_V32] = "V32", - [LSC_VECT_SIZE_V64] = "V64", -}; - -static const char* const lsc_cmask_str[] = { - [LSC_CMASK_X] = "x", - [LSC_CMASK_Y] = "y", - [LSC_CMASK_XY] = "xy", - [LSC_CMASK_Z] = "z", - [LSC_CMASK_XZ] = "xz", - [LSC_CMASK_YZ] = "yz", - [LSC_CMASK_XYZ] = "xyz", - [LSC_CMASK_W] = "w", - [LSC_CMASK_XW] = "xw", - [LSC_CMASK_YW] = "yw", - [LSC_CMASK_XYW] = "xyw", - [LSC_CMASK_ZW] = "zw", - [LSC_CMASK_XZW] = "xzw", - [LSC_CMASK_YZW] = "yzw", - [LSC_CMASK_XYZW] = "xyzw", -}; - -static const char* const lsc_cache_load[] = { - [LSC_CACHE_LOAD_L1STATE_L3MOCS] = "L1STATE_L3MOCS", - [LSC_CACHE_LOAD_L1UC_L3UC] = "L1UC_L3UC", - [LSC_CACHE_LOAD_L1UC_L3C] = "L1UC_L3C", - [LSC_CACHE_LOAD_L1C_L3UC] = "L1C_L3UC", - [LSC_CACHE_LOAD_L1C_L3C] = "L1C_L3C", - [LSC_CACHE_LOAD_L1S_L3UC] = "L1S_L3UC", - [LSC_CACHE_LOAD_L1S_L3C] = "L1S_L3C", - [LSC_CACHE_LOAD_L1IAR_L3C] = "L1IAR_L3C", -}; - -static const char* const lsc_cache_store[] = { - [LSC_CACHE_STORE_L1STATE_L3MOCS] = "L1STATE_L3MOCS", - [LSC_CACHE_STORE_L1UC_L3UC] = "L1UC_L3UC", - [LSC_CACHE_STORE_L1UC_L3WB] = "L1UC_L3WB", - [LSC_CACHE_STORE_L1WT_L3UC] = "L1WT_L3UC", - [LSC_CACHE_STORE_L1WT_L3WB] = "L1WT_L3WB", - [LSC_CACHE_STORE_L1S_L3UC] = "L1S_L3UC", - [LSC_CACHE_STORE_L1S_L3WB] = "L1S_L3WB", - [LSC_CACHE_STORE_L1WB_L3WB] = "L1WB_L3WB", -}; - -static const char* const xe2_lsc_cache_load[] = { - [XE2_LSC_CACHE_LOAD_L1STATE_L3MOCS] = "L1STATE_L3MOCS", - [XE2_LSC_CACHE_LOAD_L1UC_L3UC] = "L1UC_L3UC", - [XE2_LSC_CACHE_LOAD_L1UC_L3C] = "L1UC_L3C", - [XE2_LSC_CACHE_LOAD_L1UC_L3CC] = "L1UC_L3CC", - [XE2_LSC_CACHE_LOAD_L1C_L3UC] = "L1C_L3UC", - [XE2_LSC_CACHE_LOAD_L1C_L3C] = "L1C_L3C", - [XE2_LSC_CACHE_LOAD_L1C_L3CC] = "L1C_L3CC", - [XE2_LSC_CACHE_LOAD_L1S_L3UC] = "L1S_L3UC", - [XE2_LSC_CACHE_LOAD_L1S_L3C] = "L1S_L3C", - [XE2_LSC_CACHE_LOAD_L1IAR_L3IAR] = "L1IAR_L3IAR", -}; - -static const char* const xe2_lsc_cache_store[] = { - [XE2_LSC_CACHE_STORE_L1STATE_L3MOCS] = "L1STATE_L3MOCS", - [XE2_LSC_CACHE_STORE_L1UC_L3UC] = "L1UC_L3UC", - [XE2_LSC_CACHE_STORE_L1UC_L3WB] = "L1UC_L3WB", - [XE2_LSC_CACHE_STORE_L1WT_L3UC] = "L1WT_L3UC", - [XE2_LSC_CACHE_STORE_L1WT_L3WB] = "L1WT_L3WB", - [XE2_LSC_CACHE_STORE_L1S_L3UC] = "L1S_L3UC", - [XE2_LSC_CACHE_STORE_L1S_L3WB] = "L1S_L3WB", - [XE2_LSC_CACHE_STORE_L1WB_L3WB] = "L1WB_L3WB", -}; - -static const char* const dpas_systolic_depth[4] = { - [0] = "16", - [1] = "2", - [2] = "4", - [3] = "8" -}; - -static int column; - -static int -string(FILE *file, const char *string) -{ - fputs(string, file); - column += strlen(string); - return 0; -} - -static int -format(FILE *f, const char *format, ...) PRINTFLIKE(2, 3); - -static int -format(FILE *f, const char *format, ...) -{ - char buf[1024]; - va_list args; - va_start(args, format); - - vsnprintf(buf, sizeof(buf) - 1, format, args); - va_end(args); - string(f, buf); - return 0; -} - -static int -newline(FILE *f) -{ - putc('\n', f); - column = 0; - return 0; -} - -static int -pad(FILE *f, int c) -{ - do - string(f, " "); - while (column < c); - return 0; -} - -static int -control(FILE *file, const char *name, const char *const ctrl[], - unsigned id, int *space) -{ - if (!ctrl[id]) { - fprintf(file, "*** invalid %s value %d ", name, id); - return 1; - } - if (ctrl[id][0]) { - if (space && *space) - string(file, " "); - string(file, ctrl[id]); - if (space) - *space = 1; - } - return 0; -} - -static int -print_opcode(FILE *file, const struct brw_isa_info *isa, - enum opcode id) -{ - const struct opcode_desc *desc = brw_opcode_desc(isa, id); - if (!desc) { - format(file, "*** invalid opcode value %d ", id); - return 1; - } - string(file, desc->name); - return 0; -} - -static int -reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) -{ - int err = 0; - - if (_reg_file == ARF) { - switch (_reg_nr & 0xf0) { - case BRW_ARF_NULL: - string(file, "null"); - break; - case BRW_ARF_ADDRESS: - format(file, "a%d", _reg_nr & 0x0f); - break; - case BRW_ARF_ACCUMULATOR: - format(file, "acc%d", _reg_nr & 0x0f); - break; - case BRW_ARF_FLAG: - format(file, "f%d", _reg_nr & 0x0f); - break; - case BRW_ARF_MASK: - format(file, "mask%d", _reg_nr & 0x0f); - break; - case BRW_ARF_STATE: - format(file, "sr%d", _reg_nr & 0x0f); - break; - case BRW_ARF_SCALAR: - format(file, "s%d", _reg_nr & 0x0f); - break; - case BRW_ARF_CONTROL: - format(file, "cr%d", _reg_nr & 0x0f); - break; - case BRW_ARF_NOTIFICATION_COUNT: - format(file, "n%d", _reg_nr & 0x0f); - break; - case BRW_ARF_IP: - string(file, "ip"); - return -1; - break; - case BRW_ARF_TDR: - format(file, "tdr0"); - return -1; - case BRW_ARF_TIMESTAMP: - format(file, "tm%d", _reg_nr & 0x0f); - break; - default: - format(file, "ARF%d", _reg_nr); - break; - } - } else { - err |= control(file, "src reg file", reg_file, _reg_file, NULL); - format(file, "%d", _reg_nr); - } - return err; -} - -static int -dest(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - enum brw_reg_type type = brw_eu_inst_dst_type(devinfo, inst); - unsigned elem_size = brw_type_size_bytes(type); - int err = 0; - - if (is_split_send(devinfo, brw_eu_inst_opcode(isa, inst))) { - /* These are fixed for split sends */ - type = BRW_TYPE_UD; - elem_size = 4; - if (devinfo->ver >= 12) { - err |= reg(file, brw_eu_inst_send_dst_reg_file(devinfo, inst), - brw_eu_inst_dst_da_reg_nr(devinfo, inst)); - string(file, brw_reg_type_to_letters(type)); - } else if (brw_eu_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - err |= reg(file, brw_eu_inst_send_dst_reg_file(devinfo, inst), - brw_eu_inst_dst_da_reg_nr(devinfo, inst)); - unsigned subreg_nr = brw_eu_inst_dst_da16_subreg_nr(devinfo, inst); - if (subreg_nr) - format(file, ".%u", subreg_nr); - string(file, brw_reg_type_to_letters(type)); - } else { - string(file, "g[a0"); - if (brw_eu_inst_dst_ia_subreg_nr(devinfo, inst)) - format(file, ".%"PRIu64, brw_eu_inst_dst_ia_subreg_nr(devinfo, inst) / - elem_size); - if (brw_eu_inst_send_dst_ia16_addr_imm(devinfo, inst)) - format(file, " %d", brw_eu_inst_send_dst_ia16_addr_imm(devinfo, inst)); - string(file, "]<"); - string(file, brw_reg_type_to_letters(type)); - } - } else if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (brw_eu_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - err |= reg(file, brw_eu_inst_dst_reg_file(devinfo, inst), - brw_eu_inst_dst_da_reg_nr(devinfo, inst)); - if (err == -1) - return 0; - if (brw_eu_inst_dst_da1_subreg_nr(devinfo, inst)) - format(file, ".%"PRIu64, brw_eu_inst_dst_da1_subreg_nr(devinfo, inst) / - elem_size); - string(file, "<"); - err |= control(file, "horiz stride", horiz_stride, - brw_eu_inst_dst_hstride(devinfo, inst), NULL); - string(file, ">"); - string(file, brw_reg_type_to_letters(type)); - } else { - string(file, "g[a0"); - if (brw_eu_inst_dst_ia_subreg_nr(devinfo, inst)) - format(file, ".%"PRIu64, brw_eu_inst_dst_ia_subreg_nr(devinfo, inst) / - elem_size); - if (brw_eu_inst_dst_ia1_addr_imm(devinfo, inst)) - format(file, " %d", brw_eu_inst_dst_ia1_addr_imm(devinfo, inst)); - string(file, "]<"); - err |= control(file, "horiz stride", horiz_stride, - brw_eu_inst_dst_hstride(devinfo, inst), NULL); - string(file, ">"); - string(file, brw_reg_type_to_letters(type)); - } - } else { - if (brw_eu_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - err |= reg(file, brw_eu_inst_dst_reg_file(devinfo, inst), - brw_eu_inst_dst_da_reg_nr(devinfo, inst)); - if (err == -1) - return 0; - if (brw_eu_inst_dst_da16_subreg_nr(devinfo, inst)) - format(file, ".%u", 16 / elem_size); - string(file, "<1>"); - err |= control(file, "writemask", writemask, - brw_eu_inst_da16_writemask(devinfo, inst), NULL); - string(file, brw_reg_type_to_letters(type)); - } else { - err = 1; - string(file, "Indirect align16 address mode not supported"); - } - } - - return 0; -} - -static enum brw_horizontal_stride -hstride_from_align1_3src_dst_hstride(enum brw_align1_3src_dst_horizontal_stride hstride) -{ - switch (hstride) { - case BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1: return BRW_HORIZONTAL_STRIDE_1; - case BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_2: return BRW_HORIZONTAL_STRIDE_2; - default: - UNREACHABLE("not reached"); - } -} - -static int -dest_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - bool is_align1 = brw_eu_inst_3src_access_mode(devinfo, inst) == BRW_ALIGN_1; - int err = 0; - uint32_t reg_file; - unsigned subreg_nr; - enum brw_reg_type type; - - if (devinfo->ver < 10 && is_align1) - return 0; - - if (devinfo->ver >= 12 || is_align1) - reg_file = brw_eu_inst_3src_a1_dst_reg_file(devinfo, inst); - else - reg_file = FIXED_GRF; - - err |= reg(file, reg_file, brw_eu_inst_3src_dst_reg_nr(devinfo, inst)); - if (err == -1) - return 0; - - if (is_align1) { - type = brw_eu_inst_3src_a1_dst_type(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, inst); - } else { - type = brw_eu_inst_3src_a16_dst_type(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a16_dst_subreg_nr(devinfo, inst); - } - subreg_nr /= brw_type_size_bytes(type); - - if (subreg_nr) - format(file, ".%u", subreg_nr); - string(file, "<"); - unsigned _horiz_stride = devinfo->ver == 9 ? BRW_HORIZONTAL_STRIDE_1 : - hstride_from_align1_3src_dst_hstride(brw_eu_inst_3src_a1_dst_hstride(devinfo, inst)); - err |= control(file, "horiz_stride", horiz_stride, _horiz_stride, NULL); - string(file, ">"); - - if (!is_align1) { - err |= control(file, "writemask", writemask, - brw_eu_inst_3src_a16_dst_writemask(devinfo, inst), NULL); - } - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -dest_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - uint32_t reg_file = brw_eu_inst_dpas_3src_dst_reg_file(devinfo, inst); - - if (reg(file, reg_file, brw_eu_inst_dpas_3src_dst_reg_nr(devinfo, inst)) == -1) - return 0; - - enum brw_reg_type type = brw_eu_inst_dpas_3src_dst_type(devinfo, inst); - unsigned subreg_nr = brw_eu_inst_dpas_3src_dst_subreg_nr(devinfo, inst); - - if (subreg_nr) - format(file, ".%u", subreg_nr); - string(file, "<1>"); - - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -src_align1_region(FILE *file, - unsigned _vert_stride, unsigned _width, - unsigned _horiz_stride) -{ - int err = 0; - string(file, "<"); - err |= control(file, "vert stride", vert_stride, _vert_stride, NULL); - string(file, ","); - err |= control(file, "width", width, _width, NULL); - string(file, ","); - err |= control(file, "horiz_stride", horiz_stride, _horiz_stride, NULL); - string(file, ">"); - return err; -} - -static int -src_da1(FILE *file, - const struct intel_device_info *devinfo, - unsigned opcode, - enum brw_reg_type type, unsigned _reg_file, - unsigned _vert_stride, unsigned _width, unsigned _horiz_stride, - unsigned reg_num, unsigned sub_reg_num, unsigned __abs, - unsigned _negate) -{ - int err = 0; - - if (is_logic_instruction(opcode)) - err |= control(file, "bitnot", m_bitnot, _negate, NULL); - else - err |= control(file, "negate", m_negate, _negate, NULL); - - err |= control(file, "abs", _abs, __abs, NULL); - - err |= reg(file, _reg_file, reg_num); - if (err == -1) - return 0; - if (sub_reg_num) { - unsigned elem_size = brw_type_size_bytes(type); - format(file, ".%d", sub_reg_num / elem_size); /* use formal style like spec */ - } - src_align1_region(file, _vert_stride, _width, _horiz_stride); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static int -src_ia1(FILE *file, - const struct intel_device_info *devinfo, - unsigned opcode, - enum brw_reg_type type, - int _addr_imm, - unsigned _addr_subreg_nr, - unsigned _negate, - unsigned __abs, - unsigned _horiz_stride, unsigned _width, unsigned _vert_stride) -{ - int err = 0; - - if (is_logic_instruction(opcode)) - err |= control(file, "bitnot", m_bitnot, _negate, NULL); - else - err |= control(file, "negate", m_negate, _negate, NULL); - - err |= control(file, "abs", _abs, __abs, NULL); - - string(file, "g[a0"); - if (_addr_subreg_nr) - format(file, ".%d", _addr_subreg_nr); - if (_addr_imm) - format(file, " %d", _addr_imm); - string(file, "]"); - src_align1_region(file, _vert_stride, _width, _horiz_stride); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static int -src_swizzle(FILE *file, unsigned swiz) -{ - unsigned x = BRW_GET_SWZ(swiz, BRW_CHANNEL_X); - unsigned y = BRW_GET_SWZ(swiz, BRW_CHANNEL_Y); - unsigned z = BRW_GET_SWZ(swiz, BRW_CHANNEL_Z); - unsigned w = BRW_GET_SWZ(swiz, BRW_CHANNEL_W); - int err = 0; - - if (x == y && x == z && x == w) { - string(file, "."); - err |= control(file, "channel select", chan_sel, x, NULL); - } else if (swiz != BRW_SWIZZLE_XYZW) { - string(file, "."); - err |= control(file, "channel select", chan_sel, x, NULL); - err |= control(file, "channel select", chan_sel, y, NULL); - err |= control(file, "channel select", chan_sel, z, NULL); - err |= control(file, "channel select", chan_sel, w, NULL); - } - return err; -} - -static int -src_da16(FILE *file, - const struct intel_device_info *devinfo, - unsigned opcode, - enum brw_reg_type type, - unsigned _reg_file, - unsigned _vert_stride, - unsigned _reg_nr, - unsigned _subreg_nr, - unsigned __abs, - unsigned _negate, - unsigned swz_x, unsigned swz_y, unsigned swz_z, unsigned swz_w) -{ - int err = 0; - - if (is_logic_instruction(opcode)) - err |= control(file, "bitnot", m_bitnot, _negate, NULL); - else - err |= control(file, "negate", m_negate, _negate, NULL); - - err |= control(file, "abs", _abs, __abs, NULL); - - err |= reg(file, _reg_file, _reg_nr); - if (err == -1) - return 0; - if (_subreg_nr) { - unsigned elem_size = brw_type_size_bytes(type); - - /* bit4 for subreg number byte addressing. Make this same meaning as - in da1 case, so output looks consistent. */ - format(file, ".%d", 16 / elem_size); - } - string(file, "<"); - err |= control(file, "vert stride", vert_stride, _vert_stride, NULL); - string(file, ">"); - err |= src_swizzle(file, BRW_SWIZZLE4(swz_x, swz_y, swz_z, swz_w)); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static enum brw_vertical_stride -vstride_from_align1_3src_vstride(const struct intel_device_info *devinfo, - enum brw_align1_3src_vertical_stride vstride) -{ - switch (vstride) { - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0: return BRW_VERTICAL_STRIDE_0; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2: - if (devinfo->ver >= 12) - return BRW_VERTICAL_STRIDE_1; - else - return BRW_VERTICAL_STRIDE_2; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4: return BRW_VERTICAL_STRIDE_4; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8: return BRW_VERTICAL_STRIDE_8; - default: - UNREACHABLE("not reached"); - } -} - -static enum brw_horizontal_stride -hstride_from_align1_3src_hstride(enum brw_align1_3src_src_horizontal_stride hstride) -{ - switch (hstride) { - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return BRW_HORIZONTAL_STRIDE_0; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1: return BRW_HORIZONTAL_STRIDE_1; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2: return BRW_HORIZONTAL_STRIDE_2; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4: return BRW_HORIZONTAL_STRIDE_4; - default: - UNREACHABLE("not reached"); - } -} - -static enum brw_vertical_stride -vstride_from_align1_3src_hstride(enum brw_align1_3src_src_horizontal_stride hstride) -{ - switch (hstride) { - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return BRW_VERTICAL_STRIDE_0; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1: return BRW_VERTICAL_STRIDE_1; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2: return BRW_VERTICAL_STRIDE_2; - case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4: return BRW_VERTICAL_STRIDE_4; - default: - UNREACHABLE("not reached"); - } -} - -/* From "GFX10 Regioning Rules for Align1 Ternary Operations" in the - * "Register Region Restrictions" documentation - */ -static enum brw_width -implied_width(enum brw_vertical_stride _vert_stride, - enum brw_horizontal_stride _horiz_stride) -{ - /* "1. Width is 1 when Vertical and Horizontal Strides are both zero." */ - if (_vert_stride == BRW_VERTICAL_STRIDE_0 && - _horiz_stride == BRW_HORIZONTAL_STRIDE_0) { - return BRW_WIDTH_1; - - /* "2. Width is equal to vertical stride when Horizontal Stride is zero." */ - } else if (_horiz_stride == BRW_HORIZONTAL_STRIDE_0) { - switch (_vert_stride) { - case BRW_VERTICAL_STRIDE_1: return BRW_WIDTH_1; - case BRW_VERTICAL_STRIDE_2: return BRW_WIDTH_2; - case BRW_VERTICAL_STRIDE_4: return BRW_WIDTH_4; - case BRW_VERTICAL_STRIDE_8: return BRW_WIDTH_8; - case BRW_VERTICAL_STRIDE_0: - default: - UNREACHABLE("not reached"); - } - - } else { - /* FINISHME: Implement these: */ - - /* "3. Width is equal to Vertical Stride/Horizontal Stride when both - * Strides are non-zero. - * - * 4. Vertical Stride must not be zero if Horizontal Stride is non-zero. - * This implies Vertical Stride is always greater than Horizontal - * Stride." - * - * Given these statements and the knowledge that the stride and width - * values are encoded in logarithmic form, we can perform the division - * by just subtracting. - */ - return _vert_stride - _horiz_stride; - } -} - -static int -src0_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - int err = 0; - unsigned reg_nr, subreg_nr; - enum brw_reg_file _file; - enum brw_reg_type type; - enum brw_vertical_stride _vert_stride; - enum brw_width _width; - enum brw_horizontal_stride _horiz_stride; - bool is_scalar_region; - bool is_align1 = brw_eu_inst_3src_access_mode(devinfo, inst) == BRW_ALIGN_1; - - if (devinfo->ver < 10 && is_align1) - return 0; - - if (is_align1) { - _file = brw_eu_inst_3src_a1_src0_reg_file(devinfo, inst); - if (_file == IMM) { - uint16_t imm_val = brw_eu_inst_3src_a1_src0_imm(devinfo, inst); - enum brw_reg_type type = brw_eu_inst_3src_a1_src0_type(devinfo, inst); - - if (type == BRW_TYPE_W) { - format(file, "%dW", imm_val); - } else if (type == BRW_TYPE_UW) { - format(file, "0x%04xUW", imm_val); - } else if (type == BRW_TYPE_HF) { - format(file, "0x%04xHF", imm_val); - } - return 0; - } - - reg_nr = brw_eu_inst_3src_src0_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a1_src0_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a1_src0_type(devinfo, inst); - _vert_stride = vstride_from_align1_3src_vstride( - devinfo, brw_eu_inst_3src_a1_src0_vstride(devinfo, inst)); - _horiz_stride = hstride_from_align1_3src_hstride( - brw_eu_inst_3src_a1_src0_hstride(devinfo, inst)); - _width = implied_width(_vert_stride, _horiz_stride); - } else { - _file = FIXED_GRF; - reg_nr = brw_eu_inst_3src_src0_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a16_src0_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a16_src_type(devinfo, inst); - - if (brw_eu_inst_3src_a16_src0_rep_ctrl(devinfo, inst)) { - _vert_stride = BRW_VERTICAL_STRIDE_0; - _width = BRW_WIDTH_1; - _horiz_stride = BRW_HORIZONTAL_STRIDE_0; - } else { - _vert_stride = BRW_VERTICAL_STRIDE_4; - _width = BRW_WIDTH_4; - _horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - } - is_scalar_region = _vert_stride == BRW_VERTICAL_STRIDE_0 && - _width == BRW_WIDTH_1 && - _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - - subreg_nr /= brw_type_size_bytes(type); - - err |= control(file, "negate", m_negate, - brw_eu_inst_3src_src0_negate(devinfo, inst), NULL); - err |= control(file, "abs", _abs, brw_eu_inst_3src_src0_abs(devinfo, inst), NULL); - - err |= reg(file, _file, reg_nr); - if (err == -1) - return 0; - if (subreg_nr || is_scalar_region) - format(file, ".%d", subreg_nr); - src_align1_region(file, _vert_stride, _width, _horiz_stride); - if (!is_scalar_region && !is_align1) - err |= src_swizzle(file, brw_eu_inst_3src_a16_src0_swizzle(devinfo, inst)); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static int -src1_3src(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - int err = 0; - unsigned reg_nr, subreg_nr; - enum brw_reg_file _file; - enum brw_reg_type type; - enum brw_vertical_stride _vert_stride; - enum brw_width _width; - enum brw_horizontal_stride _horiz_stride; - bool is_scalar_region; - bool is_align1 = brw_eu_inst_3src_access_mode(devinfo, inst) == BRW_ALIGN_1; - - if (devinfo->ver < 10 && is_align1) - return 0; - - if (is_align1) { - _file = brw_eu_inst_3src_a1_src1_reg_file(devinfo, inst); - reg_nr = brw_eu_inst_3src_src1_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a1_src1_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a1_src1_type(devinfo, inst); - - _vert_stride = vstride_from_align1_3src_vstride( - devinfo, brw_eu_inst_3src_a1_src1_vstride(devinfo, inst)); - _horiz_stride = hstride_from_align1_3src_hstride( - brw_eu_inst_3src_a1_src1_hstride(devinfo, inst)); - _width = implied_width(_vert_stride, _horiz_stride); - } else { - _file = FIXED_GRF; - reg_nr = brw_eu_inst_3src_src1_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a16_src1_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a16_src_type(devinfo, inst); - - if (brw_eu_inst_3src_a16_src1_rep_ctrl(devinfo, inst)) { - _vert_stride = BRW_VERTICAL_STRIDE_0; - _width = BRW_WIDTH_1; - _horiz_stride = BRW_HORIZONTAL_STRIDE_0; - } else { - _vert_stride = BRW_VERTICAL_STRIDE_4; - _width = BRW_WIDTH_4; - _horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - } - is_scalar_region = _vert_stride == BRW_VERTICAL_STRIDE_0 && - _width == BRW_WIDTH_1 && - _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - - subreg_nr /= brw_type_size_bytes(type); - - if (brw_eu_inst_opcode(isa, inst) != BRW_OPCODE_BFN) { - err |= control(file, "negate", m_negate, - brw_eu_inst_3src_src1_negate(devinfo, inst), NULL); - err |= control(file, "abs", _abs, brw_eu_inst_3src_src1_abs(devinfo, inst), NULL); - } - - err |= reg(file, _file, reg_nr); - if (err == -1) - return 0; - if (subreg_nr || is_scalar_region) - format(file, ".%d", subreg_nr); - src_align1_region(file, _vert_stride, _width, _horiz_stride); - if (!is_scalar_region && !is_align1) - err |= src_swizzle(file, brw_eu_inst_3src_a16_src1_swizzle(devinfo, inst)); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static int -src2_3src(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - int err = 0; - unsigned reg_nr, subreg_nr; - enum brw_reg_file _file; - enum brw_reg_type type; - enum brw_vertical_stride _vert_stride; - enum brw_width _width; - enum brw_horizontal_stride _horiz_stride; - bool is_scalar_region; - bool is_align1 = brw_eu_inst_3src_access_mode(devinfo, inst) == BRW_ALIGN_1; - - if (devinfo->ver < 10 && is_align1) - return 0; - - if (is_align1) { - _file = brw_eu_inst_3src_a1_src2_reg_file(devinfo, inst); - if (_file == IMM) { - uint16_t imm_val = brw_eu_inst_3src_a1_src2_imm(devinfo, inst); - enum brw_reg_type type = brw_eu_inst_3src_a1_src2_type(devinfo, inst); - - if (type == BRW_TYPE_W) { - format(file, "%dW", imm_val); - } else if (type == BRW_TYPE_UW) { - format(file, "0x%04xUW", imm_val); - } else if (type == BRW_TYPE_HF) { - format(file, "0x%04xHF", imm_val); - } - return 0; - } - - reg_nr = brw_eu_inst_3src_src2_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a1_src2_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a1_src2_type(devinfo, inst); - /* FINISHME: No vertical stride on src2. Is using the hstride in place - * correct? Doesn't seem like it, since there's hstride=1 but - * no vstride=1. - */ - _vert_stride = vstride_from_align1_3src_hstride( - brw_eu_inst_3src_a1_src2_hstride(devinfo, inst)); - _horiz_stride = hstride_from_align1_3src_hstride( - brw_eu_inst_3src_a1_src2_hstride(devinfo, inst)); - _width = implied_width(_vert_stride, _horiz_stride); - } else { - _file = FIXED_GRF; - reg_nr = brw_eu_inst_3src_src2_reg_nr(devinfo, inst); - subreg_nr = brw_eu_inst_3src_a16_src2_subreg_nr(devinfo, inst); - type = brw_eu_inst_3src_a16_src_type(devinfo, inst); - - if (brw_eu_inst_3src_a16_src2_rep_ctrl(devinfo, inst)) { - _vert_stride = BRW_VERTICAL_STRIDE_0; - _width = BRW_WIDTH_1; - _horiz_stride = BRW_HORIZONTAL_STRIDE_0; - } else { - _vert_stride = BRW_VERTICAL_STRIDE_4; - _width = BRW_WIDTH_4; - _horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - } - is_scalar_region = _vert_stride == BRW_VERTICAL_STRIDE_0 && - _width == BRW_WIDTH_1 && - _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - - subreg_nr /= brw_type_size_bytes(type); - - if (brw_eu_inst_opcode(isa, inst) != BRW_OPCODE_BFN) { - err |= control(file, "negate", m_negate, - brw_eu_inst_3src_src2_negate(devinfo, inst), NULL); - err |= control(file, "abs", _abs, brw_eu_inst_3src_src2_abs(devinfo, inst), NULL); - } - - err |= reg(file, _file, reg_nr); - if (err == -1) - return 0; - if (subreg_nr || is_scalar_region) - format(file, ".%d", subreg_nr); - src_align1_region(file, _vert_stride, _width, _horiz_stride); - if (!is_scalar_region && !is_align1) - err |= src_swizzle(file, brw_eu_inst_3src_a16_src2_swizzle(devinfo, inst)); - string(file, brw_reg_type_to_letters(type)); - return err; -} - -static int -src0_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - uint32_t reg_file = brw_eu_inst_dpas_3src_src0_reg_file(devinfo, inst); - - if (reg(file, reg_file, brw_eu_inst_dpas_3src_src0_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = brw_eu_inst_dpas_3src_src0_subreg_nr(devinfo, inst); - enum brw_reg_type type = brw_eu_inst_dpas_3src_src0_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, - BRW_VERTICAL_STRIDE_1, - BRW_WIDTH_1, - BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0); - - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -src1_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - uint32_t reg_file = brw_eu_inst_dpas_3src_src1_reg_file(devinfo, inst); - - if (reg(file, reg_file, brw_eu_inst_dpas_3src_src1_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = brw_eu_inst_dpas_3src_src1_subreg_nr(devinfo, inst); - enum brw_reg_type type = brw_eu_inst_dpas_3src_src1_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, - BRW_VERTICAL_STRIDE_1, - BRW_WIDTH_1, - BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0); - - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -src2_dpas_3src(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - uint32_t reg_file = brw_eu_inst_dpas_3src_src2_reg_file(devinfo, inst); - - if (reg(file, reg_file, brw_eu_inst_dpas_3src_src2_reg_nr(devinfo, inst)) == -1) - return 0; - - unsigned subreg_nr = brw_eu_inst_dpas_3src_src2_subreg_nr(devinfo, inst); - enum brw_reg_type type = brw_eu_inst_dpas_3src_src2_type(devinfo, inst); - - if (subreg_nr) - format(file, ".%d", subreg_nr); - src_align1_region(file, - BRW_VERTICAL_STRIDE_1, - BRW_WIDTH_1, - BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0); - - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -imm(FILE *file, const struct brw_isa_info *isa, enum brw_reg_type type, - const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - switch (type) { - case BRW_TYPE_UQ: - format(file, "0x%016"PRIx64"UQ", brw_eu_inst_imm_uq(devinfo, inst)); - break; - case BRW_TYPE_Q: - format(file, "0x%016"PRIx64"Q", brw_eu_inst_imm_uq(devinfo, inst)); - break; - case BRW_TYPE_UD: - format(file, "0x%08xUD", brw_eu_inst_imm_ud(devinfo, inst)); - break; - case BRW_TYPE_D: - format(file, "%dD", brw_eu_inst_imm_d(devinfo, inst)); - break; - case BRW_TYPE_UW: - format(file, "0x%04xUW", (uint16_t) brw_eu_inst_imm_ud(devinfo, inst)); - break; - case BRW_TYPE_W: - format(file, "%dW", (int16_t) brw_eu_inst_imm_d(devinfo, inst)); - break; - case BRW_TYPE_UV: - format(file, "0x%08xUV", brw_eu_inst_imm_ud(devinfo, inst)); - break; - case BRW_TYPE_VF: - format(file, "0x%"PRIx64"VF", brw_eu_inst_bits(inst, 127, 96)); - pad(file, 48); - format(file, "/* [%-gF, %-gF, %-gF, %-gF]VF */", - brw_vf_to_float(brw_eu_inst_imm_ud(devinfo, inst)), - brw_vf_to_float(brw_eu_inst_imm_ud(devinfo, inst) >> 8), - brw_vf_to_float(brw_eu_inst_imm_ud(devinfo, inst) >> 16), - brw_vf_to_float(brw_eu_inst_imm_ud(devinfo, inst) >> 24)); - break; - case BRW_TYPE_V: - format(file, "0x%08xV", brw_eu_inst_imm_ud(devinfo, inst)); - break; - case BRW_TYPE_F: - /* The DIM instruction's src0 uses an F type but contains a - * 64-bit immediate - */ - format(file, "0x%"PRIx64"F", brw_eu_inst_bits(inst, 127, 96)); - pad(file, 48); - format(file, " /* %-gF */", brw_eu_inst_imm_f(devinfo, inst)); - break; - case BRW_TYPE_DF: - format(file, "0x%016"PRIx64"DF", brw_eu_inst_imm_uq(devinfo, inst)); - pad(file, 48); - format(file, "/* %-gDF */", brw_eu_inst_imm_df(devinfo, inst)); - break; - case BRW_TYPE_HF: - format(file, "0x%04xHF", - (uint16_t) brw_eu_inst_imm_ud(devinfo, inst)); - pad(file, 48); - format(file, "/* %-gHF */", - _mesa_half_to_float((uint16_t) brw_eu_inst_imm_ud(devinfo, inst))); - break; - case BRW_TYPE_UB: - case BRW_TYPE_B: - default: - format(file, "*** invalid immediate type %d ", type); - } - return 0; -} - -static int -src_sends_da(FILE *file, - const struct intel_device_info *devinfo, - enum brw_reg_type type, - enum brw_reg_file _reg_file, - unsigned _reg_nr, - unsigned _reg_subnr) -{ - int err = 0; - - err |= reg(file, _reg_file, _reg_nr); - if (err == -1) - return 0; - if (_reg_subnr) - format(file, ".1"); - string(file, brw_reg_type_to_letters(type)); - - return err; -} - -static int -src_sends_ia(FILE *file, - const struct intel_device_info *devinfo, - enum brw_reg_type type, - int _addr_imm, - unsigned _addr_subreg_nr) -{ - string(file, "g[a0"); - if (_addr_subreg_nr) - format(file, ".1"); - if (_addr_imm) - format(file, " %d", _addr_imm); - string(file, "]"); - string(file, brw_reg_type_to_letters(type)); - - return 0; -} - -static int -src_send_desc_ia(FILE *file, - const struct intel_device_info *devinfo, - unsigned _addr_subreg_nr) -{ - string(file, "a0"); - if (_addr_subreg_nr) - format(file, ".%d", _addr_subreg_nr); - format(file, "<0>UD"); - - return 0; -} - -static int -src0(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (is_split_send(devinfo, brw_eu_inst_opcode(isa, inst))) { - if (devinfo->ver >= 30 && - brw_eu_inst_send_src0_reg_file(devinfo, inst) == ARF) { - format(file, "r["); - reg(file, ARF, brw_eu_inst_src0_da_reg_nr(devinfo, inst)); - format(file, ".%u]", (unsigned)brw_eu_inst_send_src0_subreg_nr(devinfo, inst) * 2); - return 0; - } else if (devinfo->ver >= 12) { - return src_sends_da(file, - devinfo, - BRW_TYPE_UD, - brw_eu_inst_send_src0_reg_file(devinfo, inst), - brw_eu_inst_src0_da_reg_nr(devinfo, inst), - 0); - } else if (brw_eu_inst_send_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - return src_sends_da(file, - devinfo, - BRW_TYPE_UD, - FIXED_GRF, - brw_eu_inst_src0_da_reg_nr(devinfo, inst), - brw_eu_inst_src0_da16_subreg_nr(devinfo, inst)); - } else { - return src_sends_ia(file, - devinfo, - BRW_TYPE_UD, - brw_eu_inst_send_src0_ia16_addr_imm(devinfo, inst), - brw_eu_inst_src0_ia_subreg_nr(devinfo, inst)); - } - } else if (brw_eu_inst_src0_reg_file(devinfo, inst) == IMM) { - return imm(file, isa, brw_eu_inst_src0_type(devinfo, inst), inst); - } else if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (brw_eu_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - return src_da1(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src0_type(devinfo, inst), - brw_eu_inst_src0_reg_file(devinfo, inst), - brw_eu_inst_src0_vstride(devinfo, inst), - brw_eu_inst_src0_width(devinfo, inst), - brw_eu_inst_src0_hstride(devinfo, inst), - brw_eu_inst_src0_da_reg_nr(devinfo, inst), - brw_eu_inst_src0_da1_subreg_nr(devinfo, inst), - brw_eu_inst_src0_abs(devinfo, inst), - brw_eu_inst_src0_negate(devinfo, inst)); - } else { - return src_ia1(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src0_type(devinfo, inst), - brw_eu_inst_src0_ia1_addr_imm(devinfo, inst), - brw_eu_inst_src0_ia_subreg_nr(devinfo, inst), - brw_eu_inst_src0_negate(devinfo, inst), - brw_eu_inst_src0_abs(devinfo, inst), - brw_eu_inst_src0_hstride(devinfo, inst), - brw_eu_inst_src0_width(devinfo, inst), - brw_eu_inst_src0_vstride(devinfo, inst)); - } - } else { - if (brw_eu_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - return src_da16(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src0_type(devinfo, inst), - brw_eu_inst_src0_reg_file(devinfo, inst), - brw_eu_inst_src0_vstride(devinfo, inst), - brw_eu_inst_src0_da_reg_nr(devinfo, inst), - brw_eu_inst_src0_da16_subreg_nr(devinfo, inst), - brw_eu_inst_src0_abs(devinfo, inst), - brw_eu_inst_src0_negate(devinfo, inst), - brw_eu_inst_src0_da16_swiz_x(devinfo, inst), - brw_eu_inst_src0_da16_swiz_y(devinfo, inst), - brw_eu_inst_src0_da16_swiz_z(devinfo, inst), - brw_eu_inst_src0_da16_swiz_w(devinfo, inst)); - } else { - string(file, "Indirect align16 address mode not supported"); - return 1; - } - } -} - -static int -src1(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (is_split_send(devinfo, brw_eu_inst_opcode(isa, inst))) { - return src_sends_da(file, - devinfo, - BRW_TYPE_UD, - brw_eu_inst_send_src1_reg_file(devinfo, inst), - brw_eu_inst_send_src1_reg_nr(devinfo, inst), - 0 /* subreg_nr */); - } else if (brw_eu_inst_src1_reg_file(devinfo, inst) == IMM) { - return imm(file, isa, brw_eu_inst_src1_type(devinfo, inst), inst); - } else if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (brw_eu_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - return src_da1(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src1_type(devinfo, inst), - brw_eu_inst_src1_reg_file(devinfo, inst), - brw_eu_inst_src1_vstride(devinfo, inst), - brw_eu_inst_src1_width(devinfo, inst), - brw_eu_inst_src1_hstride(devinfo, inst), - brw_eu_inst_src1_da_reg_nr(devinfo, inst), - brw_eu_inst_src1_da1_subreg_nr(devinfo, inst), - brw_eu_inst_src1_abs(devinfo, inst), - brw_eu_inst_src1_negate(devinfo, inst)); - } else { - return src_ia1(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src1_type(devinfo, inst), - brw_eu_inst_src1_ia1_addr_imm(devinfo, inst), - brw_eu_inst_src1_ia_subreg_nr(devinfo, inst), - brw_eu_inst_src1_negate(devinfo, inst), - brw_eu_inst_src1_abs(devinfo, inst), - brw_eu_inst_src1_hstride(devinfo, inst), - brw_eu_inst_src1_width(devinfo, inst), - brw_eu_inst_src1_vstride(devinfo, inst)); - } - } else { - if (brw_eu_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { - return src_da16(file, - devinfo, - brw_eu_inst_opcode(isa, inst), - brw_eu_inst_src1_type(devinfo, inst), - brw_eu_inst_src1_reg_file(devinfo, inst), - brw_eu_inst_src1_vstride(devinfo, inst), - brw_eu_inst_src1_da_reg_nr(devinfo, inst), - brw_eu_inst_src1_da16_subreg_nr(devinfo, inst), - brw_eu_inst_src1_abs(devinfo, inst), - brw_eu_inst_src1_negate(devinfo, inst), - brw_eu_inst_src1_da16_swiz_x(devinfo, inst), - brw_eu_inst_src1_da16_swiz_y(devinfo, inst), - brw_eu_inst_src1_da16_swiz_z(devinfo, inst), - brw_eu_inst_src1_da16_swiz_w(devinfo, inst)); - } else { - string(file, "Indirect align16 address mode not supported"); - return 1; - } - } -} - -static int -qtr_ctrl(FILE *file, const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - int qtr_ctl = brw_eu_inst_qtr_control(devinfo, inst); - int exec_size = 1 << brw_eu_inst_exec_size(devinfo, inst); - const unsigned nib_ctl = devinfo->ver >= 20 ? 0 : - brw_eu_inst_nib_control(devinfo, inst); - - if (exec_size < 8 || nib_ctl) { - format(file, " %dN", qtr_ctl * 2 + nib_ctl + 1); - } else if (exec_size == 8) { - switch (qtr_ctl) { - case 0: - string(file, " 1Q"); - break; - case 1: - string(file, " 2Q"); - break; - case 2: - string(file, " 3Q"); - break; - case 3: - string(file, " 4Q"); - break; - } - } else if (exec_size == 16) { - if (qtr_ctl < 2) - string(file, " 1H"); - else - string(file, " 2H"); - } - return 0; -} - -static bool -inst_has_type(const struct brw_isa_info *isa, - const brw_eu_inst *inst, - enum brw_reg_type type) -{ - const struct intel_device_info *devinfo = isa->devinfo; - const unsigned num_sources = brw_num_sources_from_inst(isa, inst); - - if (brw_eu_inst_dst_type(devinfo, inst) == type) - return true; - - if (num_sources >= 3) { - if (brw_eu_inst_3src_access_mode(devinfo, inst) == BRW_ALIGN_1) - return brw_eu_inst_3src_a1_src0_type(devinfo, inst) == type || - brw_eu_inst_3src_a1_src1_type(devinfo, inst) == type || - brw_eu_inst_3src_a1_src2_type(devinfo, inst) == type; - else - return brw_eu_inst_3src_a16_src_type(devinfo, inst) == type; - } else if (num_sources == 2) { - return brw_eu_inst_src0_type(devinfo, inst) == type || - brw_eu_inst_src1_type(devinfo, inst) == type; - } else { - return brw_eu_inst_src0_type(devinfo, inst) == type; - } -} - -static int -swsb(FILE *file, const struct brw_isa_info *isa, const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - const enum opcode opcode = brw_eu_inst_opcode(isa, inst); - const uint32_t x = brw_eu_inst_swsb(devinfo, inst); - const bool is_unordered = - opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC || - opcode == BRW_OPCODE_MATH || opcode == BRW_OPCODE_DPAS || - (devinfo->has_64bit_float_via_math_pipe && - inst_has_type(isa, inst, BRW_TYPE_DF)); - const struct gen_swsb swsb = brw_swsb_decode(devinfo, is_unordered, x, opcode); - if (swsb.regdist) - format(file, " %s@%d", - (swsb.pipe == GEN_PIPE_FLOAT ? "F" : - swsb.pipe == GEN_PIPE_INT ? "I" : - swsb.pipe == GEN_PIPE_LONG ? "L" : - swsb.pipe == GEN_PIPE_ALL ? "A" : - swsb.pipe == GEN_PIPE_MATH ? "M" : - swsb.pipe == GEN_PIPE_SCALAR ? "S" : "" ), - swsb.regdist); - if (swsb.mode) - format(file, " $%d%s", swsb.sbid, - (swsb.mode & GEN_SBID_SET ? "" : - swsb.mode & GEN_SBID_DST ? ".dst" : ".src")); - return 0; -} - -#if MESA_DEBUG -static __attribute__((__unused__)) int -brw_disassemble_imm(const struct brw_isa_info *isa, - uint32_t dw3, uint32_t dw2, uint32_t dw1, uint32_t dw0) -{ - brw_eu_inst inst; - inst.data[0] = (((uint64_t) dw1) << 32) | ((uint64_t) dw0); - inst.data[1] = (((uint64_t) dw3) << 32) | ((uint64_t) dw2); - return brw_disassemble_inst(stderr, isa, &inst, false, 0, NULL); -} -#endif - -static void -write_label(FILE *file, const struct intel_device_info *devinfo, - const struct brw_label *root_label, - int offset, int jump) -{ - if (root_label != NULL) { - int to_bytes_scale = sizeof(brw_eu_inst) / brw_jump_scale(devinfo); - const struct brw_label *label = - brw_find_label(root_label, offset + jump * to_bytes_scale); - if (label != NULL) { - format(file, " LABEL%d", label->number); - } - } -} - -static void -lsc_disassemble_ex_desc(const struct intel_device_info *devinfo, - uint32_t imm_desc, - uint32_t imm_ex_desc, - FILE *file) -{ - const gen_lsc_desc desc = gen_lsc_desc_decode(devinfo, imm_desc); - const gen_lsc_ex_desc ex_desc = - gen_lsc_ex_desc_decode(devinfo, desc.addr_type, imm_ex_desc, 0); - - switch (ex_desc.addr_type) { - case LSC_ADDR_SURFTYPE_FLAT: - format(file, " base_offset %d ", - ex_desc.flat.base_offset); - break; - case LSC_ADDR_SURFTYPE_BSS: - case LSC_ADDR_SURFTYPE_SS: - format(file, " surface_state_index %u ", - ex_desc.surface_state.surface_state_index); - break; - case LSC_ADDR_SURFTYPE_BTI: - format(file, " BTI %u ", - ex_desc.bti.index); - format(file, " base_offset %d ", - ex_desc.bti.base_offset); - break; - default: - format(file, "unsupported address surface type %d", ex_desc.addr_type); - break; - } -} - -static inline bool -gen_sfid_is_lsc(unsigned sfid) -{ - switch (sfid) { - case GEN_SFID_UGM: - case GEN_SFID_SLM: - case GEN_SFID_TGM: - return true; - default: - break; - } - - return false; -} - -int -brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, - const brw_eu_inst *inst, bool is_compacted, - int offset, const struct brw_label *root_label) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - int err = 0; - int space = 0; - - const enum opcode opcode = brw_eu_inst_opcode(isa, inst); - const struct opcode_desc *desc = brw_opcode_desc(isa, opcode); - - if (brw_eu_inst_pred_control(devinfo, inst)) { - string(file, "("); - err |= control(file, "predicate inverse", pred_inv, - brw_eu_inst_pred_inv(devinfo, inst), NULL); - format(file, "f%"PRIu64".%"PRIu64, - brw_eu_inst_flag_reg_nr(devinfo, inst), - brw_eu_inst_flag_subreg_nr(devinfo, inst)); - if (devinfo->ver >= 20) { - err |= control(file, "predicate control", xe2_pred_ctrl, - brw_eu_inst_pred_control(devinfo, inst), NULL); - } else if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - err |= control(file, "predicate control align1", pred_ctrl_align1, - brw_eu_inst_pred_control(devinfo, inst), NULL); - } else { - err |= control(file, "predicate control align16", pred_ctrl_align16, - brw_eu_inst_pred_control(devinfo, inst), NULL); - } - string(file, ") "); - } - - err |= print_opcode(file, isa, opcode); - - if (opcode == BRW_OPCODE_BFN) { - unsigned char table_byte = 0; - table_byte |= (inst->data[1] >> (84 - 64)) & 0xF; - table_byte |= ((inst->data[1] >> (92 - 64)) & 0xF) << 4; - format(file, "[0x%x]", table_byte); - } - - if (!is_send(opcode)) - err |= control(file, "saturate", saturate, brw_eu_inst_saturate(devinfo, inst), - NULL); - - err |= control(file, "debug control", debug_ctrl, - brw_eu_inst_debug_control(devinfo, inst), NULL); - - if (opcode == BRW_OPCODE_MATH) { - string(file, " "); - err |= control(file, "function", math_function, - brw_eu_inst_math_function(devinfo, inst), NULL); - - } else if (opcode == BRW_OPCODE_SYNC) { - string(file, " "); - err |= control(file, "function", sync_function, - brw_eu_inst_cond_modifier(devinfo, inst), NULL); - - } else if (opcode == BRW_OPCODE_DPAS) { - string(file, "."); - - err |= control(file, "systolic depth", dpas_systolic_depth, - brw_eu_inst_dpas_3src_sdepth(devinfo, inst), NULL); - - const unsigned rcount = brw_eu_inst_dpas_3src_rcount(devinfo, inst) + 1; - - format(file, "x%d", rcount); - } else if (opcode == BRW_OPCODE_BFN) { - unsigned cc; - - switch (brw_eu_inst_boolean_func_cond_modifier(devinfo, inst)) { - case 0: - cc = BRW_CONDITIONAL_NONE; - break; - case 1: - cc = BRW_CONDITIONAL_Z; - break; - case 2: - cc = BRW_CONDITIONAL_G; - break; - case 3: - cc = BRW_CONDITIONAL_L; - break; - } - - err |= control(file, "conditional modifier", conditional_modifier, - cc, NULL); - - /* If we're using the conditional modifier, print which flags reg is - * used for it. - */ - if (cc != BRW_CONDITIONAL_NONE) { - format(file, ".f%"PRIu64".%"PRIu64, - brw_eu_inst_flag_reg_nr(devinfo, inst), - brw_eu_inst_flag_subreg_nr(devinfo, inst)); - } - } else if (!is_send(opcode) && - (devinfo->ver < 12 || - brw_eu_inst_src0_reg_file(devinfo, inst) != IMM || - brw_type_size_bytes(brw_eu_inst_src0_type(devinfo, inst)) < 8)) { - err |= control(file, "conditional modifier", conditional_modifier, - brw_eu_inst_cond_modifier(devinfo, inst), NULL); - - /* If we're using the conditional modifier, print which flags reg is - * used for it. Note that on gfx6+, the embedded-condition SEL and - * control flow doesn't update flags. - */ - if (brw_eu_inst_cond_modifier(devinfo, inst) && - (opcode != BRW_OPCODE_SEL && - opcode != BRW_OPCODE_CSEL && - opcode != BRW_OPCODE_IF && - opcode != BRW_OPCODE_WHILE)) { - format(file, ".f%"PRIu64".%"PRIu64, - brw_eu_inst_flag_reg_nr(devinfo, inst), - brw_eu_inst_flag_subreg_nr(devinfo, inst)); - } - } - - if (opcode != BRW_OPCODE_NOP) { - string(file, "("); - err |= control(file, "execution size", exec_size, - brw_eu_inst_exec_size(devinfo, inst), NULL); - string(file, ")"); - } - - if (brw_has_uip(devinfo, opcode)) { - /* Instructions that have UIP also have JIP. */ - pad(file, 16); - string(file, "JIP: "); - write_label(file, devinfo, root_label, offset, brw_eu_inst_jip(devinfo, inst)); - - pad(file, 38); - string(file, "UIP: "); - write_label(file, devinfo, root_label, offset, brw_eu_inst_uip(devinfo, inst)); - } else if (brw_has_jip(devinfo, opcode)) { - int jip = brw_eu_inst_jip(devinfo, inst); - - pad(file, 16); - string(file, "JIP: "); - write_label(file, devinfo, root_label, offset, jip); - } else if (opcode == BRW_OPCODE_JMPI) { - pad(file, 16); - err |= src1(file, isa, inst); - } else if (opcode == BRW_OPCODE_DPAS) { - pad(file, 16); - err |= dest_dpas_3src(file, devinfo, inst); - - pad(file, 32); - err |= src0_dpas_3src(file, devinfo, inst); - - pad(file, 48); - err |= src1_dpas_3src(file, devinfo, inst); - - pad(file, 64); - err |= src2_dpas_3src(file, devinfo, inst); - - } else if (desc && desc->nsrc == 3) { - pad(file, 16); - err |= dest_3src(file, devinfo, inst); - - pad(file, 32); - err |= src0_3src(file, devinfo, inst); - - pad(file, 48); - err |= src1_3src(file, isa, inst); - - pad(file, 64); - err |= src2_3src(file, isa, inst); - } else if (desc) { - if (desc->ndst > 0) { - pad(file, 16); - err |= dest(file, isa, inst); - } - - if (desc->nsrc > 0) { - pad(file, 32); - err |= src0(file, isa, inst); - } - - if (desc->nsrc > 1 && !is_send_gather(isa, inst)) { - pad(file, 48); - err |= src1(file, isa, inst); - } - } - - if (is_send(opcode)) { - enum gen_sfid sfid = brw_eu_inst_sfid(devinfo, inst); - - bool has_imm_desc = false, has_imm_ex_desc = false; - uint32_t imm_desc = 0, imm_ex_desc = 0; - if (is_split_send(devinfo, opcode)) { - pad(file, 64); - if (brw_eu_inst_send_sel_reg32_desc(devinfo, inst)) { - /* show the indirect descriptor source */ - err |= src_send_desc_ia(file, devinfo, 0); - } else { - has_imm_desc = true; - imm_desc = brw_eu_inst_send_desc(devinfo, inst); - fprintf(file, "0x%08"PRIx32, imm_desc); - } - - pad(file, 80); - if (brw_eu_inst_send_sel_reg32_ex_desc(devinfo, inst)) { - /* show the indirect descriptor source */ - err |= src_send_desc_ia(file, devinfo, - brw_eu_inst_send_ex_desc_ia_subreg_nr(devinfo, inst)); - if (devinfo->ver >= 20) { - imm_ex_desc |= - SET_BITS(brw_eu_inst_bits(inst, 127, 124), 31, 28) | - SET_BITS(brw_eu_inst_bits(inst, 97, 96), 27, 26) | - SET_BITS(brw_eu_inst_bits(inst, 65, 64), 25, 24) | - SET_BITS(brw_eu_inst_bits(inst, 47, 43), 23, 19) | - SET_BITS(brw_eu_inst_bits(inst, 39, 36), 15, 12); - } - } else { - has_imm_ex_desc = true; - imm_ex_desc = brw_eu_inst_sends_ex_desc(devinfo, inst, - is_send_gather(isa, inst)); - fprintf(file, "0x%08"PRIx32, imm_ex_desc); - } - } else { - if (brw_eu_inst_src1_reg_file(devinfo, inst) != IMM) { - /* show the indirect descriptor source */ - pad(file, 48); - err |= src1(file, isa, inst); - pad(file, 64); - } else { - has_imm_desc = true; - imm_desc = brw_eu_inst_send_desc(devinfo, inst); - pad(file, 48); - } - - /* Print message descriptor as immediate source */ - fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32); - } - - newline(file); - pad(file, 16); - space = 0; - - err |= control(file, "SFID", gen_sfid_names, sfid, &space); - string(file, " MsgDesc:"); - - if (!has_imm_desc) { - format(file, " indirect"); - } else { - bool unsupported = false; - switch (sfid) { - case GEN_SFID_SAMPLER: - if (devinfo->ver >= 20) { - err |= control(file, "sampler message", xe2_sampler_msg_type, - brw_sampler_desc_msg_type(devinfo, imm_desc), - &space); - err |= control(file, "sampler simd mode", xe2_sampler_simd_mode, - brw_sampler_desc_simd_mode(devinfo, imm_desc), - &space); - if (brw_sampler_desc_return_format(devinfo, imm_desc)) { - string(file, " HP"); - } - format(file, " Surface = %u Sampler = %u", - brw_sampler_desc_binding_table_index(devinfo, imm_desc), - brw_sampler_desc_sampler(devinfo, imm_desc)); - } else { - err |= control(file, "sampler message", gfx5_sampler_msg_type, - brw_sampler_desc_msg_type(devinfo, imm_desc), - &space); - err |= control(file, "sampler simd mode", - devinfo->ver >= 20 ? xe2_sampler_simd_mode : - gfx9_sampler_simd_mode, - brw_sampler_desc_simd_mode(devinfo, imm_desc), - &space); - if (brw_sampler_desc_return_format(devinfo, imm_desc)) { - string(file, " HP"); - } - format(file, " Surface = %u Sampler = %u", - brw_sampler_desc_binding_table_index(devinfo, imm_desc), - brw_sampler_desc_sampler(devinfo, imm_desc)); - } - break; - case GEN_SFID_HDC2: - case GEN_SFID_HDC_READ_ONLY: - format(file, " (bti %u, msg_ctrl %u, msg_type %u)", - brw_dp_desc_binding_table_index(devinfo, imm_desc), - brw_dp_desc_msg_control(devinfo, imm_desc), - brw_dp_desc_msg_type(devinfo, imm_desc)); - break; - - case GEN_SFID_RENDER_CACHE: { - unsigned msg_type = brw_fb_desc_msg_type(devinfo, imm_desc); - - err |= control(file, "DP rc message type", - dp_rc_msg_type(devinfo), msg_type, &space); - - bool is_rt_write = msg_type == - GEN_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; - - if (is_rt_write) { - err |= control(file, "RT message type", - devinfo->ver >= 20 ? m_rt_write_subtype_xe2 : m_rt_write_subtype, - brw_eu_inst_rt_message_type(devinfo, inst), &space); - if (brw_eu_inst_rt_slot_group(devinfo, inst)) - string(file, " Hi"); - if (brw_fb_write_desc_last_render_target(devinfo, imm_desc)) - string(file, " LastRT"); - if (devinfo->ver >= 10 && - brw_fb_write_desc_coarse_write(devinfo, imm_desc)) - string(file, " CoarseWrite"); - } else { - format(file, " MsgCtrl = 0x%u", - brw_fb_desc_msg_control(devinfo, imm_desc)); - } - - format(file, " Surface = %u", - brw_fb_desc_binding_table_index(devinfo, imm_desc)); - break; - } - - case GEN_SFID_URB: { - if (devinfo->ver >= 20) { - format(file, " ("); - const gen_lsc_desc desc = gen_lsc_desc_decode(devinfo, imm_desc); - const enum lsc_opcode op = desc.op; - err |= control(file, "operation", lsc_operation, - op, &space); - format(file, ","); - err |= control(file, "addr_size", lsc_addr_size, - desc.addr_size, - &space); - - format(file, ","); - err |= control(file, "data_size", lsc_data_size, - desc.data_size, - &space); - format(file, ","); - if (lsc_opcode_has_cmask(op)) { - err |= control(file, "component_mask", - lsc_cmask_str, - desc.cmask, - &space); - } else { - err |= control(file, "vector_size", - lsc_vect_size_str, - desc.vect_size, - &space); - if (desc.transpose) - format(file, ", transpose"); - } - switch(op) { - case LSC_OP_LOAD_CMASK: - case LSC_OP_LOAD: - case LSC_OP_LOAD_CMASK_MSRT: - format(file, ","); - err |= control(file, "cache_load", - devinfo->ver >= 20 ? - xe2_lsc_cache_load : - lsc_cache_load, - desc.cache_ctrl, - &space); - break; - default: - format(file, ","); - err |= control(file, "cache_store", - devinfo->ver >= 20 ? - xe2_lsc_cache_store : - lsc_cache_store, - desc.cache_ctrl, - &space); - break; - } - - format(file, " dst_len = %u,", - brw_message_desc_rlen(devinfo, imm_desc) / reg_unit(devinfo)); - format(file, " src0_len = %u,", - brw_message_desc_mlen(devinfo, imm_desc) / reg_unit(devinfo)); - if (!is_send_gather(isa, inst)) - format(file, " src1_len = %d", - brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc) / reg_unit(devinfo)); - err |= control(file, "address_type", lsc_addr_surface_type, - desc.addr_type, &space); - format(file, " )"); - } else { - unsigned urb_opcode = brw_eu_inst_urb_opcode(devinfo, inst); - - format(file, " offset %"PRIu64, brw_eu_inst_urb_global_offset(devinfo, inst)); - - space = 1; - - err |= control(file, "urb opcode", - gfx7_urb_opcode, urb_opcode, &space); - - if (brw_eu_inst_urb_per_slot_offset(devinfo, inst)) { - string(file, " per-slot"); - } - - if (urb_opcode == GEN_URB_OPCODE_SIMD8_WRITE || - urb_opcode == GEN_URB_OPCODE_SIMD8_READ) { - if (brw_eu_inst_urb_channel_mask_present(devinfo, inst)) - string(file, " masked"); - } else if (urb_opcode != GEN_GFX125_URB_OPCODE_FENCE) { - err |= control(file, "urb swizzle", urb_swizzle, - brw_eu_inst_urb_swizzle_control(devinfo, inst), - &space); - } - } - break; - } - case GEN_SFID_THREAD_SPAWNER: - break; - - case GEN_SFID_MESSAGE_GATEWAY: - format(file, " (%s)", - gfx7_gateway_subfuncid[brw_eu_inst_gateway_subfuncid(devinfo, inst)]); - break; - - case GEN_SFID_SLM: - case GEN_SFID_TGM: - case GEN_SFID_UGM: { - assert(devinfo->has_lsc); - format(file, " ("); - const gen_lsc_desc desc = gen_lsc_desc_decode(devinfo, imm_desc); - const enum lsc_opcode op = desc.op; - err |= control(file, "operation", lsc_operation, - op, &space); - format(file, ","); - err |= control(file, "addr_size", lsc_addr_size, - desc.addr_size, - &space); - - if (op == LSC_OP_FENCE) { - format(file, ","); - err |= control(file, "scope", lsc_fence_scope, - desc.fence.scope, - &space); - format(file, ","); - err |= control(file, "flush_type", lsc_flush_type, - desc.fence.flush_type, - &space); - format(file, ","); - err |= control(file, "backup_mode_fence_routing", - lsc_backup_fence_routing, - desc.fence.route_to_lsc, - &space); - } else { - format(file, ","); - err |= control(file, "data_size", lsc_data_size, - desc.data_size, - &space); - format(file, ","); - if (lsc_opcode_has_cmask(op)) { - err |= control(file, "component_mask", - lsc_cmask_str, - desc.cmask, - &space); - } else { - err |= control(file, "vector_size", - lsc_vect_size_str, - desc.vect_size, - &space); - if (desc.transpose) - format(file, ", transpose"); - } - switch(op) { - case LSC_OP_LOAD_CMASK: - case LSC_OP_LOAD: - format(file, ","); - err |= control(file, "cache_load", - devinfo->ver >= 20 ? - xe2_lsc_cache_load : - lsc_cache_load, - desc.cache_ctrl, - &space); - break; - default: - format(file, ","); - err |= control(file, "cache_store", - devinfo->ver >= 20 ? - xe2_lsc_cache_store : - lsc_cache_store, - desc.cache_ctrl, - &space); - break; - } - } - format(file, " dst_len = %u,", - brw_message_desc_rlen(devinfo, imm_desc) / reg_unit(devinfo)); - format(file, " src0_len = %u,", - brw_message_desc_mlen(devinfo, imm_desc) / reg_unit(devinfo)); - - if (!brw_eu_inst_send_sel_reg32_ex_desc(devinfo, inst) && - !is_send_gather(isa, inst)) - format(file, " src1_len = %d", - brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc) / reg_unit(devinfo)); - - err |= control(file, "address_type", lsc_addr_surface_type, - desc.addr_type, &space); - format(file, " )"); - break; - } - - case GEN_SFID_HDC0: - format(file, " ("); - space = 0; - - err |= control(file, "DP DC0 message type", - dp_dc0_msg_type_gfx7, - brw_dp_desc_msg_type(devinfo, imm_desc), &space); - - format(file, ", bti %u, ", - brw_dp_desc_binding_table_index(devinfo, imm_desc)); - - switch (brw_eu_inst_dp_msg_type(devinfo, inst)) { - case GEN_DATAPORT_DC_UNTYPED_ATOMIC_OP: - control(file, "atomic op", aop, - brw_dp_desc_msg_control(devinfo, imm_desc) & 0xf, - &space); - break; - case GEN_DATAPORT_DC_OWORD_BLOCK_READ: - case GEN_DATAPORT_DC_OWORD_BLOCK_WRITE: { - unsigned msg_ctrl = brw_dp_desc_msg_control(devinfo, imm_desc); - assert(dp_oword_block_rw[msg_ctrl & 7]); - format(file, "owords = %s, aligned = %d", - dp_oword_block_rw[msg_ctrl & 7], (msg_ctrl >> 3) & 3); - break; - } - default: - format(file, "%u", - brw_dp_desc_msg_control(devinfo, imm_desc)); - } - format(file, ")"); - break; - - case GEN_SFID_HDC1: { - format(file, " ("); - space = 0; - - unsigned msg_ctrl = brw_dp_desc_msg_control(devinfo, imm_desc); - - err |= control(file, "DP DC1 message type", - dp_dc1_msg_type_hsw, - brw_dp_desc_msg_type(devinfo, imm_desc), &space); - - format(file, ", Surface = %u, ", - brw_dp_desc_binding_table_index(devinfo, imm_desc)); - - switch (brw_eu_inst_dp_msg_type(devinfo, inst)) { - case GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP: - case GEN_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP: - case GEN_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP: - format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16); - FALLTHROUGH; - case GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2: - case GEN_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: - case GEN_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2: - case GEN_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: - case GEN_GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP: - control(file, "atomic op", aop, msg_ctrl & 0xf, &space); - break; - case GEN_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: - case GEN_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE: - case GEN_DATAPORT_DC_PORT1_TYPED_SURFACE_READ: - case GEN_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE: - case GEN_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE: - case GEN_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ: { - static const char *simd_modes[] = { "4x2", "16", "8" }; - format(file, "SIMD%s, Mask = 0x%x", - simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf); - break; - } - case GEN_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: - case GEN_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: - case GEN_GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP: - format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16); - control(file, "atomic float op", aop_float, msg_ctrl & 0xf, - &space); - break; - case GEN_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE: - case GEN_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ: - assert(dp_oword_block_rw[msg_ctrl & 7]); - format(file, "owords = %s, aligned = %d", - dp_oword_block_rw[msg_ctrl & 7], (msg_ctrl >> 3) & 3); - break; - default: - format(file, "0x%x", msg_ctrl); - } - format(file, ")"); - break; - } - - case GEN_SFID_PIXEL_INTERPOLATOR: - format(file, " (%s, %s, 0x%02"PRIx64")", - brw_eu_inst_pi_nopersp(devinfo, inst) ? "linear" : "persp", - pixel_interpolator_msg_types[brw_eu_inst_pi_message_type(devinfo, inst)], - brw_eu_inst_pi_message_data(devinfo, inst)); - break; - - case GEN_SFID_RAY_TRACE_ACCELERATOR: - if (devinfo->has_ray_tracing) { - format(file, " SIMD%d,", - brw_rt_trace_ray_desc_exec_size(devinfo, imm_desc)); - } else { - unsupported = true; - } - break; - - default: - unsupported = true; - break; - } - - if (unsupported) - format(file, "unsupported shared function ID %d", sfid); - - if (space) - string(file, " "); - } - if (devinfo->verx10 >= 125 && - brw_eu_inst_send_sel_reg32_ex_desc(devinfo, inst) && - brw_eu_inst_send_ex_bso(devinfo, inst)) { - format(file, " src1_len = %u", - (unsigned) brw_eu_inst_send_src1_len(devinfo, inst)); - - format(file, " ex_bso"); - } - if (gen_sfid_is_lsc(sfid) || - (sfid == GEN_SFID_URB && devinfo->ver >= 20)) { - lsc_disassemble_ex_desc(devinfo, imm_desc, imm_ex_desc, file); - } else { - if (has_imm_desc) - format(file, " mlen %u", brw_message_desc_mlen(devinfo, imm_desc) / reg_unit(devinfo)); - if (has_imm_ex_desc) { - format(file, " ex_mlen %u", - brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc) / reg_unit(devinfo)); - } - if (has_imm_desc) - format(file, " rlen %u", brw_message_desc_rlen(devinfo, imm_desc) / reg_unit(devinfo)); - } - } - pad(file, 64); - if (opcode != BRW_OPCODE_NOP) { - string(file, "{"); - space = 1; - err |= control(file, "access mode", access_mode, - brw_eu_inst_access_mode(devinfo, inst), &space); - err |= control(file, "write enable control", wectrl, - brw_eu_inst_mask_control(devinfo, inst), &space); - - if (devinfo->ver < 12) { - err |= control(file, "dependency control", dep_ctrl, - ((brw_eu_inst_no_dd_check(devinfo, inst) << 1) | - brw_eu_inst_no_dd_clear(devinfo, inst)), &space); - } - - err |= qtr_ctrl(file, devinfo, inst); - - if (devinfo->ver >= 12) - err |= swsb(file, isa, inst); - - err |= control(file, "compaction", cmpt_ctrl, is_compacted, &space); - err |= control(file, "thread control", thread_ctrl, - (devinfo->ver >= 12 ? brw_eu_inst_atomic_control(devinfo, inst) : - brw_eu_inst_thread_control(devinfo, inst)), - &space); - if (brw_has_branch_ctrl(devinfo, opcode)) { - err |= control(file, "branch ctrl", branch_ctrl, - brw_eu_inst_branch_control(devinfo, inst), &space); - } else if (devinfo->ver < 20) { - err |= control(file, "acc write control", accwr, - brw_eu_inst_acc_wr_control(devinfo, inst), &space); - } - - if (devinfo->ver == 12 && is_send(opcode)) { - err |= control(file, "fusion ctrl", fusion_ctrl, - brw_eu_inst_fusion_ctrl(devinfo, inst), &space); - } - - if (is_send(opcode)) - err |= control(file, "end of thread", end_of_thread, - brw_eu_inst_eot(devinfo, inst), &space); - if (space) - string(file, " "); - string(file, "}"); - } - string(file, ";"); - newline(file); - return err; -} - -int -brw_disassemble_find_end(const struct brw_isa_info *isa, - const void *assembly, int start) -{ - const struct intel_device_info *devinfo = isa->devinfo; - int offset = start; - - /* This loop exits when send-with-EOT or when opcode is 0 */ - while (true) { - const brw_eu_inst *insn = assembly + offset; - - if (brw_eu_inst_cmpt_control(devinfo, insn)) { - offset += 8; - } else { - offset += 16; - } - - /* Simplistic, but efficient way to terminate disasm */ - uint32_t opcode = brw_eu_inst_opcode(isa, insn); - if (opcode == 0 || (is_send(opcode) && brw_eu_inst_eot(devinfo, insn))) { - break; - } - } - - return offset; -} - -void -brw_disassemble_with_errors(const struct brw_isa_info *isa, - const void *assembly, int start, - int64_t *lineno_offset, FILE *out) -{ - int end = brw_disassemble_find_end(isa, assembly, start); - - /* Make a dummy disasm structure that brw_validate_instructions - * can work from. - */ - struct disasm_info *disasm_info = disasm_initialize(isa, NULL); - disasm_new_inst_group(disasm_info, start); - disasm_new_inst_group(disasm_info, end); - - brw_validate_instructions(isa, assembly, start, end, disasm_info); - - void *mem_ctx = ralloc_context(NULL); - const struct brw_label *root_label = - brw_label_assembly(isa, assembly, start, end, mem_ctx); - - brw_foreach_list_typed(struct inst_group, group, link, - &disasm_info->group_list) { - struct brw_exec_node *next_node = brw_exec_node_get_next(&group->link); - if (brw_exec_node_is_tail_sentinel(next_node)) - break; - - struct inst_group *next = - brw_exec_node_data(struct inst_group, next_node, link); - - int start_offset = group->offset; - int end_offset = next->offset; - - brw_disassemble(isa, assembly, start_offset, end_offset, - root_label, lineno_offset, out); - - if (group->error) { - fputs(group->error, out); - } - } - - ralloc_free(mem_ctx); - ralloc_free(disasm_info); -} - -void -brw_disassemble_with_lineno(const struct brw_isa_info *isa, uint32_t stage, - int dispatch_width, uint32_t src_hash, - const void *assembly, int start, - int64_t lineno_offset, FILE *out) -{ - fprintf(out, "\nDumping shader asm for %s", _mesa_shader_stage_to_abbrev(stage)); - if (dispatch_width > 0) - fprintf(out, " SIMD%i", dispatch_width); - fprintf(out, " (src_hash 0x%x):\n\n", src_hash); - brw_disassemble_with_errors(isa, assembly, start, &lineno_offset, out); -} diff --git a/src/intel/compiler/brw/brw_disasm.h b/src/intel/compiler/brw/brw_disasm.h deleted file mode 100644 index 54f54b33018..00000000000 --- a/src/intel/compiler/brw/brw_disasm.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2024 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#pragma once - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_isa_info; -struct brw_eu_inst; - -const struct brw_label *brw_find_label(const struct brw_label *root, int offset); -void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx); -int brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, - const struct brw_eu_inst *inst, bool is_compacted, - int offset, const struct brw_label *root_label); -const struct -brw_label *brw_label_assembly(const struct brw_isa_info *isa, - const void *assembly, int start, int end, - void *mem_ctx); -void brw_disassemble_with_labels(const struct brw_isa_info *isa, - const void *assembly, int start, int end, FILE *out); -void brw_disassemble(const struct brw_isa_info *isa, - const void *assembly, int start, int end, - const struct brw_label *root_label, - int64_t *lineno_offset, FILE *out); -int brw_disassemble_find_end(const struct brw_isa_info *isa, - const void *assembly, int start); -void brw_disassemble_with_errors(const struct brw_isa_info *isa, - const void *assembly, int start, - int64_t *lineno_offset, FILE *out); -void brw_disassemble_with_lineno(const struct brw_isa_info *isa, uint32_t stage, - int dispatch_width, uint32_t src_hash, - const void *assembly, int start, - int64_t lineno_offset, FILE *out); - -const char *brw_lsc_op_to_string(unsigned op); -const char *brw_lsc_addr_surftype_to_string(unsigned t); -const char *brw_lsc_data_size_to_string(unsigned s); - -#ifdef __cplusplus -} /* extern "C" */ -#endif diff --git a/src/intel/compiler/brw/brw_disasm_info.cpp b/src/intel/compiler/brw/brw_disasm_info.cpp deleted file mode 100644 index 463b83b24cf..00000000000 --- a/src/intel/compiler/brw/brw_disasm_info.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include "brw_cfg.h" -#include "brw_eu.h" -#include "brw_disasm.h" -#include "brw_disasm_info.h" -#include "dev/intel_debug.h" -#include "compiler/nir/nir.h" -#include "util/lut.h" - -static bool -is_do_block(struct bblock_t *block) -{ - return block->start()->opcode == BRW_OPCODE_DO; -} - -static bool -is_flow_block(struct bblock_t *block) -{ - return block->start()->opcode == SHADER_OPCODE_FLOW; -} - -static bool -should_omit_link(struct bblock_t *block, - struct bblock_link *link) -{ - return link->kind == bblock_link_physical && - (is_do_block(block) || is_do_block(link->block)); -} - -static void -print_successors_for_disasm(FILE *f, struct bblock_t *block) -{ - brw_foreach_list_typed(struct bblock_link, succ, link, - &block->children) { - if (should_omit_link(block, succ)) - continue; - if (is_do_block(succ->block) || is_flow_block(succ->block)) - print_successors_for_disasm(f, succ->block); - else - fprintf(f, " ->B%d", succ->block->num); - } -} - -static void -print_predecessors_for_disasm(FILE *f, struct bblock_t *block) -{ - brw_foreach_list_typed(struct bblock_link, pred, link, - &block->parents) { - if (should_omit_link(block, pred)) - continue; - if (is_do_block(pred->block) || is_flow_block(pred->block)) - print_predecessors_for_disasm(f, pred->block); - else - fprintf(f, " <-B%d", pred->block->num); - } -} - -void -dump_assembly(void *assembly, int start_offset, int end_offset, - struct disasm_info *disasm, const unsigned *block_latency, FILE *f) -{ - const struct brw_isa_info *isa = disasm->isa; - const char *last_annotation_string = NULL; - - void *mem_ctx = ralloc_context(NULL); - const struct brw_label *root_label = - brw_label_assembly(isa, assembly, start_offset, end_offset, mem_ctx); - - brw_foreach_list_typed(struct inst_group, group, link, &disasm->group_list) { - struct brw_exec_node *next_node = brw_exec_node_get_next(&group->link); - if (brw_exec_node_is_tail_sentinel(next_node)) - break; - - struct inst_group *next = - brw_exec_node_data(struct inst_group, next_node, link); - - int start_offset = group->offset; - int end_offset = next->offset; - - if (group->block_start) { - fprintf(f, " START B%d", group->block_start->num); - print_predecessors_for_disasm(f, group->block_start); - if (block_latency) - fprintf(f, " (%u cycles)", - block_latency[group->block_start->num]); - fprintf(f, "\n"); - } - - if (last_annotation_string != group->annotation) { - last_annotation_string = group->annotation; - if (last_annotation_string) - fprintf(f, " %s\n", last_annotation_string); - } - - brw_disassemble(isa, assembly, start_offset, end_offset, - root_label, NULL, f); - - if (group->error) { - fputs(group->error, f); - } - - if (group->block_end) { - fprintf(f, " END B%d", group->block_end->num); - print_successors_for_disasm(f, group->block_end); - fprintf(f, "\n"); - } - } - fprintf(f, "\n"); - - ralloc_free(mem_ctx); -} - -struct disasm_info * -disasm_initialize(const struct brw_isa_info *isa, - const struct cfg_t *cfg) -{ - struct disasm_info *disasm = ralloc(NULL, struct disasm_info); - brw_exec_list_make_empty(&disasm->group_list); - disasm->isa = isa; - disasm->cfg = cfg; - disasm->cur_block = 0; - disasm->use_tail = false; - return disasm; -} - -struct inst_group * -disasm_new_inst_group(struct disasm_info *disasm, int next_inst_offset) -{ - assert(next_inst_offset >= 0); - struct inst_group *tail = rzalloc(disasm, struct inst_group); - tail->offset = next_inst_offset; - brw_exec_list_push_tail(&disasm->group_list, &tail->link); - return tail; -} - -void -disasm_annotate(struct disasm_info *disasm, - brw_inst *inst, int offset) -{ - const struct cfg_t *cfg = disasm->cfg; - - struct inst_group *group; - if (!disasm->use_tail) { - group = disasm_new_inst_group(disasm, offset); - } else { - disasm->use_tail = false; - group = brw_exec_node_data(struct inst_group, - brw_exec_list_get_tail_raw(&disasm->group_list), link); - } - -#ifndef NDEBUG - if (INTEL_DEBUG(DEBUG_ANNOTATION)) { - group->annotation = inst->annotation; - - if (group->annotation == NULL && inst->opcode == BRW_OPCODE_BFN) - group->annotation = util_lut3_to_str[inst->src[3].ud & 0xff]; - } -#endif - - if (inst->opcode == BRW_OPCODE_DO || - inst->opcode == SHADER_OPCODE_FLOW) { - disasm->use_tail = true; - disasm->cur_block++; - return; - } - - if (cfg->blocks[disasm->cur_block]->start() == inst) { - group->block_start = cfg->blocks[disasm->cur_block]; - } - - if (cfg->blocks[disasm->cur_block]->end() == inst) { - group->block_end = cfg->blocks[disasm->cur_block]; - disasm->cur_block++; - } -} - -void -disasm_insert_error(struct disasm_info *disasm, int offset, - int inst_size, const char *error) -{ - brw_foreach_list_typed(struct inst_group, cur, link, &disasm->group_list) { - struct brw_exec_node *next_node = brw_exec_node_get_next(&cur->link); - if (brw_exec_node_is_tail_sentinel(next_node)) - break; - - struct inst_group *next = - brw_exec_node_data(struct inst_group, next_node, link); - - if (next->offset <= offset) - continue; - - if (offset + inst_size != next->offset) { - struct inst_group *new_group = ralloc(disasm, struct inst_group); - memcpy(new_group, cur, sizeof(struct inst_group)); - - cur->error = NULL; - cur->error_length = 0; - cur->block_end = NULL; - - new_group->offset = offset + inst_size; - new_group->block_start = NULL; - - brw_exec_node_insert_after(&cur->link, &new_group->link); - } - - if (cur->error) - ralloc_strcat(&cur->error, error); - else - cur->error = ralloc_strdup(disasm, error); - return; - } -} diff --git a/src/intel/compiler/brw/brw_disasm_info.h b/src/intel/compiler/brw/brw_disasm_info.h deleted file mode 100644 index e97e4827fe8..00000000000 --- a/src/intel/compiler/brw/brw_disasm_info.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#pragma once - -#include "compiler/brw_list.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct cfg_t; -struct brw_inst; -struct intel_device_info; - -struct inst_group { - struct brw_exec_node link; - - int offset; - - size_t error_length; - char *error; - - /* Pointers to the basic block in the CFG if the instruction group starts - * or ends a basic block. - */ - struct bblock_t *block_start; - struct bblock_t *block_end; - - /* Annotation for the generated IR. */ - const char *annotation; -}; - -struct disasm_info { - struct brw_exec_list group_list; - - const struct brw_isa_info *isa; - const struct cfg_t *cfg; - - /** Block index in the cfg. */ - int cur_block; - bool use_tail; -}; - -void -dump_assembly(void *assembly, int start_offset, int end_offset, - struct disasm_info *disasm, const unsigned *block_latency, FILE *f); - -struct disasm_info * -disasm_initialize(const struct brw_isa_info *isa, - const struct cfg_t *cfg); - -struct inst_group * -disasm_new_inst_group(struct disasm_info *disasm, int offset); - -void -disasm_annotate(struct disasm_info *disasm, - struct brw_inst *inst, int offset); - -void -disasm_insert_error(struct disasm_info *disasm, int offset, - int inst_size, const char *error); - -#ifdef __cplusplus -} /* extern "C" */ -#endif diff --git a/src/intel/compiler/brw/brw_disasm_tool.c b/src/intel/compiler/brw/brw_disasm_tool.c deleted file mode 100644 index 0b54cd28b7a..00000000000 --- a/src/intel/compiler/brw/brw_disasm_tool.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include -#include - -#include "brw_disasm.h" -#include "brw_isa_info.h" -#include "dev/intel_device_info.h" -#include "util/u_dynarray.h" - -enum opt_input_type { - OPT_INPUT_BINARY, - OPT_INPUT_C_LITERAL, -}; - -static enum opt_input_type input_type = OPT_INPUT_BINARY; - -/* Return size of file in bytes pointed by fp */ -static long -i965_disasm_get_file_size(FILE *fp) -{ - long size; - - fseek(fp, 0L, SEEK_END); - size = ftell(fp); - fseek(fp, 0L, SEEK_SET); - - return size; -} - -/* Read hex file which should be in following format: - * for example : - * { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } - */ -static void * -i965_disasm_read_c_literal_file(FILE *fp, size_t *end) -{ - struct util_dynarray assembly = {}; - uint32_t temp[2]; - - if (fscanf(fp, " { ") == EOF) { - fprintf(stderr, "Couldn't find opening `{`\n"); - return NULL; - } - - if (fscanf(fp, "0x%x , 0x%x", &temp[0], &temp[1]) == 2) { - util_dynarray_append(&assembly, temp[0]); - util_dynarray_append(&assembly, temp[1]); - } else { - fprintf(stderr, "Couldn't read hex values\n"); - return NULL; - } - - while (fscanf(fp, " , 0x%x , 0x%x ", &temp[0], &temp[1]) == 2) { - util_dynarray_append(&assembly, temp[0]); - util_dynarray_append(&assembly, temp[1]); - } - - if (fscanf(fp, "}") == EOF) { - fprintf(stderr, "Couldn't find closing `}`\n"); - return NULL; - } - - *end = assembly.size; - return assembly.data; -} - -static void * -i965_disasm_read_binary(FILE *fp, size_t *end) -{ - size_t size; - void *assembly; - - long sz = i965_disasm_get_file_size(fp); - if (sz < 0) - return NULL; - - *end = (size_t)sz; - if (!*end) - return NULL; - - assembly = malloc(*end + 1); - if (assembly == NULL) - return NULL; - - size = fread(assembly, *end, 1, fp); - if (!size) { - free(assembly); - return NULL; - } - return assembly; -} - -static void -print_help(const char *progname, FILE *file) -{ - fprintf(file, - "Usage: %s [OPTION]...\n" - "Disassemble i965 instructions from binary file.\n\n" - " --help display this help and exit\n" - " --input-path=PATH read binary file from binary file PATH\n" - " --type=INPUT_TYPE INPUT_TYPE can be 'bin' (default if omitted),\n" - " 'c_literal'.\n" - " --gen=platform disassemble instructions for given \n" - " platform (3 letter platform name)\n", - progname); -} - -int main(int argc, char *argv[]) -{ - FILE *fp = NULL; - void *assembly = NULL; - char *file_path = NULL; - size_t start = 0, end = 0; - uint16_t pci_id = 0; - int c; - int result = EXIT_FAILURE; - - bool help = false; - const struct option i965_disasm_opts[] = { - { "help", no_argument, (int *) &help, true }, - { "input-path", required_argument, NULL, 'i' }, - { "type", required_argument, NULL, 't' }, - { "gen", required_argument, NULL, 'g'}, - { NULL, 0, NULL, 0 } - }; - - while ((c = getopt_long(argc, argv, ":i:t:g:h", i965_disasm_opts, NULL)) != -1) { - switch (c) { - case 'g': { - const int id = intel_device_name_to_pci_device_id(optarg); - if (id < 0) { - fprintf(stderr, "can't parse gen: '%s', expected 3 letter " - "platform name\n", optarg); - goto end; - } else { - pci_id = id; - } - break; - } - case 'i': - file_path = strdup(optarg); - fp = fopen(file_path, "r"); - if (!fp) { - fprintf(stderr, "Unable to read input file : %s\n", - file_path); - goto end; - } - break; - case 't': - if (strcmp(optarg, "c_literal") == 0) { - input_type = OPT_INPUT_C_LITERAL; - } else if (strcmp(optarg, "bin") == 0) { - input_type = OPT_INPUT_BINARY; - } else { - fprintf(stderr, "invalid value for --type: %s\n", optarg); - goto end; - } - break; - case 'h': - help = true; - print_help(argv[0], stderr); - goto end; - case 0: - break; - case ':': - fprintf(stderr, "%s: option `-%c' requires an argument\n", - argv[0], optopt); - goto end; - case '?': - default: - fprintf(stderr, "%s: option `-%c' is invalid: ignored\n", - argv[0], optopt); - goto end; - } - } - - if (help || !file_path || !pci_id) { - print_help(argv[0], stderr); - exit(0); - } - - struct intel_device_info devinfo; - if (!intel_get_device_info_from_pci_id(pci_id, &devinfo)) { - fprintf(stderr, "can't find device information: pci_id=0x%x\n", pci_id); - exit(EXIT_FAILURE); - } - - if (devinfo.ver < 9) { - fprintf(stderr, "device has gfx version %d but must be >= 9, try elk_disasm instead", - devinfo.ver); - exit(EXIT_FAILURE); - } - - struct brw_isa_info isa; - brw_init_isa_info(&isa, &devinfo); - - if (input_type == OPT_INPUT_BINARY) - assembly = i965_disasm_read_binary(fp, &end); - else if (input_type == OPT_INPUT_C_LITERAL) - assembly = i965_disasm_read_c_literal_file(fp, &end); - - if (!assembly) { - if (end) - fprintf(stderr, "Unable to allocate buffer to read input file\n"); - else - fprintf(stderr, "Failed to read input file\n"); - - goto end; - } - - /* Disassemble i965 instructions from buffer assembly */ - brw_disassemble_with_labels(&isa, assembly, start, end, stdout); - - result = EXIT_SUCCESS; - -end: - if (fp) - fclose(fp); - - free(file_path); - free(assembly); - - exit(result); -} diff --git a/src/intel/compiler/brw/brw_eu.c b/src/intel/compiler/brw/brw_eu.c index e8e90629130..01e8a7e2cc9 100644 --- a/src/intel/compiler/brw/brw_eu.c +++ b/src/intel/compiler/brw/brw_eu.c @@ -10,7 +10,6 @@ #include #include -#include "brw_disasm.h" #include "brw_eu_defines.h" #include "brw_eu.h" #include "brw_private.h" @@ -109,171 +108,6 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz) } } -unsigned -brw_get_default_exec_size(struct brw_codegen *p) -{ - return p->current->exec_size; -} - -unsigned -brw_get_default_group(struct brw_codegen *p) -{ - return p->current->group; -} - -unsigned -brw_get_default_access_mode(struct brw_codegen *p) -{ - return p->current->access_mode; -} - -gen_swsb -brw_get_default_swsb(struct brw_codegen *p) -{ - return p->current->swsb; -} - -void -brw_set_default_exec_size(struct brw_codegen *p, unsigned value) -{ - p->current->exec_size = value; -} - -void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc) -{ - p->current->predicate = pc; -} - -void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse) -{ - p->current->pred_inv = predicate_inverse; -} - -void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg) -{ - assert(subreg < 2); - p->current->flag_subreg = reg * 2 + subreg; -} - -void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ) -{ - p->current->access_mode = access_mode; -} - -/** - * Apply the range of channel enable signals given by - * [group, group + exec_size) to the instruction passed as argument. - */ -void -brw_eu_inst_set_group(const struct intel_device_info *devinfo, - brw_eu_inst *inst, unsigned group) -{ - if (devinfo->ver >= 20) { - assert(group % 8 == 0 && group < 32); - brw_eu_inst_set_qtr_control(devinfo, inst, group / 8); - - } else { - assert(group % 4 == 0 && group < 32); - brw_eu_inst_set_qtr_control(devinfo, inst, group / 8); - brw_eu_inst_set_nib_control(devinfo, inst, (group / 4) % 2); - - } -} - -void -brw_set_default_group(struct brw_codegen *p, unsigned group) -{ - p->current->group = group; -} - -void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ) -{ - p->current->mask_control = value; -} - -void brw_set_default_saturate( struct brw_codegen *p, bool enable ) -{ - p->current->saturate = enable; -} - -void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value) -{ - p->current->acc_wr_control = value; -} - -void brw_set_default_swsb(struct brw_codegen *p, gen_swsb value) -{ - p->current->swsb = value; -} - -void brw_push_insn_state( struct brw_codegen *p ) -{ - assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); - *(p->current + 1) = *p->current; - p->current++; -} - -void brw_pop_insn_state( struct brw_codegen *p ) -{ - assert(p->current != p->stack); - p->current--; -} - - -/*********************************************************************** - */ -void -brw_init_codegen(const struct brw_isa_info *isa, - struct brw_codegen *p, void *mem_ctx) -{ - memset(p, 0, sizeof(*p)); - - p->isa = isa; - p->devinfo = isa->devinfo; - /* - * Set the initial instruction store array size to 1024, if found that - * isn't enough, then it will double the store size at brw_next_insn() - * until out of memory. - */ - p->store_size = 1024; - p->store = rzalloc_array(mem_ctx, brw_eu_inst, p->store_size); - p->nr_insn = 0; - p->current = p->stack; - memset(p->current, 0, sizeof(p->current[0])); - - p->mem_ctx = mem_ctx; - - /* Some defaults? - */ - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ - brw_set_default_saturate(p, 0); - - /* Set up control flow stack */ - p->if_stack_depth = 0; - p->if_stack_array_size = 16; - p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size); - - p->loop_stack_depth = 0; - p->loop_stack_array_size = 16; - p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); -} - - -const unsigned *brw_get_program( struct brw_codegen *p, - unsigned *sz ) -{ - *sz = p->next_insn_offset; - return (const unsigned *)p->store; -} - -const struct intel_shader_reloc * -brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs) -{ - *num_relocs = p->num_relocs; - return p->relocs; -} - DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL); bool brw_should_dump_shader_bin(void) @@ -318,220 +152,6 @@ void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset, close(fd); } -bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, - const char *read_path, const char *identifier) -{ - char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier); - - int fd = open(name, O_RDONLY); - ralloc_free(name); - - if (fd == -1) { - return false; - } - - struct stat sb; - if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) { - close(fd); - return false; - } - - p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_eu_inst); - p->nr_insn += sb.st_size / sizeof(brw_eu_inst); - - p->next_insn_offset = start_offset + sb.st_size; - p->store_size = (start_offset + sb.st_size) / sizeof(brw_eu_inst); - p->store = (brw_eu_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset); - assert(p->store); - - ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size); - close(fd); - if (ret != sb.st_size) { - return false; - } - - ASSERTED bool valid = - brw_validate_instructions(p->isa, p->store, - start_offset, p->next_insn_offset, - NULL); - assert(valid); - - return true; -} - -const struct brw_label * -brw_find_label(const struct brw_label *root, int offset) -{ - const struct brw_label *curr = root; - - if (curr != NULL) - { - do { - if (curr->offset == offset) - return curr; - - curr = curr->next; - } while (curr != NULL); - } - - return curr; -} - -void -brw_create_label(struct brw_label **labels, int offset, void *mem_ctx) -{ - if (*labels != NULL) { - struct brw_label *curr = *labels; - struct brw_label *prev; - - do { - prev = curr; - - if (curr->offset == offset) - return; - - curr = curr->next; - } while (curr != NULL); - - curr = ralloc(mem_ctx, struct brw_label); - curr->offset = offset; - curr->number = prev->number + 1; - curr->next = NULL; - prev->next = curr; - } else { - struct brw_label *root = ralloc(mem_ctx, struct brw_label); - root->number = 0; - root->offset = offset; - root->next = NULL; - *labels = root; - } -} - -const struct brw_label * -brw_label_assembly(const struct brw_isa_info *isa, - const void *assembly, int start, int end, void *mem_ctx) -{ - const struct intel_device_info *const devinfo = isa->devinfo; - - struct brw_label *root_label = NULL; - - int to_bytes_scale = sizeof(brw_eu_inst) / brw_jump_scale(devinfo); - - for (int offset = start; offset < end;) { - const brw_eu_inst *inst = (const brw_eu_inst *) ((const char *) assembly + offset); - brw_eu_inst uncompacted; - - bool is_compact = brw_eu_inst_cmpt_control(devinfo, inst); - - if (is_compact) { - brw_eu_compact_inst *compacted = (brw_eu_compact_inst *)inst; - brw_uncompact_instruction(isa, &uncompacted, compacted); - inst = &uncompacted; - } - - if (brw_has_uip(devinfo, brw_eu_inst_opcode(isa, inst))) { - /* Instructions that have UIP also have JIP. */ - brw_create_label(&root_label, - offset + brw_eu_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx); - brw_create_label(&root_label, - offset + brw_eu_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx); - } else if (brw_has_jip(devinfo, brw_eu_inst_opcode(isa, inst))) { - int jip = brw_eu_inst_jip(devinfo, inst); - - brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx); - } - - if (is_compact) { - offset += sizeof(brw_eu_compact_inst); - } else { - offset += sizeof(brw_eu_inst); - } - } - - return root_label; -} - -void -brw_disassemble_with_labels(const struct brw_isa_info *isa, - const void *assembly, int start, int end, FILE *out) -{ - void *mem_ctx = ralloc_context(NULL); - const struct brw_label *root_label = - brw_label_assembly(isa, assembly, start, end, mem_ctx); - - brw_disassemble(isa, assembly, start, end, root_label, NULL, out); - - ralloc_free(mem_ctx); -} - -void -brw_disassemble(const struct brw_isa_info *isa, - const void *assembly, int start, int end, - const struct brw_label *root_label, - int64_t *lineno_offset, FILE *out) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - bool dump_hex = INTEL_DEBUG(DEBUG_HEX); - - for (int offset = start; offset < end;) { - const brw_eu_inst *insn = (const brw_eu_inst *)((char *)assembly + offset); - brw_eu_inst uncompacted; - - if (root_label != NULL) { - const struct brw_label *label = brw_find_label(root_label, offset); - if (label != NULL) { - fprintf(out, "\nLABEL%d:\n", label->number); - } - } - - bool compacted = brw_eu_inst_cmpt_control(devinfo, insn); - if (lineno_offset) - fprintf(out, "0x%08" PRIx64 ": ", *lineno_offset + offset); - - if (compacted) { - brw_eu_compact_inst *compacted = (brw_eu_compact_inst *)insn; - if (dump_hex) { - unsigned char * insn_ptr = ((unsigned char *)&insn[0]); - const unsigned int blank_spaces = 24; - for (int i = 0 ; i < 8; i = i + 4) { - fprintf(out, "%02x %02x %02x %02x ", - insn_ptr[i], - insn_ptr[i + 1], - insn_ptr[i + 2], - insn_ptr[i + 3]); - } - /* Make compacted instructions hex value output vertically aligned - * with uncompacted instructions hex value - */ - fprintf(out, "%*c", blank_spaces, ' '); - } - - brw_uncompact_instruction(isa, &uncompacted, compacted); - insn = &uncompacted; - } else { - if (dump_hex) { - unsigned char * insn_ptr = ((unsigned char *)&insn[0]); - for (int i = 0 ; i < 16; i = i + 4) { - fprintf(out, "%02x %02x %02x %02x ", - insn_ptr[i], - insn_ptr[i + 1], - insn_ptr[i + 2], - insn_ptr[i + 3]); - } - } - } - - brw_disassemble_inst(out, isa, insn, compacted, offset, root_label); - - if (compacted) { - offset += sizeof(brw_eu_compact_inst); - } else { - offset += sizeof(brw_eu_inst); - } - } -} - static const struct opcode_desc opcode_descs[] = { /* IR, HW, name, nsrc, ndst, gfx_vers assuming Gfx9+ */ { BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL }, @@ -679,41 +299,3 @@ brw_opcode_desc_from_hw(const struct brw_isa_info *isa, unsigned hw) { return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL; } - -unsigned -brw_num_sources_from_inst(const struct brw_isa_info *isa, - const brw_eu_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - const struct opcode_desc *desc = - brw_opcode_desc(isa, brw_eu_inst_opcode(isa, inst)); - unsigned math_function; - - if (brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MATH) { - math_function = brw_eu_inst_math_function(devinfo, inst); - } else { - assert(desc->nsrc < 4); - return desc->nsrc; - } - - switch (math_function) { - case GEN_MATH_INV: - case GEN_MATH_LOG: - case GEN_MATH_EXP: - case GEN_MATH_SQRT: - case GEN_MATH_RSQ: - case GEN_MATH_SIN: - case GEN_MATH_COS: - case GEN_MATH_INVM: - case GEN_MATH_RSQRTM: - return 1; - case GEN_MATH_FDIV: - case GEN_MATH_POW: - case GEN_MATH_INT_DIV_BOTH: - case GEN_MATH_INT_DIV_QUOTIENT: - case GEN_MATH_INT_DIV_REMAINDER: - return 2; - default: - UNREACHABLE("not reached"); - } -} diff --git a/src/intel/compiler/brw/brw_eu.h b/src/intel/compiler/brw/brw_eu.h index 0c6748b0fd7..e65e0e1e9f6 100644 --- a/src/intel/compiler/brw/brw_eu.h +++ b/src/intel/compiler/brw/brw_eu.h @@ -10,7 +10,6 @@ #include #include -#include "brw_eu_inst.h" #include "brw_compiler.h" #include "brw_eu_defines.h" #include "brw_isa_info.h" @@ -25,219 +24,10 @@ extern "C" { #endif -struct disasm_info; - -#define BRW_EU_MAX_INSN_STACK 5 - -struct brw_insn_state { - /* One of BRW_EXECUTE_* */ - unsigned exec_size:3; - - /* Group in units of channels */ - unsigned group:5; - - /* One of BRW_MASK_* */ - unsigned mask_control:1; - - /* Scheduling info for Gfx12+ */ - gen_swsb swsb; - - bool saturate:1; - - /* One of BRW_ALIGN_* */ - unsigned access_mode:1; - - /* One of BRW_PREDICATE_* */ - enum brw_predicate predicate:4; - - bool pred_inv:1; - - /* Flag subreg. Bottom bit is subreg, top bits are reg */ - unsigned flag_subreg:3; - - bool acc_wr_control:1; -}; - -struct brw_codegen { - brw_eu_inst *store; - int store_size; - unsigned nr_insn; - unsigned int next_insn_offset; - - void *mem_ctx; - - /* Allow clients to push/pop instruction state: - */ - struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK]; - struct brw_insn_state *current; - - const struct brw_isa_info *isa; - const struct intel_device_info *devinfo; - - /* Control flow stacks: - * - if_stack contains IF and ELSE instructions which must be patched - * (and popped) once the matching ENDIF instruction is encountered. - * - * Just store the instruction pointer(an index). - */ - int *if_stack; - int if_stack_depth; - int if_stack_array_size; - - /** - * loop_stack contains the instruction pointers of the starts of loops which - * must be patched (and popped) once the matching WHILE instruction is - * encountered. - */ - int *loop_stack; - int loop_stack_depth; - int loop_stack_array_size; - - struct intel_shader_reloc *relocs; - int num_relocs; - int reloc_array_size; -}; - -struct brw_label { - int offset; - int number; - struct brw_label *next; -}; - -static inline brw_eu_inst * -brw_eu_last_inst(struct brw_codegen *p) -{ - return &p->store[p->nr_insn - 1]; -} - -void brw_pop_insn_state( struct brw_codegen *p ); -void brw_push_insn_state( struct brw_codegen *p ); -unsigned brw_get_default_exec_size(struct brw_codegen *p); -unsigned brw_get_default_group(struct brw_codegen *p); -unsigned brw_get_default_access_mode(struct brw_codegen *p); -gen_swsb brw_get_default_swsb(struct brw_codegen *p); -void brw_set_default_exec_size(struct brw_codegen *p, unsigned value); -void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ); -void brw_set_default_saturate( struct brw_codegen *p, bool enable ); -void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ); -void brw_eu_inst_set_group(const struct intel_device_info *devinfo, - brw_eu_inst *inst, unsigned group); -void brw_set_default_group(struct brw_codegen *p, unsigned group); -void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc); -void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse); -void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg); -void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value); -void brw_set_default_swsb(struct brw_codegen *p, gen_swsb value); - -uint32_t brw_swsb_encode(const struct intel_device_info *devinfo, - gen_swsb swsb, enum opcode op); -gen_swsb brw_swsb_decode(const struct intel_device_info *devinfo, - bool is_unordered, uint32_t raw, enum opcode op); - -void brw_init_codegen(const struct brw_isa_info *isa, - struct brw_codegen *p, void *mem_ctx); -bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode); -bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode); -bool brw_has_branch_ctrl(const struct intel_device_info *devinfo, enum opcode opcode); -const struct intel_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p, - unsigned *num_relocs); -const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz ); - bool brw_should_dump_shader_bin(void); void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset, const char *identifier); -bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, - const char *read_path, const char *identifier); - -void brw_realign(struct brw_codegen *p, unsigned alignment); -int brw_append_data(struct brw_codegen *p, void *data, - unsigned size, unsigned alignment); -brw_eu_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode); -void brw_add_reloc(struct brw_codegen *p, uint32_t id, - enum intel_shader_reloc_type type, - uint32_t offset, uint32_t delta); -void brw_set_dest(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg dest); -void brw_set_src0(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg); - -brw_eu_inst *brw_alu1(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, - struct brw_reg src); -brw_eu_inst *brw_alu2(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1); -brw_eu_inst *brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1, struct brw_reg src2); - -/* Helpers for regular instructions: - */ -#define ALU1(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0); - -#define ALU2(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1); - -#define ALU3(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1, \ - struct brw_reg src2); - -ALU1(MOV) -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU1(DIM) -ALU2(ASR) -ALU2(ROL) -ALU2(ROR) -ALU3(CSEL) -ALU1(F32TO16) -ALU1(F16TO32) -ALU2(ADD) -ALU3(ADD3) -ALU2(AVG) -ALU2(MUL) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDU) -ALU1(RNDZ) -ALU2(MAC) -ALU2(MACL) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU3(DP4A) -ALU2(LINE) -ALU2(PLN) -ALU3(MAD) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU2(ADDC) -ALU2(SUBB) - -#undef ALU1 -#undef ALU2 -#undef ALU3 - /* In Xe2+ each register is 64bytes/512bits long while older platforms it is * 32bytes/256bits long. */ @@ -1054,179 +844,9 @@ brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, SET_BITS(simd_mode, 16, 16)); } -static inline enum gfx12_systolic_depth -translate_systolic_depth(unsigned d) -{ - /* Could also return (ffs(d) - 1) & 3. */ - switch (d) { - case 2: return BRW_SYSTOLIC_DEPTH_2; - case 4: return BRW_SYSTOLIC_DEPTH_4; - case 8: return BRW_SYSTOLIC_DEPTH_8; - case 16: return BRW_SYSTOLIC_DEPTH_16; - default: UNREACHABLE("Invalid systolic depth."); - } -} - -void -brw_SEND(struct brw_codegen *p, - unsigned sfid, - struct brw_reg dst, - struct brw_reg payload0, - struct brw_reg payload1, - struct brw_reg desc, - struct brw_reg ex_desc, - uint32_t ex_desc_imm_inst, - unsigned ex_mlen, - bool ex_bso, - bool eot, - bool gather); - -void gfx6_math(struct brw_codegen *p, - struct brw_reg dest, - unsigned function, - struct brw_reg src0, - struct brw_reg src1); - -/** - * Return the generation-specific jump distance scaling factor. - * - * Given the number of instructions to jump, we need to scale by - * some number to obtain the actual jump distance to program in an - * instruction. - */ -static inline unsigned -brw_jump_scale(const struct intel_device_info *devinfo) -{ - /* Broadwell measures jump targets in bytes. */ - return 16; -} - -void brw_barrier(struct brw_codegen *p, struct brw_reg src); - -/* If/else/endif. Works by manipulating the execution flags on each - * channel. - */ -brw_eu_inst *brw_IF(struct brw_codegen *p, unsigned execute_size); - -void brw_ELSE(struct brw_codegen *p); -void brw_ENDIF(struct brw_codegen *p); - -brw_eu_inst *brw_BFN(struct brw_codegen *p, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1, - struct brw_reg src2, struct brw_reg table_byte); - -/* DO/WHILE loops: - */ -brw_eu_inst *brw_DO(struct brw_codegen *p, unsigned execute_size); - -brw_eu_inst *brw_WHILE(struct brw_codegen *p); - -brw_eu_inst *brw_BREAK(struct brw_codegen *p); -brw_eu_inst *brw_CONT(struct brw_codegen *p); -brw_eu_inst *brw_HALT(struct brw_codegen *p); - -/* Forward jumps: - */ -brw_eu_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, - unsigned predicate_control); - -void brw_NOP(struct brw_codegen *p); - -void brw_WAIT(struct brw_codegen *p); - -void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func); - -/* Special case: there is never a destination, execution size will be - * taken from src0: - */ -void brw_CMP(struct brw_codegen *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1); - -void brw_CMPN(struct brw_codegen *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1); - -brw_eu_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth, - unsigned rcount, struct brw_reg dest, struct brw_reg src0, - struct brw_reg src1, struct brw_reg src2); - -brw_eu_inst *brw_SRND(struct brw_codegen *p, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1); - -void -brw_broadcast(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg idx); - -void -brw_float_controls_mode(struct brw_codegen *p, - unsigned mode, unsigned mask); - -void -brw_MOV_reloc_imm(struct brw_codegen *p, - struct brw_reg dst, - enum brw_reg_type src_type, - uint32_t id, uint32_t base); - -unsigned -brw_num_sources_from_inst(const struct brw_isa_info *isa, - const brw_eu_inst *inst); - -void brw_set_src1(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg); - -void brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *insn, - unsigned desc, unsigned ex_desc, bool gather); - -static inline void -brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc, bool gather) -{ - brw_set_desc_ex(p, insn, desc, 0, gather); -} - -void brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset); - enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod); enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod); -/* brw_eu_compact.c */ -void brw_compact_instructions(struct brw_codegen *p, int start_offset, - struct disasm_info *disasm); -void brw_uncompact_instruction(const struct brw_isa_info *isa, - brw_eu_inst *dst, brw_eu_compact_inst *src); -bool brw_try_compact_instruction(const struct brw_isa_info *isa, - brw_eu_compact_inst *dst, const brw_eu_inst *src); - -void brw_debug_compact_uncompact(const struct brw_isa_info *isa, - brw_eu_inst *orig, brw_eu_inst *uncompacted); - -/* brw_eu_validate.c */ -bool brw_validate_instruction(const struct brw_isa_info *isa, - const brw_eu_inst *inst, int offset, - unsigned inst_size, - struct disasm_info *disasm); -bool brw_validate_instructions(const struct brw_isa_info *isa, - const void *assembly, int start_offset, int end_offset, - struct disasm_info *disasm); - -static inline int -next_offset(struct brw_codegen *p, void *store, int offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - assert((char *)store + offset < (char *)p->store + p->next_insn_offset); - brw_eu_inst *insn = (brw_eu_inst *)((char *)store + offset); - - if (brw_eu_inst_cmpt_control(devinfo, insn)) - return offset + 8; - else - return offset + 16; -} - /** Maximum SEND message length */ #define BRW_MAX_MSG_LENGTH 15 diff --git a/src/intel/compiler/brw/brw_eu_compact.c b/src/intel/compiler/brw/brw_eu_compact.c deleted file mode 100644 index de4427980cb..00000000000 --- a/src/intel/compiler/brw/brw_eu_compact.c +++ /dev/null @@ -1,2499 +0,0 @@ -/* - * Copyright © 2012-2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -/** @file - * - * Instruction compaction is a feature of G45 and newer hardware that allows - * for a smaller instruction encoding. - * - * The instruction cache is on the order of 32KB, and many programs generate - * far more instructions than that. The instruction cache is built to barely - * keep up with instruction dispatch ability in cache hit cases -- L1 - * instruction cache misses that still hit in the next level could limit - * throughput by around 50%. - * - * The idea of instruction compaction is that most instructions use a tiny - * subset of the GPU functionality, so we can encode what would be a 16 byte - * instruction in 8 bytes using some lookup tables for various fields. - * - * - * Instruction compaction capabilities vary subtly by generation. - * - * G45's support for instruction compaction is very limited. Jump counts on - * this generation are in units of 16-byte uncompacted instructions. As such, - * all jump targets must be 16-byte aligned. Also, all instructions must be - * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned. - * A G45-only instruction, NENOP, must be used to provide padding to align - * uncompacted instructions. - * - * Gfx5 removes these restrictions and changes jump counts to be in units of - * 8-byte compacted instructions, allowing jump targets to be only 8-byte - * aligned. Uncompacted instructions can also be placed on 8-byte boundaries. - * - * Gfx6 adds the ability to compact instructions with a limited range of - * immediate values. Compactable immediates have 12 unrestricted bits, and a - * 13th bit that's replicated through the high 20 bits, to create the 32-bit - * value of DW3 in the uncompacted instruction word. - * - * On Gfx7 we can compact some control flow instructions with a small positive - * immediate in the low bits of DW3, like ENDIF with the JIP field. Other - * control flow instructions with UIP cannot be compacted, because of the - * replicated 13th bit. No control flow instructions can be compacted on Gfx6 - * since the jump count field is not in DW3. - * - * break JIP/UIP - * cont JIP/UIP - * halt JIP/UIP - * if JIP/UIP - * else JIP (plus UIP on BDW+) - * endif JIP - * while JIP (must be negative) - * - * Gen 8 adds support for compacting 3-src instructions. - * - * Gfx12 reduces the number of bits that available to compacted immediates from - * 13 to 12, but improves the compaction of floating-point immediates by - * allowing the high bits to be encoded (the sign, 8-bit exponent, and the - * three most significant bits of the mantissa), rather than the lowest bits of - * the mantissa. - */ - -#include "brw_eu.h" -#include "brw_disasm.h" -#include "brw_disasm_info.h" -#include "dev/intel_debug.h" - -static const uint16_t g45_subreg_table[32] = { - 0b000000000000000, - 0b000000010000000, - 0b000001000000000, - 0b000100000000000, - 0b000000000100000, - 0b100000000000000, - 0b000000000010000, - 0b001100000000000, - 0b001010000000000, - 0b000000100000000, - 0b001000000000000, - 0b000000000001000, - 0b000000001000000, - 0b000000000000001, - 0b000010000000000, - 0b000000010100000, - 0b000000000000111, - 0b000001000100000, - 0b011000000000000, - 0b000000110000000, - 0b000000000000010, - 0b000000000000100, - 0b000000001100000, - 0b000100000000010, - 0b001110011000110, - 0b001110100001000, - 0b000110011000110, - 0b000001000011000, - 0b000110010000100, - 0b001100000000110, - 0b000000010000110, - 0b000001000110000, -}; - -static const uint32_t gfx8_control_index_table[32] = { - 0b0000000000000000010, - 0b0000100000000000000, - 0b0000100000000000001, - 0b0000100000000000010, - 0b0000100000000000011, - 0b0000100000000000100, - 0b0000100000000000101, - 0b0000100000000000111, - 0b0000100000000001000, - 0b0000100000000001001, - 0b0000100000000001101, - 0b0000110000000000000, - 0b0000110000000000001, - 0b0000110000000000010, - 0b0000110000000000011, - 0b0000110000000000100, - 0b0000110000000000101, - 0b0000110000000000111, - 0b0000110000000001001, - 0b0000110000000001101, - 0b0000110000000010000, - 0b0000110000100000000, - 0b0001000000000000000, - 0b0001000000000000010, - 0b0001000000000000100, - 0b0001000000100000000, - 0b0010110000000000000, - 0b0010110000000010000, - 0b0011000000000000000, - 0b0011000000100000000, - 0b0101000000000000000, - 0b0101000000100000000, -}; - -static const uint32_t gfx8_datatype_table[32] = { - 0b001000000000000000001, - 0b001000000000001000000, - 0b001000000000001000001, - 0b001000000000011000001, - 0b001000000000101011101, - 0b001000000010111011101, - 0b001000000011101000001, - 0b001000000011101000101, - 0b001000000011101011101, - 0b001000001000001000001, - 0b001000011000001000000, - 0b001000011000001000001, - 0b001000101000101000101, - 0b001000111000101000100, - 0b001000111000101000101, - 0b001011100011101011101, - 0b001011101011100011101, - 0b001011101011101011100, - 0b001011101011101011101, - 0b001011111011101011100, - 0b000000000010000001100, - 0b001000000000001011101, - 0b001000000000101000101, - 0b001000001000001000000, - 0b001000101000101000100, - 0b001000111000100000100, - 0b001001001001000001001, - 0b001010111011101011101, - 0b001011111011101011101, - 0b001001111001101001100, - 0b001001001001001001000, - 0b001001011001001001000, -}; - -static const uint16_t gfx8_subreg_table[32] = { - 0b000000000000000, - 0b000000000000001, - 0b000000000001000, - 0b000000000001111, - 0b000000000010000, - 0b000000010000000, - 0b000000100000000, - 0b000000110000000, - 0b000001000000000, - 0b000001000010000, - 0b000001010000000, - 0b001000000000000, - 0b001000000000001, - 0b001000010000001, - 0b001000010000010, - 0b001000010000011, - 0b001000010000100, - 0b001000010000111, - 0b001000010001000, - 0b001000010001110, - 0b001000010001111, - 0b001000110000000, - 0b001000111101000, - 0b010000000000000, - 0b010000110000000, - 0b011000000000000, - 0b011110010000111, - 0b100000000000000, - 0b101000000000000, - 0b110000000000000, - 0b111000000000000, - 0b111000000011100, -}; - -static const uint16_t gfx8_src_index_table[32] = { - 0b000000000000, - 0b000000000010, - 0b000000010000, - 0b000000010010, - 0b000000011000, - 0b000000100000, - 0b000000101000, - 0b000001001000, - 0b000001010000, - 0b000001110000, - 0b000001111000, - 0b001100000000, - 0b001100000010, - 0b001100001000, - 0b001100010000, - 0b001100010010, - 0b001100100000, - 0b001100101000, - 0b001100111000, - 0b001101000000, - 0b001101000010, - 0b001101001000, - 0b001101010000, - 0b001101100000, - 0b001101101000, - 0b001101110000, - 0b001101110001, - 0b001101111000, - 0b010001101000, - 0b010001101001, - 0b010001101010, - 0b010110001000, -}; - -static const uint32_t gfx11_datatype_table[32] = { - 0b001000000000000000001, - 0b001000000000001000000, - 0b001000000000001000001, - 0b001000000000011000001, - 0b001000000000101100101, - 0b001000000101111100101, - 0b001000000100101000001, - 0b001000000100101000101, - 0b001000000100101100101, - 0b001000001000001000001, - 0b001000011000001000000, - 0b001000011000001000001, - 0b001000101000101000101, - 0b001000111000101000100, - 0b001000111000101000101, - 0b001100100100101100101, - 0b001100101100100100101, - 0b001100101100101100100, - 0b001100101100101100101, - 0b001100111100101100100, - 0b000000000010000001100, - 0b001000000000001100101, - 0b001000000000101000101, - 0b001000001000001000000, - 0b001000101000101000100, - 0b001000111000100000100, - 0b001001001001000001001, - 0b001101111100101100101, - 0b001100111100101100101, - 0b001001111001101001100, - 0b001001001001001001000, - 0b001001011001001001000, -}; - -static const uint32_t gfx12_control_index_table[32] = { - 0b000000000000000000100, /* (16|M0) */ - 0b000000000000000000011, /* (8|M0) */ - 0b000000010000000000000, /* (W) (1|M0) */ - 0b000000010000000000100, /* (W) (16|M0) */ - 0b000000010000000000011, /* (W) (8|M0) */ - 0b010000000000000000100, /* (16|M0) (ge)f0.0 */ - 0b000000000000000100100, /* (16|M16) */ - 0b010100000000000000100, /* (16|M0) (lt)f0.0 */ - 0b000000000000000000000, /* (1|M0) */ - 0b000010000000000000100, /* (16|M0) (sat) */ - 0b000000000000000010011, /* (8|M8) */ - 0b001100000000000000100, /* (16|M0) (gt)f0.0 */ - 0b000100000000000000100, /* (16|M0) (eq)f0.0 */ - 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */ - 0b001000000000000000100, /* (16|M0) (ne)f0.0 */ - 0b000000000000100000100, /* (f0.0) (16|M0) */ - 0b010100000000000000011, /* (8|M0) (lt)f0.0 */ - 0b000000000000110000100, /* (f1.0) (16|M0) */ - 0b000000010000000000001, /* (W) (2|M0) */ - 0b000000000000101000100, /* (f0.1) (16|M0) */ - 0b000000000000111000100, /* (f1.1) (16|M0) */ - 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */ - 0b000000000000000100011, /* (8|M16) */ - 0b000000000000000110011, /* (8|M24) */ - 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */ - 0b010000000000000000011, /* (8|M0) (ge)f0.0 */ - 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */ - 0b000010000000000000011, /* (8|M0) (sat) */ - 0b010100000000010000100, /* (16|M0) (lt)f1.0 */ - 0b000100000000000000011, /* (8|M0) (eq)f0.0 */ - 0b000001000000000000011, /* (8|M0) {AccWrEn} */ - 0b000000010000000100100, /* (W) (16|M16) */ -}; - -static const uint32_t gfx12_datatype_table[32] = { - 0b11010110100101010100, /* grf<1>:f grf:f grf:f */ - 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */ - 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */ - 0b01010110110101010100, /* grf<1>:f grf:f imm:f */ - 0b11010100100101010100, /* arf<1>:f grf:f grf:f */ - 0b11010010100101010100, /* grf<1>:f arf:f grf:f */ - 0b01010100110101010100, /* arf<1>:f grf:f imm:f */ - 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */ - 0b11010000100101010100, /* arf<1>:f arf:f grf:f */ - 0b00101110110011001100, /* grf<1>:d grf:d imm:w */ - 0b10110110100011001100, /* grf<1>:d grf:d grf:d */ - 0b01010010110101010100, /* grf<1>:f arf:f imm:f */ - 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */ - 0b01010000110101010100, /* arf<1>:f arf:f imm:f */ - 0b00110110110011001100, /* grf<1>:d grf:d imm:d */ - 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */ - 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */ - 0b00101100110011001100, /* arf<1>:d grf:d imm:w */ - 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */ - 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */ - 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */ - 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */ - 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */ - 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */ - 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */ - 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */ - 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */ - 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */ - 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */ - 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */ - 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */ - 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */ -}; - -static const uint16_t gfx12_subreg_table[32] = { - 0b000000000000000, /* .0 .0 .0 */ - 0b100000000000000, /* .0 .0 .16 */ - 0b001000000000000, /* .0 .0 .4 */ - 0b011000000000000, /* .0 .0 .12 */ - 0b000000010000000, /* .0 .4 .0 */ - 0b010000000000000, /* .0 .0 .8 */ - 0b101000000000000, /* .0 .0 .20 */ - 0b000000000001000, /* .8 .0 .0 */ - 0b000000100000000, /* .0 .8 .0 */ - 0b110000000000000, /* .0 .0 .24 */ - 0b111000000000000, /* .0 .0 .28 */ - 0b000001000000000, /* .0 .16 .0 */ - 0b000000000000100, /* .4 .0 .0 */ - 0b000001100000000, /* .0 .24 .0 */ - 0b000001010000000, /* .0 .20 .0 */ - 0b000000110000000, /* .0 .12 .0 */ - 0b000001110000000, /* .0 .28 .0 */ - 0b000000000011100, /* .28 .0 .0 */ - 0b000000000010000, /* .16 .0 .0 */ - 0b000000000001100, /* .12 .0 .0 */ - 0b000000000011000, /* .24 .0 .0 */ - 0b000000000010100, /* .20 .0 .0 */ - 0b000000000000010, /* .2 .0 .0 */ - 0b000000101000000, /* .0 .10 .0 */ - 0b000000001000000, /* .0 .2 .0 */ - 0b000000010000100, /* .4 .4 .0 */ - 0b000000001011100, /* .28 .2 .0 */ - 0b000000001000010, /* .2 .2 .0 */ - 0b000000110001100, /* .12 .12 .0 */ - 0b000000000100000, /* .0 .1 .0 */ - 0b000000001100000, /* .0 .3 .0 */ - 0b110001100000000, /* .0 .24 .24 */ -}; - -static const uint16_t gfx12_src0_index_table[16] = { - 0b010001100100, /* r<8;8,1> */ - 0b000000000000, /* r<0;1,0> */ - 0b010001100110, /* -r<8;8,1> */ - 0b010001100101, /* (abs)r<8;8,1> */ - 0b000000000010, /* -r<0;1,0> */ - 0b001000000000, /* r<2;1,0> */ - 0b001001000000, /* r<2;4,0> */ - 0b001101000000, /* r<4;4,0> */ - 0b001000100100, /* r<2;2,1> */ - 0b001100000000, /* r<4;1,0> */ - 0b001000100110, /* -r<2;2,1> */ - 0b001101000100, /* r<4;4,1> */ - 0b010001100111, /* -(abs)r<8;8,1> */ - 0b000100000000, /* r<1;1,0> */ - 0b000000000001, /* (abs)r<0;1,0> */ - 0b111100010000, /* r[a]<1,0> */ -}; - -static const uint16_t gfx12_src1_index_table[16] = { - 0b000100011001, /* r<8;8,1> */ - 0b000000000000, /* r<0;1,0> */ - 0b100100011001, /* -r<8;8,1> */ - 0b100000000000, /* -r<0;1,0> */ - 0b010100011001, /* (abs)r<8;8,1> */ - 0b100011010000, /* -r<4;4,0> */ - 0b000010000000, /* r<2;1,0> */ - 0b000010001001, /* r<2;2,1> */ - 0b100010001001, /* -r<2;2,1> */ - 0b000011010000, /* r<4;4,0> */ - 0b000011010001, /* r<4;4,1> */ - 0b000011000000, /* r<4;1,0> */ - 0b110100011001, /* -(abs)r<8;8,1> */ - 0b010000000000, /* (abs)r<0;1,0> */ - 0b110000000000, /* -(abs)r<0;1,0> */ - 0b100011010001, /* -r<4;4,1> */ -}; - -static const uint16_t xehp_src0_index_table[16] = { - 0b000100000000, /* r<1;1,0> */ - 0b000000000000, /* r<0;1,0> */ - 0b000100000010, /* -r<1;1,0> */ - 0b000100000001, /* (abs)r<1;1,0> */ - 0b000000000010, /* -r<0;1,0> */ - 0b001000000000, /* r<2;1,0> */ - 0b001001000000, /* r<2;4,0> */ - 0b001101000000, /* r<4;4,0> */ - 0b001100000000, /* r<4;1,0> */ - 0b000100000011, /* -(abs)r<1;1,0> */ - 0b000000000001, /* (abs)r<0;1,0> */ - 0b111100010000, /* r[a]<1,0> */ - 0b010001100000, /* r<8;8,0> */ - 0b000101000000, /* r<1;4,0> */ - 0b010001001000, /* r<8;4,2> */ - 0b001000000010, /* -r<2;1,0> */ -}; - -static const uint16_t xehp_src1_index_table[16] = { - 0b000001000000, /* r<1;1,0> */ - 0b000000000000, /* r<0;1,0> */ - 0b100001000000, /* -r<1;1,0> */ - 0b100000000000, /* -r<0;1,0> */ - 0b010001000000, /* (abs)r<1;1,0> */ - 0b100011010000, /* -r<4;4,0> */ - 0b000010000000, /* r<2;1,0> */ - 0b000011010000, /* r<4;4,0> */ - 0b000011000000, /* r<4;1,0> */ - 0b110001000000, /* -(abs)r<1;1,0> */ - 0b010000000000, /* (abs)r<0;1,0> */ - 0b110000000000, /* -(abs)r<0;1,0> */ - 0b000100011000, /* r<8;8,0> */ - 0b100010000000, /* -r<2;1,0> */ - 0b100000001001, /* -r<0;2,1> */ - 0b100001000100, /* -r[a]<1;1,0> */ -}; - -static const uint32_t xe2_control_index_table[32] = { - 0b000000000000000100, /* (16|M0) */ - 0b000000100000000000, /* (W) (1|M0) */ - 0b000000000010000100, /* (16|M16) */ - 0b000000000000000000, /* (1|M0) */ - 0b000000100000000100, /* (W) (16|M0) */ - 0b010000000000000100, /* (16|M0) (.ge)f0.0 */ - 0b010100000000000100, /* (16|M0) (.lt)f0.0 */ - 0b000000100000000010, /* (W) (4|M0) */ - 0b000000000000000101, /* (32|M0) */ - 0b000000100000000011, /* (W) (8|M0) */ - 0b001100100000000000, /* (W) (1|M0) (.gt)f0.0 */ - 0b000010000000000100, /* (16|M0) (sat) */ - 0b000100000000000100, /* (16|M0) (.eq)f0.0 */ - 0b000000100000000001, /* (W) (2|M0) */ - 0b001100000000000100, /* (16|M0) (.gt)f0.0 */ - 0b000100100000000000, /* (W) (1|M0) (.eq)f0.0 */ - 0b010100100000000010, /* (W) (4|M0) (.lt)f0.0 */ - 0b010000100000000000, /* (W) (1|M0) (.ge)f0.0 */ - 0b010000100000000010, /* (W) (4|M0) (.ge)f0.0 */ - 0b010100100000000000, /* (W) (1|M0) (.lt)f0.0 */ - 0b001000000000000100, /* (16|M0) (.ne)f0.0 */ - 0b000000000100100100, /* (f2.0) (16|M0) */ - 0b010100100000000011, /* (W) (8|M0) (.lt)f0.0 */ - 0b000000000100011100, /* (f1.1) (16|M0) */ - 0b010000100000000011, /* (W) (8|M0) (.ge)f0.0 */ - 0b000000000100001100, /* (f0.1) (16|M0) */ - 0b000000000100010100, /* (f1.0) (16|M0) */ - 0b000000000100110100, /* (f3.0) (16|M0) */ - 0b000000000100111100, /* (f3.1) (16|M0) */ - 0b000000000100101100, /* (f2.1) (16|M0) */ - 0b000000000100000100, /* (f0.0) (16|M0) */ - 0b010100000000100100, /* (16|M0) (.lt)f2.0 */ -}; - -static const uint32_t xe2_datatype_table[32] = { - 0b11010110100101010100, /* grf<1>:f grf:f grf:f */ - 0b11010100100101010100, /* arf<1>:f grf:f grf:f */ - 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */ - 0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */ - 0b01010110110101010100, /* grf<1>:f grf:f imm:f */ - 0b11010010100101010100, /* grf<1>:f arf:f grf:f */ - 0b10111110100011101110, /* grf<1>:q grf:q grf:q */ - 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */ - 0b01010110100101010100, /* grf<1>:f grf:f arf:f */ - 0b00000010101001000100, /* grf<1>:ud imm:ud */ - 0b00101110110011001100, /* grf<1>:d grf:d imm:w */ - 0b11010000100101010100, /* arf<1>:f arf:f grf:f */ - 0b01010100100101010100, /* arf<1>:f grf:f arf:f */ - 0b01010100110101010100, /* arf<1>:f grf:f imm:f */ - 0b00000010101101010100, /* grf<1>:f imm:f */ - 0b00000110100011001100, /* grf<1>:d grf:d arf:ub */ - 0b00101110110011101110, /* grf<1>:q grf:q imm:w */ - 0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */ - 0b01010000100101010100, /* arf<1>:f arf:f arf:f */ - 0b10110110100011001100, /* grf<1>:d grf:d grf:d */ - 0b01010010100101010100, /* grf<1>:f arf:f arf:f */ - 0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */ - 0b00110110110011001110, /* grf<1>:q grf:d imm:d */ - 0b00101100110011001100, /* arf<1>:d grf:d imm:w */ - 0b11011110100101110110, /* grf<1>:df grf:df grf:df */ - 0b01010010110101010100, /* grf<1>:f arf:f imm:f */ - 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */ - 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */ - 0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */ - 0b00000010101010101100, /* grf<1>:d imm:w */ - 0b01010000110101010100, /* arf<1>:f arf:f imm:f */ - 0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */ -}; - -static const uint16_t xe2_subreg_table[16] = { - 0b000000000000, /* .0 .0 */ - 0b000010000000, /* .0 .4 */ - 0b000000000100, /* .4 .0 */ - 0b010000000000, /* .0 .32 */ - 0b001000000000, /* .0 .16 */ - 0b000000001000, /* .8 .0 */ - 0b000100000000, /* .0 .8 */ - 0b010100000000, /* .0 .40 */ - 0b011000000000, /* .0 .48 */ - 0b000110000000, /* .0 .12 */ - 0b000000010000, /* .16 .0 */ - 0b011010000000, /* .0 .52 */ - 0b001100000000, /* .0 .24 */ - 0b011100000000, /* .0 .56 */ - 0b010110000000, /* .0 .44 */ - 0b010010000000, /* .0 .36 */ -}; - -static const uint16_t xe2_src0_index_table[8] = { - 0b00100000000, /* r<1;1,0> */ - 0b00000000000, /* r<0;1,0> */ - 0b01000000000, /* r<2;1,0> */ - 0b00100000010, /* -r<1;1,0> */ - 0b01100000000, /* r<4;1,0> */ - 0b00100000001, /* (abs)r<1;1,0> */ - 0b00000000010, /* -r<0;1,0> */ - 0b01001000000, /* r<2;4,0> */ -}; - -static const uint16_t xe2_src1_index_table[16] = { - 0b0000100000000000, /* r<1;1,0>.0 */ - 0b0000000000000000, /* r<0;1,0>.0 */ - 0b1000100000000000, /* -r<1;1,0>.0 */ - 0b0000000000010000, /* r<0;1,0>.8 */ - 0b0000000000001000, /* r<0;1,0>.4 */ - 0b0000000000011000, /* r<0;1,0>.12 */ - 0b0000000001010000, /* r<0;1,0>.40 */ - 0b0000000001000000, /* r<0;1,0>.32 */ - 0b0000000000100000, /* r<0;1,0>.16 */ - 0b0000000001111000, /* r<0;1,0>.60 */ - 0b0000000000111000, /* r<0;1,0>.28 */ - 0b0000000000101000, /* r<0;1,0>.20 */ - 0b0000000001011000, /* r<0;1,0>.44 */ - 0b0000000001001000, /* r<0;1,0>.36 */ - 0b0000000001110000, /* r<0;1,0>.56 */ - 0b0000000000110000, /* r<0;1,0>.24 */ -}; - -/* This is actually the control index table for Cherryview (26 bits), but the - * only difference from Broadwell (24 bits) is that it has two extra 0-bits at - * the start. - * - * The low 24 bits have the same mappings on both hardware. - */ -static const uint32_t gfx8_3src_control_index_table[4] = { - 0b00100000000110000000000001, - 0b00000000000110000000000001, - 0b00000000001000000000000001, - 0b00000000001000000000100001, -}; - -/* This is actually the control index table for Cherryview (49 bits), but the - * only difference from Broadwell (46 bits) is that it has three extra 0-bits - * at the start. - * - * The low 44 bits have the same mappings on both hardware, and since the high - * three bits on Broadwell are zero, we can reuse Cherryview's table. - */ -static const uint64_t gfx8_3src_source_index_table[4] = { - 0b0000001110010011100100111001000001111000000000000, - 0b0000001110010011100100111001000001111000000000010, - 0b0000001110010011100100111001000001111000000001000, - 0b0000001110010011100100111001000001111000000100000, -}; - -static const uint64_t gfx12_3src_control_index_table[32] = { - 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */ - 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */ - 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */ - 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */ - 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */ - 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */ - 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */ - 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */ - 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */ - 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */ - 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */ - 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */ - 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */ - 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */ - 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */ - 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */ - 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */ - 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */ - 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */ - 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */ - 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */ - 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */ - 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */ - 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */ - 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */ - 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */ - 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */ - 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */ - 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */ - 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */ - 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */ - 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */ -}; - -static const uint64_t xehp_3src_control_index_table[32] = { - 0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */ - 0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */ - 0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */ - 0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */ - 0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */ - 0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */ - 0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */ - 0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */ - 0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */ - 0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */ - 0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */ - 0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */ - 0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */ - 0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */ - 0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */ - 0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */ - 0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */ - 0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */ - 0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */ - 0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */ - 0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */ - 0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */ - 0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */ - 0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */ - 0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */ - 0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */ - 0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */ - 0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */ - 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */ -}; - -static const uint64_t xe2_3src_control_index_table[16] = { - 0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f */ - 0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f */ - 0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df */ - 0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */ - 0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df */ - 0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f */ - 0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f */ - 0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */ - 0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */ - 0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */ - 0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f */ - 0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud */ - 0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d */ - 0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df */ -}; - -static const uint64_t xe2_3src_dpas_control_index_table[16] = { - 0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */ - 0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */ - 0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */ - 0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */ - 0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */ - 0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */ - 0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */ - 0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */ - 0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */ - 0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */ - 0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */ - 0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */ - 0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */ - 0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */ - 0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */ - 0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */ -}; - -static const uint32_t gfx12_3src_source_index_table[32] = { - 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */ - 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */ - 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */ - 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */ - 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */ - 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */ - 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */ - 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */ - 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */ - 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */ - 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */ - 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */ - 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */ - 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */ - 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */ - 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */ - 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */ - 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */ - 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */ - 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */ - 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */ - 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */ - 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */ - 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */ - 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */ - 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */ - 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */ - 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */ - 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */ - 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */ - 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */ - 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */ -}; - -static const uint32_t xehp_3src_source_index_table[32] = { - 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */ - 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */ - 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */ - 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */ - 0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */ - 0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */ - 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */ - 0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */ - 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */ - 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */ - 0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */ - 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */ - 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */ - 0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */ - 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */ - 0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */ - 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */ - 0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */ - 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> - * dpas.*x1 grf:d grf:[ub,b] grf:[ub,b] - * dpas.*x1 grf:f grf:bf grf:bf - */ - 0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */ - 0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */ - 0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */ - 0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */ - 0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */ - 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */ - 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */ - 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */ - 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */ - 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */ - 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */ - 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */ - 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */ -}; - -static const uint32_t xe2_3src_source_index_table[16] = { - 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */ - 0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1> */ - 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */ - 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */ - 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */ - 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */ - 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */ - 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */ - 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */ - 0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1> */ - 0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0> */ - 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */ - 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */ - 0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */ - 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */ - 0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */ -}; - -static const uint32_t xe2_3src_dpas_source_index_table[16] = { - 0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b] - * dpas.*x1 grf:[f,bf] grf:bf grf:bf - * dpas.*x1 grf:[f,hf] grf:hf grf:hf - */ - 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */ - 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */ - 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */ - 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */ - 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */ - 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */ - 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */ - 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */ - 0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */ - 0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */ - 0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */ - 0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */ - 0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */ - 0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */ - 0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */ -}; - -static const uint32_t gfx12_3src_subreg_table[32] = { - 0b00000000000000000000, /* .0 .0 .0 .0 */ - 0b00100000000000000000, /* .0 .0 .0 .4 */ - 0b00000000000110000000, /* .0 .12 .0 .0 */ - 0b10100000000000000000, /* .0 .0 .0 .20 */ - 0b10000000001110000000, /* .0 .28 .0 .16 */ - 0b01100000000000000000, /* .0 .0 .0 .12 */ - 0b01000000000000000000, /* .0 .0 .0 .8 */ - 0b00000010000000000000, /* .0 .0 .8 .0 */ - 0b00000001000000000000, /* .0 .0 .4 .0 */ - 0b11000000000000000000, /* .0 .0 .0 .24 */ - 0b10000000000000000000, /* .0 .0 .0 .16 */ - 0b11100000000000000000, /* .0 .0 .0 .28 */ - 0b00000110000000000000, /* .0 .0 .24 .0 */ - 0b00000000000010000000, /* .0 .4 .0 .0 */ - 0b00000100000000000000, /* .0 .0 .16 .0 */ - 0b00000011000000000000, /* .0 .0 .12 .0 */ - 0b00000101000000000000, /* .0 .0 .20 .0 */ - 0b00000111000000000000, /* .0 .0 .28 .0 */ - 0b00000000000100000000, /* .0 .8 .0 .0 */ - 0b00000000001000000000, /* .0 .16 .0 .0 */ - 0b00000000001100000000, /* .0 .24 .0 .0 */ - 0b00000000001010000000, /* .0 .20 .0 .0 */ - 0b00000000001110000000, /* .0 .28 .0 .0 */ - 0b11000000001110000000, /* .0 .28 .0 .24 */ - 0b00100000000100000000, /* .0 .8 .0 .4 */ - 0b00100000000110000000, /* .0 .12 .0 .4 */ - 0b01000000000110000000, /* .0 .12 .0 .8 */ - 0b10000000001100000000, /* .0 .24 .0 .16 */ - 0b10000000001010000000, /* .0 .20 .0 .16 */ - 0b01100000000010000000, /* .0 .4 .0 .12 */ - 0b10100000001110000000, /* .0 .28 .0 .20 */ - 0b01000000000010000000, /* .0 .4 .0 .8 */ -}; - -static const uint32_t xe2_3src_subreg_table[32] = { - 0b00000000000000000000, /* .0 .0 .0 .0 */ - 0b00100000000000000000, /* .0 .0 .0 .8 */ - 0b10000000000000000000, /* .0 .0 .0 .32 */ - 0b00010000000000000000, /* .0 .0 .0 .4 */ - 0b11100000000000000000, /* .0 .0 .0 .56 */ - 0b01010000000000000000, /* .0 .0 .0 .20 */ - 0b10110000000000000000, /* .0 .0 .0 .44 */ - 0b01000000000011000000, /* .0 .12 .0 .16 */ - 0b01100000000000000000, /* .0 .0 .0 .24 */ - 0b10100000000000000000, /* .0 .0 .0 .40 */ - 0b11000000000000000000, /* .0 .0 .0 .48 */ - 0b01000000000000000000, /* .0 .0 .0 .16 */ - 0b01110000000110000000, /* .0 .24 .0 .28 */ - 0b10100000001001000000, /* .0 .36 .0 .40 */ - 0b11010000001100000000, /* .0 .48 .0 .52 */ - 0b01110000000000000000, /* .0 .0 .0 .28 */ - 0b11110000000000000000, /* .0 .0 .0 .60 */ - 0b10010000000000000000, /* .0 .0 .0 .36 */ - 0b00110000000000000000, /* .0 .0 .0 .12 */ - 0b00100000000010000000, /* .0 .8 .0 .8 */ - 0b00010000000001000000, /* .0 .4 .0 .4 */ - 0b00110000000011000000, /* .0 .12 .0 .12 */ - 0b11010000000000000000, /* .0 .0 .0 .52 */ - 0b00000000000001000000, /* .0 .4 .0 .0 */ - 0b00000101100000000000, /* .0 .0 .44 .0 */ - 0b00000100000000000000, /* .0 .0 .32 .0 */ - 0b00000000000010000000, /* .0 .8 .0 .0 */ - 0b00000000001100000000, /* .0 .48 .0 .0 */ - 0b00000000001101000000, /* .0 .52 .0 .0 */ - 0b00000110100000000000, /* .0 .0 .52 .0 */ - 0b00000000001000000000, /* .0 .32 .0 .0 */ - 0b00000000001111000000, /* .0 .60 .0 .0 */ -}; - -struct compaction_state { - const struct brw_isa_info *isa; - const uint32_t *control_index_table; - const uint32_t *datatype_table; - const uint16_t *subreg_table; - const uint16_t *src0_index_table; - const uint16_t *src1_index_table; -}; - -static void compaction_state_init(struct compaction_state *c, - const struct brw_isa_info *isa); - -static bool -set_control_index(const struct compaction_state *c, - brw_eu_compact_inst *dst, const brw_eu_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint32_t uncompacted; /* 19b/IVB+; 21b/TGL+ */ - - if (devinfo->ver >= 20) { - uncompacted = (brw_eu_inst_bits(src, 95, 92) << 14) | /* 4b */ - (brw_eu_inst_bits(src, 34, 34) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 32, 32) << 12) | /* 1b */ - (brw_eu_inst_bits(src, 31, 31) << 11) | /* 1b */ - (brw_eu_inst_bits(src, 28, 28) << 10) | /* 1b */ - (brw_eu_inst_bits(src, 27, 26) << 8) | /* 2b */ - (brw_eu_inst_bits(src, 25, 24) << 6) | /* 2b */ - (brw_eu_inst_bits(src, 23, 21) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 20, 18)); /* 3b */ - } else if (devinfo->ver >= 12) { - uncompacted = (brw_eu_inst_bits(src, 95, 92) << 17) | /* 4b */ - (brw_eu_inst_bits(src, 34, 34) << 16) | /* 1b */ - (brw_eu_inst_bits(src, 33, 33) << 15) | /* 1b */ - (brw_eu_inst_bits(src, 32, 32) << 14) | /* 1b */ - (brw_eu_inst_bits(src, 31, 31) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 28, 28) << 12) | /* 1b */ - (brw_eu_inst_bits(src, 27, 24) << 8) | /* 4b */ - (brw_eu_inst_bits(src, 23, 22) << 6) | /* 2b */ - (brw_eu_inst_bits(src, 21, 19) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 18, 16)); /* 3b */ - } else { - uncompacted = (brw_eu_inst_bits(src, 33, 31) << 16) | /* 3b */ - (brw_eu_inst_bits(src, 23, 12) << 4) | /* 12b */ - (brw_eu_inst_bits(src, 10, 9) << 2) | /* 2b */ - (brw_eu_inst_bits(src, 34, 34) << 1) | /* 1b */ - (brw_eu_inst_bits(src, 8, 8)); /* 1b */ - } - - for (int i = 0; i < 32; i++) { - if (c->control_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_control_index(devinfo, dst, i); - return true; - } - } - - return false; -} - -static bool -set_datatype_index(const struct compaction_state *c, brw_eu_compact_inst *dst, - const brw_eu_inst *src, bool is_immediate) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */ - - if (devinfo->ver >= 12) { - uncompacted = (brw_eu_inst_bits(src, 91, 88) << 15) | /* 4b */ - (brw_eu_inst_bits(src, 66, 66) << 14) | /* 1b */ - (brw_eu_inst_bits(src, 50, 50) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 49, 48) << 11) | /* 2b */ - (brw_eu_inst_bits(src, 47, 47) << 10) | /* 1b */ - (brw_eu_inst_bits(src, 46, 46) << 9) | /* 1b */ - (brw_eu_inst_bits(src, 43, 40) << 5) | /* 4b */ - (brw_eu_inst_bits(src, 39, 36) << 1) | /* 4b */ - (brw_eu_inst_bits(src, 35, 35)); /* 1b */ - - /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate - * is present - */ - if (!is_immediate) { - uncompacted |= brw_eu_inst_bits(src, 98, 98) << 19; /* 1b */ - } - } else { - uncompacted = (brw_eu_inst_bits(src, 63, 61) << 18) | /* 3b */ - (brw_eu_inst_bits(src, 94, 89) << 12) | /* 6b */ - (brw_eu_inst_bits(src, 46, 35)); /* 12b */ - } - - for (int i = 0; i < 32; i++) { - if (c->datatype_table[i] == uncompacted) { - brw_eu_compact_inst_set_datatype_index(devinfo, dst, i); - return true; - } - } - - return false; -} - -static bool -set_subreg_index(const struct compaction_state *c, brw_eu_compact_inst *dst, - const brw_eu_inst *src, bool is_immediate) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - const unsigned table_len = devinfo->ver >= 20 ? - ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table); - uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */ - - if (devinfo->ver >= 20) { - uncompacted = (brw_eu_inst_bits(src, 33, 33) << 0) | /* 1b */ - (brw_eu_inst_bits(src, 55, 51) << 1) | /* 5b */ - (brw_eu_inst_bits(src, 71, 67) << 6) | /* 5b */ - (brw_eu_inst_bits(src, 87, 87) << 11); /* 1b */ - } else if (devinfo->ver >= 12) { - uncompacted = (brw_eu_inst_bits(src, 55, 51) << 0) | /* 5b */ - (brw_eu_inst_bits(src, 71, 67) << 5); /* 5b */ - - if (!is_immediate) - uncompacted |= brw_eu_inst_bits(src, 103, 99) << 10; /* 5b */ - } else { - uncompacted = (brw_eu_inst_bits(src, 52, 48) << 0) | /* 5b */ - (brw_eu_inst_bits(src, 68, 64) << 5); /* 5b */ - - if (!is_immediate) - uncompacted |= brw_eu_inst_bits(src, 100, 96) << 10; /* 5b */ - } - - for (int i = 0; i < table_len; i++) { - if (c->subreg_table[i] == uncompacted) { - brw_eu_compact_inst_set_subreg_index(devinfo, dst, i); - return true; - } - } - - return false; -} - -static bool -set_src0_index(const struct compaction_state *c, brw_eu_compact_inst *dst, - const brw_eu_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */ - int table_len; - - if (devinfo->ver >= 12) { - table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) : - ARRAY_SIZE(gfx12_src0_index_table)); - uncompacted = (devinfo->ver >= 20 ? 0 : - brw_eu_inst_bits(src, 87, 87) << 11) | /* 1b */ - (brw_eu_inst_bits(src, 86, 84) << 8) | /* 3b */ - (brw_eu_inst_bits(src, 83, 81) << 5) | /* 3b */ - (brw_eu_inst_bits(src, 80, 80) << 4) | /* 1b */ - (brw_eu_inst_bits(src, 65, 64) << 2) | /* 2b */ - (brw_eu_inst_bits(src, 45, 44)); /* 2b */ - } else { - table_len = ARRAY_SIZE(gfx8_src_index_table); - uncompacted = brw_eu_inst_bits(src, 88, 77); /* 12b */ - } - - for (int i = 0; i < table_len; i++) { - if (c->src0_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_src0_index(devinfo, dst, i); - return true; - } - } - - return false; -} - -static bool -set_src1_index(const struct compaction_state *c, brw_eu_compact_inst *dst, - const brw_eu_inst *src, bool is_immediate, unsigned imm) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - if (is_immediate) { - if (devinfo->ver >= 12) { - /* src1 index takes the low 4 bits of the 12-bit compacted value */ - brw_eu_compact_inst_set_src1_index(devinfo, dst, imm & 0xf); - } else { - /* src1 index takes the high 5 bits of the 13-bit compacted value */ - brw_eu_compact_inst_set_src1_index(devinfo, dst, imm >> 8); - } - return true; - } else { - uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */ - int table_len; - - if (devinfo->ver >= 20) { - table_len = ARRAY_SIZE(xe2_src1_index_table); - uncompacted = (brw_eu_inst_bits(src, 121, 120) << 14) | /* 2b */ - (brw_eu_inst_bits(src, 118, 116) << 11) | /* 3b */ - (brw_eu_inst_bits(src, 115, 113) << 8) | /* 3b */ - (brw_eu_inst_bits(src, 112, 112) << 7) | /* 1b */ - (brw_eu_inst_bits(src, 103, 99) << 2) | /* 5b */ - (brw_eu_inst_bits(src, 97, 96)); /* 2b */ - } else if (devinfo->ver >= 12) { - table_len = ARRAY_SIZE(gfx12_src0_index_table); - uncompacted = (brw_eu_inst_bits(src, 121, 120) << 10) | /* 2b */ - (brw_eu_inst_bits(src, 119, 116) << 6) | /* 4b */ - (brw_eu_inst_bits(src, 115, 113) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 112, 112) << 2) | /* 1b */ - (brw_eu_inst_bits(src, 97, 96)); /* 2b */ - } else { - table_len = ARRAY_SIZE(gfx8_src_index_table); - uncompacted = brw_eu_inst_bits(src, 120, 109); /* 12b */ - } - - for (int i = 0; i < table_len; i++) { - if (c->src1_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_src1_index(devinfo, dst, i); - return true; - } - } - } - - return false; -} - -static bool -set_3src_control_index(const struct intel_device_info *devinfo, - brw_eu_compact_inst *dst, const brw_eu_inst *src, - bool is_dpas) -{ - if (devinfo->ver >= 20) { - assert(is_dpas || !brw_eu_inst_bits(src, 49, 49)); - - const uint64_t uncompacted = /* 34b/Xe2+ */ - (brw_eu_inst_bits(src, 95, 92) << 30) | /* 4b */ - (brw_eu_inst_bits(src, 90, 88) << 27) | /* 3b */ - (brw_eu_inst_bits(src, 82, 80) << 24) | /* 3b */ - (brw_eu_inst_bits(src, 50, 50) << 23) | /* 1b */ - (brw_eu_inst_bits(src, 49, 48) << 21) | /* 2b */ - (brw_eu_inst_bits(src, 42, 40) << 18) | /* 3b */ - (brw_eu_inst_bits(src, 39, 39) << 17) | /* 1b */ - (brw_eu_inst_bits(src, 38, 36) << 14) | /* 3b */ - (brw_eu_inst_bits(src, 34, 34) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 32, 32) << 12) | /* 1b */ - (brw_eu_inst_bits(src, 31, 31) << 11) | /* 1b */ - (brw_eu_inst_bits(src, 28, 28) << 10) | /* 1b */ - (brw_eu_inst_bits(src, 27, 26) << 8) | /* 2b */ - (brw_eu_inst_bits(src, 25, 24) << 6) | /* 2b */ - (brw_eu_inst_bits(src, 23, 21) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 20, 18)); /* 3b */ - - /* The bits used to index the tables for 3src and 3src-dpas - * are the same, so just need to pick the right one. - */ - const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table : - xe2_3src_control_index_table; - const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) : - ARRAY_SIZE(xe2_3src_control_index_table); - for (unsigned i = 0; i < size; i++) { - if (table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_control_index(devinfo, dst, i); - return true; - } - } - } else if (devinfo->verx10 >= 125) { - uint64_t uncompacted = /* 37b/XeHP+ */ - (brw_eu_inst_bits(src, 95, 92) << 33) | /* 4b */ - (brw_eu_inst_bits(src, 90, 88) << 30) | /* 3b */ - (brw_eu_inst_bits(src, 82, 80) << 27) | /* 3b */ - (brw_eu_inst_bits(src, 50, 50) << 26) | /* 1b */ - (brw_eu_inst_bits(src, 49, 48) << 24) | /* 2b */ - (brw_eu_inst_bits(src, 42, 40) << 21) | /* 3b */ - (brw_eu_inst_bits(src, 39, 39) << 20) | /* 1b */ - (brw_eu_inst_bits(src, 38, 36) << 17) | /* 3b */ - (brw_eu_inst_bits(src, 34, 34) << 16) | /* 1b */ - (brw_eu_inst_bits(src, 33, 33) << 15) | /* 1b */ - (brw_eu_inst_bits(src, 32, 32) << 14) | /* 1b */ - (brw_eu_inst_bits(src, 31, 31) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 28, 28) << 12) | /* 1b */ - (brw_eu_inst_bits(src, 27, 24) << 8) | /* 4b */ - (brw_eu_inst_bits(src, 23, 23) << 7) | /* 1b */ - (brw_eu_inst_bits(src, 22, 22) << 6) | /* 1b */ - (brw_eu_inst_bits(src, 21, 19) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 18, 16)); /* 3b */ - - for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) { - if (xehp_3src_control_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_control_index(devinfo, dst, i); - return true; - } - } - } else if (devinfo->ver >= 12) { - uint64_t uncompacted = /* 36b/TGL+ */ - (brw_eu_inst_bits(src, 95, 92) << 32) | /* 4b */ - (brw_eu_inst_bits(src, 90, 88) << 29) | /* 3b */ - (brw_eu_inst_bits(src, 82, 80) << 26) | /* 3b */ - (brw_eu_inst_bits(src, 50, 50) << 25) | /* 1b */ - (brw_eu_inst_bits(src, 48, 48) << 24) | /* 1b */ - (brw_eu_inst_bits(src, 42, 40) << 21) | /* 3b */ - (brw_eu_inst_bits(src, 39, 39) << 20) | /* 1b */ - (brw_eu_inst_bits(src, 38, 36) << 17) | /* 3b */ - (brw_eu_inst_bits(src, 34, 34) << 16) | /* 1b */ - (brw_eu_inst_bits(src, 33, 33) << 15) | /* 1b */ - (brw_eu_inst_bits(src, 32, 32) << 14) | /* 1b */ - (brw_eu_inst_bits(src, 31, 31) << 13) | /* 1b */ - (brw_eu_inst_bits(src, 28, 28) << 12) | /* 1b */ - (brw_eu_inst_bits(src, 27, 24) << 8) | /* 4b */ - (brw_eu_inst_bits(src, 23, 23) << 7) | /* 1b */ - (brw_eu_inst_bits(src, 22, 22) << 6) | /* 1b */ - (brw_eu_inst_bits(src, 21, 19) << 3) | /* 3b */ - (brw_eu_inst_bits(src, 18, 16)); /* 3b */ - - for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) { - if (gfx12_3src_control_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_control_index(devinfo, dst, i); - return true; - } - } - } else { - uint32_t uncompacted = /* 26b/SKL+ */ - (brw_eu_inst_bits(src, 36, 35) << 24) | /* 2b */ - (brw_eu_inst_bits(src, 34, 32) << 21) | /* 3b */ - (brw_eu_inst_bits(src, 28, 8)); /* 21b */ - - for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) { - if (gfx8_3src_control_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_control_index(devinfo, dst, i); - return true; - } - } - } - - return false; -} - -static bool -set_3src_source_index(const struct intel_device_info *devinfo, - brw_eu_compact_inst *dst, const brw_eu_inst *src, - bool is_dpas) -{ - if (devinfo->ver >= 12) { - uint32_t uncompacted = /* 21b/TGL+ */ - (brw_eu_inst_bits(src, 114, 114) << 20) | /* 1b */ - (brw_eu_inst_bits(src, 113, 112) << 18) | /* 2b */ - (brw_eu_inst_bits(src, 98, 98) << 17) | /* 1b */ - (brw_eu_inst_bits(src, 97, 96) << 15) | /* 2b */ - (brw_eu_inst_bits(src, 91, 91) << 14) | /* 1b */ - (brw_eu_inst_bits(src, 87, 86) << 12) | /* 2b */ - (brw_eu_inst_bits(src, 85, 84) << 10) | /* 2b */ - (brw_eu_inst_bits(src, 83, 83) << 9) | /* 1b */ - (brw_eu_inst_bits(src, 66, 66) << 8) | /* 1b */ - (brw_eu_inst_bits(src, 65, 64) << 6) | /* 2b */ - (brw_eu_inst_bits(src, 47, 47) << 5) | /* 1b */ - (brw_eu_inst_bits(src, 46, 46) << 4) | /* 1b */ - (brw_eu_inst_bits(src, 45, 44) << 2) | /* 2b */ - (brw_eu_inst_bits(src, 43, 43) << 1) | /* 1b */ - (brw_eu_inst_bits(src, 35, 35)); /* 1b */ - - /* In Xe2, the bits used to index the tables for 3src and 3src-dpas - * are the same, so just need to pick the right one. - */ - const uint32_t *three_src_source_index_table = - devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table : - xe2_3src_source_index_table) : - devinfo->verx10 >= 125 ? xehp_3src_source_index_table : - gfx12_3src_source_index_table; - const uint32_t three_src_source_index_table_len = - devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) : - ARRAY_SIZE(xe2_3src_source_index_table)) : - devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) : - ARRAY_SIZE(gfx12_3src_source_index_table); - - for (unsigned i = 0; i < three_src_source_index_table_len; i++) { - if (three_src_source_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_source_index(devinfo, dst, i); - return true; - } - } - } else { - uint64_t uncompacted = /* 49b/SKL+ */ - (brw_eu_inst_bits(src, 126, 125) << 47) | /* 2b */ - (brw_eu_inst_bits(src, 105, 104) << 45) | /* 2b */ - (brw_eu_inst_bits(src, 84, 84) << 44) | /* 1b */ - (brw_eu_inst_bits(src, 83, 83) << 43) | /* 1b */ - (brw_eu_inst_bits(src, 114, 107) << 35) | /* 8b */ - (brw_eu_inst_bits(src, 93, 86) << 27) | /* 8b */ - (brw_eu_inst_bits(src, 72, 65) << 19) | /* 8b */ - (brw_eu_inst_bits(src, 55, 37)); /* 19b */ - - for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) { - if (gfx8_3src_source_index_table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_source_index(devinfo, dst, i); - return true; - } - } - } - - return false; -} - -static bool -set_3src_subreg_index(const struct intel_device_info *devinfo, - brw_eu_compact_inst *dst, const brw_eu_inst *src) -{ - assert(devinfo->ver >= 12); - - uint32_t uncompacted = /* 20b/TGL+ */ - (brw_eu_inst_bits(src, 119, 115) << 15) | /* 5b */ - (brw_eu_inst_bits(src, 103, 99) << 10) | /* 5b */ - (brw_eu_inst_bits(src, 71, 67) << 5) | /* 5b */ - (brw_eu_inst_bits(src, 55, 51)); /* 5b */ - - const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table : - gfx12_3src_subreg_table; - const uint32_t len = - devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) : - ARRAY_SIZE(gfx12_3src_subreg_table); - - for (unsigned i = 0; i < len; i++) { - if (table[i] == uncompacted) { - brw_eu_compact_inst_set_3src_subreg_index(devinfo, dst, i); - return true; - } - } - - return false; -} - -static bool -has_unmapped_bits(const struct brw_isa_info *isa, const brw_eu_inst *src) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - /* EOT can only be mapped on a send if the src1 is an immediate */ - if ((brw_eu_inst_opcode(isa, src) == BRW_OPCODE_SENDC || - brw_eu_inst_opcode(isa, src) == BRW_OPCODE_SEND) && - brw_eu_inst_eot(devinfo, src)) - return true; - - /* Check for instruction bits that don't map to any of the fields of the - * compacted instruction. The instruction cannot be compacted if any of - * them are set. They overlap with: - * - NibCtrl (bit 11 on Gfx8) - * - Dst.AddrImm[9] (bit 47 on Gfx8) - * - Src0.AddrImm[9] (bit 95 on Gfx8) - * - Imm64[27:31] (bit 95 on Gfx8) - * - UIP[31] (bit 95 on Gfx8) - */ - if (devinfo->ver >= 12) { - assert(!brw_eu_inst_bits(src, 7, 7)); - return false; - } else { - assert(!brw_eu_inst_bits(src, 7, 7)); - return brw_eu_inst_bits(src, 95, 95) || - brw_eu_inst_bits(src, 47, 47) || - brw_eu_inst_bits(src, 11, 11); - } -} - -static bool -has_3src_unmapped_bits(const struct intel_device_info *devinfo, - const brw_eu_inst *src, bool is_dpas) -{ - /* Check for three-source instruction bits that don't map to any of the - * fields of the compacted instruction. All of them seem to be reserved - * bits currently. - */ - if (devinfo->ver >= 20) { - assert(is_dpas || !brw_eu_inst_bits(src, 49, 49)); - assert(!brw_eu_inst_bits(src, 33, 33)); - assert(!brw_eu_inst_bits(src, 7, 7)); - } else if (devinfo->ver >= 12) { - assert(is_dpas || !brw_eu_inst_bits(src, 49, 49)); - assert(!brw_eu_inst_bits(src, 7, 7)); - } else { - assert(!brw_eu_inst_bits(src, 127, 127) && - !brw_eu_inst_bits(src, 7, 7)); - } - - return false; -} - -static bool -brw_try_compact_3src_instruction(const struct brw_isa_info *isa, - brw_eu_compact_inst *dst, const brw_eu_inst *src) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - bool is_dpas = brw_eu_inst_opcode(isa, src) == BRW_OPCODE_DPAS; - if (has_3src_unmapped_bits(devinfo, src, is_dpas)) - return false; - -#define compact(field) \ - brw_eu_compact_inst_set_3src_##field(devinfo, dst, brw_eu_inst_3src_##field(devinfo, src)) -#define compact_a16(field) \ - brw_eu_compact_inst_set_3src_##field(devinfo, dst, brw_eu_inst_3src_a16_##field(devinfo, src)) - - compact(hw_opcode); - - if (!set_3src_control_index(devinfo, dst, src, is_dpas)) - return false; - - if (!set_3src_source_index(devinfo, dst, src, is_dpas)) - return false; - - if (devinfo->ver >= 12) { - if (!set_3src_subreg_index(devinfo, dst, src)) - return false; - - compact(swsb); - compact(debug_control); - compact(dst_reg_nr); - compact(src0_reg_nr); - compact(src1_reg_nr); - compact(src2_reg_nr); - } else { - compact(dst_reg_nr); - compact_a16(src0_rep_ctrl); - compact(debug_control); - compact(saturate); - compact_a16(src1_rep_ctrl); - compact_a16(src2_rep_ctrl); - compact(src0_reg_nr); - compact(src1_reg_nr); - compact(src2_reg_nr); - compact_a16(src0_subreg_nr); - compact_a16(src1_subreg_nr); - compact_a16(src2_subreg_nr); - } - brw_eu_compact_inst_set_3src_cmpt_control(devinfo, dst, true); - -#undef compact -#undef compact_a16 - - return true; -} - -/* On SNB through ICL, compacted instructions have 12-bits for immediate - * sources, and a 13th bit that's replicated through the high 20 bits. - * - * Effectively this means we get 12-bit integers, 0.0f, and some limited uses - * of packed vectors as compactable immediates. - * - * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded - * rather than the low 12-bits. For signed integer the 12th bit is replicated, - * while for unsigned integers it is not. - * - * Returns the compacted immediate, or -1 if immediate cannot be compacted - */ -static int -compact_immediate(const struct intel_device_info *devinfo, - enum brw_reg_type type, unsigned imm) -{ - if (devinfo->ver >= 12) { - /* 16-bit immediates need to be replicated through the 32-bit immediate - * field - */ - switch (type) { - case BRW_TYPE_W: - case BRW_TYPE_UW: - case BRW_TYPE_HF: - if ((imm >> 16) != (imm & 0xffff)) - return -1; - break; - default: - break; - } - - switch (type) { - case BRW_TYPE_F: - /* We get the high 12-bits as-is; rest must be zero */ - if ((imm & 0xfffff) == 0) - return (imm >> 20) & 0xfff; - break; - case BRW_TYPE_HF: - /* We get the high 12-bits as-is; rest must be zero */ - if ((imm & 0xf) == 0) - return (imm >> 4) & 0xfff; - break; - case BRW_TYPE_UD: - case BRW_TYPE_VF: - case BRW_TYPE_UV: - case BRW_TYPE_V: - /* We get the low 12-bits as-is; rest must be zero */ - if ((imm & 0xfffff000) == 0) - return imm & 0xfff; - break; - case BRW_TYPE_UW: - /* We get the low 12-bits as-is; rest must be zero */ - if ((imm & 0xf000) == 0) - return imm & 0xfff; - break; - case BRW_TYPE_D: - /* We get the low 11-bits as-is; 12th is replicated */ - if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1) - return imm & 0xfff; - break; - case BRW_TYPE_W: - /* We get the low 11-bits as-is; 12th is replicated */ - if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1) - return imm & 0xfff; - break; - case BRW_TYPE_DF: - case BRW_TYPE_Q: - case BRW_TYPE_UQ: - case BRW_TYPE_B: - case BRW_TYPE_UB: - default: - return -1; - } - } else { - /* We get the low 12 bits as-is; 13th is replicated */ - if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) { - return imm & 0x1fff; - } - } - - return -1; -} - -static int -uncompact_immediate(const struct intel_device_info *devinfo, - enum brw_reg_type type, unsigned compact_imm) -{ - if (devinfo->ver >= 12) { - switch (type) { - case BRW_TYPE_F: - return compact_imm << 20; - case BRW_TYPE_HF: - return (compact_imm << 20) | (compact_imm << 4); - case BRW_TYPE_UD: - case BRW_TYPE_VF: - case BRW_TYPE_UV: - case BRW_TYPE_V: - return compact_imm; - case BRW_TYPE_UW: - /* Replicate */ - return compact_imm << 16 | compact_imm; - case BRW_TYPE_D: - /* Extend the 12th bit into the high 20 bits */ - return (int)(compact_imm << 20) >> 20; - case BRW_TYPE_W: - /* Extend the 12th bit into the high 4 bits and replicate */ - return ((int)(compact_imm << 20) >> 4) | - ((unsigned short)((short)(compact_imm << 4) >> 4)); - case BRW_TYPE_DF: - case BRW_TYPE_Q: - case BRW_TYPE_UQ: - case BRW_TYPE_B: - case BRW_TYPE_UB: - UNREACHABLE("not reached"); - default: - UNREACHABLE("invalid type"); - } - } else { - /* Replicate the 13th bit into the high 19 bits */ - return (int)(compact_imm << 19) >> 19; - } - - UNREACHABLE("not reached"); -} - -static bool -has_immediate(const struct intel_device_info *devinfo, const brw_eu_inst *inst, - enum brw_reg_type *type) -{ - if (brw_eu_inst_src0_reg_file(devinfo, inst) == IMM) { - *type = brw_eu_inst_src0_type(devinfo, inst); - return *type != BRW_TYPE_INVALID; - } else if (brw_eu_inst_src1_reg_file(devinfo, inst) == IMM) { - *type = brw_eu_inst_src1_type(devinfo, inst); - return *type != BRW_TYPE_INVALID; - } - - return false; -} - -/** - * Applies some small changes to instruction types to increase chances of - * compaction. - */ -static brw_eu_inst -precompact(const struct brw_isa_info *isa, brw_eu_inst inst) -{ - if (is_3src(isa, brw_eu_inst_opcode(isa, &inst))) - return inst; - - const struct intel_device_info *devinfo = isa->devinfo; - - /* In XeHP the compaction tables removed the entries for source regions - * <8;8,1> giving preference to <1;1,0> as the way to indicate - * sequential elements, so convert to those before compacting. - */ - if (devinfo->verx10 >= 125) { - if (brw_eu_inst_src0_reg_file(devinfo, &inst) == FIXED_GRF && - brw_eu_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 && - brw_eu_inst_src0_vstride(devinfo, &inst) == (brw_eu_inst_src0_width(devinfo, &inst) + 1) && - brw_eu_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) { - brw_eu_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1); - brw_eu_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1); - brw_eu_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0); - } - - if (brw_eu_inst_src1_reg_file(devinfo, &inst) == FIXED_GRF && - brw_eu_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 && - brw_eu_inst_src1_vstride(devinfo, &inst) == (brw_eu_inst_src1_width(devinfo, &inst) + 1) && - brw_eu_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) { - brw_eu_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1); - brw_eu_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1); - brw_eu_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0); - } - } - - if (brw_eu_inst_src0_reg_file(devinfo, &inst) != IMM) - return inst; - - /* The Bspec's section titled "Non-present Operands" claims that if src0 - * is an immediate that src1's type must be the same as that of src0. - * - * The SNB+ DataTypeIndex instruction compaction tables contain mappings - * that do not follow this rule. E.g., from the IVB/HSW table: - * - * DataTypeIndex 18-Bit Mapping Mapped Meaning - * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir | - * - * And from the SNB table: - * - * DataTypeIndex 18-Bit Mapping Mapped Meaning - * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir | - * - * Neither of these cause warnings from the simulator when used, - * compacted or otherwise. In fact, all compaction mappings that have an - * immediate in src0 use a:ud for src1. - * - * Don't do any of this for 64-bit immediates, since the src1 fields - * overlap with the immediate and setting them would overwrite the - * immediate we set. - */ - if (!(brw_eu_inst_src0_type(devinfo, &inst) == BRW_TYPE_DF || - brw_eu_inst_src0_type(devinfo, &inst) == BRW_TYPE_UQ || - brw_eu_inst_src0_type(devinfo, &inst) == BRW_TYPE_Q)) { - brw_eu_inst_set_src1_reg_hw_type(devinfo, &inst, 0); - } - - /* Compacted instructions only have 12-bits (plus 1 for the other 20) - * for immediate values. Presumably the hardware engineers realized - * that the only useful floating-point value that could be represented - * in this format is 0.0, which can also be represented as a VF-typed - * immediate, so they gave us the previously mentioned mapping on IVB+. - * - * Strangely, we do have a mapping for imm:f in src1, so we don't need - * to do this there. - * - * If we see a 0.0:F, change the type to VF so that it can be compacted. - * - * Compaction of floating-point immediates is improved on Gfx12, thus - * removing the need for this. - */ - if (devinfo->ver < 12 && - brw_eu_inst_imm_ud(devinfo, &inst) == 0x0 && - brw_eu_inst_src0_type(devinfo, &inst) == BRW_TYPE_F && - brw_eu_inst_dst_type(devinfo, &inst) == BRW_TYPE_F && - brw_eu_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) { - enum brw_reg_file file = brw_eu_inst_src0_reg_file(devinfo, &inst); - brw_eu_inst_set_src0_file_type(devinfo, &inst, file, BRW_TYPE_VF); - } - - /* There are no mappings for dst:d | i:d, so if the immediate is suitable - * set the types to :UD so the instruction can be compacted. - * - * FINISHME: Use dst:f | imm:f on Gfx12 - */ - if (devinfo->ver < 12 && - compact_immediate(devinfo, BRW_TYPE_D, - brw_eu_inst_imm_ud(devinfo, &inst)) != -1 && - brw_eu_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE && - brw_eu_inst_src0_type(devinfo, &inst) == BRW_TYPE_D && - brw_eu_inst_dst_type(devinfo, &inst) == BRW_TYPE_D) { - enum brw_reg_file src_file = brw_eu_inst_src0_reg_file(devinfo, &inst); - enum brw_reg_file dst_file = brw_eu_inst_dst_reg_file(devinfo, &inst); - - brw_eu_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_TYPE_UD); - brw_eu_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_TYPE_UD); - } - - return inst; -} - -/** - * Tries to compact instruction src into dst. - * - * It doesn't modify dst unless src is compactable, which is relied on by - * brw_compact_instructions(). - */ -static bool -try_compact_instruction(const struct compaction_state *c, - brw_eu_compact_inst *dst, const brw_eu_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - brw_eu_compact_inst temp; - - assert(brw_eu_inst_cmpt_control(devinfo, src) == 0); - - if (is_3src(c->isa, brw_eu_inst_opcode(c->isa, src))) { - memset(&temp, 0, sizeof(temp)); - if (brw_try_compact_3src_instruction(c->isa, &temp, src)) { - *dst = temp; - return true; - } else { - return false; - } - } - - enum brw_reg_type type; - bool is_immediate = has_immediate(devinfo, src, &type); - - unsigned compacted_imm = 0; - - if (is_immediate) { - compacted_imm = compact_immediate(devinfo, type, - brw_eu_inst_imm_ud(devinfo, src)); - if (compacted_imm == -1) - return false; - } - - if (has_unmapped_bits(c->isa, src)) - return false; - - memset(&temp, 0, sizeof(temp)); - -#define compact(field) \ - brw_eu_compact_inst_set_##field(devinfo, &temp, brw_eu_inst_##field(devinfo, src)) -#define compact_reg(field) \ - brw_eu_compact_inst_set_##field##_reg_nr(devinfo, &temp, \ - brw_eu_inst_##field##_da_reg_nr(devinfo, src)) - - compact(hw_opcode); - compact(debug_control); - - if (!set_control_index(c, &temp, src)) - return false; - if (!set_datatype_index(c, &temp, src, is_immediate)) - return false; - if (!set_subreg_index(c, &temp, src, is_immediate)) - return false; - if (!set_src0_index(c, &temp, src)) - return false; - if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm)) - return false; - - if (devinfo->ver >= 12) { - compact(swsb); - compact_reg(dst); - compact_reg(src0); - - if (is_immediate) { - /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */ - brw_eu_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4); - } else { - compact_reg(src1); - } - } else { - compact(acc_wr_control); - - compact(cond_modifier); - - compact_reg(dst); - compact_reg(src0); - - if (is_immediate) { - /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */ - brw_eu_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff); - } else { - compact_reg(src1); - } - } - brw_eu_compact_inst_set_cmpt_control(devinfo, &temp, true); - -#undef compact -#undef compact_reg - - *dst = temp; - - return true; -} - -bool -brw_try_compact_instruction(const struct brw_isa_info *isa, - brw_eu_compact_inst *dst, const brw_eu_inst *src) -{ - struct compaction_state c; - compaction_state_init(&c, isa); - return try_compact_instruction(&c, dst, src); -} - -static void -set_uncompacted_control(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint32_t uncompacted = - c->control_index_table[brw_eu_compact_inst_control_index(devinfo, src)]; - - if (devinfo->ver >= 20) { - brw_eu_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1); - brw_eu_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1); - brw_eu_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1); - brw_eu_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3); - brw_eu_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3); - brw_eu_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7); - } else if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(dst, 95, 92, (uncompacted >> 17)); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1); - brw_eu_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1); - brw_eu_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1); - brw_eu_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1); - brw_eu_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf); - brw_eu_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3); - brw_eu_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7); - } else { - brw_eu_inst_set_bits(dst, 33, 31, (uncompacted >> 16)); - brw_eu_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff); - brw_eu_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1); - brw_eu_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1); - } -} - -static void -set_uncompacted_datatype(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint32_t uncompacted = - c->datatype_table[brw_eu_compact_inst_datatype_index(devinfo, src)]; - - if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(dst, 98, 98, (uncompacted >> 19)); - brw_eu_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf); - brw_eu_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1); - brw_eu_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3); - brw_eu_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1); - brw_eu_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1); - brw_eu_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf); - brw_eu_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf); - brw_eu_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1); - } else { - brw_eu_inst_set_bits(dst, 63, 61, (uncompacted >> 18)); - brw_eu_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f); - brw_eu_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff); - } -} - -static void -set_uncompacted_subreg(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint16_t uncompacted = - c->subreg_table[brw_eu_compact_inst_subreg_index(devinfo, src)]; - - if (devinfo->ver >= 20) { - brw_eu_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1); - brw_eu_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f); - brw_eu_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f); - brw_eu_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1); - } else if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(dst, 103, 99, (uncompacted >> 10)); - brw_eu_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f); - brw_eu_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f); - } else { - brw_eu_inst_set_bits(dst, 100, 96, (uncompacted >> 10)); - brw_eu_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f); - brw_eu_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f); - } -} - -static void -set_uncompacted_src0(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint32_t compacted = brw_eu_compact_inst_src0_index(devinfo, src); - uint16_t uncompacted = c->src0_index_table[compacted]; - - if (devinfo->ver >= 12) { - if (devinfo->ver < 20) - brw_eu_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1); - brw_eu_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7); - brw_eu_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7); - brw_eu_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1); - brw_eu_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3); - brw_eu_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3); - } else { - brw_eu_inst_set_bits(dst, 88, 77, uncompacted); - } -} - -static void -set_uncompacted_src1(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - uint16_t uncompacted = - c->src1_index_table[brw_eu_compact_inst_src1_index(devinfo, src)]; - - if (devinfo->ver >= 20) { - brw_eu_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3); - brw_eu_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7); - brw_eu_inst_set_bits(dst, 115, 113, (uncompacted >> 8) & 0x7); - brw_eu_inst_set_bits(dst, 112, 112, (uncompacted >> 7) & 0x1); - brw_eu_inst_set_bits(dst, 103, 99, (uncompacted >> 2) & 0x1f); - brw_eu_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3); - } else if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(dst, 121, 120, (uncompacted >> 10)); - brw_eu_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf); - brw_eu_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1); - brw_eu_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3); - } else { - brw_eu_inst_set_bits(dst, 120, 109, uncompacted); - } -} - -static void -set_uncompacted_3src_control_index(const struct compaction_state *c, - brw_eu_inst *dst, brw_eu_compact_inst *src, - bool is_dpas) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - - if (devinfo->ver >= 20) { - uint64_t compacted = brw_eu_compact_inst_3src_control_index(devinfo, src); - uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] : - xe2_3src_control_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf); - brw_eu_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7); - brw_eu_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7); - brw_eu_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1); - brw_eu_inst_set_bits(dst, 49, 48, (uncompacted >> 21) & 0x3); - brw_eu_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7); - brw_eu_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1); - brw_eu_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1); - brw_eu_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1); - brw_eu_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1); - brw_eu_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3); - brw_eu_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3); - brw_eu_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7); - - } else if (devinfo->verx10 >= 125) { - uint64_t compacted = brw_eu_compact_inst_3src_control_index(devinfo, src); - uint64_t uncompacted = xehp_3src_control_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 95, 92, (uncompacted >> 33)); - brw_eu_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7); - brw_eu_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7); - brw_eu_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1); - brw_eu_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3); - brw_eu_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7); - brw_eu_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1); - brw_eu_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1); - brw_eu_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1); - brw_eu_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1); - brw_eu_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1); - brw_eu_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf); - brw_eu_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1); - brw_eu_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1); - brw_eu_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7); - - } else if (devinfo->ver >= 12) { - uint64_t compacted = brw_eu_compact_inst_3src_control_index(devinfo, src); - uint64_t uncompacted = gfx12_3src_control_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 95, 92, (uncompacted >> 32)); - brw_eu_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7); - brw_eu_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7); - brw_eu_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1); - brw_eu_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1); - brw_eu_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7); - brw_eu_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1); - brw_eu_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7); - brw_eu_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1); - brw_eu_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1); - brw_eu_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1); - brw_eu_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1); - brw_eu_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1); - brw_eu_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf); - brw_eu_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1); - brw_eu_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1); - brw_eu_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7); - brw_eu_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7); - } else { - uint32_t compacted = brw_eu_compact_inst_3src_control_index(devinfo, src); - uint32_t uncompacted = gfx8_3src_control_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7); - brw_eu_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff); - - brw_eu_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3); - } -} - -static void -set_uncompacted_3src_source_index(const struct intel_device_info *devinfo, - brw_eu_inst *dst, brw_eu_compact_inst *src, - bool is_dpas) -{ - uint32_t compacted = brw_eu_compact_inst_3src_source_index(devinfo, src); - - if (devinfo->ver >= 12) { - const uint32_t *three_src_source_index_table = - devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table : - xe2_3src_source_index_table) : - devinfo->verx10 >= 125 ? xehp_3src_source_index_table : - gfx12_3src_source_index_table; - uint32_t uncompacted = three_src_source_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 114, 114, (uncompacted >> 20)); - brw_eu_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3); - brw_eu_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1); - brw_eu_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3); - brw_eu_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1); - brw_eu_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3); - brw_eu_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3); - brw_eu_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1); - brw_eu_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1); - brw_eu_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3); - brw_eu_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1); - brw_eu_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1); - brw_eu_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3); - brw_eu_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1); - brw_eu_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1); - } else { - uint64_t uncompacted = gfx8_3src_source_index_table[compacted]; - - brw_eu_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1); - brw_eu_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff); - brw_eu_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff); - brw_eu_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff); - brw_eu_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff); - - brw_eu_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3); - brw_eu_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3); - brw_eu_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1); - } -} - -static void -set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo, - brw_eu_inst *dst, brw_eu_compact_inst *src) -{ - assert(devinfo->ver >= 12); - - uint32_t compacted = brw_eu_compact_inst_3src_subreg_index(devinfo, src); - uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]: - gfx12_3src_subreg_table[compacted]); - - brw_eu_inst_set_bits(dst, 119, 115, (uncompacted >> 15)); - brw_eu_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f); - brw_eu_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f); - brw_eu_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f); -} - -static void -brw_uncompact_3src_instruction(const struct compaction_state *c, - brw_eu_inst *dst, brw_eu_compact_inst *src, bool is_dpas) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - -#define uncompact(field) \ - brw_eu_inst_set_3src_##field(devinfo, dst, brw_eu_compact_inst_3src_##field(devinfo, src)) -#define uncompact_a16(field) \ - brw_eu_inst_set_3src_a16_##field(devinfo, dst, brw_eu_compact_inst_3src_##field(devinfo, src)) - - uncompact(hw_opcode); - - if (devinfo->ver >= 12) { - set_uncompacted_3src_control_index(c, dst, src, is_dpas); - set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas); - set_uncompacted_3src_subreg_index(devinfo, dst, src); - - uncompact(debug_control); - uncompact(swsb); - uncompact(dst_reg_nr); - uncompact(src0_reg_nr); - uncompact(src1_reg_nr); - uncompact(src2_reg_nr); - } else { - set_uncompacted_3src_control_index(c, dst, src, is_dpas); - set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas); - - uncompact(dst_reg_nr); - uncompact_a16(src0_rep_ctrl); - uncompact(debug_control); - uncompact(saturate); - uncompact_a16(src1_rep_ctrl); - uncompact_a16(src2_rep_ctrl); - uncompact(src0_reg_nr); - uncompact(src1_reg_nr); - uncompact(src2_reg_nr); - uncompact_a16(src0_subreg_nr); - uncompact_a16(src1_subreg_nr); - uncompact_a16(src2_subreg_nr); - } - brw_eu_inst_set_3src_cmpt_control(devinfo, dst, false); - -#undef uncompact -#undef uncompact_a16 -} - -static void -uncompact_instruction(const struct compaction_state *c, brw_eu_inst *dst, - brw_eu_compact_inst *src) -{ - const struct intel_device_info *devinfo = c->isa->devinfo; - memset(dst, 0, sizeof(*dst)); - - const enum opcode opcode = - brw_opcode_decode(c->isa, brw_eu_compact_inst_3src_hw_opcode(devinfo, src)); - if (is_3src(c->isa, opcode)) { - const bool is_dpas = opcode == BRW_OPCODE_DPAS; - brw_uncompact_3src_instruction(c, dst, src, is_dpas); - return; - } - -#define uncompact(field) \ - brw_eu_inst_set_##field(devinfo, dst, brw_eu_compact_inst_##field(devinfo, src)) -#define uncompact_reg(field) \ - brw_eu_inst_set_##field##_da_reg_nr(devinfo, dst, \ - brw_eu_compact_inst_##field##_reg_nr(devinfo, src)) - - uncompact(hw_opcode); - uncompact(debug_control); - - set_uncompacted_control(c, dst, src); - set_uncompacted_datatype(c, dst, src); - set_uncompacted_subreg(c, dst, src); - set_uncompacted_src0(c, dst, src); - - enum brw_reg_type type; - if (has_immediate(devinfo, dst, &type)) { - unsigned imm = uncompact_immediate(devinfo, type, - brw_eu_compact_inst_imm(devinfo, src)); - brw_eu_inst_set_imm_ud(devinfo, dst, imm); - } else { - set_uncompacted_src1(c, dst, src); - uncompact_reg(src1); - } - - if (devinfo->ver >= 12) { - uncompact(swsb); - uncompact_reg(dst); - uncompact_reg(src0); - } else { - uncompact(acc_wr_control); - - uncompact(cond_modifier); - - uncompact_reg(dst); - uncompact_reg(src0); - } - brw_eu_inst_set_cmpt_control(devinfo, dst, false); - -#undef uncompact -#undef uncompact_reg -} - -void -brw_uncompact_instruction(const struct brw_isa_info *isa, - brw_eu_inst *dst, brw_eu_compact_inst *src) -{ - struct compaction_state c; - compaction_state_init(&c, isa); - uncompact_instruction(&c, dst, src); -} - -void -brw_debug_compact_uncompact(const struct brw_isa_info *isa, - brw_eu_inst *orig, - brw_eu_inst *uncompacted) -{ - fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", - isa->devinfo->ver); - - fprintf(stderr, " before: "); - brw_disassemble_inst(stderr, isa, orig, true, 0, NULL); - - fprintf(stderr, " after: "); - brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL); - - uint32_t *before_bits = (uint32_t *)orig; - uint32_t *after_bits = (uint32_t *)uncompacted; - fprintf(stderr, " changed bits:\n"); - for (int i = 0; i < 128; i++) { - uint32_t before = before_bits[i / 32] & (1 << (i & 31)); - uint32_t after = after_bits[i / 32] & (1 << (i & 31)); - - if (before != after) { - fprintf(stderr, " bit %d, %s to %s\n", i, - before ? "set" : "unset", - after ? "set" : "unset"); - } - } -} - -static int -compacted_between(int old_ip, int old_target_ip, int *compacted_counts) -{ - int this_compacted_count = compacted_counts[old_ip]; - int target_compacted_count = compacted_counts[old_target_ip]; - return target_compacted_count - this_compacted_count; -} - -static void -update_uip_jip(const struct brw_isa_info *isa, brw_eu_inst *insn, - int this_old_ip, int *compacted_counts, - unsigned num_compacted_counts) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - /* JIP and UIP are in units of bytes on Gfx8+. */ - int shift = 3; - - /* Even though the values are signed, we don't need the rounding behavior - * of integer division. The shifts are safe. - */ - assert(brw_eu_inst_jip(devinfo, insn) % 8 == 0 && - brw_eu_inst_uip(devinfo, insn) % 8 == 0); - - int32_t jip_compacted = brw_eu_inst_jip(devinfo, insn) >> shift; - - jip_compacted -= compacted_between(this_old_ip, - MIN2(this_old_ip + (jip_compacted / 2), - num_compacted_counts - 1), - compacted_counts); - brw_eu_inst_set_jip(devinfo, insn, (uint32_t)jip_compacted << shift); - - if (brw_eu_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF || - brw_eu_inst_opcode(isa, insn) == BRW_OPCODE_WHILE || - brw_eu_inst_opcode(isa, insn) == BRW_OPCODE_JOIN) - return; - - int32_t uip_compacted = brw_eu_inst_uip(devinfo, insn) >> shift; - - uip_compacted -= compacted_between(this_old_ip, - MIN2(this_old_ip + (uip_compacted / 2), - num_compacted_counts - 1), - compacted_counts); - brw_eu_inst_set_uip(devinfo, insn, (uint32_t)uip_compacted << shift); -} - -static void -compaction_state_init(struct compaction_state *c, - const struct brw_isa_info *isa) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0); - assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0); - assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0); - assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0); - assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0); - assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0); - assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0); - assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0); - assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0); - assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0); - assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0); - assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0); - assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0); - assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0); - assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0); - assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0); - assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0); - assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0); - - c->isa = isa; - switch (devinfo->ver) { - case 20: - case 30: - c->control_index_table = xe2_control_index_table; - c->datatype_table = xe2_datatype_table; - c->subreg_table = xe2_subreg_table; - c->src0_index_table = xe2_src0_index_table; - c->src1_index_table = xe2_src1_index_table; - break; - case 12: - c->control_index_table = gfx12_control_index_table;; - c->datatype_table = gfx12_datatype_table; - c->subreg_table = gfx12_subreg_table; - if (devinfo->verx10 >= 125) { - c->src0_index_table = xehp_src0_index_table; - c->src1_index_table = xehp_src1_index_table; - } else { - c->src0_index_table = gfx12_src0_index_table; - c->src1_index_table = gfx12_src1_index_table; - } - break; - case 11: - c->control_index_table = gfx8_control_index_table; - c->datatype_table = gfx11_datatype_table; - c->subreg_table = gfx8_subreg_table; - c->src0_index_table = gfx8_src_index_table; - c->src1_index_table = gfx8_src_index_table; - break; - case 9: - c->control_index_table = gfx8_control_index_table; - c->datatype_table = gfx8_datatype_table; - c->subreg_table = gfx8_subreg_table; - c->src0_index_table = gfx8_src_index_table; - c->src1_index_table = gfx8_src_index_table; - break; - default: - UNREACHABLE("unknown generation"); - } -} - -void -brw_compact_instructions(struct brw_codegen *p, int start_offset, - struct disasm_info *disasm) -{ - if (INTEL_DEBUG(DEBUG_NO_COMPACTION)) - return; - - const struct intel_device_info *devinfo = p->devinfo; - - void *store = p->store + start_offset / 16; - /* For an instruction at byte offset 16*i before compaction, this is the - * number of compacted instructions minus the number of padding NOP/NENOPs - * that preceded it. - */ - unsigned num_compacted_counts = - (p->next_insn_offset - start_offset) / sizeof(brw_eu_inst); - int *compacted_counts = - calloc(num_compacted_counts, sizeof(*compacted_counts)); - - /* For an instruction at byte offset 8*i after compaction, this was its IP - * (in 16-byte units) before compaction. - */ - unsigned num_old_ip = - (p->next_insn_offset - start_offset) / sizeof(brw_eu_compact_inst) + 1; - int *old_ip = calloc(num_old_ip, sizeof(*old_ip)); - - struct compaction_state c; - compaction_state_init(&c, p->isa); - - int offset = 0; - int compacted_count = 0; - for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset; - src_offset += sizeof(brw_eu_inst)) { - brw_eu_inst *src = store + src_offset; - void *dst = store + offset; - - old_ip[offset / sizeof(brw_eu_compact_inst)] = src_offset / sizeof(brw_eu_inst); - compacted_counts[src_offset / sizeof(brw_eu_inst)] = compacted_count; - - brw_eu_inst inst = precompact(p->isa, *src); - brw_eu_inst saved = inst; - - if (try_compact_instruction(&c, dst, &inst)) { - compacted_count++; - -#ifndef NDEBUG - brw_eu_inst uncompacted; - uncompact_instruction(&c, &uncompacted, dst); - if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { - brw_debug_compact_uncompact(p->isa, &saved, &uncompacted); - assert(false); - } -#endif - - offset += sizeof(brw_eu_compact_inst); - } else { - /* If we didn't compact this instruction, we need to move it down into - * place. - */ - if (offset != src_offset) { - memmove(dst, src, sizeof(brw_eu_inst)); - } - offset += sizeof(brw_eu_inst); - } - } - - /* Add an entry for the ending offset of the program. This greatly - * simplifies the linked list walk at the end of the function. - */ - old_ip[offset / sizeof(brw_eu_compact_inst)] = - (p->next_insn_offset - start_offset) / sizeof(brw_eu_inst); - - /* Fix up control flow offsets. */ - p->next_insn_offset = start_offset + offset; - for (offset = 0; offset < p->next_insn_offset - start_offset; - offset = next_offset(p, store, offset)) { - brw_eu_inst *insn = store + offset; - int this_old_ip = old_ip[offset / sizeof(brw_eu_compact_inst)]; - int this_compacted_count = compacted_counts[this_old_ip]; - - switch (brw_eu_inst_opcode(p->isa, insn)) { - case BRW_OPCODE_BREAK: - case BRW_OPCODE_CONTINUE: - case BRW_OPCODE_HALT: - update_uip_jip(p->isa, insn, this_old_ip, compacted_counts, num_compacted_counts); - break; - - case BRW_OPCODE_IF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_WHILE: - case BRW_OPCODE_GOTO: - case BRW_OPCODE_JOIN: - if (brw_eu_inst_cmpt_control(devinfo, insn)) { - brw_eu_inst uncompacted; - uncompact_instruction(&c, &uncompacted, - (brw_eu_compact_inst *)insn); - - update_uip_jip(p->isa, &uncompacted, this_old_ip, - compacted_counts, num_compacted_counts); - - bool ret = try_compact_instruction(&c, (brw_eu_compact_inst *)insn, - &uncompacted); - assert(ret); (void)ret; - } else { - update_uip_jip(p->isa, insn, this_old_ip, compacted_counts, - num_compacted_counts); - } - break; - - case BRW_OPCODE_ADD: - /* Add instructions modifying the IP register use an immediate src1, - * and Gens that use this cannot compact instructions with immediate - * operands. - */ - if (brw_eu_inst_cmpt_control(devinfo, insn)) - break; - - if (brw_eu_inst_dst_reg_file(devinfo, insn) == ARF && - brw_eu_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) { - assert(brw_eu_inst_src1_reg_file(devinfo, insn) == IMM); - - int shift = 3; - int jump_compacted = brw_eu_inst_imm_d(devinfo, insn) >> shift; - - int target_old_ip = this_old_ip + (jump_compacted / 2); - int target_compacted_count = compacted_counts[target_old_ip]; - jump_compacted -= (target_compacted_count - this_compacted_count); - brw_eu_inst_set_imm_ud(devinfo, insn, jump_compacted << shift); - } - break; - - default: - break; - } - } - - /* p->nr_insn is counting the number of uncompacted instructions still, so - * divide. We do want to be sure there's a valid instruction in any - * alignment padding, so that the next compression pass (for the FS 8/16 - * compile passes) parses correctly. - */ - if (p->next_insn_offset & sizeof(brw_eu_compact_inst)) { - brw_eu_compact_inst *align = store + offset; - memset(align, 0, sizeof(*align)); - brw_eu_compact_inst_set_hw_opcode( - devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP)); - brw_eu_compact_inst_set_cmpt_control(devinfo, align, true); - p->next_insn_offset += sizeof(brw_eu_compact_inst); - } - p->nr_insn = p->next_insn_offset / sizeof(brw_eu_inst); - - for (int i = 0; i < p->num_relocs; i++) { - if (p->relocs[i].offset < (uint32_t)start_offset) - continue; - - assert(p->relocs[i].offset % 16 == 0); - unsigned idx = (p->relocs[i].offset - start_offset) / 16; - p->relocs[i].offset -= compacted_counts[idx] * 8; - } - - /* Update the instruction offsets for each group. */ - if (disasm) { - int offset = 0; - - brw_foreach_list_typed(struct inst_group, group, link, &disasm->group_list) { - while (start_offset + old_ip[offset / sizeof(brw_eu_compact_inst)] * - sizeof(brw_eu_inst) != group->offset) { - assert(start_offset + old_ip[offset / sizeof(brw_eu_compact_inst)] * - sizeof(brw_eu_inst) < group->offset); - offset = next_offset(p, store, offset); - } - - group->offset = start_offset + offset; - } - } - - free(compacted_counts); - free(old_ip); -} diff --git a/src/intel/compiler/brw/brw_eu_emit.c b/src/intel/compiler/brw/brw_eu_emit.c deleted file mode 100644 index c3b35fadaaa..00000000000 --- a/src/intel/compiler/brw/brw_eu_emit.c +++ /dev/null @@ -1,1574 +0,0 @@ -/* - * Copyright © 2006 Intel Corporation - * SPDX-License-Identifier: MIT - * - * Intel funded Tungsten Graphics to develop this 3D driver. - * File originally authored by: Keith Whitwell - */ - -#include "brw_eu_defines.h" -#include "brw_eu.h" - -#include "util/ralloc.h" -#include "compiler/gen/gen.h" - -void -brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest) -{ - const struct intel_device_info *devinfo = p->devinfo; - - if (dest.file == FIXED_GRF) - assert(dest.nr < XE3_MAX_GRF); - - /* The hardware has a restriction where a destination of size Byte with - * a stride of 1 is only allowed for a packed byte MOV. For any other - * instruction, the stride must be at least 2, even when the destination - * is the NULL register. - */ - if (brw_reg_is_arf(dest, BRW_ARF_NULL) && - brw_type_size_bytes(dest.type) == 1 && - dest.hstride == BRW_HORIZONTAL_STRIDE_1) { - dest.hstride = BRW_HORIZONTAL_STRIDE_2; - } - - const enum opcode opcode = brw_eu_inst_opcode(p->isa, inst); - - if (devinfo->ver >= 12 && - (opcode == BRW_OPCODE_SEND || - opcode == BRW_OPCODE_SENDC)) { - assert(dest.file == FIXED_GRF || - dest.file == ADDRESS || - dest.file == ARF); - assert(dest.address_mode == BRW_ADDRESS_DIRECT); - assert(dest.subnr == 0); - assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || - (dest.hstride == BRW_HORIZONTAL_STRIDE_1 && - dest.vstride == dest.width + 1)); - assert(!dest.negate && !dest.abs); - brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest)); - brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); - - } else if (opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC) { - assert(devinfo->ver < 12); - assert(dest.file == FIXED_GRF || - dest.file == ADDRESS || - dest.file == ARF); - assert(dest.address_mode == BRW_ADDRESS_DIRECT); - assert(dest.subnr % 16 == 0); - assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && - dest.vstride == dest.width + 1); - assert(!dest.negate && !dest.abs); - brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); - brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest)); - } else { - brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type); - brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - - if (dest.address_mode == BRW_ADDRESS_DIRECT) { - brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); - - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest)); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == FIXED_GRF) { - assert(dest.writemask != 0); - } - /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: - * Although Dst.HorzStride is a don't care for Align16, HW needs - * this to be programmed as "01". - */ - brw_eu_inst_set_dst_hstride(devinfo, inst, 1); - } - } else { - brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest)); - - /* These are different sizes in align1 vs align16: - */ - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst, - dest.indirect_offset); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst, - dest.indirect_offset); - /* even ignored in da16, still need to set as '01' */ - brw_eu_inst_set_dst_hstride(devinfo, inst, 1); - } - } - } -} - -void -brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg) -{ - const struct intel_device_info *devinfo = p->devinfo; - - if (reg.file == FIXED_GRF) - assert(reg.nr < XE3_MAX_GRF); - - const enum opcode opcode = brw_eu_inst_opcode(p->isa, inst); - - if (opcode == BRW_OPCODE_SEND || - opcode == BRW_OPCODE_SENDC || - opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC) { - /* Any source modifiers or regions will be ignored, since this just - * identifies the GRF to start reading the message contents from. - * Check for some likely failures. - */ - assert(!reg.negate); - assert(!reg.abs); - assert(reg.address_mode == BRW_ADDRESS_DIRECT); - } - - if (devinfo->ver >= 12 && - (opcode == BRW_OPCODE_SEND || - opcode == BRW_OPCODE_SENDC)) { - assert(reg.file == ARF || reg.file == FIXED_GRF); - assert(reg.address_mode == BRW_ADDRESS_DIRECT); - assert(has_scalar_region(reg) || - (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && - reg.vstride == reg.width + 1)); - assert(!reg.negate && !reg.abs); - - brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg)); - brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg)); - - if (brw_reg_is_arf(reg, BRW_ARF_SCALAR)) { - assert(reg.subnr % 2 == 0); - brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2); - } else { - assert(reg.subnr == 0); - } - } else if (opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC) { - assert(reg.file == FIXED_GRF); - assert(reg.address_mode == BRW_ADDRESS_DIRECT); - assert(reg.subnr % 16 == 0); - assert(has_scalar_region(reg) || - (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && - reg.vstride == reg.width + 1)); - assert(!reg.negate && !reg.abs); - brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg)); - brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } else { - brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type); - brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs); - brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate); - brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); - - if (reg.file == IMM) { - if (reg.type == BRW_TYPE_DF) - brw_eu_inst_set_imm_df(devinfo, inst, reg.df); - else if (reg.type == BRW_TYPE_UQ || - reg.type == BRW_TYPE_Q) - brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64); - else - brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud); - - if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) { - brw_eu_inst_set_src1_reg_file(devinfo, inst, - ARF); - brw_eu_inst_set_src1_reg_hw_type(devinfo, inst, - brw_eu_inst_src0_reg_hw_type(devinfo, inst)); - } - } else { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg)); - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg)); - } else { - brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } - } else { - brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg)); - - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); - } else { - brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); - } - } - - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); - brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride); - brw_eu_inst_set_src0_width(devinfo, inst, reg.width); - brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else { - brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } - } - } -} - - -void -brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg) -{ - const struct intel_device_info *devinfo = p->devinfo; - - if (reg.file == FIXED_GRF) - assert(reg.nr < XE3_MAX_GRF); - - const enum opcode opcode = brw_eu_inst_opcode(p->isa, inst); - - if (opcode == BRW_OPCODE_SENDS || - opcode == BRW_OPCODE_SENDSC || - (devinfo->ver >= 12 && - (opcode == BRW_OPCODE_SEND || - opcode == BRW_OPCODE_SENDC))) { - assert(reg.file == FIXED_GRF || - reg.file == ARF || - reg.file == ADDRESS); - assert(reg.address_mode == BRW_ADDRESS_DIRECT); - assert(reg.subnr == 0); - assert(has_scalar_region(reg) || - (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && - reg.vstride == reg.width + 1)); - assert(!reg.negate && !reg.abs); - brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg)); - brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg)); - } else { - /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: - * - * "Accumulator registers may be accessed explicitly as src0 - * operands only." - * - * Bspec 47251 (r48459) says for [ACM, ACMPLUS, ATS, PVC, RLT, MAR, MTL, - * ARL]: - * - * Accumulator registers may be accessed explicitly on src0 and src1 - * operand. - */ - assert(devinfo->verx10 >= 125 || - !brw_reg_is_arf(reg, BRW_ARF_ACCUMULATOR)); - - brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type); - brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs); - brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate); - - /* Only src1 can be immediate in two-argument instructions. - */ - assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM); - - if (reg.file == IMM) { - /* two-argument instructions can only use 32-bit immediates */ - assert(brw_type_size_bytes(reg.type) < 8); - brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud); - } else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: - */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - /* assert (reg.file == FIXED_GRF); */ - - brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg)); - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg)); - } else { - brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } - - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); - brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride); - brw_eu_inst_set_src1_width(devinfo, inst, reg.width); - brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else { - brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } - } - } -} - -/** - * Specify the descriptor and extended descriptor immediate for a SEND(C) - * message instruction. - */ -void -brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst, - unsigned desc, unsigned ex_desc, bool gather) -{ - const struct intel_device_info *devinfo = p->devinfo; - assert(!gather || devinfo->ver >= 30); - assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND || - brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC); - if (devinfo->ver < 12) - brw_eu_inst_set_src1_file_type(devinfo, inst, - IMM, BRW_TYPE_UD); - brw_eu_inst_set_send_desc(devinfo, inst, desc); - if (devinfo->ver >= 9) - brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, gather); -} - -static void -brw_eu_inst_set_state(const struct brw_isa_info *isa, - brw_eu_inst *insn, - const struct brw_insn_state *state) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size); - brw_eu_inst_set_group(devinfo, insn, state->group); - brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode); - brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control); - if (devinfo->ver >= 12) - brw_eu_inst_set_swsb(devinfo, insn, - brw_swsb_encode(devinfo, state->swsb, - brw_eu_inst_opcode(isa, insn))); - brw_eu_inst_set_saturate(devinfo, insn, state->saturate); - brw_eu_inst_set_pred_control(devinfo, insn, state->predicate); - brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv); - - if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) && - state->access_mode == BRW_ALIGN_16) { - brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); - brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); - } else { - brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); - brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); - } - - if (devinfo->ver < 20) - brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control); -} - -static brw_eu_inst * -brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment) -{ - assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst))); - assert(util_is_power_of_two_or_zero(alignment)); - const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1); - const unsigned start_insn = align(p->nr_insn, align_insn); - const unsigned new_nr_insn = start_insn + nr_insn; - - if (p->store_size < new_nr_insn) { - p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst)); - p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size); - } - - /* Memset any padding due to alignment to 0. We don't want to be hashing - * or caching a bunch of random bits we got from a memory allocation. - */ - if (p->nr_insn < start_insn) { - memset(&p->store[p->nr_insn], 0, - (start_insn - p->nr_insn) * sizeof(brw_eu_inst)); - } - - assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst)); - p->nr_insn = new_nr_insn; - p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst); - - return &p->store[start_insn]; -} - -void -brw_realign(struct brw_codegen *p, unsigned alignment) -{ - brw_append_insns(p, 0, alignment); -} - -int -brw_append_data(struct brw_codegen *p, void *data, - unsigned size, unsigned alignment) -{ - unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst)); - void *dst = brw_append_insns(p, nr_insn, alignment); - memcpy(dst, data, size); - - /* If it's not a whole number of instructions, memset the end */ - if (size < nr_insn * sizeof(brw_eu_inst)) - memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size); - - return dst - (void *)p->store; -} - -#define next_insn brw_next_insn -brw_eu_inst * -brw_next_insn(struct brw_codegen *p, unsigned opcode) -{ - brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst)); - - memset(insn, 0, sizeof(*insn)); - brw_eu_inst_set_opcode(p->isa, insn, opcode); - - /* Apply the default instruction state */ - brw_eu_inst_set_state(p->isa, insn, p->current); - - return insn; -} - -void -brw_add_reloc(struct brw_codegen *p, uint32_t id, - enum intel_shader_reloc_type type, - uint32_t offset, uint32_t delta) -{ - if (p->num_relocs + 1 > p->reloc_array_size) { - p->reloc_array_size = MAX2(16, p->reloc_array_size * 2); - p->relocs = reralloc(p->mem_ctx, p->relocs, - struct intel_shader_reloc, p->reloc_array_size); - } - - p->relocs[p->num_relocs++] = (struct intel_shader_reloc) { - .id = id, - .type = type, - .offset = offset, - .delta = delta, - }; -} - -brw_eu_inst * -brw_alu1(struct brw_codegen *p, unsigned opcode, - struct brw_reg dest, struct brw_reg src) -{ - brw_eu_inst *insn = next_insn(p, opcode); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src); - return insn; -} - -brw_eu_inst * -brw_alu2(struct brw_codegen *p, unsigned opcode, - struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) -{ - /* 64-bit immediates are only supported on 1-src instructions */ - assert(src0.file != IMM || - brw_type_size_bytes(src0.type) <= 4); - assert(src1.file != IMM || - brw_type_size_bytes(src1.type) <= 4); - - brw_eu_inst *insn = next_insn(p, opcode); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, src1); - return insn; -} - -static enum brw_align1_3src_vertical_stride -to_3src_align1_vstride(const struct intel_device_info *devinfo, - enum brw_vertical_stride vstride) -{ - switch (vstride) { - case BRW_VERTICAL_STRIDE_0: - return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; - case BRW_VERTICAL_STRIDE_1: - assert(devinfo->ver >= 12); - return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1; - case BRW_VERTICAL_STRIDE_2: - assert(devinfo->ver < 12); - return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; - case BRW_VERTICAL_STRIDE_4: - return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; - case BRW_VERTICAL_STRIDE_8: - case BRW_VERTICAL_STRIDE_16: - return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8; - default: - UNREACHABLE("invalid vstride"); - } -} - -static enum brw_align1_3src_dst_horizontal_stride -to_3src_align1_dst_hstride(enum brw_horizontal_stride hstride) -{ - switch (hstride) { - case BRW_HORIZONTAL_STRIDE_1: - return BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1; - case BRW_HORIZONTAL_STRIDE_2: - return BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_2; - default: - UNREACHABLE("invalid hstride"); - } -} - -static enum brw_align1_3src_src_horizontal_stride -to_3src_align1_hstride(enum brw_horizontal_stride hstride) -{ - switch (hstride) { - case BRW_HORIZONTAL_STRIDE_0: - return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0; - case BRW_HORIZONTAL_STRIDE_1: - return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1; - case BRW_HORIZONTAL_STRIDE_2: - return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2; - case BRW_HORIZONTAL_STRIDE_4: - return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4; - default: - UNREACHABLE("invalid hstride"); - } -} - -brw_eu_inst * -brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *inst = next_insn(p, opcode); - - assert(dest.nr < XE3_MAX_GRF); - - if (devinfo->ver <= 9) { - assert(src0.file != IMM && src2.file != IMM); - } else if (devinfo->ver <= 11) { - /* On Ice Lake, BFE and CSEL cannot have any immediate sources. */ - assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) || - (src0.file != IMM && src2.file != IMM)); - - /* On Ice Lake, DP4A and MAD can only have one immediate source. */ - assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) || - !(src0.file == IMM && src2.file == IMM)); - } else { - /* Having two immediate sources is allowed, but this should have been - * converted to a regular ADD by brw_opt_algebraic. - */ - assert(opcode != BRW_OPCODE_ADD3 || - !(src0.file == IMM && src2.file == IMM)); - } - - /* BFI2 cannot have any immediate sources on any platform. */ - assert(opcode != BRW_OPCODE_BFI2 || - (src0.file != IMM && src2.file != IMM)); - - assert(src0.file == IMM || src0.nr < XE3_MAX_GRF); - assert(src1.file != IMM && src1.nr < XE3_MAX_GRF); - assert(src2.file == IMM || src2.nr < XE3_MAX_GRF); - assert(dest.address_mode == BRW_ADDRESS_DIRECT); - assert(src0.address_mode == BRW_ADDRESS_DIRECT); - assert(src1.address_mode == BRW_ADDRESS_DIRECT); - assert(src2.address_mode == BRW_ADDRESS_DIRECT); - - if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - assert(dest.file == FIXED_GRF || - brw_reg_is_arf(dest, BRW_ARF_ACCUMULATOR) || - brw_reg_is_arf(dest, BRW_ARF_NULL)); - - brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest)); - brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); - brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest)); - brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, - to_3src_align1_dst_hstride(dest.hstride)); - - if (brw_type_is_float_or_bfloat(dest.type)) { - brw_eu_inst_set_3src_a1_exec_type(devinfo, inst, - BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); - } else { - brw_eu_inst_set_3src_a1_exec_type(devinfo, inst, - BRW_ALIGN1_3SRC_EXEC_TYPE_INT); - } - - brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type); - brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type); - brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); - brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); - - if (src0.file == IMM) { - brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud); - } else { - brw_eu_inst_set_3src_a1_src0_vstride( - devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride)); - brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst, - to_3src_align1_hstride(src0.hstride)); - brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0)); - brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0)); - brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs); - brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate); - } - brw_eu_inst_set_3src_a1_src1_vstride( - devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride)); - brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst, - to_3src_align1_hstride(src1.hstride)); - - brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1)); - brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1)); - brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs); - brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate); - - if (src2.file == IMM) { - brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud); - } else { - brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst, - to_3src_align1_hstride(src2.hstride)); - /* no vstride on src2 */ - brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2)); - brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2)); - brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs); - brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate); - } - - if (devinfo->ver >= 12) { - if (src0.file == IMM) { - brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1); - } else { - brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0)); - } - - brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1)); - - if (src2.file == IMM) { - brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1); - } else { - brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2)); - } - } else { - brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0)); - brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1)); - brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2)); - } - - } else { - assert(dest.file == FIXED_GRF); - assert(dest.type == BRW_TYPE_F || - dest.type == BRW_TYPE_DF || - dest.type == BRW_TYPE_D || - dest.type == BRW_TYPE_UD || - dest.type == BRW_TYPE_HF); - brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); - brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); - brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); - - assert(src0.file == FIXED_GRF); - brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr); - brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); - brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs); - brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate); - - /* From "Instruction Fields": - * - * ChanSel does not apply when Replicate Control is set. - * - * In the code ChanSel is swizzle. Also apply to the src1 and src2. - */ - if (src0.vstride == BRW_VERTICAL_STRIDE_0) - brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, 1); - else - brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); - - assert(src1.file == FIXED_GRF); - brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr); - brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); - brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs); - brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate); - if (src1.vstride == BRW_VERTICAL_STRIDE_0) - brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst, 1); - else - brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); - - assert(src2.file == FIXED_GRF); - brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr); - brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); - brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs); - brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate); - if (src2.vstride == BRW_VERTICAL_STRIDE_0) - brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, 1); - else - brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); - - /* Set both the source and destination types based on dest.type, - * ignoring the source register types. The MAD and LRP emitters ensure - * that all four types are float. The BFE and BFI2 emitters, however, - * may send us mixed D and UD types and want us to ignore that and use - * the destination type. - */ - brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type); - brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); - - /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: - * - * "Three source instructions can use operands with mixed-mode - * precision. When SrcType field is set to :f or :hf it defines - * precision for source 0 only, and fields Src1Type and Src2Type - * define precision for other source operands: - * - * 0b = :f. Single precision Float (32-bit). - * 1b = :hf. Half precision Float (16-bit)." - */ - if (src1.type == BRW_TYPE_HF) - brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1); - - if (src2.type == BRW_TYPE_HF) - brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1); - } - - return inst; -} - -static brw_eu_inst * -brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode, - enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *inst = next_insn(p, opcode); - - assert(dest.file == FIXED_GRF); - brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst, - FIXED_GRF); - brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest)); - brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest)); - - if (brw_type_is_float_or_bfloat(dest.type)) { - brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst, - BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); - } else { - brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst, - BRW_ALIGN1_3SRC_EXEC_TYPE_INT); - } - - brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth); - brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1); - - brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type); - brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type); - brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type); - brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type); - - assert(src0.file == FIXED_GRF || - brw_reg_is_arf(src0, BRW_ARF_NULL)); - - brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0)); - brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0)); - brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0)); - - assert(src1.file == FIXED_GRF); - - brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1)); - brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1)); - brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1)); - brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE); - - assert(src2.file == FIXED_GRF); - - brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2)); - brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2)); - brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2)); - brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE); - - return inst; -} - -/*********************************************************************** - * Convenience routines. - */ -#define ALU1(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0) \ -{ \ - return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ -} - -#define ALU2(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1) \ -{ \ - return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ -} - -#define ALU3(OP) \ -brw_eu_inst *brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1, \ - struct brw_reg src2) \ -{ \ - return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ -} - -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU2(ASR) -ALU2(ROL) -ALU2(ROR) -ALU3(CSEL) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDU) -ALU1(RNDZ) -ALU2(MAC) -ALU2(MACL) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU3(DP4A) -ALU3(MAD) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU2(ADDC) -ALU2(SUBB) -ALU3(ADD3) -ALU1(MOV) -ALU2(MUL) -ALU2(AVG) -ALU2(ADD) -ALU2(SRND) -ALU2(LINE) -ALU2(PLN) - -brw_eu_inst * -brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth, - unsigned rcount, struct brw_reg dest, struct brw_reg src0, - struct brw_reg src1, struct brw_reg src2) -{ - return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0, - src1, src2); -} - -void brw_NOP(struct brw_codegen *p) -{ - brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP); - memset(insn, 0, sizeof(*insn)); - brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP); -} - -void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) -{ - brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC); - brw_eu_inst_set_saturate(p->devinfo, insn, 0); - brw_eu_inst_set_cond_modifier(p->devinfo, insn, func); -} - -/*********************************************************************** - * Comparisons, if/else/endif - */ - -brw_eu_inst * -brw_BFN(struct brw_codegen *p, struct brw_reg dest, - struct brw_reg src0, struct brw_reg src1, struct brw_reg src2, - struct brw_reg table_byte) -{ - brw_eu_inst *inst = brw_alu3(p, BRW_OPCODE_BFN, dest, src0, src1, src2); - brw_eu_inst_set_boolean_func_ctrl(p->devinfo, inst, table_byte.ud); - return inst; -} - -brw_eu_inst * -brw_JMPI(struct brw_codegen *p, struct brw_reg index, - unsigned predicate_control) -{ - const struct intel_device_info *devinfo = p->devinfo; - struct brw_reg ip = brw_ip_reg(); - brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index); - - brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1); - brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); - brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); - brw_eu_inst_set_pred_control(devinfo, inst, predicate_control); - - return inst; -} - -static void -push_if_stack(struct brw_codegen *p, brw_eu_inst *inst) -{ - p->if_stack[p->if_stack_depth] = inst - p->store; - - p->if_stack_depth++; - if (p->if_stack_array_size <= p->if_stack_depth) { - p->if_stack_array_size *= 2; - p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, - p->if_stack_array_size); - } -} - -static brw_eu_inst * -pop_if_stack(struct brw_codegen *p) -{ - p->if_stack_depth--; - return &p->store[p->if_stack[p->if_stack_depth]]; -} - -static void -push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst) -{ - if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) { - p->loop_stack_array_size *= 2; - p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, - p->loop_stack_array_size); - } - - p->loop_stack[p->loop_stack_depth] = inst - p->store; - p->loop_stack_depth++; -} - -static brw_eu_inst * -get_inner_do_insn(struct brw_codegen *p) -{ - return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; -} - -/* EU takes the value from the flag register and pushes it onto some - * sort of a stack (presumably merging with any flag value already on - * the stack). Within an if block, the flags at the top of the stack - * control execution on each channel of the unit, eg. on each of the - * 16 pixel values in our wm programs. - * - * When the matching 'else' instruction is reached (presumably by - * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevant flags are inverted. - * - * When the matching 'endif' instruction is reached, the flags are - * popped off. If the stack is now empty, normal execution resumes. - */ -brw_eu_inst * -brw_IF(struct brw_codegen *p, unsigned execute_size) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn; - - insn = next_insn(p, BRW_OPCODE_IF); - - /* UIP and JIP set by patch_IF_ELSE(). */ - - brw_eu_inst_set_exec_size(devinfo, insn, execute_size); - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - - push_if_stack(p, insn); - return insn; -} - -/** - * Patch IF and ELSE instructions with appropriate jump targets. - */ -static void -patch_IF_ELSE(struct brw_codegen *p, - brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst) -{ - const struct intel_device_info *devinfo = p->devinfo; - - assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF); - assert(endif_inst != NULL); - assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE); - - unsigned br = brw_jump_scale(devinfo); - - assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF); - brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst)); - - if (else_inst == NULL) { - /* Patch IF -> ENDIF */ - brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); - brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst)); - } else { - brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst)); - - /* Patch ELSE -> ENDIF */ - /* The IF instruction's JIP should point just past the ELSE */ - brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1)); - /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ - brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); - - if (devinfo->ver < 11) { - /* Set the ELSE instruction to use branch_ctrl with a join - * jump target pointing at the NOP inserted right before - * the ENDIF instruction in order to make sure it is - * executed in all cases, since attempting to do the same - * as on other generations could cause the EU to jump at - * the instruction immediately after the ENDIF due to - * Wa_220160235, which could cause the program to continue - * running with all channels disabled. - */ - brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1)); - brw_eu_inst_set_branch_control(devinfo, else_inst, true); - } else { - brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst)); - } - - /* Since we don't set branch_ctrl on Gfx11+, the ELSE's - * JIP and UIP both should point to ENDIF on those - * platforms. - */ - brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst)); - } -} - -void -brw_ELSE(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn; - - insn = next_insn(p, BRW_OPCODE_ELSE); - - /* UIP and JIP set by patch_IF_ELSE(). */ - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - - push_if_stack(p, insn); -} - -void -brw_ENDIF(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn = NULL; - brw_eu_inst *else_inst = NULL; - brw_eu_inst *if_inst = NULL; - brw_eu_inst *tmp; - - assert(p->if_stack_depth > 0); - - if (devinfo->ver < 11 && - brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[ - p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) { - /* Insert a NOP to be specified as join instruction within the - * ELSE block, which is valid for an ELSE instruction with - * branch_ctrl on. The ELSE instruction will be set to jump - * here instead of to the ENDIF instruction, since attempting to - * do the latter would prevent the ENDIF from being executed in - * some cases due to Wa_220160235, which could cause the program - * to continue running with all channels disabled. - */ - brw_NOP(p); - } - - /* - * A single next_insn() may change the base address of instruction store - * memory(p->store), so call it first before referencing the instruction - * store pointer from an index - */ - insn = next_insn(p, BRW_OPCODE_ENDIF); - - /* Pop the IF and (optional) ELSE instructions from the stack */ - tmp = pop_if_stack(p); - if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) { - else_inst = tmp; - tmp = pop_if_stack(p); - } - if_inst = tmp; - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - - brw_eu_inst_set_jip(devinfo, insn, 2); - brw_eu_inst_set_unused_uip(devinfo, insn); - - patch_IF_ELSE(p, if_inst, else_inst, insn); -} - -brw_eu_inst * -brw_BREAK(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn; - - insn = next_insn(p, BRW_OPCODE_BREAK); - - /* UIP and JIP set by brw_set_uip_jip(). */ - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); - - return insn; -} - -brw_eu_inst * -brw_CONT(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn; - - insn = next_insn(p, BRW_OPCODE_CONTINUE); - - /* UIP and JIP set by brw_set_uip_jip(). */ - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); - return insn; -} - -brw_eu_inst * -brw_HALT(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn; - - insn = next_insn(p, BRW_OPCODE_HALT); - - /* UIP and JIP set by brw_set_uip_jip(). */ - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); - return insn; -} - -/* DO/WHILE loop: - * - * The DO/WHILE is just an unterminated loop -- break or continue are - * used for control within the loop. We have a few ways they can be - * done. - * - * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, - * jip and no DO instruction. - * - * For gfx6, there's no more mask stack, so no need for DO. WHILE - * just points back to the first instruction of the loop. - */ -brw_eu_inst * -brw_DO(struct brw_codegen *p, unsigned execute_size) -{ - push_loop_stack(p, &p->store[p->nr_insn]); - return &p->store[p->nr_insn]; -} - -brw_eu_inst * -brw_WHILE(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn, *do_insn; - unsigned br = brw_jump_scale(devinfo); - - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); - - brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn)); - brw_eu_inst_set_unused_uip(devinfo, insn); - - brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); - - brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - - p->loop_stack_depth--; - - return insn; -} - -void brw_CMP(struct brw_codegen *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP); - - brw_eu_inst_set_cond_modifier(devinfo, insn, conditional); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, src1); -} - -void brw_CMPN(struct brw_codegen *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN); - - brw_eu_inst_set_cond_modifier(devinfo, insn, conditional); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, src1); -} - -/*********************************************************************** - * Helpers for the various SEND message types: - */ - -void gfx6_math(struct brw_codegen *p, - struct brw_reg dest, - unsigned function, - struct brw_reg src0, - struct brw_reg src1) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH); - - assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - - /* This workaround says that we cannot use scalar broadcast with HF types. - * However, for is_scalar values, all 16 elements contain the same value, so - * we can replace a <0,1,0> region with <16,16,1> without ill effect. - */ - if (intel_needs_workaround(devinfo, 22016140776)) { - if (src0.is_scalar && src0.type == BRW_TYPE_HF) { - src0.vstride = BRW_VERTICAL_STRIDE_16; - src0.width = BRW_WIDTH_16; - src0.hstride = BRW_HORIZONTAL_STRIDE_1; - src0.swizzle = BRW_SWIZZLE_XYZW; - } - - if (src1.is_scalar && src1.type == BRW_TYPE_HF) { - src1.vstride = BRW_VERTICAL_STRIDE_16; - src1.width = BRW_WIDTH_16; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; - src1.swizzle = BRW_SWIZZLE_XYZW; - } - } - - brw_eu_inst_set_math_function(devinfo, insn, function); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, src1); -} - -void -brw_SEND(struct brw_codegen *p, - unsigned sfid, - struct brw_reg dst, - struct brw_reg payload0, - struct brw_reg payload1, - struct brw_reg desc, - struct brw_reg ex_desc, - uint32_t ex_desc_imm_inst, - unsigned ex_mlen, - bool ex_bso, - bool eot, - bool gather) -{ - const struct intel_device_info *devinfo = p->devinfo; - struct brw_eu_inst *send; - - dst = retype(dst, BRW_TYPE_UW); - - assert(desc.type == BRW_TYPE_UD); - - send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS); - brw_set_dest(p, send, dst); - brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD)); - brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD)); - - if (desc.file == IMM) { - brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0); - brw_eu_inst_set_send_desc(devinfo, send, desc.ud); - } else { - assert(desc.file == ADDRESS); - assert(desc.subnr == 0); - brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1); - } - - if (ex_desc.file == IMM) { - assert(ex_desc_imm_inst == 0); - brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); - brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, gather); - } else { - assert(ex_desc.file == ADDRESS); - assert(util_is_aligned(ex_desc.subnr, 4)); - brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); - brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2); - - if (ex_desc_imm_inst) { - /* Write the immediate extended descriptor immediate value, but only - * the part used for encoding an offset. This matches to bits - * 12:15-19:31 as described in BSpec 70586 (extended descriptor - * format) & BSpec 56890 (SEND instruction format). - */ - assert(devinfo->ver >= 20); - brw_eu_inst_set_bits(send, 127, 124, GET_BITS(ex_desc_imm_inst, 31, 28)); - brw_eu_inst_set_bits(send, 97, 96, GET_BITS(ex_desc_imm_inst, 27, 26)); - brw_eu_inst_set_bits(send, 65, 64, GET_BITS(ex_desc_imm_inst, 25, 24)); - brw_eu_inst_set_bits(send, 47, 43, GET_BITS(ex_desc_imm_inst, 23, 19)); - brw_eu_inst_set_bits(send, 39, 36, GET_BITS(ex_desc_imm_inst, 15, 12)); - } - if (devinfo->ver >= 20 && sfid == GEN_SFID_UGM) - brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo)); - } - - if (ex_bso) { - /* The send instruction ExBSO field does not exist with UGM on Gfx20+, - * it is assumed. - * - * BSpec 56890 - */ - if (devinfo->ver < 20 || sfid != GEN_SFID_UGM) - brw_eu_inst_set_send_ex_bso(devinfo, send, true); - brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo)); - } - brw_eu_inst_set_sfid(devinfo, send, sfid); - brw_eu_inst_set_eot(devinfo, send, eot); -} - -void -brw_broadcast(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg idx) -{ - const struct intel_device_info *devinfo = p->devinfo; - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - - assert(src.file == FIXED_GRF && - src.address_mode == BRW_ADDRESS_DIRECT); - assert(!src.abs && !src.negate); - assert(brw_type_is_uint(src.type)); - assert(src.type == dst.type); - - if ((src.vstride == 0 && src.hstride == 0) || - idx.file == IMM) { - /* Trivial, the source is already uniform or the index is a constant. - * We will typically not get here if the optimizer is doing its job, but - * asserting would be mean. - */ - const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud; - src = stride(suboffset(src, i), 0, 1, 0); - - if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) { - brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), - subscript(src, BRW_TYPE_D, 0)); - brw_set_default_swsb(p, gen_swsb_null()); - brw_MOV(p, subscript(dst, BRW_TYPE_D, 1), - subscript(src, BRW_TYPE_D, 1)); - } else { - brw_MOV(p, dst, src); - } - } else { - /* From the Haswell PRM section "Register Region Restrictions": - * - * "The lower bits of the AddressImmediate must not overflow to - * change the register address. The lower 5 bits of Address - * Immediate when added to lower 5 bits of address register gives - * the sub-register offset. The upper bits of Address Immediate - * when added to upper bits of address register gives the register - * address. Any overflow from sub-register offset is dropped." - * - * Fortunately, for broadcast, we never have a sub-register offset so - * this isn't an issue. - */ - assert(src.subnr == 0); - - const struct brw_reg addr = - retype(brw_address_reg(0), BRW_TYPE_UD); - unsigned offset = src.nr * REG_SIZE + src.subnr; - /* Limit in bytes of the signed indirect addressing immediate. */ - const unsigned limit = 512; - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - - /* Take into account the component size and horizontal stride. */ - assert(src.vstride == src.hstride + src.width); - brw_SHL(p, addr, vec1(idx), - brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) + - src.hstride - 1)); - - /* We can only address up to limit bytes using the indirect - * addressing immediate, account for the difference if the source - * register is above this limit. - */ - if (offset >= limit) { - brw_set_default_swsb(p, gen_swsb_regdist(1)); - brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); - offset = offset % limit; - } - - brw_pop_insn_state(p); - - brw_set_default_swsb(p, gen_swsb_regdist(1)); - - /* Use indirect addressing to fetch the specified component. */ - if (brw_type_size_bytes(src.type) > 4 && - (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) { - /* From the Cherryview PRM Vol 7. "Register Region Restrictions": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, indirect addressing must not be - * used." - * - * We may also not support Q/UQ types. - * - * To work around both of these, we do two integer MOVs instead - * of one 64-bit MOV. Because no double value should ever cross - * a register boundary, it's safe to use the immediate offset in - * the indirect here to handle adding 4 bytes to the offset and - * avoid the extra ADD to the register file. - */ - brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), - retype(brw_vec1_indirect(addr.subnr, offset), - BRW_TYPE_D)); - brw_set_default_swsb(p, gen_swsb_null()); - brw_MOV(p, subscript(dst, BRW_TYPE_D, 1), - retype(brw_vec1_indirect(addr.subnr, offset + 4), - BRW_TYPE_D)); - } else { - brw_MOV(p, dst, - retype(brw_vec1_indirect(addr.subnr, offset), src.type)); - } - } - - brw_pop_insn_state(p); -} - - -/** - * Emit the SEND message for a barrier - */ -void -brw_barrier(struct brw_codegen *p, struct brw_reg src) -{ - const struct intel_device_info *devinfo = p->devinfo; - - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_SEND(p, GEN_SFID_MESSAGE_GATEWAY, - retype(brw_null_reg(), BRW_TYPE_UW), src, - brw_null_reg(), - brw_imm_ud(brw_message_desc(devinfo, - 1 * reg_unit(devinfo), 0, - false)), - brw_imm_ud(0), 0, 0, false, - false, false); - - brw_eu_inst *inst = brw_eu_last_inst(p); - brw_eu_inst_set_gateway_subfuncid(devinfo, inst, - GEN_MESSAGE_GATEWAY_SFID_BARRIER_MSG); - brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); - brw_pop_insn_state(p); -} - - -/** - * Emit the wait instruction for a barrier - */ -void -brw_WAIT(struct brw_codegen *p) -{ - const struct intel_device_info *devinfo = p->devinfo; - struct brw_eu_inst *insn; - - struct brw_reg src = brw_notification_reg(); - - insn = next_insn(p, BRW_OPCODE_WAIT); - brw_set_dest(p, insn, src); - brw_set_src0(p, insn, src); - brw_set_src1(p, insn, brw_null_reg()); - - brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); -} - -void -brw_float_controls_mode(struct brw_codegen *p, - unsigned mode, unsigned mask) -{ - assert(p->current->mask_control == BRW_MASK_DISABLE); - - /* From the Skylake PRM, Volume 7, page 760: - * "Implementation Restriction on Register Access: When the control - * register is used as an explicit source and/or destination, hardware - * does not ensure execution pipeline coherency. Software must set the - * thread control field to ‘switch’ for an instruction that uses - * control register as an explicit operand." - * - * On Gfx12+ this is implemented in terms of SWSB annotations instead. - */ - brw_set_default_swsb(p, gen_swsb_regdist(1)); - - brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(~mask)); - brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); - if (p->devinfo->ver < 12) - brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); - - if (mode) { - brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(mode)); - brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1); - if (p->devinfo->ver < 12) - brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH); - } - - if (p->devinfo->ver >= 12) - brw_SYNC(p, TGL_SYNC_NOP); -} - -/* A default value for constants that will be patched at run-time. - * We pick an arbitrary value that prevents instruction compaction. - */ -#define DEFAULT_PATCH_IMM 0x4a7cc037 - -void -brw_MOV_reloc_imm(struct brw_codegen *p, - struct brw_reg dst, - enum brw_reg_type src_type, - uint32_t id, - uint32_t base) -{ - assert(brw_type_size_bytes(src_type) == 4); - assert(brw_type_size_bytes(dst.type) == 4); - - brw_add_reloc(p, id, INTEL_SHADER_RELOC_TYPE_MOV_IMM, - p->next_insn_offset, base); - - brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type)); -} diff --git a/src/intel/compiler/brw/brw_eu_inst.h b/src/intel/compiler/brw/brw_eu_inst.h deleted file mode 100644 index 6a2b2afa3df..00000000000 --- a/src/intel/compiler/brw/brw_eu_inst.h +++ /dev/null @@ -1,1467 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -/** - * @file - * - * A representation of i965 EU assembly instructions, with helper methods to - * get and set various fields. This is the actual hardware format. - */ - -#pragma once - -#include -#include - -#include "brw_eu_defines.h" -#include "brw_isa_info.h" -#include "brw_reg_type.h" -#include "dev/intel_device_info.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* brw_context.h has a forward declaration of brw_eu_inst, so name the struct. */ -typedef struct brw_eu_inst { - uint64_t data[2]; -} brw_eu_inst; - -static inline uint64_t brw_eu_inst_bits(const brw_eu_inst *inst, - unsigned high, unsigned low); -static inline void brw_eu_inst_set_bits(brw_eu_inst *inst, - unsigned high, unsigned low, - uint64_t value); - -#define FC(name, hi9, lo9, hi12, lo12, assertions) \ -static inline void \ -brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t v) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) \ - brw_eu_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_inst_set_bits(inst, hi9, lo9, v); \ -} \ -static inline uint64_t \ -brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) \ - return brw_eu_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_inst_bits(inst, hi9, lo9); \ -} - -/* A simple macro for fields which stay in the same place on all generations, - * except for Gfx12! - */ -#define F(name, hi9, lo9, hi12, lo12) FC(name, hi9, lo9, hi12, lo12, true) - -/* A simple macro for fields which stay in the same place on all generations, - * except for Gfx12 and Gfx20. - */ -#define F20(name, hi9, lo9, hi12, lo12, hi20, lo20) \ - static inline void \ - brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t v) \ - { \ - if (devinfo->ver >= 20) \ - brw_eu_inst_set_bits(inst, hi20, lo20, v); \ - else if (devinfo->ver >= 12) \ - brw_eu_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_inst_set_bits(inst, hi9, lo9, v); \ - } \ - static inline uint64_t \ - brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ - { \ - if (devinfo->ver >= 20) \ - return brw_eu_inst_bits(inst, hi20, lo20); \ - else if (devinfo->ver >= 12) \ - return brw_eu_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_inst_bits(inst, hi9, lo9); \ - } - -#define FV20(name, hi9, lo9, hi12, lo12, hi20, lo20) \ - static inline void \ - brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t v) \ - { \ - if (devinfo->ver >= 20) \ - brw_eu_inst_set_bits(inst, hi20, lo20, v & 0x7); \ - else if (devinfo->ver >= 12) \ - brw_eu_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_inst_set_bits(inst, hi9, lo9, v); \ - } \ - static inline uint64_t \ - brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ - { \ - if (devinfo->ver >= 20) \ - return brw_eu_inst_bits(inst, hi20, lo20) == 0x7 ? 0xF : \ - brw_eu_inst_bits(inst, hi20, lo20); \ - else if (devinfo->ver >= 12) \ - return brw_eu_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_inst_bits(inst, hi9, lo9); \ - } - -#define FD20(name, hi9, lo9, hi12, lo12, hi20, lo20, zero20) \ - static inline void \ - brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t v) \ - { \ - if (devinfo->ver >= 20) { \ - brw_eu_inst_set_bits(inst, hi20, lo20, v >> 1); \ - if (zero20 == -1) \ - assert((v & 1) == 0); \ - else \ - brw_eu_inst_set_bits(inst, zero20, zero20, v & 1); \ - } else if (devinfo->ver >= 12) \ - brw_eu_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_inst_set_bits(inst, hi9, lo9, v); \ - } \ - static inline uint64_t \ - brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ - { \ - if (devinfo->ver >= 20) \ - return (brw_eu_inst_bits(inst, hi20, lo20) << 1) | \ - (zero20 == -1 ? 0 : \ - brw_eu_inst_bits(inst, zero20, zero20)); \ - else if (devinfo->ver >= 12) \ - return brw_eu_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_inst_bits(inst, hi9, lo9); \ - } - -/* Macro for fields that gained extra discontiguous MSBs in Gfx12 (specified - * by hi12ex-lo12ex). - */ -#define FFDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, assertions) \ -static inline void \ -brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t value) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) { \ - const unsigned k = hi12 - lo12 + 1; \ - if (hi12ex != -1 && lo12ex != -1) \ - brw_eu_inst_set_bits(inst, hi12ex, lo12ex, value >> k); \ - brw_eu_inst_set_bits(inst, hi12, lo12, value & ((1ull << k) - 1)); \ - } else { \ - brw_eu_inst_set_bits(inst, hi9, lo9, value); \ - } \ -} \ -static inline uint64_t \ -brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) { \ - const unsigned k = hi12 - lo12 + 1; \ - return (hi12ex == -1 || lo12ex == -1 ? 0 : \ - brw_eu_inst_bits(inst, hi12ex, lo12ex) << k) | \ - brw_eu_inst_bits(inst, hi12, lo12); \ - } else { \ - return brw_eu_inst_bits(inst, hi9, lo9); \ - } \ -} - -#define FD(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12) \ - FFDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, true) - -/* Macro for fields that didn't move across generations until Gfx12, and then - * gained extra discontiguous bits. - */ -#define FDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, assertions) \ - FFDC(name, hi9, lo9, hi12ex, lo12ex, hi12, lo12, assertions) - -static inline uint64_t -brw_reg_file_to_hw_reg_file(enum brw_reg_file file) -{ - switch (file) { - case ARF: return 0x0; - case FIXED_GRF: return 0x1; - default: /* Fallthrough. */ - case IMM: return 0x3; - } -} - -static inline enum brw_reg_file -hw_reg_file_to_brw_reg_file(uint64_t v) -{ - switch (v) { - case 0x0: return ARF; - case 0x1: return FIXED_GRF; - default: return IMM; - } -} - -/* Macro for storing register file field. See variant FF below for no - * assertions. - * - * In Gfx12+, either a single bit is available (ARF or GRF) or two bits are - * available. In that case the register file is stored as the variable length - * combination of an IsImm (hi12) bit and an additional file (lo12) bit. - * - * For some instructions in Gfx11, the encoding uses 0 for GRF, and 1 for - * either ARF (for accumulator) or IMM. - */ -#define FFC(name, hi9, lo9, hi12, lo12, assertions, ...) \ -static inline void \ -brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, enum brw_reg_file file) \ -{ \ - assert(assertions); \ - const struct { \ - bool _; /* Exists to avoid empty initializer. */ \ - bool grf_or_imm; \ - bool grf_or_acc; \ - } args = { ._ = false, __VA_ARGS__ }; \ - uint64_t value = brw_reg_file_to_hw_reg_file(file); \ - if (devinfo->ver < 12) { \ - if (devinfo->ver == 11 && args.grf_or_imm) { \ - assert(file == FIXED_GRF || file == IMM); \ - value = file == FIXED_GRF ? 0 : 1; \ - } else if (devinfo->ver == 11 && args.grf_or_acc) { \ - assert(file == FIXED_GRF || file == ARF); \ - value = file == FIXED_GRF ? 0 : 1; \ - } \ - brw_eu_inst_set_bits(inst, hi9, lo9, value); \ - } else if (hi12 == lo12) { \ - brw_eu_inst_set_bits(inst, hi12, lo12, value); \ - } else { \ - brw_eu_inst_set_bits(inst, hi12, hi12, value >> 1); \ - if ((value >> 1) == 0) \ - brw_eu_inst_set_bits(inst, lo12, lo12, value & 1); \ - } \ -} \ -static inline uint64_t \ -brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - assert(assertions); \ - const struct { \ - bool _; /* Exists to avoid empty initializer. */ \ - bool grf_or_imm; \ - bool grf_or_acc; \ - } args = { ._ = false, __VA_ARGS__ }; \ - uint64_t value; \ - if (devinfo->ver < 12) { \ - value = brw_eu_inst_bits(inst, hi9, lo9); \ - if (devinfo->ver == 11 && args.grf_or_imm) \ - return value ? IMM : FIXED_GRF; \ - else if (devinfo->ver == 11 && args.grf_or_acc) \ - return value ? ARF : FIXED_GRF; \ - } else if (hi12 == lo12) { \ - value = brw_eu_inst_bits(inst, hi12, lo12); \ - } else { \ - value = (brw_eu_inst_bits(inst, hi12, hi12) << 1) | \ - (brw_eu_inst_bits(inst, hi12, hi12) == 0 ? \ - brw_eu_inst_bits(inst, lo12, lo12) : 1); \ - } \ - return hw_reg_file_to_brw_reg_file(value); \ -} - -#define FF(name, hi9, lo9, hi12, lo12, ...) FFC(name, hi9, lo9, hi12, lo12, true, __VA_ARGS__) - -/* Macro for fields that become a constant in Gfx12+ not actually represented - * in the instruction. - */ -#define FK(name, hi9, lo9, const12) \ -static inline void \ -brw_eu_inst_set_##name(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, uint64_t v) \ -{ \ - if (devinfo->ver >= 12) \ - assert(v == (const12)); \ - else \ - brw_eu_inst_set_bits(inst, hi9, lo9, v); \ -} \ -static inline uint64_t \ -brw_eu_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - if (devinfo->ver >= 12) \ - return (const12); \ - else \ - return brw_eu_inst_bits(inst, hi9, lo9); \ -} - -FV20(src1_vstride, /* 9+ */ 120, 117, /* 12+ */ 119, 116, /* 20+ */ 118, 116) -F(src1_width, /* 9+ */ 116, 114, /* 12+ */ 115, 113) -F(src1_da16_swiz_w, /* 9+ */ 115, 114, /* 12+ */ -1, -1) -F(src1_da16_swiz_z, /* 9+ */ 113, 112, /* 12+ */ -1, -1) -F(src1_hstride, /* 9+ */ 113, 112, /* 12+ */ 97, 96) -F(src1_address_mode, /* 9+ */ 111, 111, /* 12+ */ 112, 112) -/** Src1.SrcMod @{ */ -F(src1_negate, /* 9+ */ 110, 110, /* 12+ */ 121, 121) -F(src1_abs, /* 9+ */ 109, 109, /* 12+ */ 120, 120) -/** @} */ -F(src1_ia_subreg_nr, /* 9+ */ 108, 105, /* 12+ */ 111, 108) -F(src1_da_reg_nr, /* 9+ */ 108, 101, /* 12+ */ 111, 104) -F(src1_da16_subreg_nr, /* 9+ */ 100, 100, /* 12+ */ -1, -1) -FD20(src1_da1_subreg_nr, /* 9+ */ 100, 96, /* 12+ */ 103, 99, /* 20+ */ 103, 99, -1) -F(src1_da16_swiz_y, /* 9+ */ 99, 98, /* 12+ */ -1, -1) -F(src1_da16_swiz_x, /* 9+ */ 97, 96, /* 12+ */ -1, -1) -F(src1_reg_hw_type, /* 9+ */ 94, 91, /* 12+ */ 91, 88) -FF(src1_reg_file, /* 9+ */ 90, 89, /* 12+ */ 47, 98) -F(src1_is_imm, /* 9+ */ -1, -1, /* 12+ */ 47, 47) -FV20(src0_vstride, /* 9+ */ 88, 85, /* 12+ */ 87, 84, /* 20+ */ 86, 84) -F(src0_width, /* 9+ */ 84, 82, /* 12+ */ 83, 81) -F(src0_da16_swiz_w, /* 9+ */ 83, 82, /* 12+ */ -1, -1) -F(src0_da16_swiz_z, /* 9+ */ 81, 80, /* 12+ */ -1, -1) -F(src0_hstride, /* 9+ */ 81, 80, /* 12+ */ 65, 64) -F(src0_address_mode, /* 9+ */ 79, 79, /* 12+ */ 80, 80) -/** Src0.SrcMod @{ */ -F(src0_negate, /* 9+ */ 78, 78, /* 12+ */ 45, 45) -F(src0_abs, /* 9+ */ 77, 77, /* 12+ */ 44, 44) -/** @} */ -F(src0_ia_subreg_nr, /* 9+ */ 76, 73, /* 12+ */ 79, 76) -F(src0_da_reg_nr, /* 9+ */ 76, 69, /* 12+ */ 79, 72) -F(src0_da16_subreg_nr, /* 9+ */ 68, 68, /* 12+ */ -1, -1) -FD20(src0_da1_subreg_nr, /* 9+ */ 68, 64, /* 12+ */ 71, 67, /* 20+ */ 71, 67, 87) -F(src0_da16_swiz_y, /* 9+ */ 67, 66, /* 12+ */ -1, -1) -F(src0_da16_swiz_x, /* 9+ */ 65, 64, /* 12+ */ -1, -1) -F(dst_address_mode, /* 9+ */ 63, 63, /* 12+ */ 35, 35) -F(dst_hstride, /* 9+ */ 62, 61, /* 12+ */ 49, 48) -F(dst_ia_subreg_nr, /* 9+ */ 60, 57, /* 12+ */ 63, 60) -F(dst_da_reg_nr, /* 9+ */ 60, 53, /* 12+ */ 63, 56) -F(dst_da16_subreg_nr, /* 9+ */ 52, 52, /* 12+ */ -1, -1) -FD20(dst_da1_subreg_nr, /* 9+ */ 52, 48, /* 12+ */ 55, 51, /* 20+ */ 55, 51, 33) -F(da16_writemask, /* 9+ */ 51, 48, /* 12+ */ -1, -1) /* Dst.ChanEn */ -F(src0_reg_hw_type, /* 9+ */ 46, 43, /* 12+ */ 43, 40) -FF(src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 46, 66) -F(src0_is_imm, /* 9+ */ -1, -1, /* 12+ */ 46, 46) -F(dst_reg_hw_type, /* 9+ */ 40, 37, /* 12+ */ 39, 36) -FF(dst_reg_file, /* 9+ */ 36, 35, /* 12+ */ 50, 50) -F(mask_control, /* 9+ */ 34, 34, /* 12+ */ 31, 31) -F20(flag_reg_nr, /* 9+ */ 33, 33, /* 12+ */ 23, 23, /* 20+ */ 23, 22) -F20(flag_subreg_nr, /* 9+ */ 32, 32, /* 12+ */ 22, 22, /* 20+ */ 21, 21) -F(saturate, /* 9+ */ 31, 31, /* 12+ */ 34, 34) -F(debug_control, /* 9+ */ 30, 30, /* 12+ */ 30, 30) -F(cmpt_control, /* 9+ */ 29, 29, /* 12+ */ 29, 29) -F(branch_control, /* 9+ */ 28, 28, /* 12+ */ 33, 33) -FC(acc_wr_control, /* 9+ */ 28, 28, /* 12+ */ 33, 33, devinfo->ver < 20) -F(cond_modifier, /* 9+ */ 27, 24, /* 12+ */ 95, 92) -F(math_function, /* 9+ */ 27, 24, /* 12+ */ 95, 92) -F20(exec_size, /* 9+ */ 23, 21, /* 12+ */ 18, 16, /* 20+ */ 20, 18) -F(pred_inv, /* 9+ */ 20, 20, /* 12+ */ 28, 28) -F20(pred_control, /* 9+ */ 19, 16, /* 12+ */ 27, 24, /* 20+ */ 27, 26) -F(thread_control, /* 9+ */ 15, 14, /* 12+ */ -1, -1) -F(atomic_control, /* 9+ */ -1, -1, /* 12+ */ 32, 32) -F20(qtr_control, /* 9+ */ 13, 12, /* 12+ */ 21, 20, /* 20+ */ 25, 24) -F20(nib_control, /* 9+ */ 11, 11, /* 12+ */ 19, 19, /* 20+ */ -1, -1) -F(no_dd_check, /* 9+ */ 10, 10, /* 12+ */ -1, -1) -F(no_dd_clear, /* 9+ */ 9, 9, /* 12+ */ -1, -1) -F20(swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8) -FK(access_mode, /* 9+ */ 8, 8, /* 12+ */ BRW_ALIGN_1) -/* Bit 7 is Reserved (for future Opcode expansion) */ -F(hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) - -/** - * Three-source instructions: - * @{ - */ -F(3src_src2_reg_nr, /* 9+ */ 125, 118, /* 12+ */ 127, 120) /* same in align1 */ -F(3src_a16_src2_swizzle, /* 9+ */ 114, 107, /* 12+ */ -1, -1) -F(3src_a16_src2_rep_ctrl, /* 9+ */ 106, 106, /* 12+ */ -1, -1) -F(3src_src1_reg_nr, /* 9+ */ 104, 97, /* 12+ */ 111, 104) /* same in align1 */ -F(3src_a16_src1_swizzle, /* 9+ */ 93, 86, /* 12+ */ -1, -1) -F(3src_a16_src1_rep_ctrl, /* 9+ */ 85, 85, /* 12+ */ -1, -1) -F(3src_src0_reg_nr, /* 9+ */ 83, 76, /* 12+ */ 79, 72) /* same in align1 */ -F(3src_a16_src0_swizzle, /* 9+ */ 72, 65, /* 12+ */ -1, -1) -F(3src_a16_src0_rep_ctrl, /* 9+ */ 64, 64, /* 12+ */ -1, -1) -F(3src_dst_reg_nr, /* 9+ */ 63, 56, /* 12+ */ 63, 56) /* same in align1 */ -F(3src_a16_dst_subreg_nr, /* 9+ */ 55, 53, /* 12+ */ -1, -1) -F(3src_a16_dst_writemask, /* 9+ */ 52, 49, /* 12+ */ -1, -1) -F(3src_a16_nib_ctrl, /* 9+ */ 11, 11, /* 12+ */ -1, -1) /* only exists on IVB+ */ -F(3src_a16_dst_hw_type, /* 9+ */ 48, 46, /* 12+ */ -1, -1) /* only exists on IVB+ */ -F(3src_a16_src_hw_type, /* 9+ */ 45, 43, /* 12+ */ -1, -1) -F(3src_src2_negate, /* 9+ */ 42, 42, /* 12+ */ 85, 85) -F(3src_src2_abs, /* 9+ */ 41, 41, /* 12+ */ 84, 84) -F(3src_src1_negate, /* 9+ */ 40, 40, /* 12+ */ 87, 87) -F(3src_src1_abs, /* 9+ */ 39, 39, /* 12+ */ 86, 86) -F(3src_src0_negate, /* 9+ */ 38, 38, /* 12+ */ 45, 45) -F(3src_src0_abs, /* 9+ */ 37, 37, /* 12+ */ 44, 44) -F(3src_a16_src1_type, /* 9+ */ 36, 36, /* 12+ */ -1, -1) -F(3src_a16_src2_type, /* 9+ */ 35, 35, /* 12+ */ -1, -1) -F(3src_a16_flag_reg_nr, /* 9+ */ 33, 33, /* 12+ */ -1, -1) -F(3src_a16_flag_subreg_nr, /* 9+ */ 32, 32, /* 12+ */ -1, -1) -F(3src_saturate, /* 9+ */ 31, 31, /* 12+ */ 34, 34) -F(3src_debug_control, /* 9+ */ 30, 30, /* 12+ */ 30, 30) -F(3src_cmpt_control, /* 9+ */ 29, 29, /* 12+ */ 29, 29) -FC(3src_acc_wr_control, /* 9+ */ 28, 28, /* 12+ */ 33, 33, devinfo->ver < 20) -F(3src_cond_modifier, /* 9+ */ 27, 24, /* 12+ */ 95, 92) -F(3src_pred_inv, /* 9+ */ 20, 20, /* 12+ */ 28, 28) -F20(3src_pred_control, /* 9+ */ 19, 16, /* 12+ */ 27, 24, /* 20+ */ 27, 26) -F(3src_thread_control, /* 9+ */ 15, 14, /* 12+ */ -1, -1) -F(3src_atomic_control, /* 9+ */ -1, -1, /* 12+ */ 32, 32) -F20(3src_qtr_control, /* 9+ */ 13, 12, /* 12+ */ 21, 20, /* 20+ */ 25, 24) -F(3src_no_dd_check, /* 9+ */ 10, 10, /* 12+ */ -1, -1) -F(3src_no_dd_clear, /* 9+ */ 9, 9, /* 12+ */ -1, -1) -F(3src_mask_control, /* 9+ */ 34, 34, /* 12+ */ 31, 31) -FK(3src_access_mode, /* 9+ */ 8, 8, /* 12+ */ BRW_ALIGN_1) -F20(3src_swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8) -/* Bit 7 is Reserved (for future Opcode expansion) */ -F(3src_hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) -/** @} */ - -#define F_3SRC_A16_SUBREG_NR(srcN, src_base) \ -static inline void \ -brw_eu_inst_set_3src_a16_##srcN##_subreg_nr(const struct \ - intel_device_info *devinfo, \ - brw_eu_inst *inst, \ - unsigned value) \ -{ \ - assert(devinfo->ver == 9); \ - assert((value & ~0b11110) == 0); \ - brw_eu_inst_set_bits(inst, src_base + 11, src_base + 9, value >> 2); \ - brw_eu_inst_set_bits(inst, src_base + 20, src_base + 20, (value >> 1) & 1); \ -} \ -static inline unsigned \ -brw_eu_inst_3src_a16_##srcN##_subreg_nr(const struct \ - intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - assert(devinfo->ver == 9); \ - return brw_eu_inst_bits(inst, src_base + 11, src_base + 9) << 2 | \ - brw_eu_inst_bits(inst, src_base + 20, src_base + 20) << 1; \ -} - -F_3SRC_A16_SUBREG_NR(src0, 64) -F_3SRC_A16_SUBREG_NR(src1, 85) -F_3SRC_A16_SUBREG_NR(src2, 106) -#undef F_3SRC_A16_SUBREG_NR - -#define REG_TYPE(reg) \ -static inline void \ -brw_eu_inst_set_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, \ - enum brw_reg_type type) \ -{ \ - unsigned hw_type = brw_type_encode_for_3src(devinfo, type); \ - brw_eu_inst_set_3src_a16_##reg##_hw_type(devinfo, inst, hw_type); \ -} \ - \ -static inline enum brw_reg_type \ -brw_eu_inst_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - unsigned hw_type = brw_eu_inst_3src_a16_##reg##_hw_type(devinfo, inst); \ - return brw_type_decode_for_3src(devinfo, hw_type, 0); \ -} - -REG_TYPE(dst) -REG_TYPE(src) -#undef REG_TYPE - -/** - * Three-source align1 instructions: - * @{ - */ -/* Reserved 127:126 */ -/* src2_reg_nr same in align16 */ -FD20(3src_a1_src2_subreg_nr,/* 9+ */ 117, 113, /* 12+ */ 119, 115, /* 20+ */ 119, 115, -1) -FC(3src_a1_src2_hstride, /* 9+ */ 112, 111, /* 12+ */ 113, 112, devinfo->ver >= 10) -/* Reserved 110:109. src2 vstride is an implied parameter */ -FC(3src_a1_src2_hw_type, /* 9+ */ 108, 106, /* 12+ */ 82, 80, devinfo->ver >= 10) -/* Reserved 105 */ -/* src1_reg_nr same in align16 */ -FD20(3src_a1_src1_subreg_nr, /* 9+ */ 96, 92, /* 12+ */ 103, 99, /* 20+ */ 103, 99, -1) -FC(3src_a1_src1_hstride, /* 9+ */ 91, 90, /* 12+ */ 97, 96, devinfo->ver >= 10) -FDC(3src_a1_src1_vstride, /* 9+ */ 89, 88, /* 12+ */ 91, 91, 83, 83, devinfo->ver >= 10) -FC(3src_a1_src1_hw_type, /* 9+ */ 87, 85, /* 12+ */ 90, 88, devinfo->ver >= 10) -/* Reserved 84 */ -/* src0_reg_nr same in align16 */ -FD20(3src_a1_src0_subreg_nr, /* 9+ */ 75, 71, /* 12+ */ 71, 67, /* 20+ */ 71, 67, -1) -FC(3src_a1_src0_hstride, /* 9+ */ 70, 69, /* 12+ */ 65, 64, devinfo->ver >= 10) -FDC(3src_a1_src0_vstride, /* 9+ */ 68, 67, /* 12+ */ 43, 43, 35, 35, devinfo->ver >= 10) -FC(3src_a1_src0_hw_type, /* 9+ */ 66, 64, /* 12+ */ 42, 40, devinfo->ver >= 10) -/* dst_reg_nr same in align16 */ -FD20(3src_a1_dst_subreg_nr, /* 9+ */ 55, 51, /* 12+ */ 55, 51, /* 20+ */ 55, 51, -1) -FC(3src_a1_special_acc, /* 9+ */ 55, 52, /* 12+ */ 54, 51, devinfo->ver >= 10) /* aliases dst_subreg_nr */ -/* Reserved 51:50 */ -FC(3src_a1_dst_hstride, /* 9+ */ 49, 49, /* 12+ */ 48, 48, devinfo->ver >= 10) -FC(3src_a1_dst_hw_type, /* 9+ */ 48, 46, /* 12+ */ 38, 36, devinfo->ver >= 10) -FF(3src_a1_src2_reg_file, /* 9+ */ 45, 45, /* 12+ */ 47, 114, .grf_or_imm = true) -FFC(3src_a1_src1_reg_file, /* 9+ */ 44, 44, /* 12+ */ 98, 98, devinfo->ver >= 10, .grf_or_acc = true) -FF(3src_a1_src0_reg_file, /* 9+ */ 43, 43, /* 12+ */ 46, 66, .grf_or_imm = true) - -F(3src_a1_src2_is_imm, /* 9+ */ -1, -1, /* 12+ */ 47, 47) -F(3src_a1_src0_is_imm, /* 9+ */ -1, -1, /* 12+ */ 46, 46) - -FDC(boolean_func_ctrl, /* 9+ */ -1, -1, /* 12+ */ 95, 92, 87, 84, devinfo->verx10 >= 125) -F(boolean_func_cond_modifier,/* 9+ */ -1, -1, /* 12+ */ 45, 44) - -/* Source Modifier fields same in align16 */ -FFC(3src_a1_dst_reg_file, /* 9+ */ 36, 36, /* 12+ */ 50, 50, devinfo->ver >= 10, .grf_or_acc = true) -FC(3src_a1_exec_type, /* 9+ */ 35, 35, /* 12+ */ 39, 39, devinfo->ver >= 10) -/* Fields below this same in align16 */ -/** @} */ - -#define REG_TYPE(reg) \ -static inline void \ -brw_eu_inst_set_3src_a1_##reg##_type(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, \ - enum brw_reg_type type) \ -{ \ - UNUSED enum brw_align1_3src_exec_type exec_type = \ - (enum brw_align1_3src_exec_type) \ - brw_eu_inst_3src_a1_exec_type(devinfo, inst); \ - if (brw_type_is_float_or_bfloat(type)) { \ - assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \ - } else { \ - assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_INT); \ - } \ - unsigned hw_type = brw_type_encode_for_3src(devinfo, type); \ - brw_eu_inst_set_3src_a1_##reg##_hw_type(devinfo, inst, hw_type); \ -} \ - \ -static inline enum brw_reg_type \ -brw_eu_inst_3src_a1_##reg##_type(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - enum brw_align1_3src_exec_type exec_type = \ - (enum brw_align1_3src_exec_type) \ - brw_eu_inst_3src_a1_exec_type(devinfo, inst); \ - unsigned hw_type = brw_eu_inst_3src_a1_##reg##_hw_type(devinfo, inst); \ - return brw_type_decode_for_3src(devinfo, hw_type, exec_type); \ -} - -REG_TYPE(dst) -REG_TYPE(src0) -REG_TYPE(src1) -REG_TYPE(src2) -#undef REG_TYPE - -/** - * Three-source align1 instruction immediates: - * @{ - */ -static inline uint16_t -brw_eu_inst_3src_a1_src0_imm(ASSERTED const struct intel_device_info *devinfo, - const brw_eu_inst *insn) -{ - assert(devinfo->ver >= 10); - if (devinfo->ver >= 12) - return brw_eu_inst_bits(insn, 79, 64); - else - return brw_eu_inst_bits(insn, 82, 67); -} - -static inline uint16_t -brw_eu_inst_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo, - const brw_eu_inst *insn) -{ - assert(devinfo->ver >= 10); - if (devinfo->ver >= 12) - return brw_eu_inst_bits(insn, 127, 112); - else - return brw_eu_inst_bits(insn, 124, 109); -} - -static inline void -brw_eu_inst_set_3src_a1_src0_imm(ASSERTED const struct intel_device_info *devinfo, - brw_eu_inst *insn, uint16_t value) -{ - assert(devinfo->ver >= 10); - if (devinfo->ver >= 12) - brw_eu_inst_set_bits(insn, 79, 64, value); - else - brw_eu_inst_set_bits(insn, 82, 67, value); -} - -static inline void -brw_eu_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo, - brw_eu_inst *insn, uint16_t value) -{ - assert(devinfo->ver >= 10); - if (devinfo->ver >= 12) - brw_eu_inst_set_bits(insn, 127, 112, value); - else - brw_eu_inst_set_bits(insn, 124, 109, value); -} -/** @} */ - -/** - * Three-source systolic instructions: - * @{ - */ -F(dpas_3src_src2_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 127, 120) -F(dpas_3src_src2_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 119, 115) -FF(dpas_3src_src2_reg_file, /* 9+ */ -1, -1, /* 12+ */ 114, 114) -F(dpas_3src_src1_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 111, 104) -F(dpas_3src_src1_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 103, 99) -FF(dpas_3src_src1_reg_file, /* 9+ */ -1, -1, /* 12+ */ 98, 98) -F(dpas_3src_src1_hw_type, /* 9+ */ -1, -1, /* 12+ */ 90, 88) -F(dpas_3src_src1_subbyte, /* 9+ */ -1, -1, /* 12+ */ 87, 86) -F(dpas_3src_src2_subbyte, /* 9+ */ -1, -1, /* 12+ */ 85, 84) -F(dpas_3src_src2_hw_type, /* 9+ */ -1, -1, /* 12+ */ 82, 80) -F(dpas_3src_src0_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 79, 72) -F(dpas_3src_src0_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 71, 67) -FF(dpas_3src_src0_reg_file, /* 9+ */ -1, -1, /* 12+ */ 66, 66) -F(dpas_3src_dst_reg_nr, /* 9+ */ -1, -1, /* 12+ */ 63, 56) -F(dpas_3src_dst_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 55, 51) -FF(dpas_3src_dst_reg_file, /* 9+ */ -1, -1, /* 12+ */ 50, 50) -F(dpas_3src_sdepth, /* 9+ */ -1, -1, /* 12+ */ 49, 48) -F(dpas_3src_rcount, /* 9+ */ -1, -1, /* 12+ */ 45, 43) -F(dpas_3src_src0_hw_type, /* 9+ */ -1, -1, /* 12+ */ 42, 40) -F(dpas_3src_exec_type, /* 9+ */ -1, -1, /* 12+ */ 39, 39) -F(dpas_3src_dst_hw_type, /* 9+ */ -1, -1, /* 12+ */ 38, 36) -/** @} */ - -#define REG_TYPE(reg) \ -static inline void \ -brw_eu_inst_set_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, \ - enum brw_reg_type type) \ -{ \ - UNUSED enum brw_align1_3src_exec_type exec_type = \ - (enum brw_align1_3src_exec_type) \ - brw_eu_inst_dpas_3src_exec_type(devinfo, inst); \ - if (brw_type_is_float_or_bfloat(type)) { \ - assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \ - } else { \ - assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_INT); \ - } \ - unsigned hw_type = brw_type_encode_for_3src(devinfo, type); \ - brw_eu_inst_set_dpas_3src_##reg##_hw_type(devinfo, inst, hw_type); \ -} \ - \ -static inline enum brw_reg_type \ -brw_eu_inst_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - enum brw_align1_3src_exec_type exec_type = \ - (enum brw_align1_3src_exec_type) \ - brw_eu_inst_dpas_3src_exec_type(devinfo, inst); \ - unsigned hw_type = brw_eu_inst_dpas_3src_##reg##_hw_type(devinfo, inst); \ - return brw_type_decode_for_3src(devinfo, hw_type, exec_type); \ -} - -REG_TYPE(dst) -REG_TYPE(src0) -REG_TYPE(src1) -REG_TYPE(src2) -#undef REG_TYPE - -/** - * SEND instructions: - * @{ - */ -F(send_ex_desc_ia_subreg_nr, /* 9+ */ 82, 80, /* 12+ */ 42, 40) -F(send_src0_address_mode, /* 9+ */ 79, 79, /* 12+ */ -1, -1) -F(send_sel_reg32_desc, /* 9+ */ 77, 77, /* 12+ */ 48, 48) -F(send_sel_reg32_ex_desc, /* 9+ */ 61, 61, /* 12+ */ 49, 49) -FF(send_src0_reg_file, /* 9+ */ 42, 41, /* 12+ */ 66, 66) -F(send_src1_reg_nr, /* 9+ */ 51, 44, /* 12+ */ 111, 104) -FC(send_src1_len, /* 9+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 125) -FF(send_src1_reg_file, /* 9+ */ 36, 36, /* 12+ */ 98, 98) -FF(send_dst_reg_file, /* 9+ */ 35, 35, /* 12+ */ 50, 50) -FC(send_ex_bso, /* 9+ */ -1, -1, /* 12+ */ 39, 39, devinfo->verx10 >= 125) - -/* When using scalar register for src0, this replaces src1_len, which is - * always zero. - */ -FC(send_src0_subreg_nr, /* 9+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 300) -/** @} */ - -/* Message descriptor bits */ -#define MD(x) ((x) + 96) -#define MD12(x) ((x) >= 30 ? (x) - 30 + 122 : \ - (x) >= 25 ? (x) - 25 + 67 : \ - (x) >= 20 ? (x) - 20 + 51 : \ - (x) >= 11 ? (x) - 11 + 113 : \ - (x) - 0 + 81) - -/** - * Set the SEND(C) message descriptor immediate. - * - * This doesn't include the SFID nor the EOT field that were considered to be - * part of the message descriptor by ancient versions of the BSpec, because - * they are present in the instruction even if the message descriptor is - * provided indirectly in the address register, so we want to specify them - * separately. - */ -static inline void -brw_eu_inst_set_send_desc(const struct intel_device_info *devinfo, - brw_eu_inst *inst, uint32_t value) -{ - if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(inst, 123, 122, GET_BITS(value, 31, 30)); - brw_eu_inst_set_bits(inst, 71, 67, GET_BITS(value, 29, 25)); - brw_eu_inst_set_bits(inst, 55, 51, GET_BITS(value, 24, 20)); - brw_eu_inst_set_bits(inst, 121, 113, GET_BITS(value, 19, 11)); - brw_eu_inst_set_bits(inst, 91, 81, GET_BITS(value, 10, 0)); - } else { - brw_eu_inst_set_bits(inst, 126, 96, value); - assert(value >> 31 == 0); - } -} - -/** - * Get the SEND(C) message descriptor immediate. - * - * \sa brw_eu_inst_set_send_desc(). - */ -static inline uint32_t -brw_eu_inst_send_desc(const struct intel_device_info *devinfo, - const brw_eu_inst *inst) -{ - if (devinfo->ver >= 12) { - return (brw_eu_inst_bits(inst, 123, 122) << 30 | - brw_eu_inst_bits(inst, 71, 67) << 25 | - brw_eu_inst_bits(inst, 55, 51) << 20 | - brw_eu_inst_bits(inst, 121, 113) << 11 | - brw_eu_inst_bits(inst, 91, 81)); - } else { - return brw_eu_inst_bits(inst, 126, 96); - } -} - -/** - * Set the SEND(C) message extended descriptor immediate. - * - * This doesn't include the SFID nor the EOT field that were considered to be - * part of the extended message descriptor by some versions of the BSpec, - * because they are present in the instruction even if the extended message - * descriptor is provided indirectly in a register, so we want to specify them - * separately. - */ -static inline void -brw_eu_inst_set_send_ex_desc(const struct intel_device_info *devinfo, - brw_eu_inst *inst, uint32_t value, bool gather) -{ - assert(!gather || devinfo->ver >= 30); - - if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(inst, 127, 124, GET_BITS(value, 31, 28)); - brw_eu_inst_set_bits(inst, 97, 96, GET_BITS(value, 27, 26)); - brw_eu_inst_set_bits(inst, 65, 64, GET_BITS(value, 25, 24)); - brw_eu_inst_set_bits(inst, 47, 35, GET_BITS(value, 23, 11)); - - /* SEND gather uses these bits for src0 subreg nr, so they - * are not part of the ex_desc. - */ - if (gather) { - assert(devinfo->ver >= 30); - assert(GET_BITS(value, 10, 6) == 0); - } else { - brw_eu_inst_set_bits(inst, 103, 99, GET_BITS(value, 10, 6)); - } - - assert(GET_BITS(value, 5, 0) == 0); - } else { - assert(devinfo->ver >= 9); - brw_eu_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28)); - brw_eu_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24)); - brw_eu_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20)); - brw_eu_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16)); - assert(GET_BITS(value, 15, 0) == 0); - } -} - -/** - * Set the SENDS(C) message extended descriptor immediate. - * - * This doesn't include the SFID nor the EOT field that were considered to be - * part of the extended message descriptor by some versions of the BSpec, - * because they are present in the instruction even if the extended message - * descriptor is provided indirectly in a register, so we want to specify them - * separately. - */ -static inline void -brw_eu_inst_set_sends_ex_desc(const struct intel_device_info *devinfo, - brw_eu_inst *inst, uint32_t value, bool gather) -{ - if (devinfo->ver >= 12) { - brw_eu_inst_set_send_ex_desc(devinfo, inst, value, gather); - } else { - brw_eu_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16)); - assert(GET_BITS(value, 15, 10) == 0); - brw_eu_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6)); - assert(GET_BITS(value, 5, 0) == 0); - } -} - -/** - * Get the SEND(C) message extended descriptor immediate. - * - * \sa brw_eu_inst_set_send_ex_desc(). - */ -static inline uint32_t -brw_eu_inst_send_ex_desc(const struct intel_device_info *devinfo, - const brw_eu_inst *inst, bool gather) -{ - assert(!gather || devinfo->ver >= 30); - - if (devinfo->ver >= 12) { - return (brw_eu_inst_bits(inst, 127, 124) << 28 | - brw_eu_inst_bits(inst, 97, 96) << 26 | - brw_eu_inst_bits(inst, 65, 64) << 24 | - brw_eu_inst_bits(inst, 47, 35) << 11 | - (!gather ? brw_eu_inst_bits(inst, 103, 99) << 6 : 0)); - } else { - assert(devinfo->ver >= 9); - return (brw_eu_inst_bits(inst, 94, 91) << 28 | - brw_eu_inst_bits(inst, 88, 85) << 24 | - brw_eu_inst_bits(inst, 83, 80) << 20 | - brw_eu_inst_bits(inst, 67, 64) << 16); - } -} - -/** - * Get the SENDS(C) message extended descriptor immediate. - * - * \sa brw_eu_inst_set_send_ex_desc(). - */ -static inline uint32_t -brw_eu_inst_sends_ex_desc(const struct intel_device_info *devinfo, - const brw_eu_inst *inst, bool gather) -{ - if (devinfo->ver >= 12) { - return brw_eu_inst_send_ex_desc(devinfo, inst, gather); - } else { - assert(!gather); - return (brw_eu_inst_bits(inst, 95, 80) << 16 | - brw_eu_inst_bits(inst, 67, 64) << 6); - } -} - -/** - * Fields for SEND messages: - * @{ - */ -F(eot, /* 9+ */ 127, 127, /* 12+ */ 34, 34) -F(fusion_ctrl, /* 9+ */ -1, -1, /* 12+ */ 33, 33) -F(mlen, /* 9+ */ 124, 121, /* 12+ */ MD12(28), MD12(25)) -F(rlen, /* 9+ */ 120, 116, /* 12+ */ MD12(24), MD12(20)) -F(header_present, /* 9+ */ 115, 115, /* 12+ */ MD12(19), MD12(19)) -F(gateway_notify, /* 9+ */ MD(16), MD(15), /* 12+ */ -1, -1) -FD(function_control, /* 9+ */ 114, 96, /* 12+ */ MD12(18), MD12(11), MD12(10), MD12(0)) -F(gateway_subfuncid, /* 9+ */ MD(2), MD(0), /* 12+ */ MD12(2), MD12(0)) -F(sfid, /* 9+ */ 27, 24, /* 12+ */ 95, 92) -F(null_rt, /* 9+ */ 80, 80, /* 12+ */ 44, 44) /* actually only Gfx11+ */ -F(send_rta_index, /* 9+ */ -1, -1, /* 12+ */ 38, 36) -/** @} */ - -/** - * URB message function control bits: - * @{ - */ -F(urb_per_slot_offset, /* 9+ */ MD(17), MD(17), /* 12+ */ MD12(17), MD12(17)) -F(urb_channel_mask_present, /* 9+ */ MD(15), MD(15), /* 12+ */ MD12(15), MD12(15)) -F(urb_swizzle_control, /* 9+ */ MD(15), MD(15), /* 12+ */ -1, -1) -FD(urb_global_offset, /* 9+ */ MD(14), MD(4), /* 12+ */ MD12(14), MD12(11), MD12(10), MD12(4)) -F(urb_opcode, /* 9+ */ MD( 3), MD(0), /* 12+ */ MD12(3), MD12(0)) -/** @} */ - -/** - * Sampler message function control bits: - * @{ - */ -F(sampler_simd_mode, /* 9+ */ MD(18), MD(17), /* 12+ */ MD12(18), MD12(17)) -F(sampler_msg_type, /* 9+ */ MD(16), MD(12), /* 12+ */ MD12(16), MD12(12)) -FD(sampler, /* 9+ */ MD(11), MD(8), /* 12+ */ MD12(11), MD12(11), MD12(10), MD12(8)) -F(binding_table_index, /* 9+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0)) /* also used by other messages */ -/** @} */ - -/** - * Data port message function control bits: - * @{ - */ -F(dp_category, /* 9+ */ MD(18), MD(18), /* 12+ */ MD12(18), MD12(18)) - -F(dp_read_msg_type, /* 9+ */ MD(17), MD(14), /* 12+ */ MD12(17), MD12(14)) -F(dp_write_msg_type, /* 9+ */ MD(17), MD(14), /* 12+ */ MD12(17), MD12(14)) -FD(dp_read_msg_control, /* 9+ */ MD(13), MD( 8), /* 12+ */ MD12(13), MD12(11), MD12(10), MD12(8)) -FD(dp_write_msg_control, /* 9+ */ MD(13), MD( 8), /* 12+ */ MD12(13), MD12(11), MD12(10), MD12(8)) - -F(dp_msg_type, /* 9+ */ MD(18), MD(14), /* 12+ */ MD12(18), MD12(14)) -FD(dp_msg_control, /* 9+ */ MD(13), MD( 8), /* 12+ */ MD12(13), MD12(11), MD12(10), MD12(8)) -/** @} */ - -/** - * Scratch message bits: - * @{ - */ -F(scratch_read_write, /* 9+ */ MD(17), MD(17), /* 12+ */ MD12(17), MD12(17)) /* 0 = read, 1 = write */ -F(scratch_type, /* 9+ */ MD(16), MD(16), /* 12+ */ -1, -1) /* 0 = OWord, 1 = DWord */ -F(scratch_invalidate_after_read, /* 9+ */ MD(15), MD(15), /* 12+ */ MD12(15), MD12(15)) -F(scratch_block_size, /* 9+ */ MD(13), MD(12), /* 12+ */ MD12(13), MD12(12)) -FD(scratch_addr_offset, - /* 9: */ MD(11), MD(0), - /* 12: */ MD12(11), MD12(11), MD12(10), MD12(0)) -/** @} */ - -/** - * Render Target message function control bits: - * @{ - */ -F(rt_last, /* 9+ */ MD(12), MD(12), /* 12+ */ MD12(12), MD12(12)) -F(rt_slot_group, /* 9+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11)) -F(rt_message_type, /* 9+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8)) -/** @} */ - -/** - * Pixel Interpolator message function control bits: - * @{ - */ -F(pi_simd_mode, /* 9+ */ MD(16), MD(16), /* 12+ */ MD12(16), MD12(16)) -F(pi_nopersp, /* 9+ */ MD(14), MD(14), /* 12+ */ MD12(14), MD12(14)) -F(pi_message_type, /* 9+ */ MD(13), MD(12), /* 12+ */ MD12(13), MD12(12)) -F(pi_slot_group, /* 9+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11)) -F(pi_message_data, /* 9+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0)) -/** @} */ - -/** - * Immediates: - * @{ - */ -static inline int -brw_eu_inst_imm_d(const struct intel_device_info *devinfo, const brw_eu_inst *insn) -{ - (void) devinfo; - return brw_eu_inst_bits(insn, 127, 96); -} - -static inline unsigned -brw_eu_inst_imm_ud(const struct intel_device_info *devinfo, const brw_eu_inst *insn) -{ - (void) devinfo; - return brw_eu_inst_bits(insn, 127, 96); -} - -static inline uint64_t -brw_eu_inst_imm_uq(const struct intel_device_info *devinfo, - const brw_eu_inst *insn) -{ - if (devinfo->ver >= 12) { - return brw_eu_inst_bits(insn, 95, 64) << 32 | - brw_eu_inst_bits(insn, 127, 96); - } else { - return brw_eu_inst_bits(insn, 127, 64); - } -} - -static inline float -brw_eu_inst_imm_f(const struct intel_device_info *devinfo, const brw_eu_inst *insn) -{ - union { - float f; - uint32_t u; - } ft; - (void) devinfo; - ft.u = brw_eu_inst_bits(insn, 127, 96); - return ft.f; -} - -static inline double -brw_eu_inst_imm_df(const struct intel_device_info *devinfo, const brw_eu_inst *insn) -{ - union { - double d; - uint64_t u; - } dt; - dt.u = brw_eu_inst_imm_uq(devinfo, insn); - return dt.d; -} - -static inline void -brw_eu_inst_set_imm_d(const struct intel_device_info *devinfo, - brw_eu_inst *insn, int value) -{ - (void) devinfo; - return brw_eu_inst_set_bits(insn, 127, 96, value); -} - -static inline void -brw_eu_inst_set_imm_ud(const struct intel_device_info *devinfo, - brw_eu_inst *insn, unsigned value) -{ - (void) devinfo; - return brw_eu_inst_set_bits(insn, 127, 96, value); -} - -static inline void -brw_eu_inst_set_imm_f(const struct intel_device_info *devinfo, - brw_eu_inst *insn, float value) -{ - union { - float f; - uint32_t u; - } ft; - (void) devinfo; - ft.f = value; - brw_eu_inst_set_bits(insn, 127, 96, ft.u); -} - -static inline void -brw_eu_inst_set_imm_df(const struct intel_device_info *devinfo, - brw_eu_inst *insn, double value) -{ - union { - double d; - uint64_t u; - } dt; - (void) devinfo; - dt.d = value; - - if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(insn, 95, 64, dt.u >> 32); - brw_eu_inst_set_bits(insn, 127, 96, dt.u & 0xFFFFFFFF); - } else { - brw_eu_inst_set_bits(insn, 127, 64, dt.u); - } -} - -static inline void -brw_eu_inst_set_imm_uq(const struct intel_device_info *devinfo, - brw_eu_inst *insn, uint64_t value) -{ - (void) devinfo; - if (devinfo->ver >= 12) { - brw_eu_inst_set_bits(insn, 95, 64, value >> 32); - brw_eu_inst_set_bits(insn, 127, 96, value & 0xFFFFFFFF); - } else { - brw_eu_inst_set_bits(insn, 127, 64, value); - } -} - -/** @} */ - -#define REG_TYPE(reg) \ -static inline void \ -brw_eu_inst_set_##reg##_file_type(const struct intel_device_info *devinfo, \ - brw_eu_inst *inst, enum brw_reg_file file, \ - enum brw_reg_type type) \ -{ \ - assert(file <= IMM); \ - unsigned hw_type = brw_type_encode(devinfo, file, type); \ - brw_eu_inst_set_##reg##_reg_file(devinfo, inst, file); \ - brw_eu_inst_set_##reg##_reg_hw_type(devinfo, inst, hw_type); \ -} \ - \ -static inline enum brw_reg_type \ -brw_eu_inst_##reg##_type(const struct intel_device_info *devinfo, \ - const brw_eu_inst *inst) \ -{ \ - unsigned file = __builtin_strcmp("dst", #reg) == 0 ? \ - (unsigned) FIXED_GRF : \ - brw_eu_inst_##reg##_reg_file(devinfo, inst); \ - unsigned hw_type = brw_eu_inst_##reg##_reg_hw_type(devinfo, inst); \ - return brw_type_decode(devinfo, (enum brw_reg_file)file, hw_type); \ -} - -REG_TYPE(dst) -REG_TYPE(src0) -REG_TYPE(src1) -#undef REG_TYPE - - -/** - * Flow control instruction bits: - * @{ - */ -static inline void -brw_eu_inst_set_uip(const struct intel_device_info *devinfo, - brw_eu_inst *inst, int32_t value) -{ - if (devinfo->ver >= 12) - brw_eu_inst_set_src1_is_imm(devinfo, inst, 1); - else - brw_eu_inst_set_src1_file_type(devinfo, inst, IMM, BRW_TYPE_D); - - brw_eu_inst_set_bits(inst, 95, 64, (uint32_t)value); -} - -static inline int32_t -brw_eu_inst_uip(const struct intel_device_info *devinfo, const brw_eu_inst *inst) -{ - return brw_eu_inst_bits(inst, 95, 64); -} - -static inline void -brw_eu_inst_set_unused_uip(const struct intel_device_info *devinfo, - brw_eu_inst *inst) -{ - if (devinfo->ver < 12) { - /* When Src1 is not used, old versions required its file to be ARF and - * its type to match Src0 type. See "Non-present Operands" in PRM for - * SKL, vol 7 "3D-Media-GPGPU". - * - * Note: in BRW we only use immediate sources for the branching - * instructions. - */ - brw_eu_inst_set_src1_file_type(devinfo, inst, ARF, BRW_TYPE_D); - assert(brw_eu_inst_src0_reg_hw_type(devinfo, inst) == - brw_eu_inst_src1_reg_hw_type(devinfo, inst)); - } -} - -static inline void -brw_eu_inst_set_jip(const struct intel_device_info *devinfo, - brw_eu_inst *inst, int32_t value) -{ - if (devinfo->ver >= 12) - brw_eu_inst_set_src0_is_imm(devinfo, inst, 1); - else - brw_eu_inst_set_src0_file_type(devinfo, inst, IMM, BRW_TYPE_D); - - brw_eu_inst_set_bits(inst, 127, 96, (uint32_t)value); -} - -static inline int32_t -brw_eu_inst_jip(const struct intel_device_info *devinfo, const brw_eu_inst *inst) -{ - return brw_eu_inst_bits(inst, 127, 96); -} -/** @} */ - - -/* The AddrImm fields are split into two discontiguous sections on Gfx9+ */ -#define BRW_IA1_ADDR_IMM(reg, g9_nine, g9_high, g9_low, \ - g12_high, g12_low, g20_high, g20_low, g20_zero) \ -static inline void \ -brw_eu_inst_set_##reg##_ia1_addr_imm(const struct \ - intel_device_info *devinfo, \ - brw_eu_inst *inst, \ - unsigned value) \ -{ \ - if (devinfo->ver >= 20) { \ - assert((value & ~0x7ff) == 0); \ - brw_eu_inst_set_bits(inst, g20_high, g20_low, value >> 1); \ - if (g20_zero == -1) \ - assert((value & 1) == 0); \ - else \ - brw_eu_inst_set_bits(inst, g20_zero, g20_zero, value & 1); \ - } else if (devinfo->ver >= 12) { \ - assert((value & ~0x3ff) == 0); \ - brw_eu_inst_set_bits(inst, g12_high, g12_low, value); \ - } else { \ - assert((value & ~0x3ff) == 0); \ - brw_eu_inst_set_bits(inst, g9_high, g9_low, value & 0x1ff); \ - brw_eu_inst_set_bits(inst, g9_nine, g9_nine, value >> 9); \ - } \ -} \ -static inline unsigned \ -brw_eu_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo,\ - const brw_eu_inst *inst) \ -{ \ - if (devinfo->ver >= 20) { \ - return brw_eu_inst_bits(inst, g20_high, g20_low) << 1 | \ - (g20_zero == -1 ? 0 : \ - brw_eu_inst_bits(inst, g20_zero, g20_zero)); \ - } else if (devinfo->ver >= 12) { \ - return brw_eu_inst_bits(inst, g12_high, g12_low); \ - } else { \ - return brw_eu_inst_bits(inst, g9_high, g9_low) | \ - (brw_eu_inst_bits(inst, g9_nine, g9_nine) << 9); \ - } \ -} - -/* AddrImm for Align1 Indirect Addressing */ -/* ----Gfx9---- -Gfx12- ---Gfx20--- */ -BRW_IA1_ADDR_IMM(src1, 121, 104, 96, 107, 98, 107, 98, -1) -BRW_IA1_ADDR_IMM(src0, 95, 72, 64, 75, 66, 75, 66, 87) -BRW_IA1_ADDR_IMM(dst, 47, 56, 48, 59, 50, 59, 50, 33) - -#define BRW_IA16_ADDR_IMM(reg, g9_nine, g9_high, g9_low) \ -static inline void \ -brw_eu_inst_set_##reg##_ia16_addr_imm(const struct \ - intel_device_info *devinfo, \ - brw_eu_inst *inst, unsigned value) \ -{ \ - assert(devinfo->ver < 12); \ - assert((value & ~0x3ff) == 0); \ - assert(GET_BITS(value, 3, 0) == 0); \ - brw_eu_inst_set_bits(inst, g9_high, g9_low, GET_BITS(value, 8, 4)); \ - brw_eu_inst_set_bits(inst, g9_nine, g9_nine, GET_BITS(value, 9, 9)); \ -} \ -static inline unsigned \ -brw_eu_inst_##reg##_ia16_addr_imm(const struct intel_device_info *devinfo,\ - const brw_eu_inst *inst) \ -{ \ - assert(devinfo->ver < 12); \ - return (brw_eu_inst_bits(inst, g9_high, g9_low) << 4) | \ - (brw_eu_inst_bits(inst, g9_nine, g9_nine) << 9); \ -} - -/* AddrImm[9:0] for Align16 Indirect Addressing: - * Compared to Align1, these are missing the low 4 bits. - * ----Gfx9---- - */ -BRW_IA16_ADDR_IMM(src1, 121, 104, 100) -BRW_IA16_ADDR_IMM(src0, 95, 72, 68) -BRW_IA16_ADDR_IMM(dst, 47, 56, 52) -BRW_IA16_ADDR_IMM(send_src0, 78, 72, 68) -BRW_IA16_ADDR_IMM(send_dst, 62, 56, 52) - -/** - * Fetch a set of contiguous bits from the instruction. - * - * Bits indices range from 0..127; fields may not cross 64-bit boundaries. - */ -static inline uint64_t -brw_eu_inst_bits(const brw_eu_inst *inst, unsigned high, unsigned low) -{ - assume(high < 128); - assume(high >= low); - /* We assume the field doesn't cross 64-bit boundaries. */ - const unsigned word = high / 64; - assert(word == low / 64); - - high %= 64; - low %= 64; - - const uint64_t mask = (~0ull >> (64 - (high - low + 1))); - - return (inst->data[word] >> low) & mask; -} - -/** - * Set bits in the instruction, with proper shifting and masking. - * - * Bits indices range from 0..127; fields may not cross 64-bit boundaries. - */ -static inline void -brw_eu_inst_set_bits(brw_eu_inst *inst, unsigned high, unsigned low, uint64_t value) -{ - assume(high < 128); - assume(high >= low); - const unsigned word = high / 64; - assert(word == low / 64); - - high %= 64; - low %= 64; - - const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low; - - /* Make sure the supplied value actually fits in the given bitfield. */ - assert((value & (mask >> low)) == value); - - inst->data[word] = (inst->data[word] & ~mask) | (value << low); -} - -#undef BRW_IA16_ADDR_IMM -#undef BRW_IA1_ADDR_IMM -#undef MD -#undef F -#undef FC -#undef F20 -#undef FD20 - -typedef struct { - uint64_t data; -} brw_eu_compact_inst; - -/** - * Fetch a set of contiguous bits from the compacted instruction. - * - * Bits indices range from 0..63. - */ -static inline unsigned -brw_eu_compact_inst_bits(const brw_eu_compact_inst *inst, unsigned high, unsigned low) -{ - assume(high < 64); - assume(high >= low); - const uint64_t mask = (1ull << (high - low + 1)) - 1; - - return (inst->data >> low) & mask; -} - -/** - * Set bits in the compacted instruction. - * - * Bits indices range from 0..63. - */ -static inline void -brw_eu_compact_inst_set_bits(brw_eu_compact_inst *inst, unsigned high, unsigned low, - uint64_t value) -{ - assume(high < 64); - assume(high >= low); - const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low; - - /* Make sure the supplied value actually fits in the given bitfield. */ - assert((value & (mask >> low)) == value); - - inst->data = (inst->data & ~mask) | (value << low); -} - -#define FC(name, hi9, lo9, hi12, lo12, assertions) \ -static inline void \ -brw_eu_compact_inst_set_##name(const struct \ - intel_device_info *devinfo, \ - brw_eu_compact_inst *inst, \ - unsigned v) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) \ - brw_eu_compact_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_compact_inst_set_bits(inst, hi9, lo9, v); \ -} \ -static inline unsigned \ -brw_eu_compact_inst_##name(const struct intel_device_info *devinfo,\ - const brw_eu_compact_inst *inst) \ -{ \ - assert(assertions); \ - if (devinfo->ver >= 12) \ - return brw_eu_compact_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_compact_inst_bits(inst, hi9, lo9); \ -} - -/* A simple macro for fields which stay in the same place on all generations - * except for Gfx12. - */ -#define F(name, hi9, lo9, hi12, lo12) FC(name, hi9, lo9, hi12, lo12, true) - -/* A macro for fields which moved to several different locations - * across generations. - */ -#define F20(name, hi9, lo9, hi12, lo12, hi20, lo20) \ -static inline void \ -brw_eu_compact_inst_set_##name(const struct \ - intel_device_info *devinfo, \ - brw_eu_compact_inst *inst, \ - unsigned v) \ -{ \ - if (devinfo->ver >= 20) \ - brw_eu_compact_inst_set_bits(inst, hi20, lo20, v); \ - else if (devinfo->ver >= 12) \ - brw_eu_compact_inst_set_bits(inst, hi12, lo12, v); \ - else \ - brw_eu_compact_inst_set_bits(inst, hi9, lo9, v); \ -} \ -static inline unsigned \ -brw_eu_compact_inst_##name(const struct intel_device_info *devinfo,\ - const brw_eu_compact_inst *inst) \ -{ \ - if (devinfo->ver >= 20) \ - return brw_eu_compact_inst_bits(inst, hi20, lo20); \ - else if (devinfo->ver >= 12) \ - return brw_eu_compact_inst_bits(inst, hi12, lo12); \ - else \ - return brw_eu_compact_inst_bits(inst, hi9, lo9); \ -} - -/* A macro for fields which gained extra discontiguous bits in Gfx20 - * (specified by hi20ex-lo20ex). - */ -#define FD20(name, hi9, lo9, hi12, lo12, \ - hi20, lo20, hi20ex, lo20ex) \ - static inline void \ -brw_eu_compact_inst_set_##name(const struct \ - intel_device_info *devinfo, \ - brw_eu_compact_inst *inst, unsigned v) \ -{ \ - if (devinfo->ver >= 20) { \ - const unsigned k = hi20 - lo20 + 1; \ - brw_eu_compact_inst_set_bits(inst, hi20ex, lo20ex, v >> k); \ - brw_eu_compact_inst_set_bits(inst, hi20, lo20, v & ((1u << k) - 1)); \ - } else if (devinfo->ver >= 12) { \ - brw_eu_compact_inst_set_bits(inst, hi12, lo12, v); \ - } else { \ - brw_eu_compact_inst_set_bits(inst, hi9, lo9, v); \ - } \ -} \ -static inline unsigned \ -brw_eu_compact_inst_##name(const struct intel_device_info *devinfo, \ - const brw_eu_compact_inst *inst) \ -{ \ - if (devinfo->ver >= 20) { \ - const unsigned k = hi20 - lo20 + 1; \ - return (brw_eu_compact_inst_bits(inst, hi20ex, lo20ex) << k | \ - brw_eu_compact_inst_bits(inst, hi20, lo20)); \ - } else if (devinfo->ver >= 12) { \ - return brw_eu_compact_inst_bits(inst, hi12, lo12); \ - } else { \ - return brw_eu_compact_inst_bits(inst, hi9, lo9); \ - } \ -} - -F(src1_reg_nr, /* 9+ */ 63, 56, /* 12+ */ 63, 56) -F(src0_reg_nr, /* 9+ */ 55, 48, /* 12+ */ 47, 40) -F20(dst_reg_nr, /* 9+ */ 47, 40, /* 12+ */ 23, 16, /* 20+ */ 39, 32) -F(src1_index, /* 9+ */ 39, 35, /* 12+ */ 55, 52) -F20(src0_index, /* 9+ */ 34, 30, /* 12+ */ 51, 48, /* 20+ */ 25, 23) -F(cmpt_control, /* 9+ */ 29, 29, /* 12+ */ 29, 29) /* Same location as brw_eu_inst */ -F(cond_modifier, /* 9+ */ 27, 24, /* 12+ */ -1, -1) /* Same location as brw_eu_inst */ -F(acc_wr_control, /* 9+ */ 23, 23, /* 12+ */ -1, -1) -F20(subreg_index, /* 9+ */ 22, 18, /* 12+ */ 39, 35, /* 20+ */ 51, 48) -FD20(datatype_index, /* 9+ */ 17, 13, /* 12+ */ 34, 30, /* 20+ */ 28, 26, 31, 30) -F20(control_index, /* 9+ */ 12, 8, /* 12+ */ 28, 24, /* 20+ */ 22, 18) -F20(swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8) -F(debug_control, /* 9+ */ 7, 7, /* 12+ */ 7, 7) -F(hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) /* Same location as brw_eu_inst */ - -static inline unsigned -brw_eu_compact_inst_imm(const struct intel_device_info *devinfo, - const brw_eu_compact_inst *inst) -{ - if (devinfo->ver >= 12) { - return brw_eu_compact_inst_bits(inst, 63, 52); - } else { - return (brw_eu_compact_inst_bits(inst, 39, 35) << 8) | - (brw_eu_compact_inst_bits(inst, 63, 56)); - } -} - -/** - * Compacted three-source instructions: - * @{ - */ -F(3src_src2_reg_nr, /* 9+ */ 63, 57, /* 12+ */ 55, 48) -F(3src_src1_reg_nr, /* 9+ */ 56, 50, /* 12+ */ 63, 56) -F(3src_src0_reg_nr, /* 9+ */ 49, 43, /* 12+ */ 47, 40) -F(3src_src2_subreg_nr, /* 9+ */ 42, 40, /* 12+ */ -1, -1) -F(3src_src1_subreg_nr, /* 9+ */ 39, 37, /* 12+ */ -1, -1) -F(3src_src0_subreg_nr, /* 9+ */ 36, 34, /* 12+ */ -1, -1) -F(3src_src2_rep_ctrl, /* 9+ */ 33, 33, /* 12+ */ -1, -1) -F(3src_src1_rep_ctrl, /* 9+ */ 32, 32, /* 12+ */ -1, -1) -F(3src_saturate, /* 9+ */ 31, 31, /* 12+ */ -1, -1) -F(3src_debug_control, /* 9+ */ 30, 30, /* 12+ */ 7, 7) -F(3src_cmpt_control, /* 9+ */ 29, 29, /* 12+ */ 29, 29) -F(3src_src0_rep_ctrl, /* 9+ */ 28, 28, /* 12+ */ -1, -1) -/* Reserved */ -F20(3src_dst_reg_nr, /* 9+ */ 18, 12, /* 12+ */ 23, 16, /* 20+ */ 39, 32) -F20(3src_source_index, /* 9+ */ 11, 10, /* 12+ */ 34, 30, /* 20+ */ 25, 22) -FD20(3src_subreg_index, /* 9+ */ -1, -1, /* 12+ */ 39, 35, /* 20+ */ 28, 26, 31, 30) -F20(3src_control_index, /* 9+ */ 9, 8, /* 12+ */ 28, 24, /* 20+ */ 21, 18) -F20(3src_swsb, /* 9+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8) -/* Bit 7 is Reserved (for future Opcode expansion) */ -F(3src_hw_opcode, /* 9+ */ 6, 0, /* 12+ */ 6, 0) -/** @} */ - -#undef F - -static inline void -brw_eu_inst_set_opcode(const struct brw_isa_info *isa, - struct brw_eu_inst *inst, enum opcode opcode) -{ - brw_eu_inst_set_hw_opcode(isa->devinfo, inst, brw_opcode_encode(isa, opcode)); -} - -static inline enum opcode -brw_eu_inst_opcode(const struct brw_isa_info *isa, - const struct brw_eu_inst *inst) -{ - return brw_opcode_decode(isa, brw_eu_inst_hw_opcode(isa->devinfo, inst)); -} - -#ifdef __cplusplus -} -#endif diff --git a/src/intel/compiler/brw/brw_eu_validate.c b/src/intel/compiler/brw/brw_eu_validate.c deleted file mode 100644 index 543f7ae1538..00000000000 --- a/src/intel/compiler/brw/brw_eu_validate.c +++ /dev/null @@ -1,3149 +0,0 @@ -/* - * Copyright © 2015-2019 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -/** @file - * - * This file implements a pass that validates shader assembly. - * - * The restrictions implemented herein are intended to verify that instructions - * in shader assembly do not violate restrictions documented in the graphics - * programming reference manuals. - * - * The restrictions are difficult for humans to quickly verify due to their - * complexity and abundance. - * - * It is critical that this code is thoroughly unit tested because false - * results will lead developers astray, which is worse than having no validator - * at all. Functional changes to this file without corresponding unit tests (in - * test_eu_validate.cpp) will be rejected. - */ - -#include "brw_eu.h" -#include "brw_disasm_info.h" - -enum brw_hw_instr_format { - FORMAT_BASIC, - FORMAT_BASIC_THREE_SRC, - FORMAT_BFN_THREE_SRC, - FORMAT_DPAS_THREE_SRC, - FORMAT_SEND, - FORMAT_BRANCH, - FORMAT_ILLEGAL, - FORMAT_NOP, -}; - -typedef struct brw_hw_decoded_inst { - const brw_eu_inst *raw; - - enum brw_hw_instr_format format; - - enum opcode opcode; - - unsigned exec_size; - unsigned access_mode; - - enum brw_conditional_mod cond_modifier; - enum brw_predicate pred_control; - bool saturate; - - bool has_dst; - struct { - enum brw_reg_file file; - enum brw_reg_type type; - unsigned address_mode; - - /* These are already physical register numbers. */ - unsigned nr; - unsigned subnr; - - unsigned hstride; - } dst; - - unsigned num_sources; - struct { - enum brw_reg_file file; - enum brw_reg_type type; - unsigned address_mode; - bool negate; - bool abs; - - /* These are already physical register numbers. */ - unsigned nr; - unsigned subnr; - - unsigned vstride; - unsigned width; - unsigned hstride; - } src[3]; -} brw_hw_decoded_inst; - -struct error { - char *msg; -}; - -static void -report_error(struct error *error, const char *msg) -{ - assert(error); - - if (error->msg) { - /* Ignore duplicate reports. */ - if (strstr(error->msg, msg)) - return; - ralloc_asprintf_append(&error->msg, "\tERROR: %s\n", msg); - } else { - error->msg = ralloc_asprintf(NULL, "\tERROR: %s\n", msg); - } -} - -#define ERROR(msg) ERROR_IF(true, msg) -#define ERROR_IF(cond, msg) \ - do { \ - if ((cond)) \ - report_error(error, msg); \ - } while(0) - -#define RETURN_ERROR(msg) RETURN_ERROR_IF(true, msg) -#define RETURN_ERROR_IF(cond, msg) \ - do { \ - if ((cond)) { \ - report_error(error, msg); \ - return; \ - } \ - } while(0) - -#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) -#define WIDTH(width) (1 << (width)) - -static bool -inst_is_send(const brw_hw_decoded_inst *inst) -{ - switch (inst->opcode) { - case BRW_OPCODE_SEND: - case BRW_OPCODE_SENDC: - case BRW_OPCODE_SENDS: - case BRW_OPCODE_SENDSC: - return true; - default: - return false; - } -} - -static bool -inst_is_split_send(const struct brw_isa_info *isa, const brw_hw_decoded_inst *inst) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (devinfo->ver >= 12) { - return inst_is_send(inst); - } else { - switch (inst->opcode) { - case BRW_OPCODE_SENDS: - case BRW_OPCODE_SENDSC: - return true; - default: - return false; - } - } -} - -static unsigned -signed_type(unsigned type) -{ - return brw_type_is_uint(type) ? (type | BRW_TYPE_BASE_SINT) : type; -} - -static bool -inst_is_raw_move(const brw_hw_decoded_inst *inst) -{ - unsigned dst_type = signed_type(inst->dst.type); - unsigned src_type = signed_type(inst->src[0].type); - - if (inst->src[0].file == IMM) { - /* FIXME: not strictly true */ - if (brw_type_is_vector_imm(inst->src[0].type)) - return false; - } else if (inst->src[0].negate || inst->src[0].abs) { - return false; - } - - return inst->opcode == BRW_OPCODE_MOV && - !inst->saturate && - dst_type == src_type; -} - -static bool -dst_is_null(const brw_hw_decoded_inst *inst) -{ - return inst->dst.file == ARF && inst->dst.nr == BRW_ARF_NULL; -} - -static bool -src0_is_null(const brw_hw_decoded_inst *inst) -{ - return inst->src[0].address_mode == BRW_ADDRESS_DIRECT && - inst->src[0].file == ARF && - inst->src[0].nr == BRW_ARF_NULL; -} - -static bool -src1_is_null(const brw_hw_decoded_inst *inst) -{ - assert(inst->src[1].address_mode == BRW_ADDRESS_DIRECT); - return inst->src[1].file == ARF && - inst->src[1].nr == BRW_ARF_NULL; -} - -static bool -src0_is_acc(const brw_hw_decoded_inst *inst) -{ - return inst->src[0].address_mode == BRW_ADDRESS_DIRECT && - inst->src[0].file == ARF && - (inst->src[0].nr & 0xF0) == BRW_ARF_ACCUMULATOR; -} - -static bool -src1_is_acc(const brw_hw_decoded_inst *inst) -{ - assert(inst->src[1].address_mode == BRW_ADDRESS_DIRECT); - return inst->src[1].file == ARF && - (inst->src[1].nr & 0xF0) == BRW_ARF_ACCUMULATOR; -} - -static bool -src_has_scalar_region(const brw_hw_decoded_inst *inst, int src) -{ - return inst->src[src].vstride == 0 && - inst->src[src].width == 1 && - inst->src[src].hstride == 0; -} - -static void -invalid_values(const struct brw_isa_info *isa, const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (devinfo->ver >= 12) { - unsigned qtr_ctrl = brw_eu_inst_qtr_control(devinfo, inst->raw); - unsigned nib_ctrl = - devinfo->ver == 12 ? brw_eu_inst_nib_control(devinfo, inst->raw) : 0; - - unsigned chan_off = (qtr_ctrl * 2 + nib_ctrl) << 2; - ERROR_IF(chan_off % inst->exec_size != 0, - "The execution size must be a factor of the chosen offset"); - } -} - -static void -sources_not_null(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - /* Nothing to test. 3-src instructions can only have GRF sources, and - * there's no bit to control the file. - */ - if (inst->num_sources == 3) - return; - - /* Nothing to test. Split sends can only encode a file in sources that are - * allowed to be NULL. - */ - if (inst_is_split_send(isa, inst)) - return; - - if (inst->num_sources >= 1 && inst->opcode != BRW_OPCODE_SYNC) - ERROR_IF(src0_is_null(inst), "src0 is null"); - - if (inst->num_sources == 2) - ERROR_IF(src1_is_null(inst), "src1 is null"); -} - -static bool -inst_uses_src_acc(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst) -{ - /* Check instructions that use implicit accumulator sources */ - switch (inst->opcode) { - case BRW_OPCODE_MAC: - case BRW_OPCODE_MACH: - return true; - default: - break; - } - - /* FIXME: support 3-src instructions */ - assert(inst->num_sources < 3); - - return src0_is_acc(inst) || (inst->num_sources > 1 && src1_is_acc(inst)); -} - -static void -send_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (inst_is_split_send(isa, inst)) { - ERROR_IF(inst->src[1].file == ARF && - inst->src[1].nr != BRW_ARF_NULL, - "src1 of split send must be a GRF or NULL"); - - if (devinfo->ver < 20) { - ERROR_IF(brw_eu_inst_eot(devinfo, inst->raw) && - inst->src[0].nr < 112, - "send with EOT must use g112-g127"); - ERROR_IF(brw_eu_inst_eot(devinfo, inst->raw) && - inst->src[1].file == FIXED_GRF && - inst->src[1].nr < 112, - "send with EOT must use g112-g127"); - } - - if (inst->src[0].file == FIXED_GRF && inst->src[1].file == FIXED_GRF) { - /* Assume minimums if we don't know */ - unsigned mlen = 1; - if (!brw_eu_inst_send_sel_reg32_desc(devinfo, inst->raw)) { - const uint32_t desc = brw_eu_inst_send_desc(devinfo, inst->raw); - mlen = brw_message_desc_mlen(devinfo, desc) / reg_unit(devinfo); - } - - unsigned ex_mlen = 1; - if (!brw_eu_inst_send_sel_reg32_ex_desc(devinfo, inst->raw)) { - const uint32_t ex_desc = brw_eu_inst_sends_ex_desc(devinfo, inst->raw, false); - ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc) / - reg_unit(devinfo); - } - const unsigned src0_reg_nr = inst->src[0].nr; - const unsigned src1_reg_nr = inst->src[1].nr; - ERROR_IF((src0_reg_nr <= src1_reg_nr && - src1_reg_nr < src0_reg_nr + mlen) || - (src1_reg_nr <= src0_reg_nr && - src0_reg_nr < src1_reg_nr + ex_mlen), - "split send payloads must not overlap"); - } - } else if (inst_is_send(inst)) { - ERROR_IF(inst->src[0].address_mode != BRW_ADDRESS_DIRECT, - "send must use direct addressing"); - - ERROR_IF(inst->src[0].file != FIXED_GRF, - "send from non-GRF"); - ERROR_IF(brw_eu_inst_eot(devinfo, inst->raw) && - inst->src[0].nr < 112, - "send with EOT must use g112-g127"); - - if (devinfo->ver < 10) { - ERROR_IF(!dst_is_null(inst) && - (inst->dst.nr + brw_eu_inst_rlen(devinfo, inst->raw) > 127) && - (inst->src[0].nr + brw_eu_inst_mlen(devinfo, inst->raw) > inst->dst.nr), - "r127 must not be used for return address when there is " - "a src and dest overlap"); - } - } -} - -static bool -is_unsupported_inst(const struct brw_isa_info *isa, - const brw_eu_inst *inst) -{ - return brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_ILLEGAL; -} - -/** - * Returns whether a combination of two types would qualify as mixed float - * operation mode - */ -static inline bool -types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) -{ - return (t0 == BRW_TYPE_F && t1 == BRW_TYPE_HF) || - (t1 == BRW_TYPE_F && t0 == BRW_TYPE_HF); -} - -static enum brw_reg_type -execution_type_for_type(enum brw_reg_type type) -{ - switch (type) { - case BRW_TYPE_DF: - case BRW_TYPE_F: - case BRW_TYPE_HF: - case BRW_TYPE_BF8: - case BRW_TYPE_HF8: - return type; - - case BRW_TYPE_VF: - case BRW_TYPE_BF: - return BRW_TYPE_F; - - case BRW_TYPE_Q: - case BRW_TYPE_UQ: - return BRW_TYPE_Q; - - case BRW_TYPE_D: - case BRW_TYPE_UD: - return BRW_TYPE_D; - - case BRW_TYPE_W: - case BRW_TYPE_UW: - case BRW_TYPE_B: - case BRW_TYPE_UB: - case BRW_TYPE_V: - case BRW_TYPE_UV: - return BRW_TYPE_W; - - default: - return BRW_TYPE_INVALID; - } -} - -/** - * Returns the execution type of an instruction \p inst - */ -static enum brw_reg_type -execution_type(const brw_hw_decoded_inst *inst) -{ - enum brw_reg_type src0_exec_type, src1_exec_type; - - /* Execution data type is independent of destination data type, except in - * mixed F/HF instructions. - */ - enum brw_reg_type dst_exec_type = inst->dst.type; - - src0_exec_type = execution_type_for_type(inst->src[0].type); - if (inst->num_sources == 1) { - if (src0_exec_type == BRW_TYPE_HF) - return dst_exec_type; - return src0_exec_type; - } - - src1_exec_type = execution_type_for_type(inst->src[1].type); - if (types_are_mixed_float(src0_exec_type, src1_exec_type) || - types_are_mixed_float(src0_exec_type, dst_exec_type) || - types_are_mixed_float(src1_exec_type, dst_exec_type)) { - return BRW_TYPE_F; - } - - if (src0_exec_type == src1_exec_type) - return src0_exec_type; - - if (src0_exec_type == BRW_TYPE_Q || - src1_exec_type == BRW_TYPE_Q) - return BRW_TYPE_Q; - - if (src0_exec_type == BRW_TYPE_D || - src1_exec_type == BRW_TYPE_D) - return BRW_TYPE_D; - - if (src0_exec_type == BRW_TYPE_W || - src1_exec_type == BRW_TYPE_W) - return BRW_TYPE_W; - - if (src0_exec_type == BRW_TYPE_DF || - src1_exec_type == BRW_TYPE_DF) - return BRW_TYPE_DF; - - UNREACHABLE("not reached"); -} - -/** - * Returns whether a region is packed - * - * A region is packed if its elements are adjacent in memory, with no - * intervening space, no overlap, and no replicated values. - */ -static bool -is_packed(unsigned vstride, unsigned width, unsigned hstride) -{ - if (vstride == width) { - if (vstride == 1) { - return hstride == 0; - } else { - return hstride == 1; - } - } - - return false; -} - -/** - * Returns whether a region is linear - * - * A region is linear if its elements do not overlap and are not replicated. - * Unlike a packed region, intervening space (i.e. strided values) is allowed. - */ -static bool -is_linear(unsigned vstride, unsigned width, unsigned hstride) -{ - return vstride == width * hstride || - (hstride == 0 && width == 1); -} - -/** - * Returns whether an instruction is an explicit or implicit conversion - * to/from half-float. - */ -static bool -is_half_float_conversion(const brw_hw_decoded_inst *inst) -{ - enum brw_reg_type dst_type = inst->dst.type; - - enum brw_reg_type src0_type = inst->src[0].type; - - if (dst_type != src0_type && - (dst_type == BRW_TYPE_HF || src0_type == BRW_TYPE_HF)) { - return true; - } else if (inst->num_sources > 1) { - enum brw_reg_type src1_type = inst->src[1].type; - return dst_type != src1_type && - (dst_type == BRW_TYPE_HF || - src1_type == BRW_TYPE_HF); - } - - return false; -} - -/* - * Returns whether an instruction is using mixed float operation mode - */ -static bool -is_mixed_float(const brw_hw_decoded_inst *inst) -{ - if (inst_is_send(inst)) - return false; - - if (!inst->has_dst) - return false; - - /* FIXME: support 3-src instructions */ - assert(inst->num_sources < 3); - - enum brw_reg_type dst_type = inst->dst.type; - enum brw_reg_type src0_type = inst->src[0].type; - - if (inst->num_sources == 1) - return types_are_mixed_float(src0_type, dst_type); - - enum brw_reg_type src1_type = inst->src[1].type; - - return types_are_mixed_float(src0_type, src1_type) || - types_are_mixed_float(src0_type, dst_type) || - types_are_mixed_float(src1_type, dst_type); -} - -static bool -is_pure_bfloat(const brw_hw_decoded_inst *inst) -{ - if (inst_is_send(inst)) - return false; - - if (inst->num_sources == 0 && !inst->has_dst) - return false; - - for (int i = 0; i < inst->num_sources; i++) { - if (!brw_type_is_bfloat(inst->src[i].type)) - return false; - } - - if (inst->has_dst && !brw_type_is_bfloat(inst->dst.type)) - return false; - - return true; -} - -static bool -is_mixed_bfloat(const brw_hw_decoded_inst *inst) -{ - if (inst_is_send(inst)) - return false; - - const int operands = inst->num_sources + inst->has_dst; - - int bfloat = 0; - for (int i = 0; i < inst->num_sources; i++) - bfloat += brw_type_is_bfloat(inst->src[i].type); - if (inst->has_dst) - bfloat += brw_type_is_bfloat(inst->dst.type); - - return bfloat > 0 && bfloat != operands; -} - -/** - * Returns whether an instruction is an explicit or implicit conversion - * to/from byte. - */ -static bool -is_byte_conversion(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst) -{ - enum brw_reg_type dst_type = inst->dst.type; - - enum brw_reg_type src0_type = inst->src[0].type; - - if (dst_type != src0_type && - (brw_type_size_bytes(dst_type) == 1 || - brw_type_size_bytes(src0_type) == 1)) { - return true; - } else if (inst->num_sources > 1) { - enum brw_reg_type src1_type = inst->src[1].type; - return dst_type != src1_type && - (brw_type_size_bytes(dst_type) == 1 || - brw_type_size_bytes(src1_type) == 1); - } - - return false; -} - -/** - * Checks restrictions listed in "General Restrictions Based on Operand Types" - * in the "Register Region Restrictions" section. - */ -static void -general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (inst_is_send(inst)) - return; - - if (devinfo->ver >= 11) { - /* A register type of B or UB for DPAS actually means 4 bytes packed into - * a D or UD, so it is allowed. - */ - if (inst->num_sources == 3 && inst->opcode != BRW_OPCODE_DPAS) { - ERROR_IF(brw_type_size_bytes(inst->src[1].type) == 1 || - brw_type_size_bytes(inst->src[2].type) == 1, - "Byte data type is not supported for src1/2 register regioning. This includes " - "byte broadcast as well."); - } - if (inst->num_sources == 2) { - ERROR_IF(brw_type_size_bytes(inst->src[1].type) == 1, - "Byte data type is not supported for src1 register regioning. This includes " - "byte broadcast as well."); - } - } - - if (devinfo->ver >= 20) { - if (inst->opcode == BRW_OPCODE_SRND) { - bool valid = false; - if (inst->dst.type == BRW_TYPE_HF && - inst->src[0].type == BRW_TYPE_F && - inst->src[1].type == BRW_TYPE_F) - valid = true; - ERROR_IF(!valid, "Invalid type combination for SRND."); - } - } - - enum brw_reg_type dst_type = inst->dst.type; - - ERROR_IF(brw_type_is_bfloat(dst_type) && - !devinfo->has_bfloat16, - "Bfloat destination, but platform does not support it"); - - ERROR_IF(dst_type == BRW_TYPE_DF && - !devinfo->has_64bit_float, - "64-bit float destination, but platform does not support it"); - - ERROR_IF((dst_type == BRW_TYPE_Q || - dst_type == BRW_TYPE_UQ) && - !devinfo->has_64bit_int, - "64-bit int destination, but platform does not support it"); - - for (unsigned s = 0; s < inst->num_sources; s++) { - enum brw_reg_type src_type = inst->src[s].type; - - ERROR_IF(brw_type_is_bfloat(src_type) && - !devinfo->has_bfloat16, - "Bfloat source, but platform does not support it"); - - ERROR_IF(src_type == BRW_TYPE_DF && - !devinfo->has_64bit_float, - "64-bit float source, but platform does not support it"); - - ERROR_IF((src_type == BRW_TYPE_Q || - src_type == BRW_TYPE_UQ) && - !devinfo->has_64bit_int, - "64-bit int source, but platform does not support it"); - if (inst->access_mode == BRW_ALIGN_16 && - inst->num_sources == 3 && brw_type_size_bytes(src_type) > 4) { - /* From the Broadwell PRM, Volume 7 "3D Media GPGPU", page 944: - * - * "This is applicable to 32b datatypes and 16b datatype. 64b - * datatypes cannot use the replicate control." - */ - switch (s) { - case 0: - ERROR_IF(brw_eu_inst_3src_a16_src0_rep_ctrl(devinfo, inst->raw), - "RepCtrl must be zero for 64-bit source 0"); - break; - case 1: - ERROR_IF(brw_eu_inst_3src_a16_src1_rep_ctrl(devinfo, inst->raw), - "RepCtrl must be zero for 64-bit source 1"); - break; - case 2: - ERROR_IF(brw_eu_inst_3src_a16_src2_rep_ctrl(devinfo, inst->raw), - "RepCtrl must be zero for 64-bit source 2"); - break; - default: UNREACHABLE("invalid src"); - } - } - } - - if (inst->num_sources == 3) - return; - - if (inst->exec_size == 1) - return; - - if (!inst->has_dst) - return; - - if (inst->opcode == BRW_OPCODE_MATH && - intel_needs_workaround(devinfo, 22016140776)) { - /* Wa_22016140776: - * - * Scalar broadcast on HF math (packed or unpacked) must not be - * used. Compiler must use a mov instruction to expand the scalar - * value to a vector before using in a HF (packed or unpacked) - * math operation. - */ - ERROR_IF(inst->src[0].type == BRW_TYPE_HF && - src_has_scalar_region(inst, 0), - "Scalar broadcast on HF math (packed or unpacked) must not " - "be used."); - - if (inst->num_sources > 1) { - ERROR_IF(inst->src[1].type == BRW_TYPE_HF && - src_has_scalar_region(inst, 1), - "Scalar broadcast on HF math (packed or unpacked) must not " - "be used."); - } - } - - /* The PRMs say: - * - * Where n is the largest element size in bytes for any source or - * destination operand type, ExecSize * n must be <= 64. - * - * But we do not attempt to enforce it, because it is implied by other - * rules: - * - * - that the destination stride must match the execution data type - * - sources may not span more than two adjacent GRF registers - * - destination may not span more than two adjacent GRF registers - * - * In fact, checking it would weaken testing of the other rules. - */ - - unsigned dst_stride = inst->dst.hstride; - bool dst_type_is_byte = - inst->dst.type == BRW_TYPE_B || - inst->dst.type == BRW_TYPE_UB; - - if (dst_type_is_byte) { - if (is_packed(inst->exec_size * dst_stride, inst->exec_size, dst_stride)) { - ERROR_IF(!inst_is_raw_move(inst), - "Only raw MOV supports a packed-byte destination"); - } - } - - unsigned exec_type = execution_type(inst); - unsigned exec_type_size = brw_type_size_bytes(exec_type); - unsigned dst_type_size = brw_type_size_bytes(dst_type); - - if (is_byte_conversion(isa, inst)) { - /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: - * - * "There is no direct conversion from B/UB to DF or DF to B/UB. - * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." - * - * Even if these restrictions are listed for the MOV instruction, we - * validate this more generally, since there is the possibility - * of implicit conversions from other instructions. - */ - enum brw_reg_type src0_type = inst->src[0].type; - enum brw_reg_type src1_type = inst->num_sources > 1 ? - inst->src[1].type : 0; - - ERROR_IF(brw_type_size_bytes(dst_type) == 1 && - (brw_type_size_bytes(src0_type) == 8 || - (inst->num_sources > 1 && brw_type_size_bytes(src1_type) == 8)), - "There are no direct conversions between 64-bit types and B/UB"); - - ERROR_IF(brw_type_size_bytes(dst_type) == 8 && - (brw_type_size_bytes(src0_type) == 1 || - (inst->num_sources > 1 && brw_type_size_bytes(src1_type) == 1)), - "There are no direct conversions between 64-bit types and B/UB"); - } - - if (is_half_float_conversion(inst)) { - /** - * A helper to validate used in the validation of the following restriction - * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: - * - * "There is no direct conversion from HF to DF or DF to HF. - * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." - * - * Even if these restrictions are listed for the MOV instruction, we - * validate this more generally, since there is the possibility - * of implicit conversions from other instructions, such us implicit - * conversion from integer to HF with the ADD instruction in SKL+. - */ - enum brw_reg_type src0_type = inst->src[0].type; - enum brw_reg_type src1_type = inst->num_sources > 1 ? - inst->src[1].type : 0; - ERROR_IF(dst_type == BRW_TYPE_HF && - (brw_type_size_bytes(src0_type) == 8 || - (inst->num_sources > 1 && brw_type_size_bytes(src1_type) == 8)), - "There are no direct conversions between 64-bit types and HF"); - - ERROR_IF(brw_type_size_bytes(dst_type) == 8 && - (src0_type == BRW_TYPE_HF || - (inst->num_sources > 1 && src1_type == BRW_TYPE_HF)), - "There are no direct conversions between 64-bit types and HF"); - - /* From the BDW+ PRM: - * - * "Conversion between Integer and HF (Half Float) must be - * DWord-aligned and strided by a DWord on the destination." - * - * Also, the above restrictions seems to be expanded on CHV and SKL+ by: - * - * "There is a relaxed alignment rule for word destinations. When - * the destination type is word (UW, W, HF), destination data types - * can be aligned to either the lowest word or the second lowest - * word of the execution channel. This means the destination data - * words can be either all in the even word locations or all in the - * odd word locations." - * - * We do not implement the second rule as is though, since empirical - * testing shows inconsistencies: - * - It suggests that packed 16-bit is not allowed, which is not true. - * - It suggests that conversions from Q/DF to W (which need to be - * 64-bit aligned on the destination) are not possible, which is - * not true. - * - * So from this rule we only validate the implication that conversions - * from F to HF need to be DWord strided (except in Align1 mixed - * float mode where packed fp16 destination is allowed so long as the - * destination is oword-aligned). - * - * Finally, we only validate this for Align1 because Align16 always - * requires packed destinations, so these restrictions can't possibly - * apply to Align16 mode. - */ - if (inst->access_mode == BRW_ALIGN_1) { - if ((dst_type == BRW_TYPE_HF && - (brw_type_is_int(src0_type) || - (inst->num_sources > 1 && brw_type_is_int(src1_type)))) || - (brw_type_is_int(dst_type) && - (src0_type == BRW_TYPE_HF || - (inst->num_sources > 1 && src1_type == BRW_TYPE_HF)))) { - ERROR_IF(dst_stride * dst_type_size != 4, - "Conversions between integer and half-float must be " - "strided by a DWord on the destination"); - - ERROR_IF(inst->dst.subnr % 4 != 0, - "Conversions between integer and half-float must be " - "aligned to a DWord on the destination"); - } else if (dst_type == BRW_TYPE_HF) { - ERROR_IF(dst_stride != 2 && - !(is_mixed_float(inst) && - dst_stride == 1 && inst->dst.subnr % 16 == 0), - "Conversions to HF must have either all words in even " - "word locations or all words in odd word locations or " - "be mixed-float with Oword-aligned packed destination"); - } - } - } - - /* There are special regioning rules for mixed-float mode in CHV and SKL that - * override the general rule for the ratio of sizes of the destination type - * and the execution type. We will add validation for those in a later patch. - */ - bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(inst) && !is_mixed_bfloat(inst); - - if (validate_dst_size_and_exec_size_ratio && - exec_type_size > dst_type_size) { - if (!(dst_type_is_byte && inst_is_raw_move(inst))) { - ERROR_IF(dst_stride * dst_type_size != exec_type_size, - "Destination stride must be equal to the ratio of the sizes " - "of the execution data type to the destination type"); - } - - unsigned subreg = inst->dst.subnr; - - if (inst->access_mode == BRW_ALIGN_1 && - inst->dst.address_mode == BRW_ADDRESS_DIRECT) { - /* The i965 PRM says: - * - * Implementation Restriction: The relaxed alignment rule for byte - * destination (#10.5) is not supported. - */ - if (dst_type_is_byte) { - ERROR_IF(subreg % exec_type_size != 0 && - subreg % exec_type_size != 1, - "Destination subreg must be aligned to the size of the " - "execution data type (or to the next lowest byte for byte " - "destinations)"); - } else { - ERROR_IF(subreg % exec_type_size != 0, - "Destination subreg must be aligned to the size of the " - "execution data type"); - } - } - } -} - -/** - * Checks restrictions listed in "General Restrictions on Regioning Parameters" - * in the "Register Region Restrictions" section. - */ -static void -general_restrictions_on_region_parameters(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (inst->num_sources == 3) - return; - - /* Split sends don't have the bits in the instruction to encode regions so - * there's nothing to check. - */ - if (inst_is_split_send(isa, inst)) - return; - - if (inst->access_mode == BRW_ALIGN_16) { - if (inst->has_dst && !dst_is_null(inst)) - ERROR_IF(inst->dst.hstride != 1, - "Destination Horizontal Stride must be 1"); - - if (inst->num_sources >= 1) { - ERROR_IF(inst->src[0].file != IMM && - inst->src[0].vstride != 0 && - inst->src[0].vstride != 2 && - inst->src[0].vstride != 4, - "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); - } - - if (inst->num_sources == 2) { - ERROR_IF(inst->src[1].file != IMM && - inst->src[1].vstride != 0 && - inst->src[1].vstride != 2 && - inst->src[1].vstride != 4, - "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); - } - - return; - } - - for (unsigned i = 0; i < inst->num_sources; i++) { - if (inst->src[i].file == IMM) - continue; - - enum brw_reg_type type = inst->src[i].type; - unsigned element_size = brw_type_size_bytes(type); - unsigned subreg = inst->src[i].subnr; - unsigned vstride = inst->src[i].vstride; - unsigned width = inst->src[i].width; - unsigned hstride = inst->src[i].hstride; - - /* ExecSize must be greater than or equal to Width. */ - ERROR_IF(inst->exec_size < width, "ExecSize must be greater than or equal " - "to Width"); - - /* If Width = 1, HorzStride must be 0 regardless of the values of - * ExecSize and VertStride. - */ - if (width == 1) { - ERROR_IF(hstride != 0, - "If Width = 1, HorzStride must be 0 regardless " - "of the values of ExecSize and VertStride"); - } - - if (vstride == STRIDE(BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL)) - continue; - - /* If ExecSize = Width and HorzStride ≠ 0, - * VertStride must be set to Width * HorzStride. - */ - if (inst->exec_size == width && hstride != 0) { - ERROR_IF(vstride != width * hstride, - "If ExecSize = Width and HorzStride ≠ 0, " - "VertStride must be set to Width * HorzStride"); - } - - /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ - if (inst->exec_size == 1 && width == 1) { - ERROR_IF(vstride != 0 || hstride != 0, - "If ExecSize = Width = 1, both VertStride " - "and HorzStride must be 0"); - } - - /* If VertStride = HorzStride = 0, Width must be 1 regardless of the - * value of ExecSize. - */ - if (vstride == 0 && hstride == 0) { - ERROR_IF(width != 1, - "If VertStride = HorzStride = 0, Width must be " - "1 regardless of the value of ExecSize"); - } - - /* VertStride must be used to cross GRF register boundaries. This rule - * implies that elements within a 'Width' cannot cross GRF boundaries. - */ - if (inst->src[i].file == FIXED_GRF) { - unsigned rowbase = subreg; - assert(util_is_power_of_two_nonzero(reg_unit(devinfo))); - unsigned grf_size_shift = ffs(REG_SIZE * reg_unit(devinfo)) - 1; - - for (int y = 0; y < inst->exec_size / width; y++) { - bool spans_grfs = false; - unsigned offset = rowbase; - unsigned first_grf = offset >> grf_size_shift; - - for (int x = 0; x < width; x++) { - const unsigned end_byte = offset + (element_size - 1); - const unsigned end_grf = end_byte >> grf_size_shift; - spans_grfs = end_grf != first_grf; - if (spans_grfs) - break; - offset += hstride * element_size; - } - - rowbase += vstride * element_size; - - if (spans_grfs) { - ERROR("VertStride must be used to cross GRF register boundaries"); - break; - } - } - } - } - - /* Dst.HorzStride must not be 0. */ - if (inst->has_dst && !dst_is_null(inst)) { - ERROR_IF(inst->dst.hstride == 0, - "Destination Horizontal Stride must not be 0"); - } -} - -static bool -is_multiplier_instruction(const brw_hw_decoded_inst *inst) -{ - /* TODO: Complete this list. */ - switch (inst->opcode) { - case BRW_OPCODE_MUL: - case BRW_OPCODE_MAC: - case BRW_OPCODE_MACH: - case BRW_OPCODE_MAD: - return true; - default: - return false; - } -} - -static void -special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - /* See instruction_restrictions() for DPAS operand type validation. */ - if (inst->opcode == BRW_OPCODE_DPAS) - return; - - ERROR_IF(is_pure_bfloat(inst), - "Instructions with pure bfloat16 operands are not supported."); - - if (is_mixed_bfloat(inst)) { - ERROR_IF(devinfo->ver < 20 && inst->exec_size > 8, - "Execution size must not be greater than 8 in Gfx12."); - ERROR_IF(devinfo->ver >= 20 && inst->exec_size > 16, - "Execution size must not be greater than 8 in Gfx20+."); - - for (int i = 0; i < inst->num_sources; i++) { - ERROR_IF(brw_type_is_bfloat(inst->src[i].type) && - src_has_scalar_region(inst, i), - "Broadcast of bfloat16 scalar is not supported."); - } - - if (is_multiplier_instruction(inst)) { - if (inst->num_sources == 2) { - ERROR_IF(brw_type_is_bfloat(inst->src[1].type), - "Bfloat16 not allowed in Src1 of 2-source instructions involving multiplier."); - } else if (inst->num_sources == 3) { - ERROR_IF(brw_type_is_bfloat(inst->src[2].type), - "Bfloat16 not allowed in Src2 of 3-source instructions involving multiplier."); - } - } - - const unsigned half_offset = REG_SIZE * reg_unit(devinfo) / 2; - - if (inst->has_dst && brw_type_is_bfloat(inst->dst.type)) { - unsigned dst_stride = inst->dst.hstride; - bool dst_is_packed = is_packed(inst->exec_size * dst_stride, inst->exec_size, dst_stride); - - if (dst_is_packed) { - ERROR_IF(inst->dst.subnr != 0 && inst->dst.subnr != half_offset, - "Packed bfloat16 destination must have register offset 0 or half of GRF register."); - } else { - /* Offset in the restriction text is in terms of elements. */ - const unsigned elem_size = brw_type_size_bytes(inst->dst.type); - ERROR_IF(dst_stride != 2 || (inst->dst.subnr != 0 && - inst->dst.subnr != 1 * elem_size), - "Unpacked bfloat16 destination must have stride 2 and register offset 0 or 1."); - } - } - - for (int i = 0; i < inst->num_sources; i++) { - if (brw_type_is_bfloat(inst->src[i].type)) { - bool src_is_packed = is_packed(inst->src[i].vstride, inst->src[i].width, inst->src[i].hstride); - ERROR_IF(!src_is_packed, - "Bfloat16 source must be packed"); - ERROR_IF(inst->src[i].subnr != 0 && inst->src[i].subnr != half_offset, - "Bfloat16 source must have register offset 0 or half of GRF register."); - } - } - } - - const unsigned opcode = inst->opcode; - if (inst->num_sources >= 3) - return; - - if (!is_mixed_float(inst)) - return; - - bool is_align16 = inst->access_mode == BRW_ALIGN_16; - - enum brw_reg_type src0_type = inst->src[0].type; - enum brw_reg_type src1_type = inst->num_sources > 1 ? - inst->src[1].type : 0; - enum brw_reg_type dst_type = inst->dst.type; - - unsigned dst_stride = inst->dst.hstride; - bool dst_is_packed = is_packed(inst->exec_size * dst_stride, inst->exec_size, dst_stride); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "Indirect addressing on source is not supported when source and - * destination data types are mixed float." - */ - ERROR_IF(inst->src[0].address_mode != BRW_ADDRESS_DIRECT || - (inst->num_sources > 1 && - inst->src[1].address_mode != BRW_ADDRESS_DIRECT), - "Indirect addressing on source is not supported when source and " - "destination data types are mixed float"); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "No SIMD16 in mixed mode when destination is f32. Instruction - * execution size must be no more than 8." - */ - ERROR_IF(inst->exec_size > 8 && devinfo->ver < 20 && - dst_type == BRW_TYPE_F && - opcode != BRW_OPCODE_MOV, - "Mixed float mode with 32-bit float destination is limited " - "to SIMD8"); - - if (is_align16) { - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "In Align16 mode, when half float and float data types are mixed - * between source operands OR between source and destination operands, - * the register content are assumed to be packed." - * - * Since Align16 doesn't have a concept of horizontal stride (or width), - * it means that vertical stride must always be 4, since 0 and 2 would - * lead to replicated data, and any other value is disallowed in Align16. - */ - ERROR_IF(inst->src[0].vstride != 4, - "Align16 mixed float mode assumes packed data (vstride must be 4"); - - ERROR_IF(inst->num_sources >= 2 && - inst->src[1].vstride != 4, - "Align16 mixed float mode assumes packed data (vstride must be 4"); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "For Align16 mixed mode, both input and output packed f16 data - * must be oword aligned, no oword crossing in packed f16." - * - * The previous rule requires that Align16 operands are always packed, - * and since there is only one bit for Align16 subnr, which represents - * offsets 0B and 16B, this rule is always enforced and we don't need to - * validate it. - */ - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "No SIMD16 in mixed mode when destination is packed f16 for both - * Align1 and Align16." - * - * And: - * - * "In Align16 mode, when half float and float data types are mixed - * between source operands OR between source and destination operands, - * the register content are assumed to be packed." - * - * Which implies that SIMD16 is not available in Align16. This is further - * confirmed by: - * - * "For Align16 mixed mode, both input and output packed f16 data - * must be oword aligned, no oword crossing in packed f16" - * - * Since oword-aligned packed f16 data would cross oword boundaries when - * the execution size is larger than 8. - */ - ERROR_IF(inst->exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "No accumulator read access for Align16 mixed float." - */ - ERROR_IF(inst_uses_src_acc(isa, inst), - "No accumulator read access for Align16 mixed float"); - } else { - assert(!is_align16); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "No SIMD16 in mixed mode when destination is packed f16 for both - * Align1 and Align16." - */ - ERROR_IF(inst->exec_size > 8 && dst_is_packed && - dst_type == BRW_TYPE_HF && - opcode != BRW_OPCODE_MOV, - "Align1 mixed float mode is limited to SIMD8 when destination " - "is packed half-float"); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "Math operations for mixed mode: - * - In Align1, f16 inputs need to be strided" - */ - if (opcode == BRW_OPCODE_MATH) { - if (src0_type == BRW_TYPE_HF) { - ERROR_IF(inst->src[0].hstride <= 1, - "Align1 mixed mode math needs strided half-float inputs"); - } - - if (inst->num_sources >= 2 && src1_type == BRW_TYPE_HF) { - ERROR_IF(inst->src[1].hstride <= 1, - "Align1 mixed mode math needs strided half-float inputs"); - } - } - - if (dst_type == BRW_TYPE_HF && dst_stride == 1) { - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "In Align1, destination stride can be smaller than execution - * type. When destination is stride of 1, 16 bit packed data is - * updated on the destination. However, output packed f16 data - * must be oword aligned, no oword crossing in packed f16." - * - * The requirement of not crossing oword boundaries for 16-bit oword - * aligned data means that execution size is limited to 8. - */ - ERROR_IF(inst->dst.subnr % 16 != 0, - "Align1 mixed mode packed half-float output must be " - "oword aligned"); - ERROR_IF(inst->exec_size > 8, - "Align1 mixed mode packed half-float output must not " - "cross oword boundaries (max exec size is 8)"); - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "When source is float or half float from accumulator register and - * destination is half float with a stride of 1, the source must - * register aligned. i.e., source must have offset zero." - * - * Align16 mixed float mode doesn't allow accumulator access on sources, - * so we only need to check this for Align1. - */ - if (src0_is_acc(inst) && - (src0_type == BRW_TYPE_F || - src0_type == BRW_TYPE_HF)) { - ERROR_IF(inst->src[0].subnr != 0, - "Mixed float mode requires register-aligned accumulator " - "source reads when destination is packed half-float"); - - } - - if (inst->num_sources > 1 && - src1_is_acc(inst) && - (src1_type == BRW_TYPE_F || - src1_type == BRW_TYPE_HF)) { - ERROR_IF(inst->src[1].subnr != 0, - "Mixed float mode requires register-aligned accumulator " - "source reads when destination is packed half-float"); - } - } - - /* From the SKL PRM, Special Restrictions for Handling Mixed Mode - * Float Operations: - * - * "No swizzle is allowed when an accumulator is used as an implicit - * source or an explicit source in an instruction. i.e. when - * destination is half float with an implicit accumulator source, - * destination stride needs to be 2." - * - * FIXME: it is not quite clear what the first sentence actually means - * or its link to the implication described after it, so we only - * validate the explicit implication, which is clearly described. - */ - if (dst_type == BRW_TYPE_HF && - inst_uses_src_acc(isa, inst)) { - ERROR_IF(dst_stride != 2, - "Mixed float mode with implicit/explicit accumulator " - "source and half-float destination requires a stride " - "of 2 on the destination"); - } - } -} - -/** - * Creates a \p grf_access_mask for an \p exec_size, \p element_size, and a - * region - * - * A \p grf_access_mask is a 32-element array of uint8_t, where each uint8_t - * is a bitmask of grfs accessed by the region. - * - * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 - * instruction would be - * - * access_mask[0] = 0x01 (bytes 7-4 of the 1st grf) - * access_mask[1] = 0x01 (bytes 15-12 of the 1st grf) - * access_mask[2] = 0x01 (bytes 23-20 of the 1st grf) - * access_mask[3] = 0x01 (bytes 31-28 of the 1st grf) - * access_mask[4-31] = 0 - * - * Before Xe2, gX<1,1,0>F in an exec_size == 16 would yield: - * - * access_mask[0] = 0x01 (bytes 3-0 of the 1st grf) - * access_mask[1] = 0x01 (bytes 7-4 of the 1st grf) - * ... - * access_mask[7] = 0x01 (bytes 31-28 of the 1st grf) - * access_mask[8] = 0x02 (bytes 3-0 of the 2nd grf) - * ... - * access_mask[15] = 0x02 (bytes 31-28 of the 2nd grf) - * access_mask[16-31] = 0 - * - * Whereas on Xe2, gX<1,1,0>F in an exec_size of 16 would yield: - * - * access_mask[0] = 0x01 (bytes 3-0 of the 1st grf) - * access_mask[1] = 0x01 (bytes 7-4 of the 1st grf) - * ... - * access_mask[7] = 0x01 (bytes 31-28 of the 1st grf) - * access_mask[8] = 0x01 (bytes 35-32 of the 1st grf) - * ... - * access_mask[15] = 0x01 (bytes 63-60 of the 1st grf) - * access_mask[4-31] = 0 - * - */ -static void -grfs_accessed(const struct intel_device_info *devinfo, - uint8_t grf_access_mask[static 32], - unsigned exec_size, unsigned element_size, unsigned subreg, - unsigned vstride, unsigned width, unsigned hstride) -{ - unsigned rowbase = subreg; - unsigned element = 0; - assert(util_is_power_of_two_nonzero(reg_unit(devinfo))); - unsigned grf_size_shift = (5 - 1) + ffs(reg_unit(devinfo)); - - for (int y = 0; y < exec_size / width; y++) { - unsigned offset = rowbase; - - for (int x = 0; x < width; x++) { - const unsigned start_grf = (offset >> grf_size_shift) % 8; - const unsigned end_byte = offset + (element_size - 1); - const unsigned end_grf = (end_byte >> grf_size_shift) % 8; - grf_access_mask[element++] = (1 << start_grf) | (1 << end_grf); - offset += hstride * element_size; - } - - rowbase += vstride * element_size; - } - - assert(element == 0 || element == exec_size); -} - -/** - * Returns the number of registers accessed according to the \p access_mask - */ -static int -registers_read(const uint8_t grfs_accessed[static 32]) -{ - uint8_t all_read = 0; - - for (unsigned i = 0; i < 32; i++) - all_read |= grfs_accessed[i]; - - return util_bitcount(all_read); -} - -/** - * Checks restrictions listed in "Region Alignment Rules" in the "Register - * Region Restrictions" section. - */ -static void -region_alignment_rules(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - uint8_t dst_access_mask[32] = {}, src_access_mask[2][32] = {}; - - if (inst->num_sources == 3) - return; - - if (inst->access_mode == BRW_ALIGN_16) - return; - - if (inst_is_send(inst)) - return; - - bool skip_detailed_grf_checks = false; - - for (unsigned i = 0; i < inst->num_sources; i++) { - /* In Direct Addressing mode, a source cannot span more than 2 adjacent - * GRF registers. - */ - if (inst->src[i].file != FIXED_GRF || - inst->src[i].address_mode != BRW_ADDRESS_DIRECT) - continue; - - enum brw_reg_type type = inst->src[i].type; - unsigned element_size = brw_type_size_bytes(type); - unsigned subreg = inst->src[i].subnr; - unsigned vstride = inst->src[i].vstride; - unsigned width = inst->src[i].width; - unsigned hstride = inst->src[i].hstride; - - grfs_accessed(devinfo, src_access_mask[i], - inst->exec_size, element_size, subreg, - vstride, width, hstride); - - unsigned num_vstride = inst->exec_size / width; - unsigned num_hstride = width; - unsigned vstride_elements = (num_vstride - 1) * vstride; - unsigned hstride_elements = (num_hstride - 1) * hstride; - unsigned offset = (vstride_elements + hstride_elements) * element_size + - subreg; - - if (offset >= 64 * reg_unit(devinfo)) { - ERROR("A source cannot span more than 2 adjacent GRF registers"); - skip_detailed_grf_checks = true; - } - } - - if (!inst->has_dst || dst_is_null(inst)) - return; - - unsigned stride = inst->dst.hstride; - enum brw_reg_type dst_type = inst->dst.type; - unsigned element_size = brw_type_size_bytes(dst_type); - unsigned subreg = inst->dst.subnr; - unsigned offset = ((inst->exec_size - 1) * stride * element_size) + subreg; - if (offset >= 64 * reg_unit(devinfo)) { - ERROR("A destination cannot span more than 2 adjacent GRF registers"); - skip_detailed_grf_checks = true; - } - - if (skip_detailed_grf_checks) - return; - - grfs_accessed(devinfo, dst_access_mask, inst->exec_size, element_size, subreg, - inst->exec_size == 1 ? 0 : inst->exec_size * stride, - inst->exec_size == 1 ? 1 : inst->exec_size, - inst->exec_size == 1 ? 0 : stride); - - unsigned dst_regs = registers_read(dst_access_mask); - - /* The SKL PRM says: - * - * When destination of MATH instruction spans two registers, the - * destination elements must be evenly split between the two registers. - * - * It is not known whether this restriction applies to KBL other Gens after - * SKL. - */ - if (inst->opcode == BRW_OPCODE_MATH) { - if (dst_regs == 2) { - unsigned upper_reg_writes = 0, lower_reg_writes = 0; - - for (unsigned i = 0; i < inst->exec_size; i++) { - if (dst_access_mask[i] == 2) { - upper_reg_writes++; - } else { - assert(dst_access_mask[i] == 1); - lower_reg_writes++; - } - } - - ERROR_IF(upper_reg_writes != lower_reg_writes, - "Writes must be evenly split between the two " - "destination registers"); - } - } -} - -static void -vector_immediate_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - - if (inst->num_sources == 3 || inst->num_sources == 0 || - (devinfo->ver >= 12 && inst_is_send(inst))) - return; - - unsigned file = inst->src[inst->num_sources == 1 ? 0 : 1].file; - if (file != IMM) - return; - - enum brw_reg_type dst_type = inst->dst.type; - unsigned dst_type_size = brw_type_size_bytes(dst_type); - unsigned dst_subreg = inst->dst.subnr; - unsigned dst_stride = inst->dst.hstride; - enum brw_reg_type type = inst->src[inst->num_sources == 1 ? 0 : 1].type; - - /* The PRMs say: - * - * When an immediate vector is used in an instruction, the destination - * must be 128-bit aligned with destination horizontal stride equivalent - * to a word for an immediate integer vector (v) and equivalent to a - * DWord for an immediate float vector (vf). - * - * The text has not been updated for the addition of the immediate unsigned - * integer vector type (uv) on SNB, but presumably the same restriction - * applies. - */ - switch (type) { - case BRW_TYPE_V: - case BRW_TYPE_UV: - case BRW_TYPE_VF: - ERROR_IF(dst_subreg % (128 / 8) != 0, - "Destination must be 128-bit aligned in order to use immediate " - "vector types"); - - if (type == BRW_TYPE_VF) { - ERROR_IF(dst_type_size * dst_stride != 4, - "Destination must have stride equivalent to dword in order " - "to use the VF type"); - } else { - ERROR_IF(dst_type_size * dst_stride != 2, - "Destination must have stride equivalent to word in order " - "to use the V or UV type"); - } - break; - default: - break; - } -} - -static void -special_requirements_for_handling_double_precision_data_types( - const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (inst->num_sources == 3 || inst->num_sources == 0) - return; - - /* Split sends don't have types so there's no doubles there. */ - if (inst_is_split_send(isa, inst)) - return; - - enum brw_reg_type exec_type = execution_type(inst); - unsigned exec_type_size = brw_type_size_bytes(exec_type); - - enum brw_reg_type dst_type = inst->dst.type; - unsigned dst_type_size = brw_type_size_bytes(dst_type); - unsigned dst_hstride = inst->dst.hstride; - unsigned dst_reg = inst->dst.nr; - unsigned dst_subreg = inst->dst.subnr; - unsigned dst_address_mode = inst->dst.address_mode; - - bool is_integer_dword_multiply = - inst->opcode == BRW_OPCODE_MUL && - (inst->src[0].type == BRW_TYPE_D || inst->src[0].type == BRW_TYPE_UD) && - (inst->src[1].type == BRW_TYPE_D || inst->src[1].type == BRW_TYPE_UD); - - const bool is_double_precision = - dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply; - - for (unsigned i = 0; i < inst->num_sources; i++) { - enum brw_reg_file file = inst->src[i].file; - if (file == IMM) - continue; - - enum brw_reg_type type = inst->src[i].type; - unsigned type_size = brw_type_size_bytes(type); - unsigned address_mode = inst->src[i].address_mode; - unsigned reg = inst->src[i].nr; - unsigned subreg = inst->src[i].subnr; - bool is_scalar_region = src_has_scalar_region(inst, i); - unsigned vstride = inst->src[i].vstride; - unsigned width = inst->src[i].width; - unsigned hstride = inst->src[i].hstride; - - const unsigned src_stride = (hstride ? hstride : vstride) * type_size; - const unsigned dst_stride = dst_hstride * dst_type_size; - - /* The PRMs say that for CHV, BXT: - * - * When source or destination datatype is 64b or operation is integer - * DWord multiply, regioning in Align1 must follow these rules: - * - * 1. Source and Destination horizontal stride must be aligned to the - * same qword. - * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. - * 3. Source and Destination offset must be the same, except the case - * of scalar source. - * - * We assume that the restriction applies to GLK as well. - */ - if (is_double_precision && - inst->access_mode == BRW_ALIGN_1 && - intel_device_info_is_9lp(devinfo)) { - ERROR_IF(!is_scalar_region && - (src_stride % 8 != 0 || - dst_stride % 8 != 0 || - src_stride != dst_stride), - "Source and destination horizontal stride must equal and a " - "multiple of a qword when the execution type is 64-bit"); - - ERROR_IF(vstride != width * hstride, - "Vstride must be Width * Hstride when the execution type is " - "64-bit"); - - ERROR_IF(!is_scalar_region && dst_subreg != subreg, - "Source and destination offset must be the same when the " - "execution type is 64-bit"); - } - - /* The PRMs say that for CHV, BXT: - * - * When source or destination datatype is 64b or operation is integer - * DWord multiply, indirect addressing must not be used. - * - * We assume that the restriction applies to GLK as well. - */ - if (is_double_precision && - intel_device_info_is_9lp(devinfo)) { - ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || - BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, - "Indirect addressing is not allowed when the execution type " - "is 64-bit"); - } - - /* The PRMs say that for CHV, BXT: - * - * ARF registers must never be used with 64b datatype or when - * operation is integer DWord multiply. - * - * We assume that the restriction applies to GLK as well. - * - * We assume that the restriction does not apply to the null register. - */ - if (is_double_precision && - intel_device_info_is_9lp(devinfo)) { - ERROR_IF(inst->opcode == BRW_OPCODE_MAC || - brw_eu_inst_acc_wr_control(devinfo, inst->raw) || - (ARF == file && - reg != BRW_ARF_NULL) || - (ARF == inst->dst.file && - dst_reg != BRW_ARF_NULL), - "Architecture registers cannot be used when the execution " - "type is 64-bit"); - } - - /* From the hardware spec section "Register Region Restrictions": - * - * There are two rules: - * - * "In case of all floating point data types used in destination:" and - * - * "In case where source or destination datatype is 64b or operation is - * integer DWord multiply:" - * - * both of which list the same restrictions: - * - * "1. Register Regioning patterns where register data bit location - * of the LSB of the channels are changed between source and - * destination are not supported on Src0 and Src1 except for - * broadcast of a scalar. - * - * 2. Explicit ARF registers except null and accumulator must not be - * used." - */ - if (devinfo->verx10 >= 125 && - (brw_type_is_float(dst_type) || - is_double_precision)) { - ERROR_IF(!brw_type_is_bfloat(type) && - !is_scalar_region && - BRW_ADDRESS_REGISTER_INDIRECT_REGISTER != address_mode && - (!is_linear(vstride, width, hstride) || - src_stride != dst_stride || - subreg != dst_subreg), - "Register Regioning patterns where register data bit " - "location of the LSB of the channels are changed between " - "source and destination are not supported except for " - "broadcast of a scalar."); - - /* NOTE: Expanded this to include Scalar. See documentation issue - * open in https://gfxspecs.intel.com/Predator/Home/Index/56640. - */ - ERROR_IF((address_mode == BRW_ADDRESS_DIRECT && file == ARF && - reg != BRW_ARF_SCALAR && - reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) || - (inst->dst.file == ARF && - dst_reg != BRW_ARF_SCALAR && - dst_reg != BRW_ARF_NULL && (dst_reg & 0xF0) != BRW_ARF_ACCUMULATOR), - "Explicit ARF registers except null, accumulator, and scalar must not " - "be used."); - } - - /* From the hardware spec section "Register Region Restrictions": - * - * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and - * Quad-Word data must not be used." - * - * and - * - * "Vx1 and VxH indirect addressing for BFloat16 (...) data - * must not be used." - */ - if (devinfo->verx10 >= 125 && - (brw_type_is_float_or_bfloat(type) || brw_type_size_bytes(type) == 8)) { - ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && - vstride == STRIDE(BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL), - "Vx1 and VxH indirect addressing for Float, Half-Float, " - "Double-Float, Quad-Word, and Bfloat16 data must not be used"); - } - } - - /* The PRMs say that for BDW, SKL: - * - * If Align16 is required for an operation with QW destination and non-QW - * source datatypes, the execution size cannot exceed 2. - * - * We assume that the restriction applies to all Gfx8+ parts. - */ - if (is_double_precision) { - enum brw_reg_type src0_type = inst->src[0].type; - enum brw_reg_type src1_type = - inst->num_sources > 1 ? inst->src[1].type : src0_type; - unsigned src0_type_size = brw_type_size_bytes(src0_type); - unsigned src1_type_size = brw_type_size_bytes(src1_type); - - ERROR_IF(inst->access_mode == BRW_ALIGN_16 && - dst_type_size == 8 && - (src0_type_size != 8 || src1_type_size != 8) && - inst->exec_size > 2, - "In Align16 exec size cannot exceed 2 with a QWord destination " - "and a non-QWord source"); - } - - /* The PRMs say that for CHV, BXT: - * - * When source or destination datatype is 64b or operation is integer - * DWord multiply, DepCtrl must not be used. - * - * We assume that the restriction applies to GLK as well. - */ - if (is_double_precision && - intel_device_info_is_9lp(devinfo)) { - ERROR_IF(brw_eu_inst_no_dd_check(devinfo, inst->raw) || - brw_eu_inst_no_dd_clear(devinfo, inst->raw), - "DepCtrl is not allowed when the execution type is 64-bit"); - } -} - -static void -instruction_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - /* From Wa_1604601757: - * - * "When multiplying a DW and any lower precision integer, source modifier - * is not supported." - */ - if (devinfo->ver >= 12 && - inst->opcode == BRW_OPCODE_MUL) { - enum brw_reg_type exec_type = execution_type(inst); - const bool src0_valid = - brw_type_size_bytes(inst->src[0].type) == 4 || - inst->src[0].file == IMM || - !(inst->src[0].negate || inst->src[0].abs); - const bool src1_valid = - brw_type_size_bytes(inst->src[1].type) == 4 || - inst->src[1].file == IMM || - !(inst->src[1].negate || inst->src[1].abs); - - ERROR_IF(!brw_type_is_float(exec_type) && - brw_type_size_bytes(exec_type) == 4 && - !(src0_valid && src1_valid), - "When multiplying a DW and any lower precision integer, source " - "modifier is not supported."); - } - - if (inst->opcode == BRW_OPCODE_CMP || - inst->opcode == BRW_OPCODE_CMPN) { - ERROR_IF(inst->cond_modifier == BRW_CONDITIONAL_NONE, - "CMP (or CMPN) must have a condition."); - } - - if (inst->opcode == BRW_OPCODE_SEL) { - ERROR_IF((inst->cond_modifier != BRW_CONDITIONAL_NONE) == - (inst->pred_control != BRW_PREDICATE_NONE), - "SEL must either be predicated or have a condition modifiers"); - } - - if (inst->opcode == BRW_OPCODE_MUL) { - const enum brw_reg_type src0_type = inst->src[0].type; - const enum brw_reg_type src1_type = inst->src[1].type; - const enum brw_reg_type dst_type = inst->dst.type; - - ERROR_IF(brw_type_is_float(dst_type) && - (brw_type_is_int(src0_type) || - brw_type_is_int(src1_type)), - "MUL can't mix floats and integer sources."); - - /* Page 971 (page 987 of the PDF), section "Accumulator - * Restrictions," of the Broadwell PRM volume 7 says: - * - * Integer source operands cannot be accumulators. - * - * The Skylake and Ice Lake PRMs contain the same text. - */ - ERROR_IF((brw_type_is_int(src0_type) && src0_is_acc(inst)) || - (brw_type_is_int(src1_type) && src1_is_acc(inst)), - "In MUL, Integer source operands cannot be accumulators."); - - /* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says: - * - * When multiplying a DW and any lower precision integer, the DW - * operand must on src0. - * - * Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same - * text. - */ - ERROR_IF(brw_type_is_int(src1_type) && - brw_type_size_bytes(src0_type) < 4 && - brw_type_size_bytes(src1_type) == 4, - "When multiplying a DW and any lower precision integer, the " - "DW operand must be src0."); - - /* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says: - * - * When multiplying integer data types, if one of the sources is a - * DW, the resulting full precision data is stored in the - * accumulator. However, if the destination data type is either W or - * DW, the low bits of the result are written to the destination - * register and the remaining high bits are discarded. This results - * in undefined Overflow and Sign flags. Therefore, conditional - * modifiers and saturation (.sat) cannot be used in this case. - * - * Similar text appears in every version of the PRM. - * - * The wording of the last sentence is not very clear. It could either - * be interpreted as "conditional modifiers combined with saturation - * cannot be used" or "neither conditional modifiers nor saturation can - * be used." I have interpreted it as the latter primarily because that - * is the more restrictive interpretation. - */ - ERROR_IF((src0_type == BRW_TYPE_UD || - src0_type == BRW_TYPE_D || - src1_type == BRW_TYPE_UD || - src1_type == BRW_TYPE_D) && - (dst_type == BRW_TYPE_UD || - dst_type == BRW_TYPE_D || - dst_type == BRW_TYPE_UW || - dst_type == BRW_TYPE_W) && - (inst->saturate || inst->cond_modifier != BRW_CONDITIONAL_NONE), - "Neither Saturate nor conditional modifier allowed with DW " - "integer multiply."); - } - - if (inst->opcode == BRW_OPCODE_MATH) { - unsigned math_function = brw_eu_inst_math_function(devinfo, inst->raw); - switch (math_function) { - case GEN_MATH_INT_DIV_BOTH: - case GEN_MATH_INT_DIV_QUOTIENT: - case GEN_MATH_INT_DIV_REMAINDER: { - ERROR_IF(devinfo->verx10 >= 125, - "INT DIV functions not supported in Gfx125+."); - - /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says: - * INT DIV function does not support source modifiers. - * Bspec 6647 extends it back to Ivy Bridge. - */ - bool src0_valid = !inst->src[0].negate && !inst->src[0].abs; - bool src1_valid = !inst->src[1].negate && !inst->src[1].abs; - ERROR_IF(!src0_valid || !src1_valid, - "INT DIV function does not support source modifiers."); - - ERROR_IF(inst->src[0].type != BRW_TYPE_D && - inst->src[0].type != BRW_TYPE_UD, - "INT DIV function need D or UD source type."); - ERROR_IF(inst->src[0].type != inst->src[0].type || - inst->src[0].type != inst->dst.type, - "INT DIV function need all operand types to match."); - break; - } - - default: { - ERROR_IF(devinfo->verx10 >= 125 && - (math_function == GEN_MATH_POW || - math_function == GEN_MATH_FDIV), - "POW/FDIV not supported in Gfx125+."); - - const bool ieee_macro = - math_function == GEN_MATH_INVM || - math_function == GEN_MATH_RSQRTM; - - if (ieee_macro && devinfo->ver >= 125) { - ERROR_IF(inst->src[0].type != BRW_TYPE_F && - inst->src[0].type != BRW_TYPE_HF && - inst->src[0].type != BRW_TYPE_DF, - "MATH IEEE macros source type must be F, HF or DF (for Gfx125+)."); - } else { - ERROR_IF(inst->src[0].type != BRW_TYPE_F && - inst->src[0].type != BRW_TYPE_HF, - "MATH source type must be F or HF."); - } - - const bool two_srcs = - math_function == GEN_MATH_INVM || - math_function == GEN_MATH_POW || - math_function == GEN_MATH_FDIV; - - if (devinfo->ver >= 125) { - ERROR_IF(inst->src[0].type != inst->dst.type, - "Math function source and destination types must match on Gfx125+."); - ERROR_IF(two_srcs && - inst->src[0].type != inst->src[1].type, - "Math function need both source types to match on Gfx125+."); - } else { - ERROR_IF(inst->dst.type != BRW_TYPE_F && - inst->dst.type != BRW_TYPE_HF, - "Math function destination must be F or HF before Gfx125."); - ERROR_IF(two_srcs && - inst->src[1].type != BRW_TYPE_F && - inst->src[1].type != BRW_TYPE_HF, - "Math function source 1 type must be F or HF before Gfx125."); - } - - ERROR_IF(inst->dst.file != FIXED_GRF, - "The math instruction must use GRF as destination."); - - ERROR_IF((devinfo->ver >= 20 || !ieee_macro) && - (src0_is_acc(inst) || (two_srcs && src1_is_acc(inst))), - "Accumulator register access is only supported for Gfx125 and earlier, " - "and only for IEEE macro functions (INVM/RSQRTM)."); - - break; - } - } - } - - if (inst->opcode == BRW_OPCODE_DP4A) { - /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says: - * - * Only one of src0 or src1 operand may be an the (sic) accumulator - * register (acc#). - */ - ERROR_IF(src0_is_acc(inst) && src1_is_acc(inst), - "Only one of src0 or src1 operand may be an accumulator " - "register (acc#)."); - - } - - if (inst->opcode == BRW_OPCODE_ADD3) { - const enum brw_reg_type dst_type = inst->dst.type; - - ERROR_IF(dst_type != BRW_TYPE_D && - dst_type != BRW_TYPE_UD && - dst_type != BRW_TYPE_W && - dst_type != BRW_TYPE_UW, - "Destination must be integer D, UD, W, or UW type."); - - for (unsigned i = 0; i < 3; i++) { - enum brw_reg_type src_type = inst->src[i].type; - - ERROR_IF(src_type != BRW_TYPE_D && - src_type != BRW_TYPE_UD && - src_type != BRW_TYPE_W && - src_type != BRW_TYPE_UW, - "Source must be integer D, UD, W, or UW type."); - - ERROR_IF(inst->src[i].file == IMM && - src_type != BRW_TYPE_W && - src_type != BRW_TYPE_UW, - "Immediate source must be integer W or UW type."); - } - } - - if (inst->opcode == BRW_OPCODE_BFN) { - ERROR_IF(inst->saturate, - "BFN cannot have saturate modifier"); - - for (unsigned i = 0; i < 3; i++) { - ERROR_IF(inst->src[i].type != BRW_TYPE_UD && - inst->src[i].type != BRW_TYPE_UW, - "BFN source must be UD or UW type."); - - ERROR_IF(inst->src[i].abs || inst->src[i].negate, - "BFN does not support source modifiers."); - } - } - - if (inst->opcode == BRW_OPCODE_OR || - inst->opcode == BRW_OPCODE_AND || - inst->opcode == BRW_OPCODE_XOR || - inst->opcode == BRW_OPCODE_NOT) { - /* While the behavior of the negate source modifier is defined as - * logical not, the behavior of abs source modifier is not - * defined. Disallow it to be safe. - */ - ERROR_IF(inst->src[0].abs, - "Behavior of abs source modifier in logic ops is undefined."); - ERROR_IF(inst->opcode != BRW_OPCODE_NOT && - inst->src[1].file != IMM && - inst->src[1].abs, - "Behavior of abs source modifier in logic ops is undefined."); - - /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says: - * - * Source modifier is not allowed if source is an accumulator. - * - * The same text also appears for OR, NOT, and XOR instructions. - */ - ERROR_IF((inst->src[0].abs || inst->src[0].negate) && - src0_is_acc(inst), - "Source modifier is not allowed if source is an accumulator."); - ERROR_IF(inst->num_sources > 1 && - (inst->src[1].abs || inst->src[1].negate) && - src1_is_acc(inst), - "Source modifier is not allowed if source is an accumulator."); - - /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says: - * - * This operation does not produce sign or overflow conditions. Only - * the .e/.z or .ne/.nz conditional modifiers should be used. - * - * The same text also appears for OR, NOT, and XOR instructions. - * - * Per the comment around nir_op_imod in brw_from_nir.cpp, we have - * determined this to not be true. The only conditions that seem - * absolutely sketchy are O, R, and U. Some OpenGL shaders from Doom - * 2016 have been observed to generate and.g and operate correctly. - */ - const enum brw_conditional_mod cmod = inst->cond_modifier; - ERROR_IF(cmod == BRW_CONDITIONAL_O || - cmod == BRW_CONDITIONAL_R || - cmod == BRW_CONDITIONAL_U, - "O, R, and U conditional modifiers should not be used."); - } - - if (inst->opcode == BRW_OPCODE_BFI2) { - ERROR_IF(inst->cond_modifier != BRW_CONDITIONAL_NONE, - "BFI2 cannot have conditional modifier"); - - ERROR_IF(inst->saturate, - "BFI2 cannot have saturate modifier"); - - enum brw_reg_type dst_type = inst->dst.type; - - ERROR_IF(dst_type != BRW_TYPE_D && - dst_type != BRW_TYPE_UD, - "BFI2 destination type must be D or UD"); - - for (unsigned s = 0; s < 3; s++) { - enum brw_reg_type src_type = inst->src[s].type; - - ERROR_IF(src_type != dst_type, - "BFI2 source type must match destination type"); - } - } - - if (inst->opcode == BRW_OPCODE_CSEL) { - ERROR_IF(inst->pred_control != BRW_PREDICATE_NONE, - "CSEL cannot be predicated"); - - /* CSEL is CMP and SEL fused into one. The condition modifier, which - * does not actually modify the flags, controls the built-in comparison. - */ - ERROR_IF(inst->cond_modifier == BRW_CONDITIONAL_NONE, - "CSEL must have a condition."); - - enum brw_reg_type dst_type = inst->dst.type; - - if (devinfo->ver == 9) { - ERROR_IF(dst_type != BRW_TYPE_F, - "CSEL destination type must be F"); - } else { - ERROR_IF(dst_type != BRW_TYPE_F && - dst_type != BRW_TYPE_HF && - dst_type != BRW_TYPE_D && - dst_type != BRW_TYPE_W && - dst_type != BRW_TYPE_UD && - dst_type != BRW_TYPE_UW, - "CSEL destination type must be F, HF, *D, or *W"); - } - - for (unsigned s = 0; s < 3; s++) { - enum brw_reg_type src_type = inst->src[s].type; - - if (devinfo->ver == 9) { - ERROR_IF(src_type != BRW_TYPE_F, - "CSEL source type must be F"); - } else { - ERROR_IF(src_type != BRW_TYPE_F && src_type != BRW_TYPE_HF && - src_type != BRW_TYPE_D && src_type != BRW_TYPE_UD && - src_type != BRW_TYPE_W && src_type != BRW_TYPE_UW, - "CSEL source type must be F, HF, *D, or *W"); - - ERROR_IF(brw_type_is_float(src_type) != brw_type_is_float(dst_type), - "CSEL cannot mix float and integer types."); - - ERROR_IF(brw_type_size_bytes(src_type) != - brw_type_size_bytes(dst_type), - "CSEL cannot mix different type sizes."); - } - } - } - - if (inst->opcode == BRW_OPCODE_DPAS) { - ERROR_IF(brw_eu_inst_dpas_3src_sdepth(devinfo, inst->raw) != BRW_SYSTOLIC_DEPTH_8, - "Systolic depth must be 8."); - - const unsigned sdepth = 8; - - const enum brw_reg_type dst_type = inst->dst.type; - const enum brw_reg_type src0_type = inst->src[0].type; - const enum brw_reg_type src1_type = inst->src[1].type; - const enum brw_reg_type src2_type = inst->src[2].type; - - const enum gfx12_sub_byte_precision src1_sub_byte = - brw_eu_inst_dpas_3src_src1_subbyte(devinfo, inst->raw); - - if (src1_type != BRW_TYPE_B && src1_type != BRW_TYPE_UB) { - ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE, - "Sub-byte precision must be None for source type larger than Byte."); - } else { - ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE && - src1_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT && - src1_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT, - "Invalid sub-byte precision."); - } - - const enum gfx12_sub_byte_precision src2_sub_byte = - brw_eu_inst_dpas_3src_src2_subbyte(devinfo, inst->raw); - - if (src2_type != BRW_TYPE_B && src2_type != BRW_TYPE_UB) { - ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE, - "Sub-byte precision must be None."); - } else { - ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE && - src2_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT && - src2_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT, - "Invalid sub-byte precision."); - } - - const unsigned src1_bits_per_element = - brw_type_size_bits(src1_type) >> - brw_eu_inst_dpas_3src_src1_subbyte(devinfo, inst->raw); - - const unsigned src2_bits_per_element = - brw_type_size_bits(src2_type) >> - brw_eu_inst_dpas_3src_src2_subbyte(devinfo, inst->raw); - - /* The MAX2(1, ...) is just to prevent possible division by 0 later. */ - const unsigned ops_per_chan = - MAX2(1, 32 / MAX2(src1_bits_per_element, src2_bits_per_element)); - - if (devinfo->ver < 20) { - ERROR_IF(inst->exec_size != 8, "DPAS execution size must be 8."); - } else { - ERROR_IF(inst->exec_size != 16, "DPAS execution size must be 16."); - } - - const unsigned dst_subnr = inst->dst.subnr; - const unsigned src0_subnr = inst->src[0].subnr; - const unsigned src1_subnr = inst->src[1].subnr; - const unsigned src2_subnr = inst->src[2].subnr; - - /* Until HF is supported as dst type, this is effectively subnr == 0. */ - ERROR_IF(dst_subnr % inst->exec_size != 0, - "Destination subregister offset must be a multiple of ExecSize."); - - /* Until HF is supported as src0 type, this is effectively subnr == 0. */ - ERROR_IF(src0_subnr % inst->exec_size != 0, - "Src0 subregister offset must be a multiple of ExecSize."); - - ERROR_IF(src1_subnr != 0, - "Src1 subregister offsets must be 0."); - - /* In nearly all cases, this effectively requires that src2.subnr be - * 0. It is only when src1 is 8 bits and src2 is 2 or 4 bits that the - * ops_per_chan value can allow non-zero src2.subnr. - */ - ERROR_IF(src2_subnr % (sdepth * ops_per_chan) != 0, - "Src2 subregister offset must be a multiple of SystolicDepth " - "times OPS_PER_CHAN."); - - ERROR_IF(dst_subnr * brw_type_size_bytes(dst_type) >= REG_SIZE, - "Destination subregister specifies next register."); - - ERROR_IF(src0_subnr * brw_type_size_bytes(src0_type) >= REG_SIZE, - "Src0 subregister specifies next register."); - - ERROR_IF((src1_subnr * brw_type_size_bytes(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE, - "Src1 subregister specifies next register."); - - ERROR_IF((src2_subnr * brw_type_size_bytes(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE, - "Src2 subregister specifies next register."); - - if (brw_eu_inst_3src_atomic_control(devinfo, inst->raw)) { - /* FINISHME: When we start emitting DPAS with Atomic set, figure out - * a way to validate it. Also add a test in test_eu_validate.cpp. - */ - ERROR_IF(true, - "When instruction option Atomic is used it must be follwed by a " - "DPAS instruction."); - } - - if (brw_eu_inst_dpas_3src_exec_type(devinfo, inst->raw) == - BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT) { - ERROR_IF(src1_type != BRW_TYPE_HF && - src1_type != BRW_TYPE_BF, - "DPAS src1 type must be HF or BF."); - ERROR_IF(src2_type != BRW_TYPE_HF && - src2_type != BRW_TYPE_BF, - "DPAS src2 type must be HF or BF."); - ERROR_IF(src1_type != src2_type, - "DPAS src1 and src2 with types must match when using float types."); - - if (devinfo->ver < 20) { - ERROR_IF(dst_type != BRW_TYPE_F, - "DPAS destination type must be F in Gfx12."); - ERROR_IF(src0_type != BRW_TYPE_F, - "DPAS src0 type must be F in Gfx12."); - } else { - ERROR_IF(dst_type != BRW_TYPE_F && - dst_type != src1_type, - "DPAS destination type must be F or match Src1/Src2 in Gfx20+."); - ERROR_IF(src0_type != BRW_TYPE_F && - src0_type != src1_type, - "DPAS src0 type must be F or match Src1/Src2 in Gfx20+."); - } - } else { - ERROR_IF(dst_type != BRW_TYPE_D && - dst_type != BRW_TYPE_UD, - "DPAS destination type must be D or UD."); - ERROR_IF(src0_type != BRW_TYPE_D && - src0_type != BRW_TYPE_UD, - "DPAS src0 type must be D or UD."); - ERROR_IF(src1_type != BRW_TYPE_B && - src1_type != BRW_TYPE_UB, - "DPAS src1 base type must be B or UB."); - ERROR_IF(src2_type != BRW_TYPE_B && - src2_type != BRW_TYPE_UB, - "DPAS src2 base type must be B or UB."); - - if (brw_type_is_uint(dst_type)) { - ERROR_IF(!brw_type_is_uint(src0_type) || - !brw_type_is_uint(src1_type) || - !brw_type_is_uint(src2_type), - "If any source datatype is signed, destination datatype " - "must be signed."); - } - } - - /* FINISHME: Additional restrictions mentioned in the Bspec that are not - * yet enforced here: - * - * - General Accumulator registers access is not supported. This is - * currently enforced in brw_dpas_three_src (brw_eu_emit.c). - * - * - Given any combination of datatypes in the sources of a DPAS - * instructions, the boundaries of a register should not be crossed. - */ - } - - if (inst->opcode == BRW_OPCODE_AVG) { - ERROR_IF(!brw_type_is_int(inst->dst.type) || - !brw_type_is_int(inst->src[0].type) || - !brw_type_is_int(inst->src[1].type), - "AVG performs integer average. Float types not supported."); - ERROR_IF(brw_type_size_bytes(inst->dst.type) > 4 || - brw_type_size_bytes(inst->src[0].type) > 4 || - brw_type_size_bytes(inst->src[1].type) > 4, - "AVG does not support 64-bit types."); - } - - if (inst->opcode == BRW_OPCODE_ADD) { - ERROR_IF(brw_type_is_int(inst->src[0].type) != - brw_type_is_int(inst->src[1].type), - "ADD can't mix float and non-float sources."); - } - - if (inst->opcode == BRW_OPCODE_LINE || - inst->opcode == BRW_OPCODE_PLN) { - ERROR_IF(!src_has_scalar_region(inst, 0), - "LINE/PLN source 0 must be a scalar."); - } - - if (inst->opcode == BRW_OPCODE_ROR || - inst->opcode == BRW_OPCODE_ROL) { - ERROR_IF(inst->dst.type != BRW_TYPE_UD && - inst->dst.type != BRW_TYPE_UW, - "ROR/ROL dst type must be either UD or UW."); - ERROR_IF(inst->dst.type != inst->src[0].type, - "ROR/ROL src0 and dst must be of same datatype precision."); - } - - if (inst->opcode == BRW_OPCODE_LRP) { - ERROR_IF(inst->dst.type != BRW_TYPE_F || - inst->src[0].type != BRW_TYPE_F || - inst->src[1].type != BRW_TYPE_F || - inst->src[2].type != BRW_TYPE_F, - "LRP dst and sources must be of type F."); - } -} - -static void -send_descriptor_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - if (inst_is_split_send(isa, inst)) { - /* We can only validate immediate descriptors */ - if (brw_eu_inst_send_sel_reg32_desc(devinfo, inst->raw)) - return; - } else if (inst_is_send(inst)) { - /* We can only validate immediate descriptors */ - if (inst->src[1].file != IMM) - return; - } else { - return; - } - - const uint32_t desc = brw_eu_inst_send_desc(devinfo, inst->raw); - - switch (brw_eu_inst_sfid(devinfo, inst->raw)) { - case GEN_SFID_URB: - if (devinfo->ver < 20) - break; - FALLTHROUGH; - case GEN_SFID_TGM: - case GEN_SFID_SLM: - case GEN_SFID_UGM: - ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC"); - - ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) && - lsc_msg_desc_transpose(devinfo, desc) && - inst->exec_size != 1, - "Transposed vectors are restricted to Exec_Mask = 1."); - break; - - default: - break; - } - - if (brw_eu_inst_sfid(devinfo, inst->raw) == GEN_SFID_URB && devinfo->ver < 20) { - ERROR_IF(!brw_eu_inst_header_present(devinfo, inst->raw), - "Header must be present for all URB messages."); - - switch (brw_eu_inst_urb_opcode(devinfo, inst->raw)) { - case GEN_URB_OPCODE_ATOMIC_INC: - case GEN_URB_OPCODE_ATOMIC_MOV: - case GEN_URB_OPCODE_ATOMIC_ADD: - case GEN_URB_OPCODE_SIMD8_WRITE: - break; - - case GEN_URB_OPCODE_SIMD8_READ: - ERROR_IF(brw_eu_inst_rlen(devinfo, inst->raw) == 0, - "URB SIMD8 read message must read some data."); - break; - - case GEN_GFX125_URB_OPCODE_FENCE: - ERROR_IF(devinfo->verx10 < 125, - "URB fence message only valid on gfx >= 12.5"); - break; - - default: - ERROR_IF(true, "Invalid URB message"); - break; - } - } -} - -static void -register_region_special_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - bool format_uses_regions = inst->format == FORMAT_BASIC || - inst->format == FORMAT_BASIC_THREE_SRC; - - /* "Src0 Restrictions" in "Special Restrictions" in Bspec 56640 (r57070). */ - if (devinfo->ver >= 20 && - format_uses_regions && - inst->num_sources > 0 && - inst->src[0].file == FIXED_GRF) { - const unsigned v = inst->src[0].vstride; - const unsigned w = inst->src[0].width; - const unsigned h = inst->src[0].hstride; - - const bool multi_indirect = - inst->src[0].address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && - inst->src[0].vstride == STRIDE(BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL); - const bool is_Vx1 = multi_indirect && w != 1; - const bool is_VxH = multi_indirect && w == 1; - - const unsigned src0_stride = w == 1 ? v : h; - const unsigned src0_uniform_stride = (w == 1) || (h * w == v) || is_Vx1; - const unsigned dst_stride = inst->dst.hstride; - - const unsigned src0_size = brw_type_size_bytes(inst->src[0].type); - const unsigned dst_size = brw_type_size_bytes(inst->dst.type); - const unsigned src0_subnr = inst->src[0].subnr / src0_size; - const unsigned dst_subnr = inst->dst.subnr / dst_size; - - const bool dst_dword_aligned = (dst_size >= 4) || - (dst_size == 2 && (dst_subnr % 2 == 0)) || - (dst_size == 1 && (dst_subnr % 4 == 0)); - - /* The section below follows the pseudo-code in the spec to make - * easier to verify. - */ - bool allowed = false; - if ((dst_size >= 4) || - (src0_size >= 4) || - (dst_size == 2 && dst_stride > 1) || - (dst_size == 1 && dst_stride > 2) || - is_VxH) { - /* One element per DWord channel. */ - allowed = true; - - } else if (src0_uniform_stride || dst_dword_aligned) { - if (src0_size == 2 && dst_size == 2) { - if ((src0_stride < 2) || - (src0_stride == 2 && src0_uniform_stride && (dst_subnr % 16 == src0_subnr / 2))) - allowed = true; - - } else if (src0_size == 2 && dst_size == 1 && dst_stride == 2) { - if ((src0_stride < 2) || - (src0_stride == 2 && src0_uniform_stride && (dst_subnr % 32 == src0_subnr))) - allowed = true; - - } else if (src0_size == 1 && dst_size == 2) { - if ((src0_stride < 4) || - (src0_stride == 4 && src0_uniform_stride && ((2 * dst_subnr) % 16 == src0_subnr / 2)) || - (src0_stride == 8 && src0_uniform_stride && ((2 * dst_subnr) % 8 == src0_subnr / 4))) - allowed = true; - - } else if (src0_size == 1 && dst_size == 1 && dst_stride == 2) { - if ((src0_stride < 4) || - (src0_stride == 4 && src0_uniform_stride && (dst_subnr % 32 == src0_subnr / 2)) || - (src0_stride == 8 && src0_uniform_stride && (dst_subnr % 16 == src0_subnr / 4))) - allowed = true; - - } else if (src0_size == 1 && dst_size == 1 && dst_stride == 1 && w != 2) { - if ((src0_stride < 2) || - (src0_stride == 2 && src0_uniform_stride && (dst_subnr % 32 == src0_subnr / 2)) || - (src0_stride == 4 && src0_uniform_stride && (dst_subnr % 16 == src0_subnr / 4))) - allowed = true; - - } else if (src0_size == 1 && dst_size == 1 && dst_stride == 1 && w == 2) { - if ((h == 0 && v < 4) || - (h == 1 && v < 4) || - (h == 2 && v < 2) || - (h == 1 && v == 4 && (dst_subnr % 32 == 2 * (src0_subnr / 4)) && (src0_subnr % 2 == 0)) || - (h == 2 && v == 4 && (dst_subnr % 32 == src0_subnr / 2)) || - (h == 4 && v == 8 && (dst_subnr % 32 == src0_subnr / 4))) - allowed = true; - } - } - - ERROR_IF(!allowed, - "Invalid register region for source 0. See special restrictions section."); - } - - /* "Src1 Restrictions" in "Special Restrictions" in Bspec 56640 (r57070). */ - if (devinfo->ver >= 20 && - format_uses_regions && - inst->num_sources > 1 && - inst->src[1].file == FIXED_GRF) { - const unsigned v = inst->src[1].vstride; - const unsigned w = inst->src[1].width; - const unsigned h = inst->src[1].hstride; - - const bool multi_indirect = - inst->src[1].address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && - inst->src[1].vstride == STRIDE(BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL); - const bool is_Vx1 = multi_indirect && w != 1; - - const unsigned src1_stride = w == 1 ? v : h; - const unsigned src1_uniform_stride = (w == 1) || (h * w == v) || is_Vx1; - const unsigned dst_stride = inst->dst.hstride; - - const unsigned src1_size = brw_type_size_bytes(inst->src[1].type); - const unsigned dst_size = brw_type_size_bytes(inst->dst.type); - const unsigned src1_subnr = inst->src[1].subnr / src1_size; - const unsigned dst_subnr = inst->dst.subnr / dst_size; - - const bool dst_dword_aligned = (dst_size >= 4) || - (dst_size == 2 && (dst_subnr % 2 == 0)) || - (dst_size == 1 && (dst_subnr % 4 == 0)); - - /* The section below follows the pseudo-code in the spec to make - * easier to verify. - */ - bool allowed = false; - if ((dst_size >= 4) || - (src1_size >= 4) || - (dst_size == 2 && dst_stride > 1) || - (dst_size == 1 && dst_stride > 2)) { - /* One element per DWord channel. */ - allowed = true; - - } else if (src1_uniform_stride || dst_dword_aligned) { - if (src1_size == 2 && dst_size == 2) { - if ((src1_stride < 2) || - (src1_stride == 2 && src1_uniform_stride && (dst_subnr % 16 == src1_subnr / 2))) - allowed = true; - - } else if (src1_size == 2 && dst_size == 1 && dst_stride == 2) { - if ((src1_stride < 2) || - (src1_stride == 2 && src1_uniform_stride && (dst_subnr % 32 == src1_subnr))) - allowed = true; - } - } - - ERROR_IF(!allowed, - "Invalid register region for source 1. See special restrictions section."); - } -} - -static void -scalar_register_restrictions(const struct brw_isa_info *isa, - const brw_hw_decoded_inst *inst, - struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - /* Restrictions from BSpec 71168 (r55736). */ - - if (devinfo->ver >= 30) { - if (inst->dst.file == ARF && inst->dst.nr == BRW_ARF_SCALAR) { - switch (inst->opcode) { - case BRW_OPCODE_MOV: { - unsigned dst_size_bits = brw_type_size_bits(inst->dst.type); - ERROR_IF(inst->dst.type != inst->src[0].type, - "When destination is scalar register, " - "source and destination data-types must be the same."); - ERROR_IF(!brw_type_is_int(inst->dst.type) || (dst_size_bits != 16 && - dst_size_bits != 32 && - dst_size_bits != 64), - "When destination is scalar register, " - "it must be an integer with size 16, 32, or 64 bits."); - if (inst->src[0].file == IMM) { - ERROR_IF(inst->exec_size != 1, - "When destination is scalar register with immediate source, " - "execution size must be 1."); - ERROR_IF(inst->cond_modifier != BRW_CONDITIONAL_NONE, - "When destination is scalar register with immediate source, " - "conditional modifier must not be used."); - } - ERROR_IF((inst->dst.subnr / 32) != ((inst->dst.subnr + brw_type_size_bytes(inst->dst.type)) / 32), - "When destination is scalar register, it must not span across " - "the lower to upper 8 dword boundary of the register."); - break; - } - - default: - ERROR("When destination is scalar register, opcode must be MOV."); - break; - } - } - - if (inst->src[0].file == ARF && inst->src[0].nr == BRW_ARF_SCALAR) { - switch (inst->opcode) { - case BRW_OPCODE_MOV: { - ERROR_IF(inst->dst.file == ARF && inst->dst.nr == BRW_ARF_SCALAR, - "When source is a scalar register, destination must not be a scalar register."); - ERROR_IF(!src_has_scalar_region(inst, 0), - "When source is a scalar register and opcode is MOV, the scalar (broadcast) regioning must be used."); - break; - } - - case BRW_OPCODE_SEND: - case BRW_OPCODE_SENDC: { - ERROR_IF(!src1_is_null(inst), - "When source is a scalar and opcode is a SEND or SENDC, Src1 must be NULL."); - break; - } - - default: - ERROR("When source is a scalar register, opcode must be MOV, SEND, or SENDC."); - break; - } - } - - if ((inst->src[1].file == ARF && inst->src[1].nr == BRW_ARF_SCALAR) || - (inst->src[2].file == ARF && inst->src[2].nr == BRW_ARF_SCALAR)) { - ERROR("When source is a scalar register, it must be on Source 0."); - } - } else { - assert(devinfo->ver < 30); - if ((inst->dst.file == ARF && inst->dst.nr == BRW_ARF_SCALAR) || - (inst->src[0].file == ARF && inst->src[0].nr == BRW_ARF_SCALAR) || - (inst->src[1].file == ARF && inst->src[1].nr == BRW_ARF_SCALAR) || - (inst->src[2].file == ARF && inst->src[2].nr == BRW_ARF_SCALAR)) - ERROR("Scalar register not available before Gfx30."); - } -} - -static unsigned -DST_STRIDE_3SRC(unsigned hstride) -{ - switch (hstride) { - case BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1: return 1; - case BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_2: return 2; - } - UNREACHABLE("invalid hstride"); -} - -static unsigned -VSTRIDE_3SRC(unsigned vstride) -{ - switch (vstride) { - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0: return 0; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1: return 1; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4: return 4; - case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8: return 8; - } - UNREACHABLE("invalid vstride"); -} - -static inline unsigned -brw_implied_width_for_3src_a1(unsigned v, unsigned h) -{ - /* "Regioning Rules for Align1 Ternary Operations" */ - - /* TODO: Add remaining rules and de-duplicate with brw_disasm.c */ - - if (v == 0) return 1; - if (h == 0) return v; - return v/h; -} - -static void -brw_hw_decode_inst(const struct brw_isa_info *isa, - brw_hw_decoded_inst *inst, - const brw_eu_inst *raw, struct error *error) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - inst->raw = raw; - inst->opcode = brw_eu_inst_opcode(isa, raw); - inst->num_sources = brw_num_sources_from_inst(isa, raw); - - const struct opcode_desc *desc = brw_opcode_desc(isa, inst->opcode); - assert(desc->ndst == 0 || desc->ndst == 1); - inst->has_dst = desc->ndst == 1; - - enum brw_execution_size exec_size = brw_eu_inst_exec_size(devinfo, raw); - switch (exec_size) { - case BRW_EXECUTE_1: - case BRW_EXECUTE_2: - case BRW_EXECUTE_4: - case BRW_EXECUTE_8: - case BRW_EXECUTE_16: - case BRW_EXECUTE_32: - inst->exec_size = 1 << exec_size; - break; - default: - RETURN_ERROR("invalid execution size"); - break; - } - - inst->access_mode = brw_eu_inst_access_mode(devinfo, raw); - inst->pred_control = brw_eu_inst_pred_control(devinfo, raw); - - RETURN_ERROR_IF(inst->num_sources == 3 && inst->access_mode == BRW_ALIGN_1 && devinfo->ver == 9, - "Align1 mode not allowed on Gfx9 for 3-src instructions"); - - RETURN_ERROR_IF(inst->access_mode == BRW_ALIGN_16 && devinfo->ver >= 11, - "Align16 mode doesn't exist on Gfx11+"); - - switch (inst->opcode) { - case BRW_OPCODE_BFN: - inst->format = FORMAT_BFN_THREE_SRC; - break; - - case BRW_OPCODE_DPAS: - inst->format = FORMAT_DPAS_THREE_SRC; - break; - - case BRW_OPCODE_SEND: - case BRW_OPCODE_SENDC: - inst->format = devinfo->ver >= 12 ? FORMAT_SEND : FORMAT_BASIC; - break; - - case BRW_OPCODE_SENDS: - case BRW_OPCODE_SENDSC: - inst->format = FORMAT_SEND; - break; - - case BRW_OPCODE_DO: - case BRW_OPCODE_WHILE: - case BRW_OPCODE_IF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_BREAK: - case BRW_OPCODE_CONTINUE: - case BRW_OPCODE_JMPI: - case BRW_OPCODE_BRD: - case BRW_OPCODE_BRC: - case BRW_OPCODE_HALT: - case BRW_OPCODE_CALLA: - case BRW_OPCODE_CALL: - case BRW_OPCODE_GOTO: - case BRW_OPCODE_JOIN: - inst->format = FORMAT_BRANCH; - break; - - case BRW_OPCODE_NOP: - inst->format = FORMAT_NOP; - break; - - case BRW_OPCODE_ILLEGAL: - inst->format = FORMAT_ILLEGAL; - break; - - default: - if (inst->num_sources == 3) { - inst->format = FORMAT_BASIC_THREE_SRC; - } else { - inst->format = FORMAT_BASIC; - } - break; - } - - switch (inst->format) { - case FORMAT_BASIC: { - assert(inst->num_sources == 1 || - inst->num_sources == 2 || - inst->opcode == BRW_OPCODE_WAIT); - assert(inst->has_dst || - inst->opcode == BRW_OPCODE_SYNC); - - if (inst->has_dst) { - inst->dst.file = brw_eu_inst_dst_reg_file(devinfo, raw); - inst->dst.type = brw_eu_inst_dst_type(devinfo, raw); - inst->dst.address_mode = brw_eu_inst_dst_address_mode(devinfo, raw); - if (inst->dst.address_mode == BRW_ADDRESS_DIRECT) { - inst->dst.nr = brw_eu_inst_dst_da_reg_nr(devinfo, raw); - if (inst->access_mode == BRW_ALIGN_1) { - inst->dst.subnr = brw_eu_inst_dst_da1_subreg_nr(devinfo, raw); - } else { - inst->dst.subnr = brw_eu_inst_dst_da16_subreg_nr(devinfo, raw); - } - } else { - inst->dst.subnr = brw_eu_inst_dst_ia_subreg_nr(devinfo, raw); - } - inst->dst.hstride = STRIDE(brw_eu_inst_dst_hstride(devinfo, raw)); - } - - inst->src[0].file = brw_eu_inst_src0_reg_file(devinfo, raw); - inst->src[0].type = brw_eu_inst_src0_type(devinfo, raw); - inst->src[0].address_mode = brw_eu_inst_src0_address_mode(devinfo, raw); - inst->src[0].negate = brw_eu_inst_src0_negate(devinfo, raw); - inst->src[0].abs = brw_eu_inst_src0_abs(devinfo, raw); - if (inst->src[0].file != IMM) { - if (inst->src[0].address_mode == BRW_ADDRESS_DIRECT) { - inst->src[0].nr = brw_eu_inst_src0_da_reg_nr(devinfo, raw); - if (inst->access_mode == BRW_ALIGN_1) { - inst->src[0].subnr = brw_eu_inst_src0_da1_subreg_nr(devinfo, raw); - } else { - inst->src[0].subnr = brw_eu_inst_src0_da16_subreg_nr(devinfo, raw) * 16; - } - } else { - inst->src[0].subnr = brw_eu_inst_src0_ia_subreg_nr(devinfo, raw); - } - - inst->src[0].vstride = STRIDE(brw_eu_inst_src0_vstride(devinfo, raw)); - if (inst->access_mode == BRW_ALIGN_1) { - inst->src[0].width = WIDTH(brw_eu_inst_src0_width(devinfo, raw)); - inst->src[0].hstride = STRIDE(brw_eu_inst_src0_hstride(devinfo, raw)); - } - } - - if (inst->num_sources > 1) { - inst->src[1].file = brw_eu_inst_src1_reg_file(devinfo, raw); - inst->src[1].type = brw_eu_inst_src1_type(devinfo, raw); - inst->src[1].negate = brw_eu_inst_src1_negate(devinfo, raw); - inst->src[1].abs = brw_eu_inst_src1_abs(devinfo, raw); - if (inst->src[1].file != IMM) { - if (inst->src[1].address_mode == BRW_ADDRESS_DIRECT) { - inst->src[1].nr = brw_eu_inst_src1_da_reg_nr(devinfo, raw); - if (inst->access_mode == BRW_ALIGN_1) { - inst->src[1].subnr = brw_eu_inst_src1_da1_subreg_nr(devinfo, raw); - } else { - inst->src[1].subnr = brw_eu_inst_src1_da16_subreg_nr(devinfo, raw) * 16; - } - } else { - inst->src[1].subnr = brw_eu_inst_src1_ia_subreg_nr(devinfo, raw); - } - - inst->src[1].vstride = STRIDE(brw_eu_inst_src1_vstride(devinfo, raw)); - if (inst->access_mode == BRW_ALIGN_1) { - inst->src[1].width = WIDTH(brw_eu_inst_src1_width(devinfo, raw)); - inst->src[1].hstride = STRIDE(brw_eu_inst_src1_hstride(devinfo, raw)); - } - } - } - - break; - } - - case FORMAT_BASIC_THREE_SRC: { - assert(inst->num_sources == 3); - assert(inst->has_dst); - - if (inst->access_mode == BRW_ALIGN_1) { - inst->dst.file = brw_eu_inst_3src_a1_dst_reg_file(devinfo, raw); - inst->dst.type = brw_eu_inst_3src_a1_dst_type(devinfo, raw); - inst->dst.nr = brw_eu_inst_3src_dst_reg_nr(devinfo, raw); - inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw); - inst->dst.hstride = DST_STRIDE_3SRC(brw_eu_inst_3src_a1_dst_hstride(devinfo, raw)); - - inst->src[0].file = brw_eu_inst_3src_a1_src0_reg_file(devinfo, raw); - inst->src[0].type = brw_eu_inst_3src_a1_src0_type(devinfo, raw); - inst->src[0].negate = brw_eu_inst_3src_src0_negate(devinfo, raw); - inst->src[0].abs = brw_eu_inst_3src_src0_abs(devinfo, raw); - if (inst->src[0].file != IMM) { - inst->src[0].nr = brw_eu_inst_3src_src0_reg_nr(devinfo, raw); - inst->src[0].subnr = brw_eu_inst_3src_a1_src0_subreg_nr(devinfo, raw); - inst->src[0].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src0_vstride(devinfo, raw)); - inst->src[0].hstride = STRIDE(brw_eu_inst_3src_a1_src0_hstride(devinfo, raw)); - inst->src[0].width = brw_implied_width_for_3src_a1(inst->src[0].vstride, inst->src[0].hstride); - } - - inst->src[1].file = brw_eu_inst_3src_a1_src1_reg_file(devinfo, raw); - inst->src[1].type = brw_eu_inst_3src_a1_src1_type(devinfo, raw); - inst->src[1].negate = brw_eu_inst_3src_src1_negate(devinfo, raw); - inst->src[1].abs = brw_eu_inst_3src_src1_abs(devinfo, raw); - inst->src[1].nr = brw_eu_inst_3src_src1_reg_nr(devinfo, raw); - inst->src[1].subnr = brw_eu_inst_3src_a1_src1_subreg_nr(devinfo, raw); - inst->src[1].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src1_vstride(devinfo, raw)); - inst->src[1].hstride = STRIDE(brw_eu_inst_3src_a1_src1_hstride(devinfo, raw)); - inst->src[1].width = brw_implied_width_for_3src_a1(inst->src[1].vstride, inst->src[1].hstride); - - inst->src[2].file = brw_eu_inst_3src_a1_src2_reg_file(devinfo, raw); - inst->src[2].type = brw_eu_inst_3src_a1_src2_type(devinfo, raw); - inst->src[2].negate = brw_eu_inst_3src_src2_negate(devinfo, raw); - inst->src[2].abs = brw_eu_inst_3src_src2_abs(devinfo, raw); - if (inst->src[2].file != IMM) { - inst->src[2].nr = brw_eu_inst_3src_src2_reg_nr(devinfo, raw); - inst->src[2].subnr = brw_eu_inst_3src_a1_src2_subreg_nr(devinfo, raw); - inst->src[2].hstride = STRIDE(brw_eu_inst_3src_a1_src2_hstride(devinfo, raw)); - inst->src[2].width = brw_implied_width_for_3src_a1(inst->src[2].vstride, inst->src[2].hstride); - } - - } else { - inst->dst.file = FIXED_GRF; - inst->dst.type = brw_eu_inst_3src_a16_dst_type(devinfo, raw); - inst->dst.nr = brw_eu_inst_3src_dst_reg_nr(devinfo, raw); - inst->dst.subnr = brw_eu_inst_3src_a16_dst_subreg_nr(devinfo, raw) * 4; - - enum brw_reg_type src_type = brw_eu_inst_3src_a16_src_type(devinfo, raw); - - inst->src[0].file = FIXED_GRF; - inst->src[0].type = src_type; - inst->src[0].nr = brw_eu_inst_3src_src0_reg_nr(devinfo, raw); - inst->src[0].subnr = brw_eu_inst_3src_a16_src0_subreg_nr(devinfo, raw) * 4; - - inst->src[1].file = FIXED_GRF; - inst->src[1].type = src_type; - inst->src[1].nr = brw_eu_inst_3src_src1_reg_nr(devinfo, raw); - inst->src[1].subnr = brw_eu_inst_3src_a16_src1_subreg_nr(devinfo, raw) * 4; - - inst->src[2].file = FIXED_GRF; - inst->src[2].type = src_type; - inst->src[2].nr = brw_eu_inst_3src_src2_reg_nr(devinfo, raw); - inst->src[2].subnr = brw_eu_inst_3src_a16_src2_subreg_nr(devinfo, raw) * 4; - } - break; - } - - case FORMAT_BFN_THREE_SRC: { - assert(inst->num_sources == 3); - assert(inst->has_dst); - - inst->dst.file = brw_eu_inst_3src_a1_dst_reg_file(devinfo, raw); - inst->dst.type = brw_eu_inst_3src_a1_dst_type(devinfo, raw); - inst->dst.nr = brw_eu_inst_3src_dst_reg_nr(devinfo, raw); - inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw) * 8; - inst->dst.hstride = DST_STRIDE_3SRC(brw_eu_inst_3src_a1_dst_hstride(devinfo, raw)); - - inst->src[0].file = brw_eu_inst_3src_a1_src0_reg_file(devinfo, raw); - inst->src[0].type = brw_eu_inst_3src_a1_src0_type(devinfo, raw); - if (inst->src[0].file != IMM) { - inst->src[0].nr = brw_eu_inst_3src_src0_reg_nr(devinfo, raw); - inst->src[0].subnr = brw_eu_inst_3src_a1_src0_subreg_nr(devinfo, raw); - inst->src[0].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src0_vstride(devinfo, raw)); - inst->src[0].hstride = STRIDE(brw_eu_inst_3src_a1_src0_hstride(devinfo, raw)); - inst->src[0].width = brw_implied_width_for_3src_a1(inst->src[0].vstride, inst->src[0].hstride); - } - - inst->src[1].file = brw_eu_inst_3src_a1_src1_reg_file(devinfo, raw); - inst->src[1].type = brw_eu_inst_3src_a1_src1_type(devinfo, raw); - inst->src[1].nr = brw_eu_inst_3src_src1_reg_nr(devinfo, raw); - inst->src[1].subnr = brw_eu_inst_3src_a1_src1_subreg_nr(devinfo, raw); - inst->src[1].vstride = VSTRIDE_3SRC(brw_eu_inst_3src_a1_src1_vstride(devinfo, raw)); - inst->src[1].hstride = STRIDE(brw_eu_inst_3src_a1_src1_hstride(devinfo, raw)); - inst->src[1].width = brw_implied_width_for_3src_a1(inst->src[1].vstride, inst->src[1].hstride); - - inst->src[2].file = brw_eu_inst_3src_a1_src2_reg_file(devinfo, raw); - inst->src[2].type = brw_eu_inst_3src_a1_src2_type(devinfo, raw); - if (inst->src[2].file != IMM) { - inst->src[2].nr = brw_eu_inst_3src_src2_reg_nr(devinfo, raw); - inst->src[2].subnr = brw_eu_inst_3src_a1_src2_subreg_nr(devinfo, raw); - inst->src[2].hstride = STRIDE(brw_eu_inst_3src_a1_src2_hstride(devinfo, raw)); - inst->src[2].width = brw_implied_width_for_3src_a1(inst->src[2].vstride, inst->src[2].hstride); - } - - switch (brw_eu_inst_boolean_func_cond_modifier(devinfo, raw)) { - case 0: - inst->cond_modifier = BRW_CONDITIONAL_NONE; - break; - case 1: - inst->cond_modifier = BRW_CONDITIONAL_Z; - break; - case 2: - inst->cond_modifier = BRW_CONDITIONAL_G; - break; - case 3: - inst->cond_modifier = BRW_CONDITIONAL_L; - break; - } - break; - } - - case FORMAT_DPAS_THREE_SRC: { - assert(inst->num_sources == 3); - assert(inst->has_dst); - - inst->dst.file = brw_eu_inst_dpas_3src_dst_reg_file(devinfo, raw); - inst->dst.type = brw_eu_inst_dpas_3src_dst_type(devinfo, raw); - inst->dst.nr = brw_eu_inst_dpas_3src_dst_reg_nr(devinfo, raw); - inst->dst.subnr = brw_eu_inst_dpas_3src_dst_subreg_nr(devinfo, raw); - - inst->src[0].file = brw_eu_inst_dpas_3src_src0_reg_file(devinfo, raw); - inst->src[0].type = brw_eu_inst_dpas_3src_src0_type(devinfo, raw); - inst->src[0].nr = brw_eu_inst_dpas_3src_src0_reg_nr(devinfo, raw); - inst->src[0].subnr = brw_eu_inst_dpas_3src_src0_subreg_nr(devinfo, raw); - - inst->src[1].file = brw_eu_inst_dpas_3src_src1_reg_file(devinfo, raw); - inst->src[1].type = brw_eu_inst_dpas_3src_src1_type(devinfo, raw); - inst->src[1].nr = brw_eu_inst_dpas_3src_src1_reg_nr(devinfo, raw); - inst->src[1].subnr = brw_eu_inst_dpas_3src_src1_subreg_nr(devinfo, raw); - - inst->src[2].file = brw_eu_inst_dpas_3src_src2_reg_file(devinfo, raw); - inst->src[2].type = brw_eu_inst_dpas_3src_src2_type(devinfo, raw); - inst->src[2].nr = brw_eu_inst_dpas_3src_src2_reg_nr(devinfo, raw); - inst->src[2].subnr = brw_eu_inst_dpas_3src_src2_subreg_nr(devinfo, raw); - break; - } - - case FORMAT_SEND: { - if (inst->opcode == BRW_OPCODE_SENDS || inst->opcode == BRW_OPCODE_SENDSC) { - assert(devinfo->ver < 12); - - inst->dst.file = brw_eu_inst_send_dst_reg_file(devinfo, raw); - inst->dst.type = BRW_TYPE_D; - inst->dst.nr = brw_eu_inst_dst_da_reg_nr(devinfo, raw); - inst->dst.subnr = brw_eu_inst_dst_da16_subreg_nr(devinfo, raw) * 16; - - inst->src[0].file = FIXED_GRF; - inst->src[0].type = BRW_TYPE_D; - inst->src[0].nr = brw_eu_inst_src0_da_reg_nr(devinfo, raw); - inst->src[0].subnr = brw_eu_inst_src0_da16_subreg_nr(devinfo, raw) * 16; - - if (inst->num_sources > 1) { - inst->src[1].file = brw_eu_inst_send_src1_reg_file(devinfo, raw); - inst->src[1].type = BRW_TYPE_D; - inst->src[1].nr = brw_eu_inst_send_src1_reg_nr(devinfo, raw); - } - } else { - assert(devinfo->ver >= 12); - - inst->dst.file = brw_eu_inst_dst_reg_file(devinfo, raw); - inst->dst.type = BRW_TYPE_D; - inst->dst.nr = brw_eu_inst_dst_da_reg_nr(devinfo, raw); - - inst->src[0].file = brw_eu_inst_send_src0_reg_file(devinfo, raw); - inst->src[0].type = BRW_TYPE_D; - inst->src[0].nr = brw_eu_inst_src0_da_reg_nr(devinfo, raw); - - if (inst->num_sources > 1) { - inst->src[1].file = brw_eu_inst_send_src1_reg_file(devinfo, raw); - inst->src[1].type = BRW_TYPE_D; - inst->src[1].nr = brw_eu_inst_send_src1_reg_nr(devinfo, raw); - } - } - break; - } - - case FORMAT_BRANCH: { - assert(!inst->has_dst); - break; - } - - case FORMAT_ILLEGAL: - case FORMAT_NOP: { - assert(!inst->has_dst); - assert(inst->num_sources == 0); - break; - } - } - - if (inst->has_dst) { - ERROR_IF(inst->dst.type == BRW_TYPE_INVALID, - "Invalid destination register type encoding."); - } - - for (unsigned i = 0; i < inst->num_sources; i++) { - ERROR_IF(inst->src[i].type == BRW_TYPE_INVALID, - "Invalid source register type encoding."); - } - - if ((inst->format == FORMAT_BASIC || - inst->format == FORMAT_BASIC_THREE_SRC || - inst->format == FORMAT_BFN_THREE_SRC || - inst->format == FORMAT_DPAS_THREE_SRC) && - !inst_is_send(inst)) { - inst->saturate = brw_eu_inst_saturate(devinfo, raw); - - if (inst->opcode != BRW_OPCODE_BFN && - (inst->num_sources > 1 || - devinfo->ver < 12 || - inst->src[0].file != IMM || - brw_type_size_bytes(inst->src[0].type) < 8)) { - inst->cond_modifier = brw_eu_inst_cond_modifier(devinfo, raw); - } - } -} - -bool -brw_validate_instruction(const struct brw_isa_info *isa, - const brw_eu_inst *inst, int offset, - unsigned inst_size, - struct disasm_info *disasm) -{ - struct error error = {}; - - if (is_unsupported_inst(isa, inst)) { - report_error(&error, "Instruction not supported on this Gfx version"); - } else { - brw_hw_decoded_inst decoded = {}; - brw_hw_decode_inst(isa, &decoded, inst, &error); - -#define CHECK(func, args...) func(isa, &decoded, &error, ##args); - - if (!error.msg) - CHECK(invalid_values); - - if (!error.msg) { - CHECK(sources_not_null); - CHECK(send_restrictions); - CHECK(general_restrictions_based_on_operand_types); - CHECK(general_restrictions_on_region_parameters); - CHECK(special_restrictions_for_mixed_float_mode); - CHECK(region_alignment_rules); - CHECK(vector_immediate_restrictions); - CHECK(special_requirements_for_handling_double_precision_data_types); - CHECK(instruction_restrictions); - CHECK(send_descriptor_restrictions); - CHECK(register_region_special_restrictions); - CHECK(scalar_register_restrictions); - } - -#undef CHECK - } - - if (error.msg) { - if (disasm) - disasm_insert_error(disasm, offset, inst_size, error.msg); - ralloc_free(error.msg); - return false; - } else { - return true; - } -} - -bool -brw_validate_instructions(const struct brw_isa_info *isa, - const void *assembly, int start_offset, int end_offset, - struct disasm_info *disasm) -{ - const struct intel_device_info *devinfo = isa->devinfo; - bool valid = true; - - for (int src_offset = start_offset; src_offset < end_offset;) { - const brw_eu_inst *inst = assembly + src_offset; - bool is_compact = brw_eu_inst_cmpt_control(devinfo, inst); - unsigned inst_size = is_compact ? sizeof(brw_eu_compact_inst) - : sizeof(brw_eu_inst); - brw_eu_inst uncompacted; - - if (is_compact) { - brw_eu_compact_inst *compacted = (void *)inst; - brw_uncompact_instruction(isa, &uncompacted, compacted); - inst = &uncompacted; - } - - bool v = brw_validate_instruction(isa, inst, src_offset, - inst_size, disasm); - valid = valid && v; - - src_offset += inst_size; - } - - return valid; -} diff --git a/src/intel/compiler/brw/brw_generator.cpp b/src/intel/compiler/brw/brw_generator.cpp deleted file mode 100644 index 3d4a29903ce..00000000000 --- a/src/intel/compiler/brw/brw_generator.cpp +++ /dev/null @@ -1,1828 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -/** @file - * - * This file supports generating code from the FS LIR to the actual - * native instructions. - */ - -#include -#include - -#include "brw_eu.h" -#include "brw_disasm_info.h" -#include "brw_shader.h" -#include "brw_generator.h" -#include "brw_cfg.h" -#include "dev/intel_debug.h" -#include "util/mesa-blake3.h" -#include "util/half_float.h" - -namespace old { - -static gen_opcode -brw_opcode_to_gen(enum opcode op) -{ - switch (op) { - case BRW_OPCODE_ILLEGAL: return GEN_OP_ILLEGAL; - - case BRW_OPCODE_ADD: return GEN_OP_ADD; - case BRW_OPCODE_ADD3: return GEN_OP_ADD3; - case BRW_OPCODE_ADDC: return GEN_OP_ADDC; - case BRW_OPCODE_AND: return GEN_OP_AND; - case BRW_OPCODE_ASR: return GEN_OP_ASR; - case BRW_OPCODE_AVG: return GEN_OP_AVG; - case BRW_OPCODE_BFE: return GEN_OP_BFE; - case BRW_OPCODE_BFI1: return GEN_OP_BFI1; - case BRW_OPCODE_BFI2: return GEN_OP_BFI2; - case BRW_OPCODE_BFN: return GEN_OP_BFN; - case BRW_OPCODE_BFREV: return GEN_OP_BFREV; - case BRW_OPCODE_BRC: return GEN_OP_BRC; - case BRW_OPCODE_BRD: return GEN_OP_BRD; - case BRW_OPCODE_BREAK: return GEN_OP_BREAK; - case BRW_OPCODE_CALL: return GEN_OP_CALL; - case BRW_OPCODE_CALLA: return GEN_OP_CALLA; - case BRW_OPCODE_CBIT: return GEN_OP_CBIT; - case BRW_OPCODE_CMP: return GEN_OP_CMP; - case BRW_OPCODE_CMPN: return GEN_OP_CMPN; - case BRW_OPCODE_CONTINUE: return GEN_OP_CONTINUE; - case BRW_OPCODE_CSEL: return GEN_OP_CSEL; - case BRW_OPCODE_DP2: return GEN_OP_DP2; - case BRW_OPCODE_DP3: return GEN_OP_DP3; - case BRW_OPCODE_DP4: return GEN_OP_DP4; - case BRW_OPCODE_DP4A: return GEN_OP_DP4A; - case BRW_OPCODE_DPAS: return GEN_OP_DPAS; - case BRW_OPCODE_DPH: return GEN_OP_DPH; - case BRW_OPCODE_ELSE: return GEN_OP_ELSE; - case BRW_OPCODE_ENDIF: return GEN_OP_ENDIF; - case BRW_OPCODE_FBH: return GEN_OP_FBH; - case BRW_OPCODE_FBL: return GEN_OP_FBL; - case BRW_OPCODE_FRC: return GEN_OP_FRC; - case BRW_OPCODE_GOTO: return GEN_OP_GOTO; - case BRW_OPCODE_HALT: return GEN_OP_HALT; - case BRW_OPCODE_IF: return GEN_OP_IF; - case BRW_OPCODE_JMPI: return GEN_OP_JMPI; - case BRW_OPCODE_JOIN: return GEN_OP_JOIN; - case BRW_OPCODE_LINE: return GEN_OP_LINE; - case BRW_OPCODE_LRP: return GEN_OP_LRP; - case BRW_OPCODE_LZD: return GEN_OP_LZD; - case BRW_OPCODE_MAC: return GEN_OP_MAC; - case BRW_OPCODE_MACH: return GEN_OP_MACH; - case BRW_OPCODE_MACL: return GEN_OP_MACL; - case BRW_OPCODE_MAD: return GEN_OP_MAD; - case BRW_OPCODE_MADM: return GEN_OP_MADM; - case BRW_OPCODE_MATH: return GEN_OP_MATH; - case BRW_OPCODE_MOV: return GEN_OP_MOV; - case BRW_OPCODE_MOVI: return GEN_OP_MOVI; - case BRW_OPCODE_MUL: return GEN_OP_MUL; - case BRW_OPCODE_NOP: return GEN_OP_NOP; - case BRW_OPCODE_NOT: return GEN_OP_NOT; - case BRW_OPCODE_OR: return GEN_OP_OR; - case BRW_OPCODE_PLN: return GEN_OP_PLN; - case BRW_OPCODE_RET: return GEN_OP_RET; - case BRW_OPCODE_RNDD: return GEN_OP_RNDD; - case BRW_OPCODE_RNDE: return GEN_OP_RNDE; - case BRW_OPCODE_RNDU: return GEN_OP_RNDU; - case BRW_OPCODE_RNDZ: return GEN_OP_RNDZ; - case BRW_OPCODE_ROL: return GEN_OP_ROL; - case BRW_OPCODE_ROR: return GEN_OP_ROR; - case BRW_OPCODE_SEL: return GEN_OP_SEL; - case BRW_OPCODE_SEND: return GEN_OP_SEND; - case BRW_OPCODE_SENDC: return GEN_OP_SENDC; - case BRW_OPCODE_SENDS: return GEN_OP_SENDS; - case BRW_OPCODE_SENDSC: return GEN_OP_SENDSC; - case BRW_OPCODE_SHL: return GEN_OP_SHL; - case BRW_OPCODE_SHR: return GEN_OP_SHR; - case BRW_OPCODE_SMOV: return GEN_OP_SMOV; - case BRW_OPCODE_SRND: return GEN_OP_SRND; - case BRW_OPCODE_SUBB: return GEN_OP_SUBB; - case BRW_OPCODE_SYNC: return GEN_OP_SYNC; - case BRW_OPCODE_WAIT: return GEN_OP_WAIT; - case BRW_OPCODE_WHILE: return GEN_OP_WHILE; - case BRW_OPCODE_XOR: return GEN_OP_XOR; - - default: UNREACHABLE("invalid gen opcode"); - } -} - -uint32_t -brw_swsb_encode(const struct intel_device_info *devinfo, - gen_swsb swsb, enum opcode op) -{ - return gen_swsb_encode(devinfo, swsb, brw_opcode_to_gen(op)); -} - -gen_swsb -brw_swsb_decode(const struct intel_device_info *devinfo, - bool is_unordered, uint32_t raw, enum opcode op) -{ - return gen_swsb_decode(devinfo, is_unordered, raw, brw_opcode_to_gen(op)); -} - -static uint32_t -brw_math_function(enum opcode op) -{ - switch (op) { - case SHADER_OPCODE_RCP: - return GEN_MATH_INV; - case SHADER_OPCODE_RSQ: - return GEN_MATH_RSQ; - case SHADER_OPCODE_SQRT: - return GEN_MATH_SQRT; - case SHADER_OPCODE_EXP2: - return GEN_MATH_EXP; - case SHADER_OPCODE_LOG2: - return GEN_MATH_LOG; - case SHADER_OPCODE_POW: - return GEN_MATH_POW; - case SHADER_OPCODE_SIN: - return GEN_MATH_SIN; - case SHADER_OPCODE_COS: - return GEN_MATH_COS; - case SHADER_OPCODE_INT_QUOTIENT: - return GEN_MATH_INT_DIV_QUOTIENT; - case SHADER_OPCODE_INT_REMAINDER: - return GEN_MATH_INT_DIV_REMAINDER; - default: - UNREACHABLE("not reached: unknown math function"); - } -} - -static struct brw_reg -normalize_brw_reg_for_encoding(brw_reg *reg) -{ - struct brw_reg brw_reg; - - switch (reg->file) { - case ADDRESS: - case ARF: - case FIXED_GRF: - case IMM: - assert(reg->offset == 0); - brw_reg = *reg; - break; - case BAD_FILE: - /* Probably unused. */ - brw_reg = brw_null_reg(); - break; - case VGRF: - case ATTR: - case UNIFORM: - UNREACHABLE("not reached"); - } - - return brw_reg; -} - -brw_generator::brw_generator(const struct brw_compiler *compiler, - const struct brw_compile_params *params, - struct brw_stage_prog_data *prog_data, - mesa_shader_stage stage) - - : compiler(compiler), params(params), - devinfo(compiler->devinfo), - prog_data(prog_data), dispatch_width(0), - debug_flag(false), - shader_name(NULL), stage(stage), mem_ctx(params->mem_ctx) -{ - p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(&compiler->isa, p, mem_ctx); -} - -brw_generator::~brw_generator() -{ -} - -void -brw_generator::generate_send(brw_send_inst *inst, - struct brw_reg dst, - struct brw_reg desc, - struct brw_reg ex_desc, - struct brw_reg payload, - struct brw_reg payload2, - bool ex_bso) -{ - const bool gather = inst->opcode == SHADER_OPCODE_SEND_GATHER; - if (gather) { - assert(payload.file == ARF); - assert(payload.nr == BRW_ARF_SCALAR); - assert(payload2.file == ARF); - assert(payload2.nr == BRW_ARF_NULL); - } - - brw_SEND(p, inst->sfid, dst, payload, payload2, - desc, ex_desc, - inst->ex_desc_imm ? inst->offset : 0, - inst->ex_mlen, ex_bso, - inst->eot, gather); - - if (inst->check_tdr) - brw_eu_inst_set_opcode(p->isa, brw_eu_last_inst(p), - devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC); - - /* Serialize messages if needed */ - if (devinfo->ver == 12 && inst->fused_eu_disable) - brw_eu_inst_set_fusion_ctrl(devinfo, brw_eu_last_inst(p), true); -} - -void -brw_generator::generate_mov_indirect(brw_inst *inst, - struct brw_reg dst, - struct brw_reg reg, - struct brw_reg indirect_byte_offset) -{ - assert(indirect_byte_offset.type == BRW_TYPE_UD); - assert(indirect_byte_offset.file == FIXED_GRF); - assert(!reg.abs && !reg.negate); - assert(brw_type_is_uint(reg.type)); - assert(reg.type == dst.type); - - unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; - - if (indirect_byte_offset.file == IMM) { - imm_byte_offset += indirect_byte_offset.ud; - - reg.nr = imm_byte_offset / REG_SIZE; - reg.subnr = imm_byte_offset % REG_SIZE; - if (brw_type_size_bytes(reg.type) > 4 && !devinfo->has_64bit_int) { - brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), - subscript(reg, BRW_TYPE_D, 0)); - brw_set_default_swsb(p, gen_swsb_null()); - brw_MOV(p, subscript(dst, BRW_TYPE_D, 1), - subscript(reg, BRW_TYPE_D, 1)); - } else { - brw_MOV(p, dst, reg); - } - } else { - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ - struct brw_reg addr = vec8(brw_address_reg(0)); - - /* Whether we can use destination dependency control without running the - * risk of a hang if an instruction gets shot down. - */ - const bool use_dep_ctrl = !inst->predicate && - inst->exec_size == dispatch_width; - brw_eu_inst *insn; - - /* The destination stride of an instruction (in bytes) must be greater - * than or equal to the size of the rest of the instruction. Since the - * address register is of type UW, we can't use a D-type instruction. - * In order to get around this, re retype to UW and use a stride. - */ - indirect_byte_offset = - retype(spread(indirect_byte_offset, 2), BRW_TYPE_UW); - - /* There are a number of reasons why we don't use the base offset here. - * One reason is that the field is only 9 bits which means we can only - * use it to access the first 16 GRFs. Also, from the Haswell PRM - * section "Register Region Restrictions": - * - * "The lower bits of the AddressImmediate must not overflow to - * change the register address. The lower 5 bits of Address - * Immediate when added to lower 5 bits of address register gives - * the sub-register offset. The upper bits of Address Immediate - * when added to upper bits of address register gives the register - * address. Any overflow from sub-register offset is dropped." - * - * Since the indirect may cause us to cross a register boundary, this - * makes the base offset almost useless. We could try and do something - * clever where we use a actual base offset if base_offset % 32 == 0 but - * that would mean we were generating different code depending on the - * base offset. Instead, for the sake of consistency, we'll just do the - * add ourselves. This restriction is only listed in the Haswell PRM - * but empirical testing indicates that it applies on all older - * generations and is lifted on Broadwell. - * - * In the end, while base_offset is nice to look at in the generated - * code, using it saves us 0 instructions and would require quite a bit - * of case-by-case work. It's just not worth it. - * - * Due to a hardware bug some platforms (particularly Gfx11+) seem to - * require the address components of all channels to be valid whether or - * not they're active, which causes issues if we use VxH addressing - * under non-uniform control-flow. We can easily work around that by - * initializing the whole address register with a pipelined NoMask MOV - * instruction. - */ - insn = brw_MOV(p, addr, brw_imm_uw(imm_byte_offset)); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); - brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, gen_swsb_null()); - else - brw_eu_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); - - insn = brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, gen_swsb_regdist(1)); - else - brw_eu_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); - - if (brw_type_size_bytes(reg.type) > 4 && - (devinfo->ver != 9 || intel_device_info_is_9lp(devinfo))) { - /* From the Cherryview PRM Vol 7. "Register Region Restrictions": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, indirect addressing must not be used." - * - * Later platforms either don't support Q/UQ types or have a - * restriction in "Register Region Restrictions" similar to - * - * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and - * Quad-Word data must not be used." - * - * Which means effectively all platforms except non-LP Gfx9 will - * need to lower this MOV. - * - * To work around both of these, we do two integer MOVs instead - * of one 64-bit MOV. Because no double value should ever cross - * a register boundary, it's safe to use the immediate offset in - * the indirect here to handle adding 4 bytes to the offset and - * avoid the extra ADD to the register file. - */ - brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), - retype(brw_VxH_indirect(0, 0), BRW_TYPE_D)); - brw_set_default_swsb(p, gen_swsb_null()); - brw_MOV(p, subscript(dst, BRW_TYPE_D, 1), - retype(brw_VxH_indirect(0, 4), BRW_TYPE_D)); - } else { - struct brw_reg ind_src = brw_VxH_indirect(0, 0); - - brw_MOV(p, dst, retype(ind_src, reg.type)); - } - } -} - -void -brw_generator::generate_shuffle(brw_inst *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg idx) -{ - assert(src.file == FIXED_GRF); - assert(!src.abs && !src.negate); - - /* Ivy bridge has some strange behavior that makes this a real pain to - * implement for 64-bit values so we just don't bother. - */ - assert(devinfo->has_64bit_float || brw_type_size_bytes(src.type) <= 4); - assert(brw_type_is_uint(src.type)); - assert(src.type == dst.type); - - /* Because we're using the address register, we're limited to 16-wide - * by the address register file and 8-wide for 64-bit types. We could try - * and make this instruction splittable higher up in the compiler but that - * gets weird because it reads all of the channels regardless of execution - * size. It's easier just to split it here. - */ - unsigned lower_width = MIN2(16, inst->exec_size); - if (devinfo->ver < 20 && (element_sz(src) > 4 || element_sz(dst) > 4)) { - lower_width = 8; - } - - brw_set_default_exec_size(p, cvt(lower_width) - 1); - for (unsigned group = 0; group < inst->exec_size; group += lower_width) { - brw_set_default_group(p, group); - - if ((src.vstride == 0 && src.hstride == 0) || - idx.file == IMM) { - /* Trivial, the source is already uniform or the index is a constant. - * We will typically not get here if the optimizer is doing its job, - * but asserting would be mean. - */ - const unsigned i = idx.file == IMM ? idx.ud : 0; - struct brw_reg group_src = stride(suboffset(src, i), 0, 1, 0); - struct brw_reg group_dst = suboffset(dst, group << (dst.hstride - 1)); - brw_MOV(p, group_dst, group_src); - } else { - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ - struct brw_reg addr = vec8(brw_address_reg(0)); - - struct brw_reg group_idx = idx.is_scalar || is_uniform(idx) ? - component(idx, 0) : suboffset(idx, group); - - if (lower_width == 8 && group_idx.width == BRW_WIDTH_16) { - /* Things get grumpy if the register is too wide. */ - group_idx.width--; - group_idx.vstride--; - } - - assert(brw_type_size_bytes(group_idx.type) <= 4); - if (brw_type_size_bytes(group_idx.type) == 4) { - /* The destination stride of an instruction (in bytes) must be - * greater than or equal to the size of the rest of the - * instruction. Since the address register is of type UW, we - * can't use a D-type instruction. In order to get around this, - * re retype to UW and use a stride. - */ - group_idx = retype(spread(group_idx, 2), BRW_TYPE_W); - } - - uint32_t src_start_offset = src.nr * REG_SIZE + src.subnr; - - /* From the Haswell PRM: - * - * "When a sequence of NoDDChk and NoDDClr are used, the last - * instruction that completes the scoreboard clear must have a - * non-zero execution mask. This means, if any kind of predication - * can change the execution mask or channel enable of the last - * instruction, the optimization must be avoided. This is to - * avoid instructions being shot down the pipeline when no writes - * are required." - * - * Whenever predication is enabled or the instructions being emitted - * aren't the full width, it's possible that it will be run with zero - * channels enabled so we can't use dependency control without - * running the risk of a hang if an instruction gets shot down. - */ - const bool use_dep_ctrl = !inst->predicate && - lower_width == dispatch_width; - brw_eu_inst *insn; - - /* Due to a hardware bug some platforms (particularly Gfx11+) seem - * to require the address components of all channels to be valid - * whether or not they're active, which causes issues if we use VxH - * addressing under non-uniform control-flow. We can easily work - * around that by initializing the whole address register with a - * pipelined NoMask MOV instruction. - */ - insn = brw_MOV(p, addr, brw_imm_uw(src_start_offset)); - brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); - brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, gen_swsb_null()); - else - brw_eu_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); - - /* Take into account the component size and horizontal stride. */ - assert(src.vstride == src.hstride + src.width); - insn = brw_SHL(p, addr, group_idx, - brw_imm_uw(util_logbase2(brw_type_size_bytes(src.type)) + - src.hstride - 1)); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, gen_swsb_regdist(1)); - else - brw_eu_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); - - /* Add on the register start offset */ - brw_ADD(p, addr, addr, brw_imm_uw(src_start_offset)); - brw_MOV(p, suboffset(dst, group << (dst.hstride - 1)), - retype(brw_VxH_indirect(0, 0), src.type)); - } - - brw_set_default_swsb(p, gen_swsb_null()); - } -} - -void -brw_generator::generate_quad_swizzle(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src, - unsigned swiz) -{ - /* Requires a quad. */ - assert(inst->exec_size >= 4); - - if (src.file == IMM || - has_scalar_region(src)) { - /* The value is uniform across all channels */ - brw_MOV(p, dst, src); - - } else if (devinfo->ver < 11 && brw_type_size_bytes(src.type) == 4) { - /* This only works on 8-wide 32-bit values */ - assert(inst->exec_size == 8); - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src.vstride == src.width + 1); - brw_set_default_access_mode(p, BRW_ALIGN_16); - struct brw_reg swiz_src = stride(src, 4, 4, 1); - swiz_src.swizzle = swiz; - brw_MOV(p, dst, swiz_src); - - } else { - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src.vstride == src.width + 1); - const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz, 0)); - - switch (swiz) { - case BRW_SWIZZLE_XXXX: - case BRW_SWIZZLE_YYYY: - case BRW_SWIZZLE_ZZZZ: - case BRW_SWIZZLE_WWWW: - brw_MOV(p, dst, stride(src_0, 4, 4, 0)); - break; - - case BRW_SWIZZLE_XXZZ: - case BRW_SWIZZLE_YYWW: - brw_MOV(p, dst, stride(src_0, 2, 2, 0)); - break; - - case BRW_SWIZZLE_XYXY: - case BRW_SWIZZLE_ZWZW: - assert(inst->exec_size == 4); - brw_MOV(p, dst, stride(src_0, 0, 2, 1)); - break; - - default: - assert(inst->force_writemask_all); - brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1); - - for (unsigned c = 0; c < 4; c++) { - brw_eu_inst *insn = brw_MOV( - p, stride(suboffset(dst, c), - 4 * inst->dst.stride, 1, 4 * inst->dst.stride), - stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1, 0)); - - if (devinfo->ver < 12) { - brw_eu_inst_set_no_dd_clear(devinfo, insn, c < 3); - brw_eu_inst_set_no_dd_check(devinfo, insn, c > 0); - } - - brw_set_default_swsb(p, gen_swsb_null()); - } - - break; - } - } -} - -void -brw_generator::generate_barrier(brw_inst *, struct brw_reg src) -{ - brw_barrier(p, src); - if (devinfo->ver >= 12) { - brw_set_default_swsb(p, gen_swsb_null()); - brw_SYNC(p, TGL_SYNC_BAR); - } else { - brw_WAIT(p); - } -} - -/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input - * looking like: - * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br - * - * Ideally, we want to produce: - * - * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) - * - * and add another set of two more subspans if in 16-pixel dispatch mode. - * - * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result - * for each pair, and vertstride = 2 jumps us 2 elements after processing a - * pair. But the ideal approximation may impose a huge performance cost on - * sample_d. On at least Haswell, sample_d instruction does some - * optimizations if the same LOD is used for all pixels in the subspan. - * - * For DDY, we need to use ALIGN16 mode since it's capable of doing the - * appropriate swizzling. - */ -void -brw_generator::generate_ddx(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src) -{ - unsigned vstride, width; - - if (inst->opcode == FS_OPCODE_DDX_FINE) { - /* produce accurate derivatives */ - vstride = BRW_VERTICAL_STRIDE_2; - width = BRW_WIDTH_2; - } else { - /* replicate the derivative at the top-left pixel to other pixels */ - vstride = BRW_VERTICAL_STRIDE_4; - width = BRW_WIDTH_4; - } - - struct brw_reg src0 = byte_offset(src, brw_type_size_bytes(src.type));; - struct brw_reg src1 = src; - - src0.vstride = vstride; - src0.width = width; - src0.hstride = BRW_HORIZONTAL_STRIDE_0; - src1.vstride = vstride; - src1.width = width; - src1.hstride = BRW_HORIZONTAL_STRIDE_0; - - brw_ADD(p, dst, src0, negate(src1)); -} - -/* The negate_value boolean is used to negate the derivative computation for - * FBOs, since they place the origin at the upper left instead of the lower - * left. - */ -void -brw_generator::generate_ddy(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src) -{ - const uint32_t type_size = brw_type_size_bytes(src.type); - - if (inst->opcode == FS_OPCODE_DDY_FINE) { - /* produce accurate derivatives. - * - * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU) - * "Register Region Restrictions", Section "1. Special Restrictions": - * - * "In Align16 mode, the channel selects and channel enables apply to - * a pair of half-floats, because these parameters are defined for - * DWord elements ONLY. This is applicable when both source and - * destination are half-floats." - * - * So for half-float operations we use the Gfx11+ Align1 path. CHV - * inherits its FP16 hardware from SKL, so it is not affected. - */ - if (devinfo->ver >= 11) { - src = stride(src, 0, 2, 1); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_4); - for (uint32_t g = 0; g < inst->exec_size; g += 4) { - brw_set_default_group(p, inst->group + g); - brw_ADD(p, byte_offset(dst, g * type_size), - negate(byte_offset(src, g * type_size)), - byte_offset(src, (g + 2) * type_size)); - brw_set_default_swsb(p, gen_swsb_null()); - } - brw_pop_insn_state(p); - } else { - struct brw_reg src0 = stride(src, 4, 4, 1); - struct brw_reg src1 = stride(src, 4, 4, 1); - src0.swizzle = BRW_SWIZZLE_XYXY; - src1.swizzle = BRW_SWIZZLE_ZWZW; - - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, dst, negate(src0), src1); - brw_pop_insn_state(p); - } - } else { - /* replicate the derivative at the top-left pixel to other pixels */ - struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size); - struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size); - - brw_ADD(p, dst, negate(src0), src1); - } -} - -DEBUG_GET_ONCE_OPTION(shader_bin_override_path, "INTEL_SHADER_ASM_READ_PATH", - NULL); - -/* The A32 messages take a buffer base address in header.5:[31:0] (See - * MH1_A32_PSM for typed messages or MH_A32_GO for byte/dword scattered - * and OWord block messages in the SKL PRM Vol. 2d for more details.) - * Unfortunately, there are a number of subtle differences: - * - * For the block read/write messages: - * - * - We always stomp header.2 to fill in the actual scratch address (in - * units of OWORDs) so we don't care what's in there. - * - * - They rely on per-thread scratch space value in header.3[3:0] to do - * bounds checking so that needs to be valid. The upper bits of - * header.3 are ignored, though, so we can copy all of g0.3. - * - * - They ignore header.5[9:0] and assumes the address is 1KB aligned. - * - * - * For the byte/dword scattered read/write messages: - * - * - We want header.2 to be zero because that gets added to the per-channel - * offset in the non-header portion of the message. - * - * - Contrary to what the docs claim, they don't do any bounds checking so - * the value of header.3[3:0] doesn't matter. - * - * - They consider all of header.5 for the base address and header.5[9:0] - * are not ignored. This means that we can't copy g0.5 verbatim because - * g0.5[9:0] contains the FFTID on most platforms. Instead, we have to - * use an AND to mask off the bottom 10 bits. - * - * - * For block messages, just copying g0 gives a valid header because all the - * garbage gets ignored except for header.2 which we stomp as part of message - * setup. For byte/dword scattered messages, we can just zero out the header - * and copy over the bits we need from g0.5. This opcode, however, tries to - * satisfy the requirements of both by starting with 0 and filling out the - * information required by either set of opcodes. - */ -void -brw_generator::generate_scratch_header(brw_inst *inst, - struct brw_reg dst, - struct brw_reg src) -{ - assert(inst->exec_size == 8 && inst->force_writemask_all); - assert(dst.file == FIXED_GRF); - assert(src.file == FIXED_GRF); - assert(src.type == BRW_TYPE_UD); - - dst.type = BRW_TYPE_UD; - - brw_eu_inst *insn = brw_MOV(p, dst, brw_imm_ud(0)); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, gen_swsb_null()); - else - brw_eu_inst_set_no_dd_clear(p->devinfo, insn, true); - - /* Copy the per-thread scratch space size from g0.3[3:0] */ - brw_set_default_exec_size(p, BRW_EXECUTE_1); - insn = brw_AND(p, suboffset(dst, 3), component(src, 3), - brw_imm_ud(INTEL_MASK(3, 0))); - if (devinfo->ver < 12) { - brw_eu_inst_set_no_dd_clear(p->devinfo, insn, true); - brw_eu_inst_set_no_dd_check(p->devinfo, insn, true); - } - - /* Copy the scratch base address from g0.5[31:10] */ - insn = brw_AND(p, suboffset(dst, 5), component(src, 5), - brw_imm_ud(INTEL_MASK(31, 10))); - if (devinfo->ver < 12) - brw_eu_inst_set_no_dd_check(p->devinfo, insn, true); -} - -void -brw_generator::enable_debug(const char *shader_name) -{ - debug_flag = true; - this->shader_name = shader_name; -} - -int -brw_generator::generate_code(const brw_shader &s, - struct genisa_stats *stats) -{ - const int dispatch_width = s.dispatch_width; - struct brw_shader_stats shader_stats = s.shader_stats; - const brw_performance &perf = s.performance_analysis.require(); - - /* align to 64 byte boundary. */ - brw_realign(p, 64); - - this->dispatch_width = dispatch_width; - this->final_halt_offset = -1; - this->needs_final_halt = false; - - int start_offset = p->next_insn_offset; - - int loop_count = 0, send_count = 0, nop_count = 0, sync_nop_count = 0; - bool is_accum_used = false; - - struct disasm_info *disasm_info = disasm_initialize(p->isa, s.cfg); - const bool annotate = debug_flag || params->archiver; - - brw_inst *prev_inst = NULL; - foreach_block_and_inst (block, brw_inst, inst, s.cfg) { - if (inst->opcode == SHADER_OPCODE_UNDEF) - continue; - - struct brw_reg src[4], dst; - unsigned int last_insn_offset = p->next_insn_offset; - bool multiple_instructions_emitted = false; - gen_swsb swsb = inst->sched; - - /* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the - * "Register Region Restrictions" section: for BDW, SKL: - * - * "A POW/FDIV operation must not be followed by an instruction - * that requires two destination registers." - * - * The documentation is often lacking annotations for Atom parts, - * and empirically this affects CHV as well. - */ - if (devinfo->ver <= 9 && - p->nr_insn > 1 && - brw_eu_inst_opcode(p->isa, brw_eu_last_inst(p)) == BRW_OPCODE_MATH && - brw_eu_inst_math_function(devinfo, brw_eu_last_inst(p)) == GEN_MATH_POW && - inst->dst.component_size(inst->exec_size) > REG_SIZE) { - brw_NOP(p); - last_insn_offset = p->next_insn_offset; - - /* In order to avoid spurious instruction count differences when the - * instruction schedule changes, keep track of the number of inserted - * NOPs. - */ - nop_count++; - } - - /* Wa_14010017096: - * - * Clear accumulator register before end of thread. - */ - if (inst->eot && is_accum_used && - intel_needs_workaround(devinfo, 14010017096)) { - brw_set_default_exec_size(p, BRW_EXECUTE_16); - brw_set_default_group(p, 0); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_predicate_inverse(p, false); - brw_set_default_flag_reg(p, 0, 0); - brw_set_default_swsb(p, gen_swsb_src_dep(swsb)); - brw_MOV(p, brw_acc_reg(8), brw_imm_f(0.0f)); - last_insn_offset = p->next_insn_offset; - swsb = gen_swsb_dst_dep(swsb, 1); - } - - if (!is_accum_used && !inst->eot) { - is_accum_used = inst->writes_accumulator_implicitly(devinfo) || - inst->dst.is_accumulator(); - } - - /* Wa_14013672992: - * - * Always use @1 SWSB for EOT. - */ - if (inst->eot && intel_needs_workaround(devinfo, 14013672992)) { - if (gen_swsb_src_dep(swsb).mode) { - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_group(p, 0); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - brw_set_default_swsb(p, gen_swsb_src_dep(swsb)); - brw_SYNC(p, TGL_SYNC_NOP); - last_insn_offset = p->next_insn_offset; - } - - swsb = gen_swsb_dst_dep(swsb, 1); - } - - if (unlikely(annotate)) - disasm_annotate(disasm_info, inst, p->next_insn_offset); - - if (devinfo->ver >= 20 && inst->group % 8 != 0) { - assert(inst->force_writemask_all); - assert(!inst->predicate && !inst->conditional_mod); - assert(!inst->writes_accumulator_implicitly(devinfo) && - !inst->reads_accumulator_implicitly()); - assert(inst->opcode != SHADER_OPCODE_SEL_EXEC); - brw_set_default_group(p, 0); - } else { - brw_set_default_group(p, inst->group); - } - - /* For SEND_GATHER, the payload sources are represented inside the - * scalar register in src[2], so we can skip them. - */ - const unsigned num_sources = - inst->opcode == SHADER_OPCODE_SEND_GATHER ? 3 : inst->sources; - assert(num_sources <= ARRAY_SIZE(src)); - - for (unsigned int i = 0; i < num_sources; i++) { - src[i] = normalize_brw_reg_for_encoding(&inst->src[i]); - /* The accumulator result appears to get used for the - * conditional modifier generation. When negating a UD - * value, there is a 33rd bit generated for the sign in the - * accumulator value, so now you can't check, for example, - * equality with a 32-bit value. See piglit fs-op-neg-uvec4. - */ - assert(!inst->conditional_mod || - inst->src[i].type != BRW_TYPE_UD || - !inst->src[i].negate); - } - dst = normalize_brw_reg_for_encoding(&inst->dst); - - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_set_default_predicate_control(p, inst->predicate); - brw_set_default_predicate_inverse(p, inst->predicate_inverse); - /* On gfx7 and above, hardware automatically adds the group onto the - * flag subregister number. - */ - const unsigned flag_subreg = inst->flag_subreg; - brw_set_default_flag_reg(p, flag_subreg / 2, flag_subreg % 2); - brw_set_default_saturate(p, inst->saturate); - brw_set_default_mask_control(p, inst->force_writemask_all); - if (devinfo->ver >= 20 && inst->writes_accumulator) { - assert(inst->dst.is_accumulator() || - inst->opcode == BRW_OPCODE_ADDC || - inst->opcode == BRW_OPCODE_MACH || - inst->opcode == BRW_OPCODE_SUBB); - } else { - brw_set_default_acc_write_control(p, inst->writes_accumulator); - } - brw_set_default_swsb(p, swsb); - - unsigned exec_size = inst->exec_size; - - brw_set_default_exec_size(p, cvt(exec_size) - 1); - - assert(inst->force_writemask_all || inst->exec_size >= 4); - assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); - if (const brw_send_inst *send = inst->as_send()) - assert(send->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo)); - - switch (inst->opcode) { - case BRW_OPCODE_NOP: - brw_NOP(p); - break; - case BRW_OPCODE_SYNC: - assert(src[0].file == IMM); - brw_SYNC(p, tgl_sync_function(src[0].ud)); - - if (tgl_sync_function(src[0].ud) == TGL_SYNC_NOP) - ++sync_nop_count; - - break; - - case BRW_OPCODE_MOV: - case BRW_OPCODE_FRC: - case BRW_OPCODE_RNDD: - case BRW_OPCODE_RNDE: - case BRW_OPCODE_RNDZ: - case BRW_OPCODE_NOT: - case BRW_OPCODE_LZD: - brw_alu1(p, inst->opcode, dst, src[0]); - break; - - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - case BRW_OPCODE_AVG: - case BRW_OPCODE_MACH: - case BRW_OPCODE_AND: - case BRW_OPCODE_OR: - case BRW_OPCODE_XOR: - case BRW_OPCODE_ASR: - case BRW_OPCODE_SHR: - case BRW_OPCODE_SHL: - case BRW_OPCODE_SEL: - case BRW_OPCODE_ADDC: - case BRW_OPCODE_SUBB: - case BRW_OPCODE_MAC: - case BRW_OPCODE_BFI1: - case BRW_OPCODE_PLN: - case BRW_OPCODE_SRND: - case BRW_OPCODE_ROL: - case BRW_OPCODE_ROR: - assert(inst->opcode != BRW_OPCODE_SRND || devinfo->ver >= 20); - assert(inst->opcode != BRW_OPCODE_ROL || devinfo->ver >= 11); - assert(inst->opcode != BRW_OPCODE_ROR || devinfo->ver >= 11); - - brw_alu2(p, inst->opcode, dst, src[0], src[1]); - break; - - case BRW_OPCODE_MAD: - case BRW_OPCODE_CSEL: - case BRW_OPCODE_BFE: - case BRW_OPCODE_BFI2: - case BRW_OPCODE_DP4A: - case BRW_OPCODE_LRP: - case BRW_OPCODE_ADD3: - assert(inst->opcode != BRW_OPCODE_DP4A || devinfo->ver >= 12); - assert(inst->opcode != BRW_OPCODE_LRP || devinfo->ver == 9); - assert(inst->opcode != BRW_OPCODE_ADD3 || devinfo->verx10 >= 125); - - if (devinfo->ver == 9) - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_alu3(p, inst->opcode, dst, src[0], src[1], src[2]); - break; - - case BRW_OPCODE_DPAS: { - assert(devinfo->verx10 >= 125); - const brw_dpas_inst *dpas = inst->as_dpas(); - brw_DPAS(p, translate_systolic_depth(dpas->sdepth), dpas->rcount, - dst, src[0], src[1], src[2]); - break; - } - - case BRW_OPCODE_BFN: - brw_BFN(p, dst, src[0], src[1], src[2], src[3]); - break; - - case BRW_OPCODE_CMP: - brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); - break; - case BRW_OPCODE_CMPN: - brw_CMPN(p, dst, inst->conditional_mod, src[0], src[1]); - break; - - case BRW_OPCODE_BFREV: - case BRW_OPCODE_FBL: - case BRW_OPCODE_CBIT: - brw_alu1(p, inst->opcode, retype(dst, BRW_TYPE_UD), retype(src[0], BRW_TYPE_UD)); - break; - - case BRW_OPCODE_FBH: - brw_FBH(p, retype(dst, src[0].type), src[0]); - break; - - case BRW_OPCODE_IF: - brw_IF(p, brw_get_default_exec_size(p)); - break; - - case BRW_OPCODE_ELSE: - brw_ELSE(p); - break; - case BRW_OPCODE_ENDIF: - brw_ENDIF(p); - break; - - case BRW_OPCODE_DO: - brw_DO(p, brw_get_default_exec_size(p)); - break; - - case SHADER_OPCODE_FLOW: - /* Do nothing. */ - break; - - case BRW_OPCODE_BREAK: - brw_BREAK(p); - break; - case BRW_OPCODE_CONTINUE: - brw_CONT(p); - break; - - case BRW_OPCODE_WHILE: - /* Workaround for an issue with branch prediction for WHILE - * instructions that may lead to misrendering or GPU hangs. - * See HSDs 22020521218 and 16026360541. - */ - if (devinfo->ver >= 20 && prev_inst && - unlikely(prev_inst->is_control_flow())) - brw_NOP(p); - - brw_WHILE(p); - loop_count++; - break; - - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - gfx6_math(p, dst, brw_math_function(inst->opcode), - src[0], retype(brw_null_reg(), src[0].type)); - break; - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - case SHADER_OPCODE_POW: - assert(devinfo->verx10 < 125); - assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8); - gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); - break; - case FS_OPCODE_PIXEL_X: - assert(src[0].type == BRW_TYPE_UW); - assert(src[1].type == BRW_TYPE_UW); - src[0].subnr = 0 * brw_type_size_bytes(src[0].type); - if (src[1].file == IMM) { - assert(src[1].ud == 0); - brw_MOV(p, dst, stride(src[0], 8, 4, 1)); - } else { - /* Coarse pixel case */ - brw_ADD(p, dst, stride(src[0], 8, 4, 1), src[1]); - } - break; - case FS_OPCODE_PIXEL_Y: - assert(src[0].type == BRW_TYPE_UW); - assert(src[1].type == BRW_TYPE_UW); - src[0].subnr = 4 * brw_type_size_bytes(src[0].type); - if (src[1].file == IMM) { - assert(src[1].ud == 0); - brw_MOV(p, dst, stride(src[0], 8, 4, 1)); - } else { - /* Coarse pixel case */ - brw_ADD(p, dst, stride(src[0], 8, 4, 1), src[1]); - } - break; - - case SHADER_OPCODE_SEND: - generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], - src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2], - inst->as_send()->bindless_surface && - intel_has_extended_bindless(devinfo)); - send_count++; - break; - - case SHADER_OPCODE_SEND_GATHER: - generate_send(inst->as_send(), dst, - src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC], - src[SEND_GATHER_SRC_SCALAR], brw_null_reg(), - inst->as_send()->bindless_surface && - intel_has_extended_bindless(devinfo)); - send_count++; - break; - - case FS_OPCODE_DDX_COARSE: - case FS_OPCODE_DDX_FINE: - generate_ddx(inst, dst, src[0]); - break; - case FS_OPCODE_DDY_COARSE: - case FS_OPCODE_DDY_FINE: - generate_ddy(inst, dst, src[0]); - break; - - case SHADER_OPCODE_SCRATCH_HEADER: - generate_scratch_header(inst, dst, src[0]); - break; - - case SHADER_OPCODE_MOV_INDIRECT: - generate_mov_indirect(inst, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_MOV_RELOC_IMM: - assert(src[0].file == IMM); - assert(src[1].file == IMM); - brw_MOV_reloc_imm(p, dst, dst.type, src[0].ud, src[1].ud); - break; - - case BRW_OPCODE_HALT: - /* This HALT will be patched by brw_set_uip_jip(). */ - this->needs_final_halt = true; - brw_HALT(p); - break; - - case FS_OPCODE_SCHEDULING_FENCE: - if (inst->sources == 0 && swsb.regdist == 0 && - swsb.mode == GEN_SBID_NULL) { - if (unlikely(annotate)) - disasm_info->use_tail = true; - break; - } - - if (devinfo->ver >= 12) { - /* Use the available SWSB information to stall. A single SYNC is - * sufficient since if there were multiple dependencies, the - * scoreboard algorithm already injected other SYNCs before this - * instruction. - */ - brw_SYNC(p, TGL_SYNC_NOP); - } else { - for (unsigned i = 0; i < inst->sources; i++) { - /* Emit a MOV to force a stall until the instruction producing the - * registers finishes. - */ - brw_MOV(p, retype(brw_null_reg(), BRW_TYPE_UW), - retype(src[i], BRW_TYPE_UW)); - } - - if (inst->sources > 1) - multiple_instructions_emitted = true; - } - - break; - - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: - case SHADER_OPCODE_LOAD_LIVE_CHANNELS: - UNREACHABLE("Should be lowered by lower_find_live_channel()"); - break; - - case FS_OPCODE_LOAD_LIVE_CHANNELS: { - assert(inst->force_writemask_all && inst->group == 0); - assert(inst->dst.file == BAD_FILE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_swsb(p, gen_swsb_dst_dep(swsb, 1)); - brw_MOV(p, retype(brw_flag_subreg(inst->flag_subreg), BRW_TYPE_UD), - retype(brw_mask_reg(0), BRW_TYPE_UD)); - /* Reading certain ARF registers (like 'ce', the mask register) on - * Gfx12+ requires requires a dependency on all pipes on the read - * instruction and the next instructions - */ - if (devinfo->ver >= 12) - brw_SYNC(p, TGL_SYNC_NOP); - break; - } - case SHADER_OPCODE_BROADCAST: - assert(inst->force_writemask_all); - brw_broadcast(p, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_SHUFFLE: - generate_shuffle(inst, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_SEL_EXEC: - assert(inst->force_writemask_all); - assert(devinfo->has_64bit_float || brw_type_size_bytes(dst.type) <= 4); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, dst, src[1]); - brw_set_default_mask_control(p, BRW_MASK_ENABLE); - brw_set_default_swsb(p, gen_swsb_null()); - brw_MOV(p, dst, src[0]); - break; - - case SHADER_OPCODE_QUAD_SWIZZLE: - assert(src[1].file == IMM); - assert(src[1].type == BRW_TYPE_UD); - generate_quad_swizzle(inst, dst, src[0], src[1].ud); - break; - - case SHADER_OPCODE_CLUSTER_BROADCAST: { - assert((!intel_device_info_is_9lp(devinfo) && - devinfo->has_64bit_float) || brw_type_size_bytes(src[0].type) <= 4); - assert(!src[0].negate && !src[0].abs); - assert(src[1].file == IMM); - assert(src[1].type == BRW_TYPE_UD); - assert(src[2].file == IMM); - assert(src[2].type == BRW_TYPE_UD); - const unsigned component = src[1].ud; - const unsigned cluster_size = src[2].ud; - assert(inst->src[0].file != ARF); - - unsigned s; - if (inst->src[0].file == FIXED_GRF) { - s = inst->src[0].hstride ? 1 << (inst->src[0].hstride - 1) : 0; - } else { - s = inst->src[0].stride; - } - unsigned vstride = cluster_size * s; - unsigned width = cluster_size; - - /* The maximum exec_size is 32, but the maximum width is only 16. */ - if (inst->exec_size == width) { - vstride = 0; - width = 1; - } - - struct brw_reg strided = stride(suboffset(src[0], component * s), - vstride, width, 0); - brw_MOV(p, dst, strided); - break; - } - - case SHADER_OPCODE_HALT_TARGET: - /* This is the place where the final HALT needs to be inserted if - * we've emitted any discards. If not, this will emit no code. - */ - if (!this->needs_final_halt) { - disasm_info->use_tail = true; - break; - } - - /* HALT temporarily disables channels, and the same instruction - * is used to re-enable them: once all channels are - * disabled, then they are re-enabled again immediately. - * - * So put a HALT right before the "epilogue" of the shader to make - * sure all channels get HALTed, so that this last HALT will re-enable - * them again. - */ - final_halt_offset = p->next_insn_offset; - brw_HALT(p); - - if (devinfo->ver >= 12) { - /* This works around synchronization issues consequence of the - * HALT instruction not being considered a control flow - * instruction by the back-end -- The fact that it doesn't - * cause the CFG pass to introduce an edge in the graph means - * that the software scoreboard pass is completely blind to the - * effect of discard jumps on control flow, so it doesn't - * introduce the required annotations to avoid data hazards - * when the discard path of the CFG is taken. Note that - * because of the very limited set of instructions that can - * follow the HALT target in a fragment shader this was very - * unlikely to lead to issues in practice, but starting on xe3 - * it appears to have become far more likely due to the use of - * SENDG, since SENDG requires the scalar register to be set - * prior to the submission of the render target write payloads, - * which can easily lead to a WaR hazard if there was another - * SENDG before the HALT jump that wasn't done reading out its - * payload from the GRF. - * - * In an ideal world this would be avoided by having HALT be a - * normal control flow instruction represented as an edge in - * the control flow graph -- But unfortunately that would - * prevent the optimizations we currently do that take - * advantage of the ability of reordering code past the HALT - * instruction, so it would have a pretty large performance - * cost. Instead this simply adds a SYNC.ALLWR instruction - * after the HALT target to guarantee that all pending SEND - * messages have finished execution -- That may also seem - * costly, however its cost in practice appears to be minimal - * since at the point of the program when the target HALT is - * executed there is almost nothing left to do other than send - * out the render target write payloads, so any pending - * operations had to be waited on at roughly this point of the - * program regardless. - */ - brw_SYNC(p, TGL_SYNC_ALLWR); - } - break; - - case SHADER_OPCODE_BARRIER: - generate_barrier(inst, src[0]); - send_count++; - break; - - case SHADER_OPCODE_RND_MODE: { - assert(src[0].file == IMM); - /* - * Changes the floating point rounding mode updating the control - * register field defined at cr0.0[5-6] bits. - */ - enum brw_rnd_mode mode = - (enum brw_rnd_mode) (src[0].d << BRW_CR0_RND_MODE_SHIFT); - brw_float_controls_mode(p, mode, BRW_CR0_RND_MODE_MASK); - } - break; - - case SHADER_OPCODE_FLOAT_CONTROL_MODE: - assert(src[0].file == IMM); - assert(src[1].file == IMM); - brw_float_controls_mode(p, src[0].d, src[1].d); - break; - - case SHADER_OPCODE_READ_ARCH_REG: - if (devinfo->ver >= 12) { - /* There is a SWSB restriction that requires that any time sr0 is - * accessed both the instruction doing the access and the next one - * have SWSB set to RegDist(1). - */ - if (brw_get_default_swsb(p).mode != GEN_SBID_NULL) - brw_SYNC(p, TGL_SYNC_NOP); - brw_set_default_swsb(p, gen_swsb_regdist(1)); - brw_MOV(p, dst, src[0]); - brw_set_default_swsb(p, gen_swsb_regdist(1)); - brw_AND(p, dst, dst, brw_imm_ud(0xffffffff)); - } else { - brw_MOV(p, dst, src[0]); - } - break; - - default: - UNREACHABLE("Unsupported opcode"); - - case SHADER_OPCODE_LOAD_PAYLOAD: - UNREACHABLE("Should be lowered by lower_load_payload()"); - } - prev_inst = inst; - - if (multiple_instructions_emitted) - continue; - - if (inst->conditional_mod) { - assert(p->next_insn_offset == last_insn_offset + 16 || - !"conditional_mod for IR " - "emitting more than 1 instruction"); - - brw_eu_inst *last = &p->store[last_insn_offset / 16]; - - if (inst->conditional_mod) { - if (inst->opcode != BRW_OPCODE_BFN) { - brw_eu_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod); - } else { - unsigned cc; - - switch (inst->conditional_mod) { - case BRW_CONDITIONAL_NONE: - cc = 0; - break; - case BRW_CONDITIONAL_Z: - cc = 1; - break; - case BRW_CONDITIONAL_G: - cc = 2; - break; - case BRW_CONDITIONAL_L: - cc = 3; - break; - default: - UNREACHABLE("Invalid cmod for BFN."); - } - - brw_eu_inst_set_boolean_func_cond_modifier(p->devinfo, last, cc); - } - } - } - - /* When enabled, insert sync NOP after every instruction and make sure - * that current instruction depends on the previous instruction. - */ - if (INTEL_DEBUG(DEBUG_SWSB_STALL) && devinfo->ver >= 12) { - brw_set_default_swsb(p, gen_swsb_regdist(1)); - brw_SYNC(p, TGL_SYNC_NOP); - } - } - - brw_set_uip_jip(p, start_offset, final_halt_offset); - - /* end of program sentinel */ - disasm_new_inst_group(disasm_info, p->next_insn_offset); - - /* `send_count` explicitly does not include spills or fills, as we'd - * like to use it as a metric for intentional memory access or other - * shared function use. Otherwise, subtle changes to scheduling or - * register allocation could cause it to fluctuate wildly - and that - * effect is already counted in spill/fill counts. - */ - send_count -= shader_stats.spill_count; - send_count -= shader_stats.fill_count; - -#ifndef NDEBUG - bool validated = -#else - if (unlikely(debug_flag)) -#endif - brw_validate_instructions(&compiler->isa, p->store, - start_offset, - p->next_insn_offset, - disasm_info); - - int before_size = p->next_insn_offset - start_offset; - brw_compact_instructions(p, start_offset, disasm_info); - int after_size = p->next_insn_offset - start_offset; - - bool dump_shader_bin = brw_should_dump_shader_bin(); - unsigned char blake3[BLAKE3_KEY_LEN + 1]; - char blake3buf[BLAKE3_HEX_LEN]; - - auto override_path = debug_get_option_shader_bin_override_path(); - if (unlikely(debug_flag || dump_shader_bin || override_path != NULL || - params->archiver)) { - _mesa_blake3_compute(p->store + start_offset / sizeof(brw_eu_inst), - after_size, blake3); - _mesa_blake3_format(blake3buf, blake3); - } - - if (unlikely(dump_shader_bin)) - brw_dump_shader_bin(p->store, start_offset, p->next_insn_offset, - blake3buf); - - if (unlikely(override_path != NULL && - brw_try_override_assembly(p, start_offset, override_path, - blake3buf))) { - fprintf(stderr, "Successfully overrode shader with blake3 %s\n", blake3buf); - /* disasm_info and stats are no longer valid as we gathered - * them based on the original shader. - */ - if (debug_flag) { - fprintf(stderr, "Skipping disassembly and statistics " - "output for this shader.\n\n"); - } - ralloc_free(disasm_info); - return start_offset; - } - - if (unlikely(debug_flag || params->archiver)) { - FILE *files[2] = { NULL, NULL }; - - if (debug_flag && (!intel_shader_dump_filter || - (intel_shader_dump_filter && intel_shader_dump_filter == params->source_hash))) - files[0] = stderr; - - if (params->archiver) { - const char *filename = - ralloc_asprintf(mem_ctx, "ASM%d/0", dispatch_width); - files[1] = debug_archiver_start_file(params->archiver, filename); - } - - for (unsigned i = 0; i < ARRAY_SIZE(files); i++) { - if (!files[i]) continue; - fprintf(files[i], "Native code for %s (src_hash 0x%016" PRIx64 ") (blake3 %s)\n" - "SIMD%d shader: %d instructions. %d loops. %u cycles. " - "%d:%d spills:fills, %u sends, " - "scheduled with mode %s. " - "Promoted %u constants. " - "GRF registers: %u. " - "Non-SSA regs (after NIR): %u. " - "Compacted %d to %d bytes (%.0f%%)\n", - shader_name, params->source_hash, blake3buf, - dispatch_width, - before_size / 16 - nop_count - sync_nop_count, - loop_count, perf.latency, - shader_stats.spill_count, - shader_stats.fill_count, - send_count, - shader_stats.scheduler_mode, - shader_stats.promoted_constants, - s.grf_used, - shader_stats.non_ssa_registers_after_nir, - before_size, after_size, - 100.0f * (before_size - after_size) / before_size); - dump_assembly(p->store, start_offset, p->next_insn_offset, - disasm_info, perf.block_latency, files[i]); - } - - if (params->archiver) { - debug_archiver_finish_file(params->archiver); - } - } - - ralloc_free(disasm_info); - -#ifndef NDEBUG - if (!validated && !debug_flag) { - fprintf(stderr, - "Validation failed. Rerun with INTEL_DEBUG=shaders to get more information.\n"); - } -#endif - brw_shader_debug_log(compiler, params->log_data, - "%s SIMD%d shader: %d inst, %d loops, %u cycles, " - "%d:%d spills:fills, %u sends, " - "scheduled with mode %s, " - "Promoted %u constants, " - "compacted %d to %d bytes.\n", - _mesa_shader_stage_to_abbrev(stage), - dispatch_width, - before_size / 16 - nop_count - sync_nop_count, - loop_count, perf.latency, - shader_stats.spill_count, - shader_stats.fill_count, - send_count, - shader_stats.scheduler_mode, - shader_stats.promoted_constants, - before_size, after_size); - assert(validated); - - if (stats) { - stats->dispatch_width = dispatch_width; - stats->max_polygons = s.max_polygons; - stats->instrs = before_size / 16 - nop_count - sync_nop_count; - stats->code_size = after_size; - stats->sends = send_count; - stats->loops = loop_count; - stats->cycles = perf.latency; - stats->spills = shader_stats.spill_count; - stats->fills = shader_stats.fill_count; - stats->max_live_registers = shader_stats.max_register_pressure; - stats->non_ssa_regs_after_nir = shader_stats.non_ssa_registers_after_nir; - stats->source_hash = prog_data->source_hash; - stats->grf_registers = devinfo->ver >= 30 ? s.grf_used : 0; - stats->scheduler_mode = shader_stats.scheduler_mode; - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_FRAGMENT: - stats->push_constant_ranges = 0; - stats->push_constant_registers = 0; - for (uint32_t i = 0; i < 4; i++) { - stats->push_constant_ranges += prog_data->push_sizes[i] != 0; - stats->push_constant_registers += - DIV_ROUND_UP(prog_data->push_sizes[i], reg_unit(devinfo) * REG_SIZE); - } - break; - - case MESA_SHADER_COMPUTE: - case MESA_SHADER_KERNEL: - /* Pre Gfx12.5, there is only one push constant buffer for compute - * shaders, post Gfx12.5 the shader has to pull the constant data. - */ - stats->push_constant_ranges = - devinfo->verx10 < 125 ? (prog_data->push_sizes[0] != 0) : 0; - stats->push_constant_registers = - devinfo->verx10 < 125 ? - DIV_ROUND_UP(prog_data->push_sizes[0], reg_unit(devinfo) * REG_SIZE) : 0; - break; - - case MESA_SHADER_MESH: - case MESA_SHADER_TASK: - case MESA_SHADER_RAYGEN: - case MESA_SHADER_ANY_HIT: - case MESA_SHADER_CLOSEST_HIT: - case MESA_SHADER_MISS: - case MESA_SHADER_INTERSECTION: - case MESA_SHADER_CALLABLE: - stats->push_constant_ranges = 0; - stats->push_constant_registers = 0; - break; - - default: - UNREACHABLE("invalid stage"); - } - - /* Report the max dispatch width only on the smallest SIMD variant. - * - * XXX: SIMD8 is not the smallest on Xe2. This logic should be adjusted. - */ - if (stage != MESA_SHADER_FRAGMENT || dispatch_width == 8) - stats->max_dispatch_width = dispatch_width; - else - stats->max_dispatch_width = 0; - - if (mesa_shader_stage_uses_workgroup(stage)) - stats->workgroup_memory_size = prog_data->total_shared; - else - stats->workgroup_memory_size = 0; - } - - return start_offset; -} - -void -brw_generator::add_const_data(void *data, unsigned size) -{ - assert(prog_data->const_data_size == 0); - if (size > 0) { - prog_data->const_data_size = size; - prog_data->const_data_offset = brw_append_data(p, data, size, 32); - } -} - -void -brw_generator::add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt) -{ - assert(brw_shader_stage_is_bindless(stage)); - struct brw_bs_prog_data *bs_prog_data = brw_bs_prog_data(prog_data); - if (num_resume_shaders > 0) { - bs_prog_data->resume_sbt_offset = - brw_append_data(p, sbt, num_resume_shaders * sizeof(uint64_t), 32); - for (unsigned i = 0; i < num_resume_shaders; i++) { - size_t offset = bs_prog_data->resume_sbt_offset + i * sizeof(*sbt); - assert(offset <= UINT32_MAX); - brw_add_reloc(p, INTEL_SHADER_RELOC_SHADER_START_OFFSET, - INTEL_SHADER_RELOC_TYPE_U32, - (uint32_t)offset, (uint32_t)sbt[i]); - } - } -} - -const unsigned * -brw_generator::get_assembly() -{ - prog_data->relocs = brw_get_shader_relocs(p, &prog_data->num_relocs); - - return brw_get_program(p, &prog_data->program_size); -} - -} /* namespace old */ - -/* After program generation, go back and update the UIP and JIP of - * BREAK, CONT, ENDIF and HALT instructions to their correct locations. - */ -void -brw_set_uip_jip(struct brw_codegen *p, int start_offset, int final_halt_offset) -{ - const struct intel_device_info *devinfo = p->devinfo; - const int end_offset = p->next_insn_offset; - brw_eu_inst *store = p->store; - - struct branch_info { - enum opcode opcode; - int offset; - - /* For loop headers. */ - int loop_end_offset; - }; - - /* Collect information about the control flow instructions and any - * instruction that are loop headers. There might be multiple entries - * for instructions that act as loop header for multiple loops and/or that - * are control flow instruction themselves (e.g. IF as the loop header). - */ - std::vector infos; - for (int offset = start_offset; offset < end_offset; offset += 16) { - brw_eu_inst *insn = store + (offset / 16); - assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0); - - const enum opcode opcode = brw_eu_inst_opcode(p->isa, insn); - switch (opcode) { - case BRW_OPCODE_IF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_HALT: - case BRW_OPCODE_BREAK: - case BRW_OPCODE_CONTINUE: - case BRW_OPCODE_WHILE: - infos.push_back({ - .opcode = opcode, - .offset = offset, - }); - if (opcode == BRW_OPCODE_WHILE) { - /* Also add an entry for the loop header. */ - const int jip = brw_eu_inst_jip(devinfo, insn); - assert(jip < 0); - infos.push_back({ - /* Use NOP to indicate this is a loop header entry. */ - .opcode = BRW_OPCODE_NOP, - .offset = offset + jip, - .loop_end_offset = offset, - }); - } - break; - - default: - /* Nothing to do. */ - break; - } - } - - /* Sort in scope order. */ - std::sort(infos.begin(), infos.end(), [](const auto &a, const auto &b) { - if (a.offset != b.offset) - return a.offset < b.offset; - /* Note the flipped comparison: want to see the largest scope first, - * since it contains the other. - */ - return a.loop_end_offset > b.loop_end_offset; - }); - - struct scope { - int end_offset; - - /* End of current loop if exists. */ - int loop_end_offset; - }; - - std::vector scopes; - scopes.push_back({-1, -1}); - - /* Walk backwards keeping track of the scopes. This make easy to - * get the innermost end of scope and the innermost end of loop. - */ - for (int i = infos.size() - 1; i >= 0; i--) { - const branch_info &info = infos[i]; - - brw_eu_inst *insn = store + (info.offset / 16); - - switch (info.opcode) { - case BRW_OPCODE_NOP: - case BRW_OPCODE_IF: - /* Pop the scope. NOP here is a stand in for loop headers. */ - scopes.pop_back(); - break; - - case BRW_OPCODE_ELSE: - /* For instructions before the ELSE in the conditional (i.e. the - * then-part of the loop), the scope ends here. - */ - scopes.back().end_offset = info.offset; - break; - - case BRW_OPCODE_ENDIF: { - const int innermost_end_offset = scopes.back().end_offset; - int jip_offset; - - if (innermost_end_offset != -1) - jip_offset = innermost_end_offset; - else if (final_halt_offset != -1) - jip_offset = final_halt_offset + 16; - else - jip_offset = info.offset + 16; - - brw_eu_inst_set_jip(devinfo, insn, jip_offset - info.offset); - - scopes.push_back({ - .end_offset = info.offset, - .loop_end_offset = scopes.back().loop_end_offset, - }); - break; - } - - case BRW_OPCODE_WHILE: - scopes.push_back({ - .end_offset = info.offset, - .loop_end_offset = info.offset, - }); - break; - - case BRW_OPCODE_BREAK: - case BRW_OPCODE_CONTINUE: { - const int innermost_end_offset = scopes.back().end_offset; - brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset); - - const int loop_end_offset = scopes.back().loop_end_offset; - assert(loop_end_offset != -1); - assert(loop_end_offset > info.offset); - brw_eu_inst_set_uip(devinfo, insn, loop_end_offset - info.offset); - break; - } - - case BRW_OPCODE_HALT: { - /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): - * - * "In case of the halt instruction not inside any conditional - * code block, the value of and should be the - * same. In case of the halt instruction inside conditional code - * block, the should be the end of the program, and the - * should be end of the most inner conditional code block." - */ - const int innermost_end_offset = scopes.back().end_offset; - - /* If present, use the final HALT to infer the "end of the program". - * - * See also SHADER_OPCODE_HALT_TARGET. - */ - if (final_halt_offset != -1) { - if (final_halt_offset == info.offset) - assert(innermost_end_offset == -1); - - const int uip_offset = final_halt_offset + 16; - brw_eu_inst_set_uip(devinfo, insn, uip_offset - info.offset); - } - - if (innermost_end_offset != -1) - brw_eu_inst_set_jip(devinfo, insn, innermost_end_offset - info.offset); - else - brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn)); - break; - } - - default: - /* Nothing to do. */ - break; - } - } -} diff --git a/src/intel/compiler/brw/brw_generator.h b/src/intel/compiler/brw/brw_generator.h deleted file mode 100644 index ebd9280b330..00000000000 --- a/src/intel/compiler/brw/brw_generator.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#pragma once - -#include "brw_shader.h" - -namespace old { - -/* Translates BRW IR to actual EU assembly code. */ -class brw_generator -{ -public: - brw_generator(const struct brw_compiler *compiler, - const struct brw_compile_params *params, - struct brw_stage_prog_data *prog_data, - mesa_shader_stage stage); - ~brw_generator(); - - void enable_debug(const char *shader_name); - int generate_code(const brw_shader &s, - struct genisa_stats *stats); - void add_const_data(void *data, unsigned size); - void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt); - const unsigned *get_assembly(); - -private: - void generate_send(brw_send_inst *inst, - struct brw_reg dst, - struct brw_reg desc, - struct brw_reg ex_desc, - struct brw_reg payload, - struct brw_reg payload2, - bool ex_bso); - void generate_barrier(brw_inst *inst, struct brw_reg src); - void generate_ddx(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src); - void generate_ddy(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src); - void generate_scratch_header(brw_inst *inst, - struct brw_reg dst, struct brw_reg src); - - void generate_mov_indirect(brw_inst *inst, - struct brw_reg dst, - struct brw_reg reg, - struct brw_reg indirect_byte_offset); - - void generate_shuffle(brw_inst *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg idx); - - void generate_quad_swizzle(const brw_inst *inst, - struct brw_reg dst, struct brw_reg src, - unsigned swiz); - - bool patch_halt_jumps(); - - const struct brw_compiler *compiler; - const struct brw_compile_params *params; - - const struct intel_device_info *devinfo; - - struct brw_codegen *p; - struct brw_stage_prog_data * const prog_data; - - unsigned dispatch_width; /**< 8, 16 or 32 */ - - int final_halt_offset; - bool needs_final_halt; - - bool debug_flag; - const char *shader_name; - mesa_shader_stage stage; - void *mem_ctx; -}; - -} /* namespace old */ diff --git a/src/intel/compiler/brw/brw_gram.y b/src/intel/compiler/brw/brw_gram.y deleted file mode 100644 index 77752741e8a..00000000000 --- a/src/intel/compiler/brw/brw_gram.y +++ /dev/null @@ -1,2252 +0,0 @@ -%{ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include -#include -#include -#include -#include -#include "brw_asm_internal.h" - -#undef ALIGN16 - -#define YYLTYPE YYLTYPE -typedef struct YYLTYPE -{ - int first_line; - int first_column; - int last_line; - int last_column; -} YYLTYPE; - -void yyerror (YYLTYPE *, struct brw_asm_parser *parser, const char *); - -enum message_level { - WARN, - ERROR, -}; - -static void -message(struct brw_asm_parser *parser, enum message_level level, - YYLTYPE *location, const char *fmt, ...) -{ - static const char *level_str[] = { "warning", "error" }; - va_list args; - - if (location) - fprintf(stderr, "%s:%d:%d: %s: ", parser->input_filename, - location->first_line, - location->first_column, level_str[level]); - else - fprintf(stderr, "%s:%s: ", parser->input_filename, level_str[level]); - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} - -#define warn(flag, l, fmt, ...) \ - do { \ - if (warning_flags & WARN_ ## flag) \ - message(parser, WARN, l, fmt, ## __VA_ARGS__); \ - } while (0) - -#define error(l, fmt, ...) \ - do { \ - message(parser, ERROR, l, fmt, ## __VA_ARGS__); \ - } while (0) - -static bool -isPowerofTwo(unsigned int x) -{ - return x && (!(x & (x - 1))); -} - -static struct brw_reg -set_direct_src_operand(struct brw_reg *reg, int type) -{ - return brw_make_reg(reg->file, - reg->nr, - reg->subnr, - 0, // negate - 0, // abs - type, - 0, // vstride - 0, // width - 0, // hstride - BRW_SWIZZLE_NOOP, - WRITEMASK_XYZW); -} - -static void -i965_asm_unary_instruction(int opcode, struct brw_codegen *p, - struct brw_reg dest, struct brw_reg src0) -{ - switch (opcode) { - case BRW_OPCODE_BFREV: - brw_BFREV(p, dest, src0); - break; - case BRW_OPCODE_CBIT: - brw_CBIT(p, dest, src0); - break; - case BRW_OPCODE_MOV: - brw_MOV(p, dest, src0); - break; - case BRW_OPCODE_FBL: - brw_FBL(p, dest, src0); - break; - case BRW_OPCODE_FRC: - brw_FRC(p, dest, src0); - break; - case BRW_OPCODE_FBH: - brw_FBH(p, dest, src0); - break; - case BRW_OPCODE_NOT: - brw_NOT(p, dest, src0); - break; - case BRW_OPCODE_RNDE: - brw_RNDE(p, dest, src0); - break; - case BRW_OPCODE_RNDZ: - brw_RNDZ(p, dest, src0); - break; - case BRW_OPCODE_RNDD: - brw_RNDD(p, dest, src0); - break; - case BRW_OPCODE_LZD: - brw_LZD(p, dest, src0); - break; - case BRW_OPCODE_RNDU: - fprintf(stderr, "Opcode BRW_OPCODE_RNDU unhandled\n"); - break; - default: - fprintf(stderr, "Unsupported unary opcode\n"); - } -} - -static void -i965_asm_binary_instruction(int opcode, - struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) -{ - switch (opcode) { - case BRW_OPCODE_ADDC: - brw_ADDC(p, dest, src0, src1); - break; - case BRW_OPCODE_BFI1: - brw_BFI1(p, dest, src0, src1); - break; - case BRW_OPCODE_DP2: - brw_DP2(p, dest, src0, src1); - break; - case BRW_OPCODE_DP3: - brw_DP3(p, dest, src0, src1); - break; - case BRW_OPCODE_DP4: - brw_DP4(p, dest, src0, src1); - break; - case BRW_OPCODE_DPH: - brw_DPH(p, dest, src0, src1); - break; - case BRW_OPCODE_LINE: - brw_LINE(p, dest, src0, src1); - break; - case BRW_OPCODE_MAC: - brw_MAC(p, dest, src0, src1); - break; - case BRW_OPCODE_MACH: - brw_MACH(p, dest, src0, src1); - break; - case BRW_OPCODE_PLN: - brw_PLN(p, dest, src0, src1); - break; - case BRW_OPCODE_ROL: - brw_ROL(p, dest, src0, src1); - break; - case BRW_OPCODE_ROR: - brw_ROR(p, dest, src0, src1); - break; - case BRW_OPCODE_SUBB: - brw_SUBB(p, dest, src0, src1); - break; - case BRW_OPCODE_ADD: - brw_ADD(p, dest, src0, src1); - break; - case BRW_OPCODE_CMP: - /* Third parameter is conditional modifier - * which gets updated later - */ - brw_CMP(p, dest, 0, src0, src1); - break; - case BRW_OPCODE_AND: - brw_AND(p, dest, src0, src1); - break; - case BRW_OPCODE_ASR: - brw_ASR(p, dest, src0, src1); - break; - case BRW_OPCODE_AVG: - brw_AVG(p, dest, src0, src1); - break; - case BRW_OPCODE_OR: - brw_OR(p, dest, src0, src1); - break; - case BRW_OPCODE_SEL: - brw_SEL(p, dest, src0, src1); - break; - case BRW_OPCODE_SHL: - brw_SHL(p, dest, src0, src1); - break; - case BRW_OPCODE_SHR: - brw_SHR(p, dest, src0, src1); - break; - case BRW_OPCODE_XOR: - brw_XOR(p, dest, src0, src1); - break; - case BRW_OPCODE_MUL: - brw_MUL(p, dest, src0, src1); - break; - case BRW_OPCODE_SRND: - brw_SRND(p, dest, src0, src1); - break; - default: - fprintf(stderr, "Unsupported binary opcode\n"); - } -} - -static void -i965_asm_ternary_instruction(int opcode, - struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2) -{ - switch (opcode) { - case BRW_OPCODE_MAD: - brw_MAD(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_CSEL: - brw_CSEL(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_LRP: - brw_LRP(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_BFE: - brw_BFE(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_BFI2: - brw_BFI2(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_DP4A: - brw_DP4A(p, dest, src0, src1, src2); - break; - case BRW_OPCODE_ADD3: - brw_ADD3(p, dest, src0, src1, src2); - break; - default: - fprintf(stderr, "Unsupported ternary opcode\n"); - } -} - -static void -i965_asm_set_instruction_options(struct brw_asm_parser *parser, - struct options options) -{ - const struct intel_device_info *devinfo = parser->devinfo; - const struct brw_isa_info *isa = parser->p->isa; - - brw_eu_inst_set_access_mode(devinfo, brw_last_inst, - options.access_mode); - brw_eu_inst_set_mask_control(devinfo, brw_last_inst, - options.mask_control); - if (devinfo->ver < 12) { - brw_eu_inst_set_thread_control(devinfo, brw_last_inst, - options.thread_control); - brw_eu_inst_set_no_dd_check(devinfo, brw_last_inst, - options.no_dd_check); - brw_eu_inst_set_no_dd_clear(devinfo, brw_last_inst, - options.no_dd_clear); - } else { - enum opcode opcode = brw_eu_inst_opcode(isa, brw_last_inst); - brw_eu_inst_set_swsb(devinfo, brw_last_inst, - brw_swsb_encode(devinfo, options.depinfo, opcode)); - } - brw_eu_inst_set_debug_control(devinfo, brw_last_inst, - options.debug_control); - if (brw_has_branch_ctrl(devinfo, brw_eu_inst_opcode(isa, brw_last_inst))) { - if (options.acc_wr_control) - error(NULL, "Instruction does not support AccWrEnable\n"); - - brw_eu_inst_set_branch_control(devinfo, brw_last_inst, - options.branch_control); - } else if (options.branch_control) { - error(NULL, "Instruction does not support BranchCtrl\n"); - } else if (devinfo->ver < 20) { - brw_eu_inst_set_acc_wr_control(devinfo, brw_last_inst, - options.acc_wr_control); - } - brw_eu_inst_set_cmpt_control(devinfo, brw_last_inst, - options.compaction); -} - -%} - -%pure-parser -%lex-param { struct brw_asm_parser *parser } -%parse-param { struct brw_asm_parser *parser } - -%locations - -%start ROOT - -%union { - char *string; - double number; - int integer; - unsigned long long int llint; - struct brw_reg reg; - enum brw_reg_type reg_type; - struct brw_codegen *program; - struct predicate predicate; - struct condition condition; - struct options options; - struct instoption instoption; - struct msgdesc msgdesc; - gen_swsb depinfo; - struct { int sdepth; int rcount; } dpas_params; - brw_eu_inst *instruction; -} - -%code { -int brw_asm_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, - yyscan_t yyscanner); - -static int -yylex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, - struct brw_asm_parser *parser) -{ - return brw_asm_lex(yylval_param, yylloc_param, parser->scanner); -} -} - -%token ABS -%token COLON -%token COMMA -%token DOT -%token LANGLE RANGLE -%token LCURLY RCURLY -%token LPAREN RPAREN -%token LSQUARE RSQUARE -%token PLUS MINUS -%token SEMICOLON -%token ASSIGN - -/* datatypes */ -%token TYPE_B TYPE_UB -%token TYPE_W TYPE_UW -%token TYPE_D TYPE_UD -%token TYPE_Q TYPE_UQ -%token TYPE_V TYPE_UV -%token TYPE_F TYPE_HF TYPE_HF8 -%token TYPE_BF TYPE_BF8 -%token TYPE_DF -%token TYPE_VF - -/* label */ -%token JUMP_LABEL -%token JUMP_LABEL_TARGET -%token JIP UIP - -/* opcodes */ -%token ADD ADD3 ADDC AND ASR AVG -%token BFE BFI1 BFI2 BFB BFREV BRC BRD BREAK -%token CALL CALLA CASE CBIT CMP CMPN CONT CSEL -%token DIM DO DPAS DP2 DP3 DP4 DP4A DPH -%token ELSE ENDIF FBH FBL FORK FRC -%token GOTO -%token HALT -%token IF ILLEGAL -%token JMPI JOIN -%token LINE LRP LZD -%token MAC MACH MAD MADM MATH MOV MOVI MUL MREST MSAVE -%token NENOP NOP NOT -%token OR -%token PLN POP PUSH -%token RET RNDD RNDE RNDU RNDZ ROL ROR -%token SEL SENDS SENDSC SHL SHR SMOV SRND SUBB SYNC -%token SEND_GFX4 SENDC_GFX4 SEND_GFX12 SENDC_GFX12 -%token WAIT WHILE -%token XOR - -/* extended math functions */ -%token COS EXP FDIV INV INVM INTDIV INTDIVMOD INTMOD LOG POW RSQ -%token RSQRTM SIN SINCOS SQRT - -/* sync instruction */ -%token ALLRD ALLWR FENCE BAR HOST -%type sync_function -%type sync_arg - -/* shared functions for send */ -%token HDC0 HDC1 HDC2 HDC_RO GATEWAY PIXEL_INTERP RENDER SAMPLER -%token TS_BTD URB RT_ACCEL SLM TGM UGM - -/* message details for send */ -%token MSGDESC_BEGIN SRC1_LEN EX_BSO MSGDESC_END -%type msgdesc msgdesc_parts; - -/* Conditional modifiers */ -%token EQUAL GREATER GREATER_EQUAL LESS LESS_EQUAL NOT_EQUAL -%token NOT_ZERO OVERFLOW UNORDERED ZERO - -/* register Access Modes */ -%token ALIGN1 ALIGN16 - -/* accumulator write control */ -%token ACCWREN - -/* compaction control */ -%token CMPTCTRL - -/* mask control (WeCtrl) */ -%token WECTRL - -/* debug control */ -%token BREAKPOINT - -/* dependency control */ -%token NODDCLR NODDCHK - -/* end of thread */ -%token EOT - -/* mask control */ -%token MASK_DISABLE; - -/* predicate control */ -%token ANYV ALLV ANY2H ALL2H ANY4H ALL4H ANY8H ALL8H ANY16H ALL16H -%token ANY32H ALL32H - -/* round instructions */ -%token ROUND_INCREMENT - -/* staturation */ -%token SATURATE - -/* thread control */ -%token ATOMIC SWITCH - -/* branch control */ -%token BRANCH_CTRL - -/* quater control */ -%token QTR_2Q QTR_3Q QTR_4Q QTR_2H QTR_2N QTR_3N QTR_4N QTR_5N -%token QTR_6N QTR_7N QTR_8N - -/* channels */ -%token X Y Z W - -/* reg files */ -%token GENREGFILE - -/* vertical stride in register region */ -%token VxH - -/* register type */ -%token GENREG ADDRREG ACCREG FLAGREG NOTIFYREG STATEREG -%token CONTROLREG IPREG PERFORMANCEREG THREADREG CHANNELENABLEREG -%token MASKREG SCALARREG - -%token INTEGER -%token LONG -%token NULL_TOKEN - -%nonassoc SUBREGNUM -%left PLUS MINUS -%nonassoc DOT -%nonassoc EMPTYEXECSIZE -%nonassoc LPAREN - -%type execsize exp -%type exp2 - -/* predicate control */ -%type predctrl predstate -%type predicate - -/* conditional modifier */ -%type cond_mod -%type condModifiers - -/* instruction options */ -%type instoptions instoption_list -%type instoption - -/* writemask */ -%type writemask_x writemask_y writemask_z writemask_w -%type writemask - -/* dst operand */ -%type dst dstoperand dstoperandex dstoperandex_typed dstreg -%type dstregion - -%type saturate -%type relativelocation2 - -/* src operand */ -%type directsrcoperand directsrcaccoperand indirectsrcoperand srcacc -%type srcarcoperandex srcaccimm srcarcoperandex_typed srcimm -%type indirectgenreg indirectregion -%type immreg src reg32 payload directgenreg_list addrparam region -%type region_wh directgenreg -%type desc ex_desc reg32a -%type swizzle - -/* registers */ -%type accreg addrreg channelenablereg controlreg flagreg ipreg scalarreg -%type notifyreg nullreg performancereg threadcontrolreg statereg maskreg -%type subregnum - -/* register types */ -%type reg_type imm_type - -/* immediate values */ -%type immval - -/* instruction opcodes */ -%type unaryopcodes binaryopcodes binaryaccopcodes ternaryopcodes -%type sendop sendsop -%type sendopcode sendsopcode - -%type negate abs chansel math_function sharedfunction - -%type jumplabeltarget - -/* SWSB */ -%token REG_DIST_CURRENT -%token REG_DIST_FLOAT -%token REG_DIST_INT -%token REG_DIST_LONG -%token REG_DIST_ALL -%token REG_DIST_MATH -%token REG_DIST_SCALAR -%token SBID_ALLOC -%token SBID_WAIT_SRC -%token SBID_WAIT_DST - -%type depinfo - -/* DPAS */ -%token DPAS_PARAMS - -%code { - -static void -add_instruction_option(struct brw_asm_parser *parser, - struct options *options, struct instoption opt) -{ - if (opt.type == INSTOPTION_DEP_INFO) { - if (opt.depinfo_value.regdist) { - options->depinfo.regdist = opt.depinfo_value.regdist; - options->depinfo.pipe = opt.depinfo_value.pipe; - } else { - options->depinfo.sbid = opt.depinfo_value.sbid; - options->depinfo.mode = opt.depinfo_value.mode; - } - return; - } - if (opt.type == INSTOPTION_CHAN_OFFSET) { - options->chan_offset = opt.uint_value; - return; - } - switch (opt.uint_value) { - case ALIGN1: - options->access_mode = BRW_ALIGN_1; - break; - case ALIGN16: - options->access_mode = BRW_ALIGN_16; - break; - case SWITCH: - options->thread_control |= BRW_THREAD_SWITCH; - break; - case ATOMIC: - options->thread_control |= BRW_THREAD_ATOMIC; - break; - case BRANCH_CTRL: - options->branch_control = true; - break; - case NODDCHK: - options->no_dd_check = true; - break; - case NODDCLR: - options->no_dd_clear = true; - break; - case MASK_DISABLE: - options->mask_control |= BRW_MASK_DISABLE; - break; - case BREAKPOINT: - options->debug_control = BRW_DEBUG_BREAKPOINT; - break; - case WECTRL: - options->mask_control |= BRW_WE_ALL; - break; - case CMPTCTRL: - /* Don't set the compaction flag to true, we're just reading - * text assembly, not instruction bits. The code that will - * assemble things later will set the flag if it decides to - * compact instructions. - */ - if (!parser->compaction_warning_given) { - parser->compaction_warning_given = true; - fprintf(stderr, "%s: ignoring 'compacted' " - "annotations for text assembly " - "instructions\n", parser->input_filename); - } - break; - case ACCWREN: - options->acc_wr_control = true; - break; - case EOT: - options->end_of_thread = true; - break; - } -} -} -%% - -ROOT: - instrseq - ; - -instrseq: - instrseq instruction SEMICOLON - | instrseq relocatableinstruction SEMICOLON - | instruction SEMICOLON - | relocatableinstruction SEMICOLON - | instrseq jumplabeltarget - | jumplabeltarget - ; - -/* Instruction Group */ -instruction: - unaryinstruction - | binaryinstruction - | binaryaccinstruction - | mathinstruction - | nopinstruction - | waitinstruction - | ternaryinstruction - | sendinstruction - | illegalinstruction - | syncinstruction - ; - -relocatableinstruction: - jumpinstruction - | branchinstruction - | breakinstruction - | loopinstruction - | joininstruction - ; - -illegalinstruction: - ILLEGAL execsize instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $1); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $2); - i965_asm_set_instruction_options(parser, $3); - (void) yynerrs; - } - ; - -/* Unary instruction */ -unaryinstruction: - predicate unaryopcodes saturate cond_mod execsize dst srcaccimm instoptions - { - struct brw_codegen *p = parser->p; - brw_set_default_access_mode(p, $8.access_mode); - i965_asm_unary_instruction($2, p, $6, $7); - brw_pop_insn_state(p); - i965_asm_set_instruction_options(parser, $8); - if ($4.cond_modifier) { - brw_eu_inst_set_cond_modifier(p->devinfo, - brw_last_inst, - $4.cond_modifier); - } - - if (!brw_eu_inst_flag_reg_nr(p->devinfo, brw_last_inst)) { - brw_eu_inst_set_flag_reg_nr(p->devinfo, - brw_last_inst, - $4.flag_reg_nr); - brw_eu_inst_set_flag_subreg_nr(p->devinfo, - brw_last_inst, - $4.flag_subreg_nr); - } - - if ($7.file != IMM) { - brw_eu_inst_set_src0_vstride(p->devinfo, brw_last_inst, - $7.vstride); - } - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $3); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $5); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $8.chan_offset); - } - ; - -unaryopcodes: - BFREV - | CBIT - | DIM - | FBH - | FBL - | FRC - | LZD - | MOV - | NOT - | RNDD - | RNDE - | RNDU - | RNDZ - ; - -/* Binary instruction */ -binaryinstruction: - predicate binaryopcodes saturate cond_mod execsize dst srcimm srcimm instoptions - { - struct brw_codegen *p = parser->p; - brw_set_default_access_mode(p, $9.access_mode); - i965_asm_binary_instruction($2, p, $6, $7, $8); - i965_asm_set_instruction_options(parser, $9); - if ($4.cond_modifier) { - brw_eu_inst_set_cond_modifier(p->devinfo, - brw_last_inst, - $4.cond_modifier); - } - - if (!brw_eu_inst_flag_reg_nr(p->devinfo, brw_last_inst)) { - brw_eu_inst_set_flag_reg_nr(p->devinfo, brw_last_inst, - $4.flag_reg_nr); - brw_eu_inst_set_flag_subreg_nr(p->devinfo, brw_last_inst, - $4.flag_subreg_nr); - } - - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $3); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $5); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $9.chan_offset); - - brw_pop_insn_state(p); - } - ; - -binaryopcodes: - ADDC - | BFI1 - | DP2 - | DP3 - | DP4 - | DPH - | LINE - | MAC - | MACH - | MUL - | PLN - | ROL - | ROR - | SUBB - | SRND - ; - -/* Binary acc instruction */ -binaryaccinstruction: - predicate binaryaccopcodes saturate cond_mod execsize dst srcacc srcimm instoptions - { - struct brw_codegen *p = parser->p; - brw_set_default_access_mode(p, $9.access_mode); - i965_asm_binary_instruction($2, p, $6, $7, $8); - brw_pop_insn_state(p); - i965_asm_set_instruction_options(parser, $9); - if ($4.cond_modifier) { - brw_eu_inst_set_cond_modifier(p->devinfo, - brw_last_inst, - $4.cond_modifier); - } - - if (!brw_eu_inst_flag_reg_nr(p->devinfo, brw_last_inst)) { - brw_eu_inst_set_flag_reg_nr(p->devinfo, - brw_last_inst, - $4.flag_reg_nr); - brw_eu_inst_set_flag_subreg_nr(p->devinfo, - brw_last_inst, - $4.flag_subreg_nr); - } - - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $3); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $5); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $9.chan_offset); - } - ; - -binaryaccopcodes: - ADD - | AND - | ASR - | AVG - | CMP - | CMPN - | OR - | SEL - | SHL - | SHR - | XOR - ; - -/* Math instruction */ -mathinstruction: - predicate MATH saturate math_function execsize dst src srcimm instoptions - { - struct brw_codegen *p = parser->p; - brw_set_default_access_mode(p, $9.access_mode); - gfx6_math(p, $6, $4, $7, $8); - i965_asm_set_instruction_options(parser, $9); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $5); - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $3); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $9.chan_offset); - brw_pop_insn_state(p); - } - ; - -math_function: - COS - | EXP - | FDIV - | INV - | INVM - | INTDIV - | INTDIVMOD - | INTMOD - | LOG - | POW - | RSQ - | RSQRTM - | SIN - | SQRT - | SINCOS - ; - -/* NOP instruction */ -nopinstruction: - NOP - { - struct brw_codegen *p = parser->p; - brw_NOP(p); - } - ; - -/* Ternary operand instruction */ -ternaryinstruction: - predicate ternaryopcodes saturate cond_mod execsize dst srcimm src srcimm instoptions - { - struct brw_codegen *p = parser->p; - brw_set_default_access_mode(p, $10.access_mode); - i965_asm_ternary_instruction($2, p, $6, $7, $8, $9); - brw_pop_insn_state(p); - i965_asm_set_instruction_options(parser, $10); - if ($4.cond_modifier) { - brw_eu_inst_set_cond_modifier(p->devinfo, - brw_last_inst, - $4.cond_modifier); - } - - if (p->devinfo->ver < 12) { - brw_eu_inst_set_3src_a16_flag_reg_nr(p->devinfo, brw_last_inst, - $4.flag_reg_nr); - brw_eu_inst_set_3src_a16_flag_subreg_nr(p->devinfo, brw_last_inst, - $4.flag_subreg_nr); - } - - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $3); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $5); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $10.chan_offset); - } - | - predicate DPAS DPAS_PARAMS saturate cond_mod execsize dst src src src instoptions - { - struct brw_codegen *p = parser->p; - assert(p->devinfo->verx10 >= 125); - - brw_set_default_access_mode(p, $11.access_mode); - - brw_DPAS(p, translate_systolic_depth($3.sdepth), $3.rcount, $7, $8, $9, $10); - brw_pop_insn_state(p); - i965_asm_set_instruction_options(parser, $11); - if ($5.cond_modifier) { - brw_eu_inst_set_cond_modifier(p->devinfo, - brw_last_inst, - $5.cond_modifier); - } - - brw_eu_inst_set_saturate(p->devinfo, brw_last_inst, $4); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $6); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $11.chan_offset); - } - ; - -ternaryopcodes: - CSEL - | BFE - | BFI2 - | LRP - | MAD - | DP4A - | ADD3 - ; - -/* Wait instruction */ -waitinstruction: - WAIT execsize dst instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $1); - i965_asm_set_instruction_options(parser, $4); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $2); - brw_set_default_access_mode(p, $4.access_mode); - struct brw_reg dest = $3; - dest.swizzle = brw_swizzle_for_mask(dest.writemask); - if (dest.file != ARF || dest.nr != BRW_ARF_NOTIFICATION_COUNT) - error(&@1, "WAIT must use the notification register\n"); - brw_set_dest(p, brw_last_inst, dest); - brw_set_src0(p, brw_last_inst, dest); - brw_set_src1(p, brw_last_inst, brw_null_reg()); - brw_eu_inst_set_mask_control(p->devinfo, brw_last_inst, BRW_MASK_DISABLE); - } - ; - -/* Send instruction */ -sendinstruction: - predicate sendopcode execsize dst payload exp2 sharedfunction msgdesc instoptions - { - struct brw_codegen *p = parser->p; - - i965_asm_set_instruction_options(parser, $9); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - brw_set_dest(p, brw_last_inst, $4); - brw_set_src0(p, brw_last_inst, $5); - brw_eu_inst_set_bits(brw_last_inst, 127, 96, $6); - brw_eu_inst_set_src1_file_type(p->devinfo, brw_last_inst, - IMM, BRW_TYPE_UD); - brw_eu_inst_set_sfid(p->devinfo, brw_last_inst, $7); - brw_eu_inst_set_eot(p->devinfo, brw_last_inst, $9.end_of_thread); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $9.chan_offset); - - brw_pop_insn_state(p); - } - | predicate sendopcode execsize dst payload payload exp2 sharedfunction msgdesc instoptions - { - struct brw_codegen *p = parser->p; - assert(p->devinfo->ver < 12); - - i965_asm_set_instruction_options(parser, $10); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - brw_set_dest(p, brw_last_inst, $4); - brw_set_src0(p, brw_last_inst, $5); - if ($6.file != ARF && - $6.nr != BRW_ARF_ADDRESS && - $6.subnr != 0) { - error(&@2, "SEND with indirect desc must use a0.0\n"); - } - brw_eu_inst_set_send_sel_reg32_desc(p->devinfo, brw_last_inst, 1); - brw_eu_inst_set_bits(brw_last_inst, 127, 96, $7); - brw_eu_inst_set_sfid(p->devinfo, brw_last_inst, $8); - brw_eu_inst_set_eot(p->devinfo, brw_last_inst, $10.end_of_thread); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $10.chan_offset); - - brw_pop_insn_state(p); - } - | predicate sendsopcode execsize dst payload payload desc ex_desc sharedfunction msgdesc instoptions - { - struct brw_codegen *p = parser->p; - - i965_asm_set_instruction_options(parser, $11); - brw_eu_inst_set_exec_size(parser->devinfo, brw_last_inst, $3); - brw_set_dest(p, brw_last_inst, $4); - brw_set_src0(p, brw_last_inst, $5); - brw_set_src1(p, brw_last_inst, $6); - - if ($7.file == IMM) { - brw_eu_inst_set_send_sel_reg32_desc(p->devinfo, brw_last_inst, 0); - brw_eu_inst_set_send_desc(p->devinfo, brw_last_inst, $7.ud); - } else { - brw_eu_inst_set_send_sel_reg32_desc(p->devinfo, brw_last_inst, 1); - } - - if ($8.file == IMM) { - brw_eu_inst_set_send_sel_reg32_ex_desc(p->devinfo, brw_last_inst, 0); - brw_eu_inst_set_sends_ex_desc(p->devinfo, brw_last_inst, $8.ud, false); - } else { - brw_eu_inst_set_send_sel_reg32_ex_desc(p->devinfo, brw_last_inst, 1); - brw_eu_inst_set_send_ex_desc_ia_subreg_nr(p->devinfo, brw_last_inst, $8.subnr >> 2); - } - - brw_eu_inst_set_sfid(p->devinfo, brw_last_inst, $9); - brw_eu_inst_set_eot(p->devinfo, brw_last_inst, $11.end_of_thread); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $11.chan_offset); - - if (p->devinfo->verx10 >= 125 && $10.ex_bso) { - brw_eu_inst_set_send_ex_bso(p->devinfo, brw_last_inst, 1); - brw_eu_inst_set_send_src1_len(p->devinfo, brw_last_inst, $10.src1_len); - } - - brw_pop_insn_state(p); - } - | predicate sendsopcode execsize dst GENREGFILE LSQUARE scalarreg RSQUARE desc ex_desc sharedfunction msgdesc instoptions - { - struct brw_codegen *p = parser->p; - assert(p->devinfo->ver >= 30); - - i965_asm_set_instruction_options(parser, $13); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - brw_set_dest(p, brw_last_inst, $4); - brw_set_src0(p, brw_last_inst, $7); - brw_set_src1(p, brw_last_inst, brw_null_reg()); - - if ($9.file == IMM) { - brw_eu_inst_set_send_sel_reg32_desc(p->devinfo, brw_last_inst, 0); - brw_eu_inst_set_send_desc(p->devinfo, brw_last_inst, $9.ud); - } else { - brw_eu_inst_set_send_sel_reg32_desc(p->devinfo, brw_last_inst, 1); - } - - if ($10.file == IMM) { - brw_eu_inst_set_send_sel_reg32_ex_desc(p->devinfo, brw_last_inst, 0); - brw_eu_inst_set_sends_ex_desc(p->devinfo, brw_last_inst, $10.ud, true); - } else { - brw_eu_inst_set_send_sel_reg32_ex_desc(p->devinfo, brw_last_inst, 1); - brw_eu_inst_set_send_ex_desc_ia_subreg_nr(p->devinfo, brw_last_inst, $10.subnr >> 2); - } - - brw_eu_inst_set_sfid(p->devinfo, brw_last_inst, $11); - brw_eu_inst_set_eot(p->devinfo, brw_last_inst, $13.end_of_thread); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $13.chan_offset); - - if ($12.ex_bso) { - brw_eu_inst_set_send_ex_bso(p->devinfo, brw_last_inst, 1); - /* Not settings src1 length, as its implied zero. */ - } - - brw_pop_insn_state(p); - } - ; - -sendop: - SEND_GFX4 - | SENDC_GFX4 - ; - -sendsop: - SEND_GFX12 - | SENDC_GFX12 - | SENDS - | SENDSC - ; - -sendopcode: - sendop { $$ = brw_next_insn(parser->p, $1); } - ; - -sendsopcode: - sendsop { $$ = brw_next_insn(parser->p, $1); } - ; - -sharedfunction: - NULL_TOKEN { $$ = GEN_SFID_NULL; } - | GATEWAY { $$ = GEN_SFID_MESSAGE_GATEWAY; } - | URB { $$ = GEN_SFID_URB; } - | TS_BTD { $$ = GEN_SFID_BINDLESS_THREAD_DISPATCH; } - | RENDER { $$ = GEN_SFID_RENDER_CACHE; } - | HDC_RO { $$ = GEN_SFID_HDC_READ_ONLY; } - | HDC0 { $$ = GEN_SFID_HDC0; } - | PIXEL_INTERP { $$ = GEN_SFID_PIXEL_INTERPOLATOR; } - | HDC1 { $$ = GEN_SFID_HDC1; } - | SAMPLER { $$ = GEN_SFID_SAMPLER; } - | HDC2 { $$ = GEN_SFID_HDC2; } - | RT_ACCEL { $$ = GEN_SFID_RAY_TRACE_ACCELERATOR; } - | SLM { $$ = GEN_SFID_SLM; } - | TGM { $$ = GEN_SFID_TGM; } - | UGM { $$ = GEN_SFID_UGM; } - ; - -exp2: - LONG { $$ = $1; } - | MINUS LONG { $$ = -$2; } - ; - -desc: - reg32a - | exp2 - { - $$ = brw_imm_ud($1); - } - ; - -ex_desc: - reg32a - | exp2 - { - $$ = brw_imm_ud($1); - } - ; - -reg32a: - addrreg region reg_type - { - $$ = set_direct_src_operand(&$1, $3); - $$ = stride($$, $2.vstride, $2.width, $2.hstride); - } - ; - - -/* Jump instruction */ -jumpinstruction: - predicate JMPI execsize relativelocation2 instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - i965_asm_set_instruction_options(parser, $5); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - brw_set_dest(p, brw_last_inst, brw_ip_reg()); - brw_set_src0(p, brw_last_inst, brw_ip_reg()); - brw_set_src1(p, brw_last_inst, $4); - brw_eu_inst_set_pred_control(p->devinfo, brw_last_inst, - brw_eu_inst_pred_control(p->devinfo, brw_last_inst)); - brw_pop_insn_state(p); - } - ; - -/* branch instruction */ -branchinstruction: - predicate ENDIF execsize JIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_eu_inst_set_unused_uip(p->devinfo, brw_last_inst); - i965_asm_set_instruction_options(parser, $6); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - | ELSE execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $1); - brw_asm_label_use_jip(parser, $4); - brw_asm_label_use_uip(parser, $6); - i965_asm_set_instruction_options(parser, $7); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $2); - } - | predicate IF execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - i965_asm_set_instruction_options(parser, $8); - brw_asm_label_use_jip(parser, $5); - brw_asm_label_use_uip(parser, $7); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - | predicate GOTO execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_asm_label_use_uip(parser, $7); - i965_asm_set_instruction_options(parser, $8); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - ; - -joininstruction: - predicate JOIN execsize JIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_eu_inst_set_unused_uip(p->devinfo, brw_last_inst); - i965_asm_set_instruction_options(parser, $6); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - ; - -/* break instruction */ -breakinstruction: - predicate BREAK execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_asm_label_use_uip(parser, $7); - i965_asm_set_instruction_options(parser, $8); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - | predicate HALT execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_asm_label_use_uip(parser, $7); - i965_asm_set_instruction_options(parser, $8); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - | predicate CONT execsize JIP JUMP_LABEL UIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_asm_label_use_uip(parser, $7); - i965_asm_set_instruction_options(parser, $8); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - ; - -/* loop instruction */ -loopinstruction: - predicate WHILE execsize JIP JUMP_LABEL instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $2); - brw_asm_label_use_jip(parser, $5); - brw_eu_inst_set_unused_uip(p->devinfo, brw_last_inst); - i965_asm_set_instruction_options(parser, $6); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $3); - - brw_pop_insn_state(p); - } - | DO execsize instoptions - { - struct brw_codegen *p = parser->p; - brw_next_insn(p, $1); - } - ; - -/* sync instruction */ -syncinstruction: - predicate SYNC sync_function execsize sync_arg instoptions - { - struct brw_codegen *p = parser->p; - if (p->devinfo->ver < 12) { - error(&@2, "sync instruction is supported only on gfx12+\n"); - } - - if ($5.file == IMM && - $3 != TGL_SYNC_ALLRD && - $3 != TGL_SYNC_ALLWR) { - error(&@2, "Only allrd and allwr support immediate argument\n"); - } - - brw_set_default_access_mode(p, $6.access_mode); - brw_SYNC(p, $3); - i965_asm_set_instruction_options(parser, $6); - brw_eu_inst_set_exec_size(p->devinfo, brw_last_inst, $4); - brw_set_src0(p, brw_last_inst, $5); - brw_eu_inst_set_eot(p->devinfo, brw_last_inst, $6.end_of_thread); - brw_eu_inst_set_group(p->devinfo, brw_last_inst, $6.chan_offset); - - brw_pop_insn_state(p); - } - ; - -sync_function: - NOP { $$ = TGL_SYNC_NOP; } - | ALLRD - | ALLWR - | FENCE - | BAR - | HOST - ; - -sync_arg: - nullreg region reg_type - { - $$ = $1; - $$.vstride = $2.vstride; - $$.width = $2.width; - $$.hstride = $2.hstride; - $$.type = $3; - } - | immreg - ; - -/* Relative location */ -relativelocation2: - immreg - | reg32 - ; - -jumplabeltarget: - JUMP_LABEL_TARGET - { - brw_asm_label_set(parser, $1); - } - ; - -/* Destination register */ -dst: - dstoperand - | dstoperandex - ; - -dstoperand: - dstreg dstregion writemask reg_type - { - $$ = $1; - $$.vstride = BRW_VERTICAL_STRIDE_1; - $$.width = BRW_WIDTH_1; - $$.hstride = $2; - $$.type = $4; - $$.writemask = $3; - $$.swizzle = BRW_SWIZZLE_NOOP; - $$.subnr = $$.subnr * brw_type_size_bytes($4); - } - ; - -dstoperandex: - dstoperandex_typed dstregion writemask reg_type - { - $$ = $1; - $$.hstride = $2; - $$.type = $4; - $$.writemask = $3; - $$.subnr = $$.subnr * brw_type_size_bytes($4); - } - /* BSpec says "When the conditional modifier is present, updates - * to the selected flag register also occur. In this case, the - * register region fields of the ‘null’ operand are valid." - */ - | nullreg dstregion writemask reg_type - { - $$ = $1; - $$.vstride = BRW_VERTICAL_STRIDE_1; - $$.width = BRW_WIDTH_1; - $$.hstride = $2; - $$.writemask = $3; - $$.type = $4; - } - | threadcontrolreg - { - $$ = $1; - $$.hstride = 1; - $$.type = BRW_TYPE_UW; - } - ; - -dstoperandex_typed: - accreg - | addrreg - | channelenablereg - | controlreg - | flagreg - | ipreg - | maskreg - | notifyreg - | performancereg - | statereg - | scalarreg - ; - -dstreg: - directgenreg - { - $$ = $1; - $$.address_mode = BRW_ADDRESS_DIRECT; - } - | indirectgenreg - { - $$ = $1; - $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - } - ; - -/* Source register */ -srcaccimm: - srcacc - | immreg - ; - -immreg: - immval imm_type - { - switch ($2) { - case BRW_TYPE_UD: - $$ = brw_imm_ud($1); - break; - case BRW_TYPE_D: - $$ = brw_imm_d($1); - break; - case BRW_TYPE_UW: - $$ = brw_imm_uw($1 | ($1 << 16)); - break; - case BRW_TYPE_W: - $$ = brw_imm_w($1); - break; - case BRW_TYPE_F: - $$ = brw_imm_reg(BRW_TYPE_F); - /* Set u64 instead of ud since DIM uses a 64-bit F-typed imm */ - $$.u64 = $1; - break; - case BRW_TYPE_V: - $$ = brw_imm_v($1); - break; - case BRW_TYPE_UV: - $$ = brw_imm_uv($1); - break; - case BRW_TYPE_VF: - $$ = brw_imm_vf($1); - break; - case BRW_TYPE_Q: - $$ = brw_imm_q($1); - break; - case BRW_TYPE_UQ: - $$ = brw_imm_uq($1); - break; - case BRW_TYPE_DF: - $$ = brw_imm_reg(BRW_TYPE_DF); - $$.d64 = $1; - break; - case BRW_TYPE_HF: - $$ = brw_imm_reg(BRW_TYPE_HF); - $$.ud = $1 | ($1 << 16); - break; - default: - error(&@2, "Unknown immediate type %s\n", - brw_reg_type_to_letters($2)); - } - } - ; - -reg32: - directgenreg region reg_type - { - $$ = set_direct_src_operand(&$1, $3); - $$ = stride($$, $2.vstride, $2.width, $2.hstride); - } - ; - -payload: - directsrcoperand - ; - -src: - directsrcoperand - | indirectsrcoperand - ; - -srcacc: - directsrcaccoperand - | indirectsrcoperand - ; - -srcimm: - directsrcoperand - | indirectsrcoperand - | immreg - ; - -directsrcaccoperand: - directsrcoperand - | negate abs accreg region reg_type - { - $$ = set_direct_src_operand(&$3, $5); - $$.negate = $1; - $$.abs = $2; - $$.vstride = $4.vstride; - $$.width = $4.width; - $$.hstride = $4.hstride; - } - ; - -srcarcoperandex: - srcarcoperandex_typed region reg_type - { - $$ = brw_make_reg($1.file, - $1.nr, - $1.subnr, - 0, - 0, - $3, - $2.vstride, - $2.width, - $2.hstride, - BRW_SWIZZLE_NOOP, - WRITEMASK_XYZW); - } - | nullreg region reg_type - { - $$ = set_direct_src_operand(&$1, $3); - $$.vstride = $2.vstride; - $$.width = $2.width; - $$.hstride = $2.hstride; - } - | threadcontrolreg - { - $$ = set_direct_src_operand(&$1, BRW_TYPE_UW); - } - ; - -srcarcoperandex_typed: - channelenablereg - | controlreg - | flagreg - | ipreg - | maskreg - | statereg - | scalarreg - ; - -indirectsrcoperand: - negate abs indirectgenreg indirectregion swizzle reg_type - { - $$ = brw_make_reg($3.file, - 0, - $3.subnr, - $1, // negate - $2, // abs - $6, - $4.vstride, - $4.width, - $4.hstride, - $5, - WRITEMASK_X); - - $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - // brw_reg set indirect_offset to 0 so set it to valid value - $$.indirect_offset = $3.indirect_offset; - } - ; - -directgenreg_list: - directgenreg - | notifyreg - | addrreg - | performancereg - ; - -directsrcoperand: - negate abs directgenreg_list region swizzle reg_type - { - $$ = brw_make_reg($3.file, - $3.nr, - $3.subnr, - $1, - $2, - $6, - $4.vstride, - $4.width, - $4.hstride, - $5, - WRITEMASK_X); - } - | srcarcoperandex - ; - -/* Address register */ -addrparam: - addrreg exp - { - memset(&$$, '\0', sizeof($$)); - $$.subnr = $1.subnr; - $$.indirect_offset = $2; - } - | addrreg - ; - -/* Register files and register numbers */ -exp: - INTEGER { $$ = $1; } - | LONG { $$ = $1; } - ; - -subregnum: - DOT exp { $$ = $2; } - | /* empty */ %prec SUBREGNUM { $$ = 0; } - ; - -directgenreg: - GENREG subregnum - { - memset(&$$, '\0', sizeof($$)); - $$.file = FIXED_GRF; - $$.nr = $1 * reg_unit(parser->devinfo); - $$.subnr = $2; - } - ; - -indirectgenreg: - GENREGFILE LSQUARE addrparam RSQUARE - { - memset(&$$, '\0', sizeof($$)); - $$.file = FIXED_GRF; - $$.subnr = $3.subnr; - $$.indirect_offset = $3.indirect_offset; - } - ; - -addrreg: - ADDRREG subregnum - { - int subnr = 16; - - if ($2 > subnr) - error(&@2, "Address sub register number %d" - "out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_ADDRESS; - $$.subnr = $2; - } - ; - -accreg: - ACCREG subregnum - { - int nr_reg = 10; - - if ($1 > nr_reg) - error(&@1, "Accumulator register number %d" - " out of range\n", $1); - - memset(&$$, '\0', sizeof($$)); - $$.file = ARF; - $$.nr = BRW_ARF_ACCUMULATOR + $1; - $$.subnr = $2; - } - ; - -flagreg: - FLAGREG subregnum - { - // 2 flag reg - int nr_reg = 2; - int subnr = nr_reg; - - if ($1 > nr_reg) - error(&@1, "Flag register number %d" - " out of range \n", $1); - if ($2 > subnr) - error(&@2, "Flag subregister number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_FLAG | $1; - $$.subnr = $2; - } - ; - -maskreg: - MASKREG subregnum - { - if ($1 > 0) - error(&@1, "Mask register number %d" - " out of range\n", $1); - - $$.file = ARF; - $$.nr = BRW_ARF_MASK; - $$.subnr = $2; - } - ; - -notifyreg: - NOTIFYREG subregnum - { - int subnr = (parser->devinfo->ver >= 11) ? 2 : 3; - if ($2 > subnr) - error(&@2, "Notification sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_NOTIFICATION_COUNT; - $$.subnr = $2; - } - ; - -scalarreg: - SCALARREG subregnum - { - if ($2 > 31) - error(&@2, "Scalar sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_SCALAR; - $$.subnr = $2; - } - -statereg: - STATEREG subregnum - { - if ($1 > 2) - error(&@1, "State register number %d" - " out of range\n", $1); - - if ($2 > 4) - error(&@2, "State sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_STATE; - $$.subnr = $2; - } - ; - -controlreg: - CONTROLREG subregnum - { - if ($2 > 3) - error(&@2, "control sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_CONTROL; - $$.subnr = $2; - } - ; - -ipreg: - IPREG { $$ = brw_ip_reg(); } - ; - -nullreg: - NULL_TOKEN { $$ = brw_null_reg(); } - ; - -threadcontrolreg: - THREADREG subregnum - { - if ($2 > 7) - error(&@2, "Thread control sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_TDR; - $$.subnr = $2; - } - ; - -performancereg: - PERFORMANCEREG subregnum - { - int subnr; - if (parser->devinfo->ver >= 10) - subnr = 5; - else - subnr = 4; - - if ($2 > subnr) - error(&@2, "Performance sub register number %d" - " out of range\n", $2); - - $$.file = ARF; - $$.nr = BRW_ARF_TIMESTAMP; - $$.subnr = $2; - } - ; - -channelenablereg: - CHANNELENABLEREG subregnum - { - if ($1 > 0) - error(&@1, "Channel enable register number %d" - " out of range\n", $1); - - $$.file = ARF; - $$.nr = BRW_ARF_MASK; - $$.subnr = $2; - } - ; - -/* Immediate values */ -immval: - exp2 - { - $$ = $1; - } - | LSQUARE exp2 COMMA exp2 COMMA exp2 COMMA exp2 RSQUARE - { - $$ = ($2 << 0) | ($4 << 8) | ($6 << 16) | ($8 << 24); - } - ; - -/* Regions */ -dstregion: - /* empty */ - { - $$ = BRW_HORIZONTAL_STRIDE_1; - } - | LANGLE exp RANGLE - { - if ($2 != 0 && ($2 > 4 || !isPowerofTwo($2))) - error(&@2, "Invalid Horizontal stride %d\n", $2); - - $$ = ffs($2); - } - ; - -indirectregion: - region - | region_wh - ; - -region: - /* empty */ - { - $$ = stride($$, 0, 1, 0); - } - | LANGLE exp RANGLE - { - if ($2 != 0 && ($2 > 32 || !isPowerofTwo($2))) - error(&@2, "Invalid VertStride %d\n", $2); - - $$ = stride($$, $2, 1, 0); - } - | LANGLE exp COMMA exp COMMA exp RANGLE - { - - if ($2 != 0 && ($2 > 32 || !isPowerofTwo($2))) - error(&@2, "Invalid VertStride %d\n", $2); - - if ($4 > 16 || !isPowerofTwo($4)) - error(&@4, "Invalid width %d\n", $4); - - if ($6 != 0 && ($6 > 4 || !isPowerofTwo($6))) - error(&@6, "Invalid Horizontal stride in" - " region_wh %d\n", $6); - - $$ = stride($$, $2, $4, $6); - } - | LANGLE exp SEMICOLON exp COMMA exp RANGLE - { - if ($2 != 0 && ($2 > 32 || !isPowerofTwo($2))) - error(&@2, "Invalid VertStride %d\n", $2); - - if ($4 > 16 || !isPowerofTwo($4)) - error(&@4, "Invalid width %d\n", $4); - - if ($6 != 0 && ($6 > 4 || !isPowerofTwo($6))) - error(&@6, "Invalid Horizontal stride in" - " region_wh %d\n", $6); - - $$ = stride($$, $2, $4, $6); - } - | LANGLE VxH COMMA exp COMMA exp RANGLE - { - if ($4 > 16 || !isPowerofTwo($4)) - error(&@4, "Invalid width %d\n", $4); - - if ($6 != 0 && ($6 > 4 || !isPowerofTwo($6))) - error(&@6, "Invalid Horizontal stride in" - " region_wh %d\n", $6); - - $$ = brw_VxH_indirect(0, 0); - } - ; - -region_wh: - LANGLE exp COMMA exp RANGLE - { - if ($2 > 16 || !isPowerofTwo($2)) - error(&@2, "Invalid width %d\n", $2); - - if ($4 != 0 && ($4 > 4 || !isPowerofTwo($4))) - error(&@4, "Invalid Horizontal stride in" - " region_wh %d\n", $4); - - $$ = stride($$, 0, $2, $4); - $$.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL; - } - ; - -reg_type: - TYPE_F { $$ = BRW_TYPE_F; } - | TYPE_UD { $$ = BRW_TYPE_UD; } - | TYPE_D { $$ = BRW_TYPE_D; } - | TYPE_UW { $$ = BRW_TYPE_UW; } - | TYPE_W { $$ = BRW_TYPE_W; } - | TYPE_UB { $$ = BRW_TYPE_UB; } - | TYPE_B { $$ = BRW_TYPE_B; } - | TYPE_DF { $$ = BRW_TYPE_DF; } - | TYPE_UQ { $$ = BRW_TYPE_UQ; } - | TYPE_Q { $$ = BRW_TYPE_Q; } - | TYPE_HF { $$ = BRW_TYPE_HF; } - | TYPE_BF { $$ = BRW_TYPE_BF; } - | TYPE_HF8 { $$ = BRW_TYPE_HF8; } - | TYPE_BF8 { $$ = BRW_TYPE_BF8; } - ; - -imm_type: - reg_type { $$ = $1; } - | TYPE_V { $$ = BRW_TYPE_V; } - | TYPE_VF { $$ = BRW_TYPE_VF; } - | TYPE_UV { $$ = BRW_TYPE_UV; } - ; - -writemask: - /* empty */ - { - $$ = WRITEMASK_XYZW; - } - | DOT writemask_x writemask_y writemask_z writemask_w - { - $$ = $2 | $3 | $4 | $5; - } - ; - -writemask_x: - /* empty */ { $$ = 0; } - | X { $$ = 1 << BRW_CHANNEL_X; } - ; - -writemask_y: - /* empty */ { $$ = 0; } - | Y { $$ = 1 << BRW_CHANNEL_Y; } - ; - -writemask_z: - /* empty */ { $$ = 0; } - | Z { $$ = 1 << BRW_CHANNEL_Z; } - ; - -writemask_w: - /* empty */ { $$ = 0; } - | W { $$ = 1 << BRW_CHANNEL_W; } - ; - -swizzle: - /* empty */ - { - $$ = BRW_SWIZZLE_NOOP; - } - | DOT chansel - { - $$ = BRW_SWIZZLE4($2, $2, $2, $2); - } - | DOT chansel chansel chansel chansel - { - $$ = BRW_SWIZZLE4($2, $3, $4, $5); - } - ; - -chansel: - X - | Y - | Z - | W - ; - -/* Instruction prediction and modifiers */ -predicate: - /* empty */ - { - struct brw_codegen *p = parser->p; - brw_push_insn_state(p); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - brw_set_default_predicate_inverse(p, false); - } - | LPAREN predstate flagreg predctrl RPAREN - { - struct brw_codegen *p = parser->p; - brw_push_insn_state(p); - brw_set_default_predicate_inverse(p, $2); - brw_set_default_flag_reg(p, $3.nr, $3.subnr); - brw_set_default_predicate_control(p, $4); - } - ; - -predstate: - /* empty */ { $$ = 0; } - | PLUS { $$ = 0; } - | MINUS { $$ = 1; } - ; - -predctrl: - /* empty */ { $$ = BRW_PREDICATE_NORMAL; } - | DOT X { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_X; } - | DOT Y { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Y; } - | DOT Z { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Z; } - | DOT W { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_W; } - | ANYV - | ALLV - | ANY2H - | ALL2H - | ANY4H - | ALL4H - | ANY8H - | ALL8H - | ANY16H - | ALL16H - | ANY32H - | ALL32H - ; - -/* Source Modification */ -negate: - /* empty */ { $$ = 0; } - | MINUS { $$ = 1; } - ; - -abs: - /* empty */ { $$ = 0; } - | ABS { $$ = 1; } - ; - -/* Flag (Conditional) Modifier */ -cond_mod: - condModifiers - { - $$.cond_modifier = $1; - $$.flag_reg_nr = 0; - $$.flag_subreg_nr = 0; - } - | condModifiers DOT flagreg - { - $$.cond_modifier = $1; - $$.flag_reg_nr = $3.nr; - $$.flag_subreg_nr = $3.subnr; - } - ; - -condModifiers: - /* empty */ { $$ = BRW_CONDITIONAL_NONE; } - | ZERO - | EQUAL - | NOT_ZERO - | NOT_EQUAL - | GREATER - | GREATER_EQUAL - | LESS - | LESS_EQUAL - | OVERFLOW - | ROUND_INCREMENT - | UNORDERED - ; - -/* message details for send */ -msgdesc: - MSGDESC_BEGIN msgdesc_parts MSGDESC_END { $$ = $2; } - ; - -msgdesc_parts: - SRC1_LEN ASSIGN INTEGER msgdesc_parts - { - $$ = $4; - $$.src1_len = $3; - } - | EX_BSO msgdesc_parts - { - $$ = $2; - $$.ex_bso = 1; - } - | INTEGER msgdesc_parts { $$ = $2; } - | ASSIGN msgdesc_parts { $$ = $2; } - | /* empty */ - { - memset(&$$, 0, sizeof($$)); - } - ; - -saturate: - /* empty */ { $$ = BRW_INSTRUCTION_NORMAL; } - | SATURATE { $$ = BRW_INSTRUCTION_SATURATE; } - ; - -/* Execution size */ -execsize: - /* empty */ %prec EMPTYEXECSIZE - { - $$ = 0; - } - | LPAREN exp2 RPAREN - { - if ($2 > 32 || !isPowerofTwo($2)) - error(&@2, "Invalid execution size %llu\n", $2); - - $$ = cvt($2) - 1; - } - ; - -/* Instruction options */ -instoptions: - /* empty */ - { - memset(&$$, 0, sizeof($$)); - } - | LCURLY instoption_list RCURLY - { - memset(&$$, 0, sizeof($$)); - $$ = $2; - } - ; - -instoption_list: - instoption_list COMMA instoption - { - memset(&$$, 0, sizeof($$)); - $$ = $1; - add_instruction_option(parser, &$$, $3); - } - | instoption_list instoption - { - memset(&$$, 0, sizeof($$)); - $$ = $1; - add_instruction_option(parser, &$$, $2); - } - | /* empty */ - { - memset(&$$, 0, sizeof($$)); - } - ; - -depinfo: - REG_DIST_CURRENT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_NONE; - } - | REG_DIST_FLOAT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_FLOAT; - } - | REG_DIST_INT - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_INT; - } - | REG_DIST_LONG - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_LONG; - } - | REG_DIST_ALL - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_ALL; - } - | REG_DIST_MATH - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_MATH; - } - | REG_DIST_SCALAR - { - memset(&$$, 0, sizeof($$)); - $$.regdist = $1; - $$.pipe = GEN_PIPE_SCALAR; - } - | SBID_ALLOC - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = GEN_SBID_SET; - } - | SBID_WAIT_SRC - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = GEN_SBID_SRC; - } - | SBID_WAIT_DST - { - memset(&$$, 0, sizeof($$)); - $$.sbid = $1; - $$.mode = GEN_SBID_DST; - } - -instoption: - ALIGN1 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN1;} - | ALIGN16 { $$.type = INSTOPTION_FLAG; $$.uint_value = ALIGN16; } - | ACCWREN - { - if (parser->devinfo->ver >= 20) - error(&@1, "AccWrEnable not supported in Xe2+\n"); - $$.type = INSTOPTION_FLAG; - $$.uint_value = ACCWREN; - } - | BREAKPOINT { $$.type = INSTOPTION_FLAG; $$.uint_value = BREAKPOINT; } - | NODDCLR { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCLR; } - | NODDCHK { $$.type = INSTOPTION_FLAG; $$.uint_value = NODDCHK; } - | MASK_DISABLE { $$.type = INSTOPTION_FLAG; $$.uint_value = MASK_DISABLE; } - | EOT { $$.type = INSTOPTION_FLAG; $$.uint_value = EOT; } - | SWITCH { $$.type = INSTOPTION_FLAG; $$.uint_value = SWITCH; } - | ATOMIC { $$.type = INSTOPTION_FLAG; $$.uint_value = ATOMIC; } - | BRANCH_CTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = BRANCH_CTRL; } - | CMPTCTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = CMPTCTRL; } - | WECTRL { $$.type = INSTOPTION_FLAG; $$.uint_value = WECTRL; } - | QTR_2Q { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 8; } - | QTR_3Q { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 16; } - | QTR_4Q { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 24; } - | QTR_2H { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 16; } - | QTR_2N - { - if (parser->devinfo->ver >= 20) - error(&@1, "Channel offset must be multiple of 8 in Xe2+\n"); - $$.type = INSTOPTION_CHAN_OFFSET; - $$.uint_value = 4; - } - | QTR_3N { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 8; } - | QTR_4N - { - if (parser->devinfo->ver >= 20) - error(&@1, "Channel offset must be multiple of 8 in Xe2+\n"); - $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 12; - } - | QTR_5N { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 16; } - | QTR_6N - { - if (parser->devinfo->ver >= 20) - error(&@1, "Channel offset must be multiple of 8 in Xe2+\n"); - $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 20; - } - | QTR_7N { $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 24; } - | QTR_8N - { - if (parser->devinfo->ver >= 20) - error(&@1, "Channel offset must be multiple of 8 in Xe2+\n"); - $$.type = INSTOPTION_CHAN_OFFSET; $$.uint_value = 28; - } - | depinfo { $$.type = INSTOPTION_DEP_INFO; $$.depinfo_value = $1; } - ; - -%% - -void -yyerror(YYLTYPE *loc, struct brw_asm_parser *parser, const char *msg) -{ - fprintf(stderr, "%s: %d: %s at \"%s\"\n", - parser->input_filename, loc->first_line, msg, - brw_asm_get_text(parser->scanner)); - ++parser->errors; -} diff --git a/src/intel/compiler/brw/brw_lex.l b/src/intel/compiler/brw/brw_lex.l deleted file mode 100644 index aab53a05735..00000000000 --- a/src/intel/compiler/brw/brw_lex.l +++ /dev/null @@ -1,452 +0,0 @@ -%option yylineno -%option nounput -%option bison-bridge bison-locations reentrant noyywrap -%option extra-type="struct brw_asm_parser *" -%option prefix="brw_asm_" -%{ -/* - * Copyright © 2018 Intel Corporation - * SPDX-License-Identifier: MIT - */ -#include -#include "brw_asm_internal.h" -#undef ALIGN16 -#include "brw_gram.tab.h" - -#define YY_NO_INPUT - -#define YY_USER_ACTION \ - yylloc->first_line = yylloc->last_line = yylineno; \ - yylloc->first_column = yycolumn; \ - yylloc->last_column = yycolumn + yyleng - 1; \ - yycolumn += yyleng; - -#define YY_USER_INIT \ - do { \ - yylineno = 1; \ - yycolumn = 1; \ - } while (0) -%} - -%x BLOCK_COMMENT -%x FILENAME -%x CHANNEL -%x REG -%x DOTSEL -%x LABEL -%x MSGDESC -%% - - struct brw_asm_parser *parser = yyextra; - struct brw_codegen *p = parser->p; - - /* eat up single line comment */ -\/\/.*[\r\n] { yycolumn = 1; } - - /* eat up multiline comment */ -\/\* { parser->saved_state = YYSTATE; BEGIN(BLOCK_COMMENT); } - -\*\/ { BEGIN(parser->saved_state); } - -. { } -[\r\n] { } - -\"[^\"]+\" { - char *name = malloc(yyleng - 1); - memmove(name, yytext + 1, yyleng - 2); - name[yyleng-1] = '\0'; - parser->input_filename = name; - } - - /* null register */ -null { BEGIN(REG); return NULL_TOKEN; } - - /* Opcodes */ -add { yylval->integer = BRW_OPCODE_ADD; return ADD; } -add3 { yylval->integer = BRW_OPCODE_ADD3; return ADD3; } -addc { yylval->integer = BRW_OPCODE_ADDC; return ADDC; } -and { yylval->integer = BRW_OPCODE_AND; return AND; } -asr { yylval->integer = BRW_OPCODE_ASR; return ASR; } -avg { yylval->integer = BRW_OPCODE_AVG; return AVG; } -bfe { yylval->integer = BRW_OPCODE_BFE; return BFE; } -bfi1 { yylval->integer = BRW_OPCODE_BFI1; return BFI1; } -bfi2 { yylval->integer = BRW_OPCODE_BFI2; return BFI2; } -bfrev { yylval->integer = BRW_OPCODE_BFREV; return BFREV; } -brc { yylval->integer = BRW_OPCODE_BRC; return BRC; } -brd { yylval->integer = BRW_OPCODE_BRD; return BRD; } -break { yylval->integer = BRW_OPCODE_BREAK; return BREAK; } -call { yylval->integer = BRW_OPCODE_CALL; return CALL; } -calla { yylval->integer = BRW_OPCODE_CALLA; return CALLA; } -cbit { yylval->integer = BRW_OPCODE_CBIT; return CBIT; } -cmp { yylval->integer = BRW_OPCODE_CMP; return CMP; } -cmpn { yylval->integer = BRW_OPCODE_CMPN; return CMPN; } -cont { yylval->integer = BRW_OPCODE_CONTINUE; return CONT; } -csel { yylval->integer = BRW_OPCODE_CSEL; return CSEL; } -do { yylval->integer = BRW_OPCODE_DO; return DO; } -dp2 { yylval->integer = BRW_OPCODE_DP2; return DP2; } -dp3 { yylval->integer = BRW_OPCODE_DP3; return DP3; } -dp4 { yylval->integer = BRW_OPCODE_DP4; return DP4; } -dp4a { yylval->integer = BRW_OPCODE_DP4A; return DP4A; } -dpas { yylval->integer = BRW_OPCODE_DPAS; return DPAS; } -dph { yylval->integer = BRW_OPCODE_DPH; return DPH; } -else { yylval->integer = BRW_OPCODE_ELSE; return ELSE; } -endif { yylval->integer = BRW_OPCODE_ENDIF; return ENDIF; } -fbh { yylval->integer = BRW_OPCODE_FBH; return FBH; } -fbl { yylval->integer = BRW_OPCODE_FBL; return FBL; } -frc { yylval->integer = BRW_OPCODE_FRC; return FRC; } -goto { yylval->integer = BRW_OPCODE_GOTO; return GOTO; } -halt { yylval->integer = BRW_OPCODE_HALT; return HALT; } -if { yylval->integer = BRW_OPCODE_IF; return IF; } -illegal { yylval->integer = BRW_OPCODE_ILLEGAL; return ILLEGAL; } -join { yylval->integer = BRW_OPCODE_JOIN; return JOIN; } -jmpi { yylval->integer = BRW_OPCODE_JMPI; return JMPI; } -line { yylval->integer = BRW_OPCODE_LINE; return LINE; } -lrp { yylval->integer = BRW_OPCODE_LRP; return LRP; } -lzd { yylval->integer = BRW_OPCODE_LZD; return LZD; } -mac { yylval->integer = BRW_OPCODE_MAC; return MAC; } -mach { yylval->integer = BRW_OPCODE_MACH; return MACH; } -mad { yylval->integer = BRW_OPCODE_MAD; return MAD; } -madm { yylval->integer = BRW_OPCODE_MADM; return MADM; } -mov { yylval->integer = BRW_OPCODE_MOV; return MOV; } -movi { yylval->integer = BRW_OPCODE_MOVI; return MOVI; } -mul { yylval->integer = BRW_OPCODE_MUL; return MUL; } -nop { yylval->integer = BRW_OPCODE_NOP; return NOP; } -not { yylval->integer = BRW_OPCODE_NOT; return NOT; } -or { yylval->integer = BRW_OPCODE_OR; return OR; } -pln { yylval->integer = BRW_OPCODE_PLN; return PLN; } -ret { yylval->integer = BRW_OPCODE_RET; return RET; } -rndd { yylval->integer = BRW_OPCODE_RNDD; return RNDD; } -rnde { yylval->integer = BRW_OPCODE_RNDE; return RNDE; } -rndu { yylval->integer = BRW_OPCODE_RNDU; return RNDU; } -rndz { yylval->integer = BRW_OPCODE_RNDZ; return RNDZ; } -rol { yylval->integer = BRW_OPCODE_ROL; return ROL; } -ror { yylval->integer = BRW_OPCODE_ROR; return ROR; } -sel { yylval->integer = BRW_OPCODE_SEL; return SEL; } -send { - yylval->integer = BRW_OPCODE_SEND; - return parser->devinfo->ver < 12 ? SEND_GFX4 : SEND_GFX12; - } -sendc { - yylval->integer = BRW_OPCODE_SENDC; - return parser->devinfo->ver < 12 ? SENDC_GFX4 : SENDC_GFX12; - } -sends { yylval->integer = BRW_OPCODE_SENDS; return SENDS; } -sendsc { yylval->integer = BRW_OPCODE_SENDSC; return SENDSC; } -shl { yylval->integer = BRW_OPCODE_SHL; return SHL; } -shr { yylval->integer = BRW_OPCODE_SHR; return SHR; } -smov { yylval->integer = BRW_OPCODE_SMOV; return SMOV; } -srnd { yylval->integer = BRW_OPCODE_SRND; return SRND; } -subb { yylval->integer = BRW_OPCODE_SUBB; return SUBB; } -wait { yylval->integer = BRW_OPCODE_WAIT; return WAIT; } -while { yylval->integer = BRW_OPCODE_WHILE; return WHILE; } -xor { yylval->integer = BRW_OPCODE_XOR; return XOR; } -sync { yylval->integer = BRW_OPCODE_SYNC; return SYNC; } -math { yylval->integer = BRW_OPCODE_MATH; return MATH; } - - /* extended math functions */ -cos { yylval->integer = GEN_MATH_COS; return COS; } -exp { yylval->integer = GEN_MATH_EXP; return EXP; } -fdiv { yylval->integer = GEN_MATH_FDIV; return FDIV; } -inv { yylval->integer = GEN_MATH_INV; return INV; } -invm { yylval->integer = GEN_MATH_INVM; return INVM; } -intdiv { - yylval->integer = GEN_MATH_INT_DIV_QUOTIENT; - return INTDIV; - } -intdivmod { - yylval->integer = - GEN_MATH_INT_DIV_BOTH; - return INTDIVMOD; - } -intmod { - yylval->integer = GEN_MATH_INT_DIV_REMAINDER; - return INTMOD; - } -log { yylval->integer = GEN_MATH_LOG; return LOG; } -pow { yylval->integer = GEN_MATH_POW; return POW; } -rsq { yylval->integer = GEN_MATH_RSQ; return RSQ; } -rsqrtm { yylval->integer = GEN_MATH_RSQRTM; return RSQRTM; } -sin { yylval->integer = GEN_MATH_SIN; return SIN; } -sqrt { yylval->integer = GEN_MATH_SQRT; return SQRT; } - - /* sync instruction */ -allrd { yylval->integer = TGL_SYNC_ALLRD; return ALLRD; } -allwr { yylval->integer = TGL_SYNC_ALLWR; return ALLWR; } -fence { yylval->integer = TGL_SYNC_FENCE; return FENCE; } -bar { yylval->integer = TGL_SYNC_BAR; return BAR; } -host { yylval->integer = TGL_SYNC_HOST; return HOST; } - - /* shared functions for send instruction */ -gateway { return GATEWAY; } -hdc0 { return HDC0; } -hdc1 { return HDC1; } -hdc2 { return HDC2; } -"hdc:ro" { return HDC_RO; } -pi { return PIXEL_INTERP; } -render { return RENDER; } -"rt accel" { return RT_ACCEL; } -sampler { return SAMPLER; } -"ts/btd" { return TS_BTD; } -urb { return URB; } -slm { return SLM; } -tgm { return TGM; } -ugm { return UGM; } - -";" { return SEMICOLON; } -":" { return COLON; } -"(" { return LPAREN; } -")" { return RPAREN; } -"{" { return LCURLY; } -"}" { return RCURLY; } -"[" { return LSQUARE; } -"]" { return RSQUARE; } -"<" { return LANGLE; } -">" { return RANGLE; } -"," { return COMMA; } -"." { return DOT; } -"+" { return PLUS; } -"-" { return MINUS; } -"~" { return MINUS; } -"(abs)" { return ABS; } - - -"VxH" { return VxH; } -"<" { return LANGLE; } -[0-9][0-9]* { - yylval->integer = strtoul(yytext, NULL, 10); - return INTEGER; - } -">" { return RANGLE; } -"," { return COMMA; } -"." { BEGIN(DOTSEL); return DOT; } -";" { return SEMICOLON; } - -"x" { yylval->integer = BRW_CHANNEL_X; return X; } -"y" { yylval->integer = BRW_CHANNEL_Y; return Y; } -"z" { yylval->integer = BRW_CHANNEL_Z; return Z; } -"w" { yylval->integer = BRW_CHANNEL_W; return W; } -[0-9][0-9]* { - yylval->integer = strtoul(yytext, NULL, 10); - BEGIN(REG); - return INTEGER; - } -. { yyless(0); BEGIN(INITIAL); } -. { yyless(0); BEGIN(INITIAL); } - - /* Access mode */ -"align1" { return ALIGN1; } -"align16" { return ALIGN16; } - - /* Accumulator write control */ -AccWrEnable { return ACCWREN; } - - /* Mask control (formerly WECtrl/Write Enable Control) */ -"WE_all" { return WECTRL; } - - /* Compaction control */ -compacted { return CMPTCTRL; } - - /* Debug control */ -breakpoint { return BREAKPOINT; } - - /* Dependency control */ -NoDDClr { return NODDCLR; } -NoDDChk { return NODDCHK; } - - /* End of thread */ -EOT { return EOT; } - - /* Mask control */ -nomask { return MASK_DISABLE; } - - /* Channel */ -"x" { yylval->integer = BRW_CHANNEL_X; return X; } -"y" { yylval->integer = BRW_CHANNEL_Y; return Y; } -"z" { yylval->integer = BRW_CHANNEL_Z; return Z; } -"w" { yylval->integer = BRW_CHANNEL_W; return W; } -[0-9][0-9]* { - yylval->integer = strtoul(yytext, NULL, 10); - return INTEGER; - } -"." { return DOT; } -. { yyless(0); BEGIN(INITIAL); } - - - /* Predicate Control */ -".anyv" { yylval->integer = BRW_PREDICATE_ALIGN1_ANYV; return ANYV; } -".allv" { yylval->integer = BRW_PREDICATE_ALIGN1_ALLV; return ALLV; } -".any2h" { yylval->integer = BRW_PREDICATE_ALIGN1_ANY2H; return ANY2H; } -".all2h" { yylval->integer = BRW_PREDICATE_ALIGN1_ALL2H; return ALL2H; } -".any4h" { yylval->integer = BRW_PREDICATE_ALIGN16_ANY4H; return ANY4H; } -".all4h" { yylval->integer = BRW_PREDICATE_ALIGN16_ALL4H; return ALL4H; } -".any8h" { yylval->integer = BRW_PREDICATE_ALIGN1_ANY8H; return ANY8H; } -".all8h" { yylval->integer = BRW_PREDICATE_ALIGN1_ALL8H; return ALL8H; } -".any16h" { yylval->integer = BRW_PREDICATE_ALIGN1_ANY16H; return ANY16H; } -".all16h" { yylval->integer = BRW_PREDICATE_ALIGN1_ALL16H; return ALL16H; } -".any32h" { yylval->integer = BRW_PREDICATE_ALIGN1_ANY32H; return ANY32H; } -".all32h" { yylval->integer = BRW_PREDICATE_ALIGN1_ALL32H; return ALL32H; } - - /* Saturation */ -".sat" { return SATURATE; } - - /* Thread control */ -atomic { return ATOMIC; } -switch { return SWITCH; } - - /* Branch control */ -BranchCtrl { return BRANCH_CTRL; } - - /* Quarter Control */ -1[HNQ] { } -"2Q" { return QTR_2Q; } -"3Q" { return QTR_3Q; } -"4Q" { return QTR_4Q; } -"2H" { return QTR_2H; } -"2N" { return QTR_2N; } -"3N" { return QTR_3N; } -"4N" { return QTR_4N; } -"5N" { return QTR_5N; } -"6N" { return QTR_6N; } -"7N" { return QTR_7N; } -"8N" { return QTR_8N; } - - /* data types */ -:?B { return TYPE_B; } -:?BF { return TYPE_BF; } -:?BF8 { return TYPE_BF8; } -:?D { return TYPE_D; } -:?DF { return TYPE_DF; } -:?F { return TYPE_F; } -:?HF { return TYPE_HF; } -:?HF8 { return TYPE_HF8; } -:?Q { return TYPE_Q; } -:?UB { return TYPE_UB; } -:?UD { return TYPE_UD; } -:?UW { return TYPE_UW; } -:?UQ { return TYPE_UQ; } -:?UV { return TYPE_UV; } -:?V { return TYPE_V; } -:?VF { return TYPE_VF; } -:?W { return TYPE_W; } - - /* Address registers */ -"a0" { return ADDRREG; } - - /* accumulator registers */ -"acc"[0-9]+ { yylval->integer = atoi(yytext + 3); return ACCREG; } - - /* channel enable registers */ -"ce0" { return CHANNELENABLEREG; } - - /* control registers */ -"cr0" { return CONTROLREG; } - - /* flag registers */ -"f"[0|1] { BEGIN(CHANNEL); yylval->integer = atoi(yytext + 1); return FLAGREG; } - - /* scalar register */ -"s0" { return SCALARREG; } - - /* state register */ -sr[0-9]+ { yylval->integer = atoi(yytext + 2); return STATEREG; } - - /* notification registers */ -"n0" { BEGIN(REG); return NOTIFYREG; } - - /* IP register */ -"ip" { return IPREG; } - - /* Thread control register */ -"tdr0" { return THREADREG; } - - /* performance register */ -"tm0" { BEGIN(REG); return PERFORMANCEREG; } - -[gr][0-9]+ { - yylval->integer = atoi(yytext + 1); - BEGIN(REG); return GENREG; - } -[gr] { return GENREGFILE; } -"mask"[0-9]+ { yylval->integer = atoi(yytext + 4); return MASKREG; } - - /* Conditional modifiers */ -".e" { yylval->integer = BRW_CONDITIONAL_Z; return EQUAL; } -".g" { yylval->integer = BRW_CONDITIONAL_G; return GREATER; } -".ge" { yylval->integer = BRW_CONDITIONAL_GE; return GREATER_EQUAL; } -".l" { yylval->integer = BRW_CONDITIONAL_L; return LESS; } -".le" { yylval->integer = BRW_CONDITIONAL_LE; return LESS_EQUAL; } -".ne" { yylval->integer = BRW_CONDITIONAL_NZ; return NOT_EQUAL; } -".nz" { yylval->integer = BRW_CONDITIONAL_NZ; return NOT_ZERO; } -".o" { yylval->integer = BRW_CONDITIONAL_O; return OVERFLOW; } -".r" { yylval->integer = BRW_CONDITIONAL_R; return ROUND_INCREMENT; } -".u" { yylval->integer = BRW_CONDITIONAL_U; return UNORDERED; } -".z" { yylval->integer = BRW_CONDITIONAL_Z; return ZERO; } - -"JIP: " { BEGIN(LABEL); return JIP; } -"UIP: " { BEGIN(LABEL); return UIP; } -[ \t]+ { } - -"MsgDesc: " { BEGIN(MSGDESC); return MSGDESC_BEGIN; } -ex_bso { return EX_BSO; } -src1_len { return SRC1_LEN; } -"=" { return ASSIGN; } -[0-9][0-9]* { - yylval->integer = strtoul(yytext, NULL, 10); - return INTEGER; - } -"{" { yyless(0); BEGIN(INITIAL); return MSGDESC_END; } -. { } -\n { yycolumn = 1; } - -"0x"[0-9a-f][0-9a-f]* { - yylval->llint = strtoull(yytext + 2, NULL, 16); - return LONG; - } -[0-9][0-9]* { - yylval->llint = strtoll(yytext, NULL, 10); - return LONG; - } - - /* jump label target */ -[a-zA-Z_][0-9a-zA-Z_]*":" { - yylval->string = ralloc_strdup(p->mem_ctx, yytext); - /* Stomp the trailing ':' */ - yylval->string[yyleng - 1] = '\0'; - return JUMP_LABEL_TARGET; -} - - /* jump label */ -