diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b4d704c20f8..3588e1135b7 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -776,10 +776,6 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) { int err = 0; - /* Clear the Compr4 instruction compression bit. */ - if (_reg_file == BRW_MESSAGE_REGISTER_FILE) - _reg_nr &= ~BRW_MRF_COMPR4; - if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 0bc55628da0..9278bc68e70 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1510,12 +1510,6 @@ void gfx6_math(struct brw_codegen *p, struct brw_reg src0, struct brw_reg src1); -void brw_oword_block_read(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t offset, - uint32_t bind_table_index); - unsigned brw_scratch_surface_idx(const struct brw_codegen *p); void gfx7_block_read_scratch(struct brw_codegen *p, @@ -1726,12 +1720,6 @@ next_offset(const struct intel_device_info *devinfo, void *store, int offset) /** Maximum SEND message length */ #define BRW_MAX_MSG_LENGTH 15 -/** First MRF register used by pull loads */ -#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13) - -/** First MRF register used by spills */ -#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) - #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 36ef004c6b9..38e192591bd 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -840,12 +840,10 @@ enum ENUM_PACKED brw_predicate { enum ENUM_PACKED brw_reg_file { BRW_ARCHITECTURE_REGISTER_FILE = 0, BRW_GENERAL_REGISTER_FILE = 1, - BRW_MESSAGE_REGISTER_FILE = 2, BRW_IMMEDIATE_VALUE = 3, ARF = BRW_ARCHITECTURE_REGISTER_FILE, FIXED_GRF = BRW_GENERAL_REGISTER_FILE, - MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, /* These are not hardware values */ @@ -885,8 +883,6 @@ enum ENUM_PACKED gfx10_align1_3src_exec_type { #define BRW_ARF_TDR 0xB0 #define BRW_ARF_TIMESTAMP 0xC0 -#define BRW_MRF_COMPR4 (1 << 7) - #define BRW_AMASK 0 #define BRW_IMASK 1 #define BRW_LMASK 2 diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 80013cb8c41..ef6de5c0785 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -35,31 +35,12 @@ #include "util/ralloc.h" -static void -gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg) -{ - /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): - * "The send with EOT should use register space R112-R127 for . This is - * to enable loading of a new thread into the same slot while the message - * with EOT for current thread is pending dispatch." - * - * Since we're pretending to have 16 MRFs anyway, we may as well use the - * registers required for messages with EOT. - */ - if (reg->file == BRW_MESSAGE_REGISTER_FILE) { - reg->file = BRW_GENERAL_REGISTER_FILE; - reg->nr += GFX7_MRF_HACK_START; - } -} - void brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) { const struct intel_device_info *devinfo = p->devinfo; - if (dest.file == BRW_MESSAGE_REGISTER_FILE) - assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver)); - else if (dest.file == BRW_GENERAL_REGISTER_FILE) + if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < XE2_MAX_GRF); /* The hardware has a restriction where a destination of size Byte with @@ -74,8 +55,6 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) dest.hstride = BRW_HORIZONTAL_STRIDE_2; } - gfx7_convert_mrf_to_grf(p, &dest); - if (devinfo->ver >= 12 && (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND || brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) { @@ -118,8 +97,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) } else { brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE) { + if (dest.file == BRW_GENERAL_REGISTER_FILE) { assert(dest.writemask != 0); } /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: @@ -173,19 +151,15 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) { const struct intel_device_info *devinfo = p->devinfo; - if (reg.file == BRW_MESSAGE_REGISTER_FILE) - assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver)); - else if (reg.file == BRW_GENERAL_REGISTER_FILE) + if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < XE2_MAX_GRF); - gfx7_convert_mrf_to_grf(p, ®); - if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND || brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC || brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS || brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) { /* Any source modifiers or regions will be ignored, since this just - * identifies the MRF/GRF to start reading the message contents from. + * identifies the GRF to start reading the message contents from. * Check for some likely failures. */ assert(!reg.negate); @@ -323,9 +297,6 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || reg.nr != BRW_ARF_ACCUMULATOR); - gfx7_convert_mrf_to_grf(p, ®); - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); brw_inst_set_src1_abs(devinfo, inst, reg.abs); brw_inst_set_src1_negate(devinfo, inst, reg.negate); @@ -631,8 +602,6 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, const struct intel_device_info *devinfo = p->devinfo; brw_inst *inst = next_insn(p, opcode); - gfx7_convert_mrf_to_grf(p, &dest); - assert(dest.nr < XE2_MAX_GRF); if (devinfo->ver >= 10) @@ -765,8 +734,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, } } else { - assert(dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE); + assert(dest.file == BRW_GENERAL_REGISTER_FILE); assert(dest.type == BRW_REGISTER_TYPE_F || dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || @@ -1474,8 +1442,7 @@ void gfx6_math(struct brw_codegen *p, const struct intel_device_info *devinfo = p->devinfo; brw_inst *insn = next_insn(p, BRW_OPCODE_MATH); - assert(dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE); + assert(dest.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); @@ -1552,70 +1519,6 @@ gfx7_block_read_scratch(struct brw_codegen *p, true); /* header present */ } -/** - * Read float[4] vectors from the data port constant cache. - * Location (in buffer) should be a multiple of 16. - * Used for fetching shader constants. - */ -void brw_oword_block_read(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t offset, - uint32_t bind_table_index) -{ - const struct intel_device_info *devinfo = p->devinfo; - const unsigned target_cache = GFX6_SFID_DATAPORT_CONSTANT_CACHE; - const unsigned exec_size = 1 << brw_get_default_exec_size(p); - const struct tgl_swsb swsb = brw_get_default_swsb(p); - - /* On newer hardware, offset is in units of owords. */ - offset /= 16; - - mrf = retype(mrf, BRW_REGISTER_TYPE_UD); - - brw_push_insn_state(p); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - - /* set message header global offset field (reg 0, element 2) */ - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_swsb(p, tgl_swsb_null()); - brw_MOV(p, - retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - mrf.nr, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(offset)); - brw_pop_insn_state(p); - - brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - - brw_inst_set_sfid(devinfo, insn, target_cache); - - /* cast dest to a uword[8] vector */ - dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - - brw_set_desc(p, insn, - brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) | - brw_dp_read_desc(devinfo, bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - BRW_DATAPORT_READ_TARGET_DATA_CACHE)); - - brw_pop_insn_state(p); -} - brw_inst * gfx9_fb_READ(struct brw_codegen *p, struct brw_reg dst, diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 1084a3e68ff..ad9fc99b6f0 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -259,21 +259,6 @@ invalid_values(const struct brw_isa_info *isa, const brw_inst *inst) if (inst_is_send(isa, inst)) return error_msg; - if (num_sources == 3) { - /* Nothing to test: - * No 3-src instructions on Gfx4-5 - * No reg file bits on Gfx6-10 (align16) - * No invalid encodings on Gfx10-12 (align1) - */ - } else { - ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF || - (num_sources > 0 && - brw_inst_src0_reg_file(devinfo, inst) == MRF) || - (num_sources > 1 && - brw_inst_src1_reg_file(devinfo, inst) == MRF), - "invalid register file encoding"); - } - if (error_msg.str) return error_msg; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index e15495a62c3..d7406afa27a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -62,7 +62,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, this->dst = dst; this->sources = sources; this->exec_size = exec_size; - this->base_mrf = -1; assert(dst.file != IMM && dst.file != UNIFORM); @@ -75,7 +74,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case VGRF: case ARF: case FIXED_GRF: - case MRF: case ATTR: this->size_written = dst.component_size(exec_size); break; @@ -538,7 +536,6 @@ fs_reg::is_contiguous() const case FIXED_GRF: return hstride == BRW_HORIZONTAL_STRIDE_1 && vstride == width + hstride; - case MRF: case VGRF: case ATTR: return stride == 1; @@ -948,8 +945,6 @@ fs_inst::size_read(int arg) const case VGRF: case ATTR: return components_read(arg) * src[arg].component_size(exec_size); - case MRF: - unreachable("MRF registers are not allowed as sources"); } return 0; } @@ -1018,59 +1013,6 @@ fs_inst::flags_written(const intel_device_info *devinfo) const } } -/** - * Returns how many MRFs an FS opcode will write over. - * - * Note that this is not the 0 or 1 implied writes in an actual gen - * instruction -- the FS opcodes often generate MOVs in addition. - */ -unsigned -fs_inst::implied_mrf_writes() const -{ - if (mlen == 0) - return 0; - - if (base_mrf == -1) - return 0; - - switch (opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - return 1 * exec_size / 8; - case SHADER_OPCODE_POW: - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - return 2 * exec_size / 8; - case SHADER_OPCODE_TEX: - case FS_OPCODE_TXB: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - case SHADER_OPCODE_TG4_BIAS: - case SHADER_OPCODE_TG4_EXPLICIT_LOD: - case SHADER_OPCODE_TG4_IMPLICIT_LOD: - case SHADER_OPCODE_TG4_OFFSET_LOD: - case SHADER_OPCODE_TG4_OFFSET_BIAS: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_LOD: - case SHADER_OPCODE_SAMPLEINFO: - return 1; - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - return mlen; - default: - unreachable("not reached"); - } -} - bool fs_inst::has_sampler_residency() const { @@ -2274,9 +2216,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f case FIXED_GRF: fprintf(file, "g%d", inst->dst.nr); break; - case MRF: - fprintf(file, "m%d", inst->dst.nr); - break; case BAD_FILE: fprintf(file, "(null)"); break; @@ -2333,9 +2272,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f case FIXED_GRF: fprintf(file, "g%d", inst->src[i].nr); break; - case MRF: - fprintf(file, "***m%d***", inst->src[i].nr); - break; case ATTR: fprintf(file, "attr%d", inst->src[i].nr); break; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 6fea62c7c2d..98404753a5c 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -322,8 +322,6 @@ public: fs_reg outputs[VARYING_SLOT_MAX]; fs_reg dual_src_output; int first_non_payload_grf; - /** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */ - unsigned max_grf; bool failed; char *fail_msg; @@ -462,9 +460,6 @@ private: void generate_ddy(const fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_scratch_header(fs_inst *inst, struct brw_reg dst); - void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); void generate_halt(fs_inst *inst); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index a32c878757b..00f38fbf50a 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -565,16 +565,6 @@ namespace { (is_grf(inst->src[i]) && inst->eot)) constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true; } - - /* The location of the Gfx7 MRF hack registers is hard-coded in the - * rest of the compiler back-end. Don't attempt to move them around. - */ - assert(inst->dst.file != MRF); - - for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { - const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i; - constrained[p.atom_of_reg(reg)] = true; - } } return constrained; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 3f0ebe31382..55daf49e176 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -619,16 +619,6 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type, * The following restrictions apply for align1 mode: Scalar source is * supported. Source and destination horizontal stride must be the * same. - * - * From the Haswell PRM Volume 2b "Command Reference - Instructions", page - * 134 ("Extended Math Function"): - * - * Scalar source is supported. Source and destination horizontal stride - * must be 1. - * - * and similar language exists for IVB and SNB. Pre-SNB, math instructions - * are sends, so the sources are moved to MRF's and there are no - * restrictions. */ if (inst->is_math()) return stride == inst->dst.stride || stride == 0; diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 296c517362c..102914333ea 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -196,7 +196,6 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) a->sfid == b->sfid && a->desc == b->desc && a->size_written == b->size_written && - a->base_mrf == b->base_mrf && a->check_tdr == b->check_tdr && a->send_has_side_effects == b->send_has_side_effects && a->eot == b->eot && diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 34c81b80068..6a7026c3622 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -44,8 +44,6 @@ brw_file_from_reg(fs_reg *reg) case FIXED_GRF: case VGRF: return BRW_GENERAL_REGISTER_FILE; - case MRF: - return BRW_MESSAGE_REGISTER_FILE; case IMM: return BRW_IMMEDIATE_VALUE; case BAD_FILE: @@ -63,9 +61,6 @@ brw_reg_from_fs_reg(const struct intel_device_info *devinfo, fs_inst *inst, struct brw_reg brw_reg; switch (reg->file) { - case MRF: - assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver)); - FALLTHROUGH; case VGRF: if (reg->stride == 0) { brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0); @@ -865,27 +860,6 @@ fs_generator::generate_scratch_header(fs_inst *inst, struct brw_reg dst) brw_inst_set_no_dd_check(p->devinfo, insn, true); } -void -fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(type_sz(dst.type) == 4); - assert(inst->mlen != 0); - - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.ud; - - assert(offset.file == BRW_IMMEDIATE_VALUE && - offset.type == BRW_REGISTER_TYPE_UD); - uint32_t read_offset = offset.ud; - - brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), - read_offset, surf_index); -} - void fs_generator::enable_debug(const char *shader_name) { @@ -1070,7 +1044,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, assert(inst->force_writemask_all || inst->exec_size >= 4); assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); - assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->ver)); assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo)); switch (inst->opcode) { @@ -1338,14 +1311,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_MOV_reloc_imm(p, dst, dst.type, src[0].ud); break; - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - assert(inst->force_writemask_all); - generate_uniform_pull_constant_load(inst, dst, - src[PULL_UNIFORM_CONSTANT_SRC_SURFACE], - src[PULL_UNIFORM_CONSTANT_SRC_OFFSET]); - send_count++; - break; - case FS_OPCODE_FB_READ: generate_fb_read(inst, dst, src[0]); send_count++; diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 52481d40ac3..5cd51a62590 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -88,14 +88,10 @@ brw_fs_lower_load_payload(fs_visitor &s) if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) continue; - assert(inst->dst.file == MRF || inst->dst.file == VGRF); + assert(inst->dst.file == VGRF); assert(inst->saturate == false); fs_reg dst = inst->dst; - /* Get rid of COMPR4. We'll add it back in if we need it */ - if (dst.file == MRF) - dst.nr = dst.nr & ~BRW_MRF_COMPR4; - const fs_builder ibld(&s, block, inst); const fs_builder ubld = ibld.exec_all(); @@ -116,59 +112,6 @@ brw_fs_lower_load_payload(fs_visitor &s) i += n; } - if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) && - inst->exec_size > 8) { - /* In this case, the payload portion of the LOAD_PAYLOAD isn't - * a straightforward copy. Instead, the result of the - * LOAD_PAYLOAD is treated as interleaved and the first four - * non-header sources are unpacked as: - * - * m + 0: r0 - * m + 1: g0 - * m + 2: b0 - * m + 3: a0 - * m + 4: r1 - * m + 5: g1 - * m + 6: b1 - * m + 7: a1 - * - * This is used for gen <= 5 fb writes. - */ - assert(inst->exec_size == 16); - assert(inst->header_size + 4 <= inst->sources); - for (uint8_t i = inst->header_size; i < inst->header_size + 4; i++) { - if (inst->src[i].file != BAD_FILE) { - if (s.devinfo->has_compr4) { - fs_reg compr4_dst = retype(dst, inst->src[i].type); - compr4_dst.nr |= BRW_MRF_COMPR4; - ibld.MOV(compr4_dst, inst->src[i]); - } else { - /* Platform doesn't have COMPR4. We have to fake it */ - fs_reg mov_dst = retype(dst, inst->src[i].type); - ibld.quarter(0).MOV(mov_dst, quarter(inst->src[i], 0)); - mov_dst.nr += 4; - ibld.quarter(1).MOV(mov_dst, quarter(inst->src[i], 1)); - } - } - - dst.nr++; - } - - /* The loop above only ever incremented us through the first set - * of 4 registers. However, thanks to the magic of COMPR4, we - * actually wrote to the first 8 registers, so we need to take - * that into account now. - */ - dst.nr += 4; - - /* The COMPR4 code took care of the first 4 sources. We'll let - * the regular path handle any remaining sources. Yes, we are - * modifying the instruction but we're about to delete it so - * this really doesn't hurt anything. - */ - inst->header_size += 4; - } - for (uint8_t i = inst->header_size; i < inst->sources; i++) { dst.type = inst->src[i].type; if (inst->src[i].file != BAD_FILE) { @@ -592,7 +535,7 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s) } /** - * Three source instruction must have a GRF/MRF destination register. + * Three source instruction must have a GRF destination register. * ARF NULL is not allowed. Fix that up by allocating a temporary GRF. */ bool diff --git a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp index 1968a305ca9..6f7bbff9034 100644 --- a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp @@ -215,7 +215,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) * register. */ fs_reg low = inst->dst; - if (orig_dst.is_null() || orig_dst.file == MRF || + if (orig_dst.is_null() || regions_overlap(inst->dst, inst->size_written, inst->src[0], inst->size_read(0)) || regions_overlap(inst->dst, inst->size_written, diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index db6385443f1..ed40d841b6d 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -217,7 +217,6 @@ namespace { case UNIFORM: case IMM: case VGRF: - case MRF: case ATTR: return reg.stride * type_sz(reg.type); case ARF: diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index fd391e04a9a..35aa48fd9ef 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -69,9 +69,9 @@ fs_visitor::assign_regs_trivial() } } - if (this->grf_used >= max_grf) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - this->grf_used, max_grf); + this->grf_used, BRW_MAX_GRF); } else { this->alloc.count = this->grf_used; } diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index 09c804b9ad5..8996c3cbd0a 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -764,7 +764,6 @@ namespace { reg_offset(r) / REG_SIZE); return (r.file == VGRF || r.file == FIXED_GRF ? &grf_deps[reg] : - r.file == MRF ? &grf_deps[GFX7_MRF_HACK_START + reg] : r.file == ARF && reg >= BRW_ARF_ADDRESS && reg < BRW_ARF_ACCUMULATOR ? &addr_dep : r.file == ARF && reg >= BRW_ARF_ACCUMULATOR && @@ -1044,13 +1043,6 @@ namespace { if (inst->reads_accumulator_implicitly()) sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all)); - if (is_send(inst) && inst->base_mrf != -1) { - const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all); - - for (unsigned j = 0; j < inst->mlen; j++) - sb.set(brw_uvec_mrf(8, inst->base_mrf + j, 0), rd_dep); - } - /* Track any destination registers of this instruction. */ const dependency wr_dep = is_unordered(devinfo, inst) ? dependency(TGL_SBID_DST, ip, exec_all) : @@ -1173,12 +1165,6 @@ namespace { add_dependency(ids, deps[ip], dep); } - if (is_send(inst) && inst->base_mrf != -1) { - for (unsigned j = 0; j < inst->mlen; j++) - add_dependency(ids, deps[ip], dependency_for_read( - sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0)))); - } - if (is_unordered(devinfo, inst) && !inst->eot) add_dependency(ids, deps[ip], dependency(TGL_SBID_SET, ip, exec_all)); @@ -1204,12 +1190,6 @@ namespace { if (dep.ordered && !is_single_pipe(dep.jp, p)) add_dependency(ids, deps[ip], dep); } - - if (is_send(inst) && inst->base_mrf != -1) { - for (unsigned j = 0; j < inst->implied_mrf_writes(); j++) - add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst, - sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0)))); - } } update_inst_scoreboard(shader, jps, inst, ip, sb); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 55f3ded446c..9fe9bd31d61 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -1140,7 +1140,6 @@ fs_visitor::init() this->source_depth_to_render_target = false; this->runtime_check_aads_emit = false; this->first_non_payload_grf = 0; - this->max_grf = GFX7_MRF_HACK_START; this->uniforms = 0; this->last_scratch = 0; diff --git a/src/intel/compiler/brw_ir.h b/src/intel/compiler/brw_ir.h index 3b4b19c244a..8a78861b62c 100644 --- a/src/intel/compiler/brw_ir.h +++ b/src/intel/compiler/brw_ir.h @@ -45,14 +45,14 @@ struct backend_reg : private brw_reg const brw_reg &as_brw_reg() const { - assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); + assert(file == ARF || file == FIXED_GRF || file == IMM); assert(offset == 0); return static_cast(*this); } brw_reg &as_brw_reg() { - assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); + assert(file == ARF || file == FIXED_GRF || file == IMM); assert(offset == 0); return static_cast(*this); } @@ -159,7 +159,6 @@ struct backend_instruction { uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint8_t mlen; /**< SEND message length */ uint8_t ex_mlen; /**< SENDS extended message length */ - int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint8_t target; /**< MRT target. */ uint8_t sfid; /**< SFID for SEND instructions */ uint32_t desc; /**< SEND[S] message descriptor immediate */ diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index dc335d7e4e2..c4524269d43 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -80,12 +80,6 @@ byte_offset(fs_reg reg, unsigned delta) case UNIFORM: reg.offset += delta; break; - case MRF: { - const unsigned suboffset = reg.offset + delta; - reg.nr += suboffset / REG_SIZE; - reg.offset = suboffset % REG_SIZE; - break; - } case ARF: case FIXED_GRF: { const unsigned suboffset = reg.subnr + delta; @@ -113,7 +107,6 @@ horiz_offset(const fs_reg ®, unsigned delta) */ return reg; case VGRF: - case MRF: case ATTR: return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); case ARF: @@ -144,7 +137,6 @@ offset(fs_reg reg, unsigned width, unsigned delta) break; case ARF: case FIXED_GRF: - case MRF: case VGRF: case ATTR: case UNIFORM: @@ -212,31 +204,6 @@ reg_padding(const fs_reg &r) return (MAX2(1, stride) - 1) * type_sz(r.type); } -/* Do not call this directly. Call regions_overlap() instead. */ -static inline bool -regions_overlap_MRF(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) -{ - if (r.nr & BRW_MRF_COMPR4) { - fs_reg t = r; - t.nr &= ~BRW_MRF_COMPR4; - /* COMPR4 regions are translated by the hardware during decompression - * into two separate half-regions 4 MRFs apart from each other. - * - * Note: swapping s and t in this parameter list eliminates one possible - * level of recursion (since the s in the called versions of - * regions_overlap_MRF can't be COMPR4), and that makes the compiled - * code a lot smaller. - */ - return regions_overlap_MRF(s, ds, t, dr / 2) || - regions_overlap_MRF(s, ds, byte_offset(t, 4 * REG_SIZE), dr / 2); - } else if (s.nr & BRW_MRF_COMPR4) { - return regions_overlap_MRF(s, ds, r, dr); - } - - return !((r.nr * REG_SIZE + r.offset + dr) <= (s.nr * REG_SIZE + s.offset) || - (s.nr * REG_SIZE + s.offset + ds) <= (r.nr * REG_SIZE + r.offset)); -} - /** * Return whether the register region starting at \p r and spanning \p dr * bytes could potentially overlap the register region starting at \p s and @@ -251,11 +218,9 @@ regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) if (r.file == VGRF) { return r.nr == s.nr && !(r.offset + dr <= s.offset || s.offset + ds <= r.offset); - } else if (r.file != MRF) { + } else { return !(reg_offset(r) + dr <= reg_offset(s) || reg_offset(s) + ds <= reg_offset(r)); - } else { - return regions_overlap_MRF(r, dr, s, ds); } } @@ -392,7 +357,6 @@ public: bool can_do_cmod(); bool can_change_types() const; bool has_source_and_destination_hazard() const; - unsigned implied_mrf_writes() const; /** * Return whether \p arg is a control source of a virtual instruction which diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 24abee2a471..c91b5c12d39 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -70,10 +70,8 @@ namespace { enum intel_eu_dependency_id { /* Register part of the GRF. */ EU_DEPENDENCY_ID_GRF0 = 0, - /* Register part of the MRF. Only used on Gfx4-6. */ - EU_DEPENDENCY_ID_MRF0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF, /* Address register part of the ARF. */ - EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_MRF0 + 24, + EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF, /* Accumulator register part of the ARF. */ EU_DEPENDENCY_ID_ACCUM0 = EU_DEPENDENCY_ID_ADDR0 + 1, /* Flag register part of the ARF. */ @@ -807,18 +805,12 @@ namespace { { if (r.file == VGRF) { const unsigned i = r.nr + r.offset / REG_SIZE + delta; - assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); + assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0); return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); } else if (r.file == FIXED_GRF) { const unsigned i = r.nr + delta; - assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); - return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); - - } else if (r.file == MRF) { - const unsigned i = GFX7_MRF_HACK_START + - r.nr + r.offset / REG_SIZE + delta; - assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); + assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0); return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); } else if (r.file == ARF && r.nr >= BRW_ARF_ADDRESS && @@ -922,13 +914,6 @@ namespace { st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); } - if (is_send(inst) && inst->base_mrf != -1) { - for (unsigned j = 0; j < inst->mlen; j++) - stall_on_dependency( - st, reg_dependency_id( - devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); - } - if (const unsigned mask = inst->flags_read(devinfo)) { for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) { if (mask & (1 << i)) @@ -980,12 +965,6 @@ namespace { } } - if (is_send(inst) && inst->base_mrf != -1) { - for (unsigned j = 0; j < inst->mlen; j++) - mark_read_dependency(st, perf, - reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); - } - /* Mark any destination dependencies. */ if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { for (unsigned j = 0; j < regs_written(inst); j++) { diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index f3818096792..f7a24915e81 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -310,9 +310,6 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, assert(inst->target != 0 || src0_alpha.file == BAD_FILE); - /* We can potentially have a message length of up to 15, so we have to set - * base_mrf to either 0 or 1 in order to fit in m0..m15. - */ fs_reg sources[15]; int header_size = 2, payload_header_size; unsigned length = 0; diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 7e2243c4625..4f1bcc6b899 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -61,17 +61,6 @@ struct intel_device_info; #define BRW_MAX_GRF 128 #define XE2_MAX_GRF 256 -/** - * First GRF used for the MRF hack. - * - * On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We - * haven't converted our compiler to be aware of this, so it asks for MRFs and - * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The - * register allocators have to be careful of this to avoid corrupting the "MRF"s - * with actual GRF allocations. - */ -#define GFX7_MRF_HACK_START 112 - /** * BRW hardware swizzles. * Only defines XYZW to ensure it can be contained in 2 bits @@ -81,9 +70,6 @@ struct intel_device_info; #define BRW_SWIZZLE_Z 2 #define BRW_SWIZZLE_W 3 -/** Number of message register file registers */ -#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16) - #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) @@ -454,10 +440,6 @@ brw_reg(enum brw_reg_file file, assert(nr < XE2_MAX_GRF); else if (file == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_TIMESTAMP); - /* Asserting on the MRF register number requires to know the hardware gen - * (gfx6 has 24 MRF registers), which we don't know here, so we assert - * for that in the generators and in brw_eu_emit.c - */ reg.type = type; reg.file = file; @@ -1050,19 +1032,6 @@ brw_mask_stack_depth_reg(unsigned subnr) BRW_ARF_MASK_STACK_DEPTH, subnr); } -static inline struct brw_reg -brw_message_reg(unsigned nr) -{ - return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); -} - -static inline struct brw_reg -brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr) -{ - return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr), - BRW_REGISTER_TYPE_UD); -} - /* This is almost always called with a numeric constant argument, so * make things easy to evaluate at compile time: */ diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index c699adcfc01..2932ccf23f8 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1174,7 +1174,6 @@ fs_instruction_scheduler::calculate_deps() * After register allocation, reg_offsets are gone and we track individual * GRF registers. */ - schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->ver)]; schedule_node *last_conditional_mod[8] = {}; schedule_node *last_accumulator_write = NULL; /* Fixed HW registers are assumed to be separate from the virtual @@ -1184,8 +1183,6 @@ fs_instruction_scheduler::calculate_deps() */ schedule_node *last_fixed_grf_write = NULL; - memset(last_mrf_write, 0, sizeof(last_mrf_write)); - /* top-to-bottom dependencies: RAW and WAW. */ for (schedule_node *n = current.start; n < current.end; n++) { fs_inst *inst = (fs_inst *)n->inst; @@ -1223,16 +1220,6 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->base_mrf != -1) { - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(last_mrf_write[inst->base_mrf + i], n); - } - } - if (const unsigned mask = inst->flags_read(v->devinfo)) { assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); @@ -1261,19 +1248,6 @@ fs_instruction_scheduler::calculate_deps() inst->dst.offset / REG_SIZE + r] = n; } } - } else if (inst->dst.file == MRF) { - int reg = inst->dst.nr & ~BRW_MRF_COMPR4; - - add_dep(last_mrf_write[reg], n); - last_mrf_write[reg] = n; - if (is_compressed(inst)) { - if (inst->dst.nr & BRW_MRF_COMPR4) - reg += 4; - else - reg++; - add_dep(last_mrf_write[reg], n); - last_mrf_write[reg] = n; - } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { for (unsigned r = 0; r < regs_written(inst); r++) { @@ -1291,13 +1265,6 @@ fs_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0 && inst->base_mrf != -1) { - for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { - add_dep(last_mrf_write[inst->base_mrf + i], n); - last_mrf_write[inst->base_mrf + i] = n; - } - } - if (const unsigned mask = inst->flags_written(v->devinfo)) { assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); @@ -1319,7 +1286,6 @@ fs_instruction_scheduler::calculate_deps() clear_last_grf_write(); /* bottom-to-top dependencies: WAR */ - memset(last_mrf_write, 0, sizeof(last_mrf_write)); memset(last_conditional_mod, 0, sizeof(last_conditional_mod)); last_accumulator_write = NULL; last_fixed_grf_write = NULL; @@ -1353,16 +1319,6 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->base_mrf != -1) { - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(n, last_mrf_write[inst->base_mrf + i], 2); - } - } - if (const unsigned mask = inst->flags_read(v->devinfo)) { assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); @@ -1389,19 +1345,6 @@ fs_instruction_scheduler::calculate_deps() inst->dst.offset / REG_SIZE + r] = n; } } - } else if (inst->dst.file == MRF) { - int reg = inst->dst.nr & ~BRW_MRF_COMPR4; - - last_mrf_write[reg] = n; - - if (is_compressed(inst)) { - if (inst->dst.nr & BRW_MRF_COMPR4) - reg += 4; - else - reg++; - - last_mrf_write[reg] = n; - } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { for (unsigned r = 0; r < regs_written(inst); r++) @@ -1415,12 +1358,6 @@ fs_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0 && inst->base_mrf != -1) { - for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { - last_mrf_write[inst->base_mrf + i] = n; - } - } - if (const unsigned mask = inst->flags_written(v->devinfo)) { assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index bac5c98c92c..50de3006812 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -222,29 +222,6 @@ TEST_P(validation_test, invalid_exec_size_encoding) } } -TEST_P(validation_test, invalid_file_encoding) -{ - /* Register file on Gfx12 is only one bit */ - if (devinfo.ver >= 12) - return; - - brw_MOV(p, g0, g0); - brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F); - - if (devinfo.ver > 6) { - EXPECT_FALSE(validate(p)); - } else { - EXPECT_TRUE(validate(p)); - } - - clear_instructions(p); - - gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null); - brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F); - - EXPECT_FALSE(validate(p)); -} - TEST_P(validation_test, invalid_type_encoding) { enum brw_reg_file files[2] = {