intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
Caio Oliveira 2024-02-21 21:21:20 -08:00 committed by Marge Bot
parent 5c93a0e125
commit 8f3c52c1da
24 changed files with 17 additions and 531 deletions

View file

@ -776,10 +776,6 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
{
int err = 0;
/* Clear the Compr4 instruction compression bit. */
if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
_reg_nr &= ~BRW_MRF_COMPR4;
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:

View file

@ -1510,12 +1510,6 @@ void gfx6_math(struct brw_codegen *p,
struct brw_reg src0,
struct brw_reg src1);
void brw_oword_block_read(struct brw_codegen *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index);
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
void gfx7_block_read_scratch(struct brw_codegen *p,
@ -1726,12 +1720,6 @@ next_offset(const struct intel_device_info *devinfo, void *store, int offset)
/** Maximum SEND message length */
#define BRW_MAX_MSG_LENGTH 15
/** First MRF register used by pull loads */
#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
/** First MRF register used by spills */
#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
#ifdef __cplusplus
}
#endif

View file

@ -840,12 +840,10 @@ enum ENUM_PACKED brw_predicate {
enum ENUM_PACKED brw_reg_file {
BRW_ARCHITECTURE_REGISTER_FILE = 0,
BRW_GENERAL_REGISTER_FILE = 1,
BRW_MESSAGE_REGISTER_FILE = 2,
BRW_IMMEDIATE_VALUE = 3,
ARF = BRW_ARCHITECTURE_REGISTER_FILE,
FIXED_GRF = BRW_GENERAL_REGISTER_FILE,
MRF = BRW_MESSAGE_REGISTER_FILE,
IMM = BRW_IMMEDIATE_VALUE,
/* These are not hardware values */
@ -885,8 +883,6 @@ enum ENUM_PACKED gfx10_align1_3src_exec_type {
#define BRW_ARF_TDR 0xB0
#define BRW_ARF_TIMESTAMP 0xC0
#define BRW_MRF_COMPR4 (1 << 7)
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2

View file

@ -35,31 +35,12 @@
#include "util/ralloc.h"
static void
gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
{
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
* "The send with EOT should use register space R112-R127 for <src>. This is
* to enable loading of a new thread into the same slot while the message
* with EOT for current thread is pending dispatch."
*
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
if (reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += GFX7_MRF_HACK_START;
}
}
void
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
{
const struct intel_device_info *devinfo = p->devinfo;
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
else if (dest.file == BRW_GENERAL_REGISTER_FILE)
if (dest.file == BRW_GENERAL_REGISTER_FILE)
assert(dest.nr < XE2_MAX_GRF);
/* The hardware has a restriction where a destination of size Byte with
@ -74,8 +55,6 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
}
gfx7_convert_mrf_to_grf(p, &dest);
if (devinfo->ver >= 12 &&
(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
@ -118,8 +97,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
} else {
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
if (dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_MESSAGE_REGISTER_FILE) {
if (dest.file == BRW_GENERAL_REGISTER_FILE) {
assert(dest.writemask != 0);
}
/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
@ -173,19 +151,15 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
{
const struct intel_device_info *devinfo = p->devinfo;
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
else if (reg.file == BRW_GENERAL_REGISTER_FILE)
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < XE2_MAX_GRF);
gfx7_convert_mrf_to_grf(p, &reg);
if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
/* Any source modifiers or regions will be ignored, since this just
* identifies the MRF/GRF to start reading the message contents from.
* identifies the GRF to start reading the message contents from.
* Check for some likely failures.
*/
assert(!reg.negate);
@ -323,9 +297,6 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
reg.nr != BRW_ARF_ACCUMULATOR);
gfx7_convert_mrf_to_grf(p, &reg);
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
brw_inst_set_src1_abs(devinfo, inst, reg.abs);
brw_inst_set_src1_negate(devinfo, inst, reg.negate);
@ -631,8 +602,6 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
const struct intel_device_info *devinfo = p->devinfo;
brw_inst *inst = next_insn(p, opcode);
gfx7_convert_mrf_to_grf(p, &dest);
assert(dest.nr < XE2_MAX_GRF);
if (devinfo->ver >= 10)
@ -765,8 +734,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
}
} else {
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_MESSAGE_REGISTER_FILE);
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(dest.type == BRW_REGISTER_TYPE_F ||
dest.type == BRW_REGISTER_TYPE_DF ||
dest.type == BRW_REGISTER_TYPE_D ||
@ -1474,8 +1442,7 @@ void gfx6_math(struct brw_codegen *p,
const struct intel_device_info *devinfo = p->devinfo;
brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_MESSAGE_REGISTER_FILE);
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
@ -1552,70 +1519,6 @@ gfx7_block_read_scratch(struct brw_codegen *p,
true); /* header present */
}
/**
* Read float[4] vectors from the data port constant cache.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
void brw_oword_block_read(struct brw_codegen *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index)
{
const struct intel_device_info *devinfo = p->devinfo;
const unsigned target_cache = GFX6_SFID_DATAPORT_CONSTANT_CACHE;
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
const struct tgl_swsb swsb = brw_get_default_swsb(p);
/* On newer hardware, offset is in units of owords. */
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_flag_reg(p, 0, 0);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
brw_pop_insn_state(p);
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
brw_inst_set_sfid(devinfo, insn, target_cache);
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, mrf);
brw_set_desc(p, insn,
brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
brw_dp_read_desc(devinfo, bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE));
brw_pop_insn_state(p);
}
brw_inst *
gfx9_fb_READ(struct brw_codegen *p,
struct brw_reg dst,

View file

@ -259,21 +259,6 @@ invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
if (inst_is_send(isa, inst))
return error_msg;
if (num_sources == 3) {
/* Nothing to test:
* No 3-src instructions on Gfx4-5
* No reg file bits on Gfx6-10 (align16)
* No invalid encodings on Gfx10-12 (align1)
*/
} else {
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
(num_sources > 0 &&
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
(num_sources > 1 &&
brw_inst_src1_reg_file(devinfo, inst) == MRF),
"invalid register file encoding");
}
if (error_msg.str)
return error_msg;

View file

@ -62,7 +62,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
this->dst = dst;
this->sources = sources;
this->exec_size = exec_size;
this->base_mrf = -1;
assert(dst.file != IMM && dst.file != UNIFORM);
@ -75,7 +74,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case VGRF:
case ARF:
case FIXED_GRF:
case MRF:
case ATTR:
this->size_written = dst.component_size(exec_size);
break;
@ -538,7 +536,6 @@ fs_reg::is_contiguous() const
case FIXED_GRF:
return hstride == BRW_HORIZONTAL_STRIDE_1 &&
vstride == width + hstride;
case MRF:
case VGRF:
case ATTR:
return stride == 1;
@ -948,8 +945,6 @@ fs_inst::size_read(int arg) const
case VGRF:
case ATTR:
return components_read(arg) * src[arg].component_size(exec_size);
case MRF:
unreachable("MRF registers are not allowed as sources");
}
return 0;
}
@ -1018,59 +1013,6 @@ fs_inst::flags_written(const intel_device_info *devinfo) const
}
}
/**
* Returns how many MRFs an FS opcode will write over.
*
* Note that this is not the 0 or 1 implied writes in an actual gen
* instruction -- the FS opcodes often generate MOVs in addition.
*/
unsigned
fs_inst::implied_mrf_writes() const
{
if (mlen == 0)
return 0;
if (base_mrf == -1)
return 0;
switch (opcode) {
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
return 1 * exec_size / 8;
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
return 2 * exec_size / 8;
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_TG4_BIAS:
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
case SHADER_OPCODE_TG4_OFFSET_LOD:
case SHADER_OPCODE_TG4_OFFSET_BIAS:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_SAMPLEINFO:
return 1;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return mlen;
default:
unreachable("not reached");
}
}
bool
fs_inst::has_sampler_residency() const
{
@ -2274,9 +2216,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
case FIXED_GRF:
fprintf(file, "g%d", inst->dst.nr);
break;
case MRF:
fprintf(file, "m%d", inst->dst.nr);
break;
case BAD_FILE:
fprintf(file, "(null)");
break;
@ -2333,9 +2272,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
case FIXED_GRF:
fprintf(file, "g%d", inst->src[i].nr);
break;
case MRF:
fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
fprintf(file, "attr%d", inst->src[i].nr);
break;

View file

@ -322,8 +322,6 @@ public:
fs_reg outputs[VARYING_SLOT_MAX];
fs_reg dual_src_output;
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */
unsigned max_grf;
bool failed;
char *fail_msg;
@ -462,9 +460,6 @@ private:
void generate_ddy(const fs_inst *inst,
struct brw_reg dst, struct brw_reg src);
void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
void generate_halt(fs_inst *inst);

View file

@ -565,16 +565,6 @@ namespace {
(is_grf(inst->src[i]) && inst->eot))
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
}
/* The location of the Gfx7 MRF hack registers is hard-coded in the
* rest of the compiler back-end. Don't attempt to move them around.
*/
assert(inst->dst.file != MRF);
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i;
constrained[p.atom_of_reg(reg)] = true;
}
}
return constrained;

View file

@ -619,16 +619,6 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
* The following restrictions apply for align1 mode: Scalar source is
* supported. Source and destination horizontal stride must be the
* same.
*
* From the Haswell PRM Volume 2b "Command Reference - Instructions", page
* 134 ("Extended Math Function"):
*
* Scalar source is supported. Source and destination horizontal stride
* must be 1.
*
* and similar language exists for IVB and SNB. Pre-SNB, math instructions
* are sends, so the sources are moved to MRF's and there are no
* restrictions.
*/
if (inst->is_math())
return stride == inst->dst.stride || stride == 0;

View file

@ -196,7 +196,6 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
a->sfid == b->sfid &&
a->desc == b->desc &&
a->size_written == b->size_written &&
a->base_mrf == b->base_mrf &&
a->check_tdr == b->check_tdr &&
a->send_has_side_effects == b->send_has_side_effects &&
a->eot == b->eot &&

View file

@ -44,8 +44,6 @@ brw_file_from_reg(fs_reg *reg)
case FIXED_GRF:
case VGRF:
return BRW_GENERAL_REGISTER_FILE;
case MRF:
return BRW_MESSAGE_REGISTER_FILE;
case IMM:
return BRW_IMMEDIATE_VALUE;
case BAD_FILE:
@ -63,9 +61,6 @@ brw_reg_from_fs_reg(const struct intel_device_info *devinfo, fs_inst *inst,
struct brw_reg brw_reg;
switch (reg->file) {
case MRF:
assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
FALLTHROUGH;
case VGRF:
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
@ -865,27 +860,6 @@ fs_generator::generate_scratch_header(fs_inst *inst, struct brw_reg dst)
brw_inst_set_no_dd_check(p->devinfo, insn, true);
}
void
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset)
{
assert(type_sz(dst.type) == 4);
assert(inst->mlen != 0);
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.ud;
assert(offset.file == BRW_IMMEDIATE_VALUE &&
offset.type == BRW_REGISTER_TYPE_UD);
uint32_t read_offset = offset.ud;
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
read_offset, surf_index);
}
void
fs_generator::enable_debug(const char *shader_name)
{
@ -1070,7 +1044,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
assert(inst->force_writemask_all || inst->exec_size >= 4);
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->ver));
assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo));
switch (inst->opcode) {
@ -1338,14 +1311,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_MOV_reloc_imm(p, dst, dst.type, src[0].ud);
break;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
assert(inst->force_writemask_all);
generate_uniform_pull_constant_load(inst, dst,
src[PULL_UNIFORM_CONSTANT_SRC_SURFACE],
src[PULL_UNIFORM_CONSTANT_SRC_OFFSET]);
send_count++;
break;
case FS_OPCODE_FB_READ:
generate_fb_read(inst, dst, src[0]);
send_count++;

View file

@ -88,14 +88,10 @@ brw_fs_lower_load_payload(fs_visitor &s)
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
continue;
assert(inst->dst.file == MRF || inst->dst.file == VGRF);
assert(inst->dst.file == VGRF);
assert(inst->saturate == false);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
if (dst.file == MRF)
dst.nr = dst.nr & ~BRW_MRF_COMPR4;
const fs_builder ibld(&s, block, inst);
const fs_builder ubld = ibld.exec_all();
@ -116,59 +112,6 @@ brw_fs_lower_load_payload(fs_visitor &s)
i += n;
}
if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
* a straightforward copy. Instead, the result of the
* LOAD_PAYLOAD is treated as interleaved and the first four
* non-header sources are unpacked as:
*
* m + 0: r0
* m + 1: g0
* m + 2: b0
* m + 3: a0
* m + 4: r1
* m + 5: g1
* m + 6: b1
* m + 7: a1
*
* This is used for gen <= 5 fb writes.
*/
assert(inst->exec_size == 16);
assert(inst->header_size + 4 <= inst->sources);
for (uint8_t i = inst->header_size; i < inst->header_size + 4; i++) {
if (inst->src[i].file != BAD_FILE) {
if (s.devinfo->has_compr4) {
fs_reg compr4_dst = retype(dst, inst->src[i].type);
compr4_dst.nr |= BRW_MRF_COMPR4;
ibld.MOV(compr4_dst, inst->src[i]);
} else {
/* Platform doesn't have COMPR4. We have to fake it */
fs_reg mov_dst = retype(dst, inst->src[i].type);
ibld.quarter(0).MOV(mov_dst, quarter(inst->src[i], 0));
mov_dst.nr += 4;
ibld.quarter(1).MOV(mov_dst, quarter(inst->src[i], 1));
}
}
dst.nr++;
}
/* The loop above only ever incremented us through the first set
* of 4 registers. However, thanks to the magic of COMPR4, we
* actually wrote to the first 8 registers, so we need to take
* that into account now.
*/
dst.nr += 4;
/* The COMPR4 code took care of the first 4 sources. We'll let
* the regular path handle any remaining sources. Yes, we are
* modifying the instruction but we're about to delete it so
* this really doesn't hurt anything.
*/
inst->header_size += 4;
}
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
dst.type = inst->src[i].type;
if (inst->src[i].file != BAD_FILE) {
@ -592,7 +535,7 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s)
}
/**
* Three source instruction must have a GRF/MRF destination register.
* Three source instruction must have a GRF destination register.
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
*/
bool

View file

@ -215,7 +215,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
* register.
*/
fs_reg low = inst->dst;
if (orig_dst.is_null() || orig_dst.file == MRF ||
if (orig_dst.is_null() ||
regions_overlap(inst->dst, inst->size_written,
inst->src[0], inst->size_read(0)) ||
regions_overlap(inst->dst, inst->size_written,

View file

@ -217,7 +217,6 @@ namespace {
case UNIFORM:
case IMM:
case VGRF:
case MRF:
case ATTR:
return reg.stride * type_sz(reg.type);
case ARF:

View file

@ -69,9 +69,9 @@ fs_visitor::assign_regs_trivial()
}
}
if (this->grf_used >= max_grf) {
if (this->grf_used >= BRW_MAX_GRF) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
this->grf_used, max_grf);
this->grf_used, BRW_MAX_GRF);
} else {
this->alloc.count = this->grf_used;
}

View file

@ -764,7 +764,6 @@ namespace {
reg_offset(r) / REG_SIZE);
return (r.file == VGRF || r.file == FIXED_GRF ? &grf_deps[reg] :
r.file == MRF ? &grf_deps[GFX7_MRF_HACK_START + reg] :
r.file == ARF && reg >= BRW_ARF_ADDRESS &&
reg < BRW_ARF_ACCUMULATOR ? &addr_dep :
r.file == ARF && reg >= BRW_ARF_ACCUMULATOR &&
@ -1044,13 +1043,6 @@ namespace {
if (inst->reads_accumulator_implicitly())
sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all));
if (is_send(inst) && inst->base_mrf != -1) {
const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
for (unsigned j = 0; j < inst->mlen; j++)
sb.set(brw_uvec_mrf(8, inst->base_mrf + j, 0), rd_dep);
}
/* Track any destination registers of this instruction. */
const dependency wr_dep =
is_unordered(devinfo, inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
@ -1173,12 +1165,6 @@ namespace {
add_dependency(ids, deps[ip], dep);
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->mlen; j++)
add_dependency(ids, deps[ip], dependency_for_read(
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
}
if (is_unordered(devinfo, inst) && !inst->eot)
add_dependency(ids, deps[ip],
dependency(TGL_SBID_SET, ip, exec_all));
@ -1204,12 +1190,6 @@ namespace {
if (dep.ordered && !is_single_pipe(dep.jp, p))
add_dependency(ids, deps[ip], dep);
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
}
}
update_inst_scoreboard(shader, jps, inst, ip, sb);

View file

@ -1140,7 +1140,6 @@ fs_visitor::init()
this->source_depth_to_render_target = false;
this->runtime_check_aads_emit = false;
this->first_non_payload_grf = 0;
this->max_grf = GFX7_MRF_HACK_START;
this->uniforms = 0;
this->last_scratch = 0;

View file

@ -45,14 +45,14 @@ struct backend_reg : private brw_reg
const brw_reg &as_brw_reg() const
{
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
assert(file == ARF || file == FIXED_GRF || file == IMM);
assert(offset == 0);
return static_cast<const brw_reg &>(*this);
}
brw_reg &as_brw_reg()
{
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
assert(file == ARF || file == FIXED_GRF || file == IMM);
assert(offset == 0);
return static_cast<brw_reg &>(*this);
}
@ -159,7 +159,6 @@ struct backend_instruction {
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
uint8_t mlen; /**< SEND message length */
uint8_t ex_mlen; /**< SENDS extended message length */
int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
uint8_t target; /**< MRT target. */
uint8_t sfid; /**< SFID for SEND instructions */
uint32_t desc; /**< SEND[S] message descriptor immediate */

View file

@ -80,12 +80,6 @@ byte_offset(fs_reg reg, unsigned delta)
case UNIFORM:
reg.offset += delta;
break;
case MRF: {
const unsigned suboffset = reg.offset + delta;
reg.nr += suboffset / REG_SIZE;
reg.offset = suboffset % REG_SIZE;
break;
}
case ARF:
case FIXED_GRF: {
const unsigned suboffset = reg.subnr + delta;
@ -113,7 +107,6 @@ horiz_offset(const fs_reg &reg, unsigned delta)
*/
return reg;
case VGRF:
case MRF:
case ATTR:
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
case ARF:
@ -144,7 +137,6 @@ offset(fs_reg reg, unsigned width, unsigned delta)
break;
case ARF:
case FIXED_GRF:
case MRF:
case VGRF:
case ATTR:
case UNIFORM:
@ -212,31 +204,6 @@ reg_padding(const fs_reg &r)
return (MAX2(1, stride) - 1) * type_sz(r.type);
}
/* Do not call this directly. Call regions_overlap() instead. */
static inline bool
regions_overlap_MRF(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
{
if (r.nr & BRW_MRF_COMPR4) {
fs_reg t = r;
t.nr &= ~BRW_MRF_COMPR4;
/* COMPR4 regions are translated by the hardware during decompression
* into two separate half-regions 4 MRFs apart from each other.
*
* Note: swapping s and t in this parameter list eliminates one possible
* level of recursion (since the s in the called versions of
* regions_overlap_MRF can't be COMPR4), and that makes the compiled
* code a lot smaller.
*/
return regions_overlap_MRF(s, ds, t, dr / 2) ||
regions_overlap_MRF(s, ds, byte_offset(t, 4 * REG_SIZE), dr / 2);
} else if (s.nr & BRW_MRF_COMPR4) {
return regions_overlap_MRF(s, ds, r, dr);
}
return !((r.nr * REG_SIZE + r.offset + dr) <= (s.nr * REG_SIZE + s.offset) ||
(s.nr * REG_SIZE + s.offset + ds) <= (r.nr * REG_SIZE + r.offset));
}
/**
* Return whether the register region starting at \p r and spanning \p dr
* bytes could potentially overlap the register region starting at \p s and
@ -251,11 +218,9 @@ regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
if (r.file == VGRF) {
return r.nr == s.nr &&
!(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
} else if (r.file != MRF) {
} else {
return !(reg_offset(r) + dr <= reg_offset(s) ||
reg_offset(s) + ds <= reg_offset(r));
} else {
return regions_overlap_MRF(r, dr, s, ds);
}
}
@ -392,7 +357,6 @@ public:
bool can_do_cmod();
bool can_change_types() const;
bool has_source_and_destination_hazard() const;
unsigned implied_mrf_writes() const;
/**
* Return whether \p arg is a control source of a virtual instruction which

View file

@ -70,10 +70,8 @@ namespace {
enum intel_eu_dependency_id {
/* Register part of the GRF. */
EU_DEPENDENCY_ID_GRF0 = 0,
/* Register part of the MRF. Only used on Gfx4-6. */
EU_DEPENDENCY_ID_MRF0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF,
/* Address register part of the ARF. */
EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_MRF0 + 24,
EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF,
/* Accumulator register part of the ARF. */
EU_DEPENDENCY_ID_ACCUM0 = EU_DEPENDENCY_ID_ADDR0 + 1,
/* Flag register part of the ARF. */
@ -807,18 +805,12 @@ namespace {
{
if (r.file == VGRF) {
const unsigned i = r.nr + r.offset / REG_SIZE + delta;
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
} else if (r.file == FIXED_GRF) {
const unsigned i = r.nr + delta;
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
} else if (r.file == MRF) {
const unsigned i = GFX7_MRF_HACK_START +
r.nr + r.offset / REG_SIZE + delta;
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
} else if (r.file == ARF && r.nr >= BRW_ARF_ADDRESS &&
@ -922,13 +914,6 @@ namespace {
st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->mlen; j++)
stall_on_dependency(
st, reg_dependency_id(
devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
}
if (const unsigned mask = inst->flags_read(devinfo)) {
for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) {
if (mask & (1 << i))
@ -980,12 +965,6 @@ namespace {
}
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->mlen; j++)
mark_read_dependency(st, perf,
reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
}
/* Mark any destination dependencies. */
if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
for (unsigned j = 0; j < regs_written(inst); j++) {

View file

@ -310,9 +310,6 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
assert(inst->target != 0 || src0_alpha.file == BAD_FILE);
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
*/
fs_reg sources[15];
int header_size = 2, payload_header_size;
unsigned length = 0;

View file

@ -61,17 +61,6 @@ struct intel_device_info;
#define BRW_MAX_GRF 128
#define XE2_MAX_GRF 256
/**
* First GRF used for the MRF hack.
*
* On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We
* haven't converted our compiler to be aware of this, so it asks for MRFs and
* brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
* register allocators have to be careful of this to avoid corrupting the "MRF"s
* with actual GRF allocations.
*/
#define GFX7_MRF_HACK_START 112
/**
* BRW hardware swizzles.
* Only defines XYZW to ensure it can be contained in 2 bits
@ -81,9 +70,6 @@ struct intel_device_info;
#define BRW_SWIZZLE_Z 2
#define BRW_SWIZZLE_W 3
/** Number of message register file registers */
#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
@ -454,10 +440,6 @@ brw_reg(enum brw_reg_file file,
assert(nr < XE2_MAX_GRF);
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_TIMESTAMP);
/* Asserting on the MRF register number requires to know the hardware gen
* (gfx6 has 24 MRF registers), which we don't know here, so we assert
* for that in the generators and in brw_eu_emit.c
*/
reg.type = type;
reg.file = file;
@ -1050,19 +1032,6 @@ brw_mask_stack_depth_reg(unsigned subnr)
BRW_ARF_MASK_STACK_DEPTH, subnr);
}
static inline struct brw_reg
brw_message_reg(unsigned nr)
{
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
}
static inline struct brw_reg
brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
{
return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
BRW_REGISTER_TYPE_UD);
}
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/

View file

@ -1174,7 +1174,6 @@ fs_instruction_scheduler::calculate_deps()
* After register allocation, reg_offsets are gone and we track individual
* GRF registers.
*/
schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->ver)];
schedule_node *last_conditional_mod[8] = {};
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
@ -1184,8 +1183,6 @@ fs_instruction_scheduler::calculate_deps()
*/
schedule_node *last_fixed_grf_write = NULL;
memset(last_mrf_write, 0, sizeof(last_mrf_write));
/* top-to-bottom dependencies: RAW and WAW. */
for (schedule_node *n = current.start; n < current.end; n++) {
fs_inst *inst = (fs_inst *)n->inst;
@ -1223,16 +1220,6 @@ fs_instruction_scheduler::calculate_deps()
}
}
if (inst->base_mrf != -1) {
for (int i = 0; i < inst->mlen; i++) {
/* It looks like the MRF regs are released in the send
* instruction once it's sent, not when the result comes
* back.
*/
add_dep(last_mrf_write[inst->base_mrf + i], n);
}
}
if (const unsigned mask = inst->flags_read(v->devinfo)) {
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
@ -1261,19 +1248,6 @@ fs_instruction_scheduler::calculate_deps()
inst->dst.offset / REG_SIZE + r] = n;
}
}
} else if (inst->dst.file == MRF) {
int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
if (inst->dst.nr & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
}
} else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
for (unsigned r = 0; r < regs_written(inst); r++) {
@ -1291,13 +1265,6 @@ fs_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
}
}
if (const unsigned mask = inst->flags_written(v->devinfo)) {
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
@ -1319,7 +1286,6 @@ fs_instruction_scheduler::calculate_deps()
clear_last_grf_write();
/* bottom-to-top dependencies: WAR */
memset(last_mrf_write, 0, sizeof(last_mrf_write));
memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
last_accumulator_write = NULL;
last_fixed_grf_write = NULL;
@ -1353,16 +1319,6 @@ fs_instruction_scheduler::calculate_deps()
}
}
if (inst->base_mrf != -1) {
for (int i = 0; i < inst->mlen; i++) {
/* It looks like the MRF regs are released in the send
* instruction once it's sent, not when the result comes
* back.
*/
add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
}
}
if (const unsigned mask = inst->flags_read(v->devinfo)) {
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
@ -1389,19 +1345,6 @@ fs_instruction_scheduler::calculate_deps()
inst->dst.offset / REG_SIZE + r] = n;
}
}
} else if (inst->dst.file == MRF) {
int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
if (inst->dst.nr & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
last_mrf_write[reg] = n;
}
} else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
for (unsigned r = 0; r < regs_written(inst); r++)
@ -1415,12 +1358,6 @@ fs_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}
}
if (const unsigned mask = inst->flags_written(v->devinfo)) {
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));

View file

@ -222,29 +222,6 @@ TEST_P(validation_test, invalid_exec_size_encoding)
}
}
TEST_P(validation_test, invalid_file_encoding)
{
/* Register file on Gfx12 is only one bit */
if (devinfo.ver >= 12)
return;
brw_MOV(p, g0, g0);
brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
if (devinfo.ver > 6) {
EXPECT_FALSE(validate(p));
} else {
EXPECT_TRUE(validate(p));
}
clear_instructions(p);
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
EXPECT_FALSE(validate(p));
}
TEST_P(validation_test, invalid_type_encoding)
{
enum brw_reg_file files[2] = {