mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-26 14:20:35 +01:00
intel/brw: Remove MRF type
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
parent
5c93a0e125
commit
8f3c52c1da
24 changed files with 17 additions and 531 deletions
|
|
@ -776,10 +776,6 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
|
|||
{
|
||||
int err = 0;
|
||||
|
||||
/* Clear the Compr4 instruction compression bit. */
|
||||
if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
|
||||
_reg_nr &= ~BRW_MRF_COMPR4;
|
||||
|
||||
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
|
||||
switch (_reg_nr & 0xf0) {
|
||||
case BRW_ARF_NULL:
|
||||
|
|
|
|||
|
|
@ -1510,12 +1510,6 @@ void gfx6_math(struct brw_codegen *p,
|
|||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
|
||||
void brw_oword_block_read(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
struct brw_reg mrf,
|
||||
uint32_t offset,
|
||||
uint32_t bind_table_index);
|
||||
|
||||
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
|
||||
|
||||
void gfx7_block_read_scratch(struct brw_codegen *p,
|
||||
|
|
@ -1726,12 +1720,6 @@ next_offset(const struct intel_device_info *devinfo, void *store, int offset)
|
|||
/** Maximum SEND message length */
|
||||
#define BRW_MAX_MSG_LENGTH 15
|
||||
|
||||
/** First MRF register used by pull loads */
|
||||
#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
|
||||
|
||||
/** First MRF register used by spills */
|
||||
#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -840,12 +840,10 @@ enum ENUM_PACKED brw_predicate {
|
|||
enum ENUM_PACKED brw_reg_file {
|
||||
BRW_ARCHITECTURE_REGISTER_FILE = 0,
|
||||
BRW_GENERAL_REGISTER_FILE = 1,
|
||||
BRW_MESSAGE_REGISTER_FILE = 2,
|
||||
BRW_IMMEDIATE_VALUE = 3,
|
||||
|
||||
ARF = BRW_ARCHITECTURE_REGISTER_FILE,
|
||||
FIXED_GRF = BRW_GENERAL_REGISTER_FILE,
|
||||
MRF = BRW_MESSAGE_REGISTER_FILE,
|
||||
IMM = BRW_IMMEDIATE_VALUE,
|
||||
|
||||
/* These are not hardware values */
|
||||
|
|
@ -885,8 +883,6 @@ enum ENUM_PACKED gfx10_align1_3src_exec_type {
|
|||
#define BRW_ARF_TDR 0xB0
|
||||
#define BRW_ARF_TIMESTAMP 0xC0
|
||||
|
||||
#define BRW_MRF_COMPR4 (1 << 7)
|
||||
|
||||
#define BRW_AMASK 0
|
||||
#define BRW_IMASK 1
|
||||
#define BRW_LMASK 2
|
||||
|
|
|
|||
|
|
@ -35,31 +35,12 @@
|
|||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static void
|
||||
gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
|
||||
{
|
||||
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
|
||||
* "The send with EOT should use register space R112-R127 for <src>. This is
|
||||
* to enable loading of a new thread into the same slot while the message
|
||||
* with EOT for current thread is pending dispatch."
|
||||
*
|
||||
* Since we're pretending to have 16 MRFs anyway, we may as well use the
|
||||
* registers required for messages with EOT.
|
||||
*/
|
||||
if (reg->file == BRW_MESSAGE_REGISTER_FILE) {
|
||||
reg->file = BRW_GENERAL_REGISTER_FILE;
|
||||
reg->nr += GFX7_MRF_HACK_START;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
|
||||
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
|
||||
assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
|
||||
else if (dest.file == BRW_GENERAL_REGISTER_FILE)
|
||||
if (dest.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(dest.nr < XE2_MAX_GRF);
|
||||
|
||||
/* The hardware has a restriction where a destination of size Byte with
|
||||
|
|
@ -74,8 +55,6 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
|||
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
|
||||
}
|
||||
|
||||
gfx7_convert_mrf_to_grf(p, &dest);
|
||||
|
||||
if (devinfo->ver >= 12 &&
|
||||
(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
||||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
|
||||
|
|
@ -118,8 +97,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
|||
} else {
|
||||
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
|
||||
brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
|
||||
if (dest.file == BRW_GENERAL_REGISTER_FILE ||
|
||||
dest.file == BRW_MESSAGE_REGISTER_FILE) {
|
||||
if (dest.file == BRW_GENERAL_REGISTER_FILE) {
|
||||
assert(dest.writemask != 0);
|
||||
}
|
||||
/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
|
||||
|
|
@ -173,19 +151,15 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
|
||||
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
|
||||
assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
|
||||
else if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(reg.nr < XE2_MAX_GRF);
|
||||
|
||||
gfx7_convert_mrf_to_grf(p, ®);
|
||||
|
||||
if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
||||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
|
||||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
|
||||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
|
||||
/* Any source modifiers or regions will be ignored, since this just
|
||||
* identifies the MRF/GRF to start reading the message contents from.
|
||||
* identifies the GRF to start reading the message contents from.
|
||||
* Check for some likely failures.
|
||||
*/
|
||||
assert(!reg.negate);
|
||||
|
|
@ -323,9 +297,6 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
|
||||
reg.nr != BRW_ARF_ACCUMULATOR);
|
||||
|
||||
gfx7_convert_mrf_to_grf(p, ®);
|
||||
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
|
||||
|
||||
brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
|
||||
brw_inst_set_src1_abs(devinfo, inst, reg.abs);
|
||||
brw_inst_set_src1_negate(devinfo, inst, reg.negate);
|
||||
|
|
@ -631,8 +602,6 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
brw_inst *inst = next_insn(p, opcode);
|
||||
|
||||
gfx7_convert_mrf_to_grf(p, &dest);
|
||||
|
||||
assert(dest.nr < XE2_MAX_GRF);
|
||||
|
||||
if (devinfo->ver >= 10)
|
||||
|
|
@ -765,8 +734,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
}
|
||||
|
||||
} else {
|
||||
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
||||
dest.file == BRW_MESSAGE_REGISTER_FILE);
|
||||
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(dest.type == BRW_REGISTER_TYPE_F ||
|
||||
dest.type == BRW_REGISTER_TYPE_DF ||
|
||||
dest.type == BRW_REGISTER_TYPE_D ||
|
||||
|
|
@ -1474,8 +1442,7 @@ void gfx6_math(struct brw_codegen *p,
|
|||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
|
||||
|
||||
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
||||
dest.file == BRW_MESSAGE_REGISTER_FILE);
|
||||
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
|
||||
|
|
@ -1552,70 +1519,6 @@ gfx7_block_read_scratch(struct brw_codegen *p,
|
|||
true); /* header present */
|
||||
}
|
||||
|
||||
/**
|
||||
* Read float[4] vectors from the data port constant cache.
|
||||
* Location (in buffer) should be a multiple of 16.
|
||||
* Used for fetching shader constants.
|
||||
*/
|
||||
void brw_oword_block_read(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
struct brw_reg mrf,
|
||||
uint32_t offset,
|
||||
uint32_t bind_table_index)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
const unsigned target_cache = GFX6_SFID_DATAPORT_CONSTANT_CACHE;
|
||||
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
|
||||
/* On newer hardware, offset is in units of owords. */
|
||||
offset /= 16;
|
||||
|
||||
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_set_default_flag_reg(p, 0, 0);
|
||||
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p,
|
||||
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
|
||||
mrf.nr,
|
||||
2), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(offset));
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
|
||||
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
|
||||
brw_inst_set_sfid(devinfo, insn, target_cache);
|
||||
|
||||
/* cast dest to a uword[8] vector */
|
||||
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
brw_set_dest(p, insn, dest);
|
||||
brw_set_src0(p, insn, mrf);
|
||||
|
||||
brw_set_desc(p, insn,
|
||||
brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
|
||||
brw_dp_read_desc(devinfo, bind_table_index,
|
||||
BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
|
||||
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
|
||||
BRW_DATAPORT_READ_TARGET_DATA_CACHE));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
brw_inst *
|
||||
gfx9_fb_READ(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
|
|
|
|||
|
|
@ -259,21 +259,6 @@ invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
|
|||
if (inst_is_send(isa, inst))
|
||||
return error_msg;
|
||||
|
||||
if (num_sources == 3) {
|
||||
/* Nothing to test:
|
||||
* No 3-src instructions on Gfx4-5
|
||||
* No reg file bits on Gfx6-10 (align16)
|
||||
* No invalid encodings on Gfx10-12 (align1)
|
||||
*/
|
||||
} else {
|
||||
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
|
||||
(num_sources > 0 &&
|
||||
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
|
||||
(num_sources > 1 &&
|
||||
brw_inst_src1_reg_file(devinfo, inst) == MRF),
|
||||
"invalid register file encoding");
|
||||
}
|
||||
|
||||
if (error_msg.str)
|
||||
return error_msg;
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
|||
this->dst = dst;
|
||||
this->sources = sources;
|
||||
this->exec_size = exec_size;
|
||||
this->base_mrf = -1;
|
||||
|
||||
assert(dst.file != IMM && dst.file != UNIFORM);
|
||||
|
||||
|
|
@ -75,7 +74,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
|||
case VGRF:
|
||||
case ARF:
|
||||
case FIXED_GRF:
|
||||
case MRF:
|
||||
case ATTR:
|
||||
this->size_written = dst.component_size(exec_size);
|
||||
break;
|
||||
|
|
@ -538,7 +536,6 @@ fs_reg::is_contiguous() const
|
|||
case FIXED_GRF:
|
||||
return hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
||||
vstride == width + hstride;
|
||||
case MRF:
|
||||
case VGRF:
|
||||
case ATTR:
|
||||
return stride == 1;
|
||||
|
|
@ -948,8 +945,6 @@ fs_inst::size_read(int arg) const
|
|||
case VGRF:
|
||||
case ATTR:
|
||||
return components_read(arg) * src[arg].component_size(exec_size);
|
||||
case MRF:
|
||||
unreachable("MRF registers are not allowed as sources");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1018,59 +1013,6 @@ fs_inst::flags_written(const intel_device_info *devinfo) const
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns how many MRFs an FS opcode will write over.
|
||||
*
|
||||
* Note that this is not the 0 or 1 implied writes in an actual gen
|
||||
* instruction -- the FS opcodes often generate MOVs in addition.
|
||||
*/
|
||||
unsigned
|
||||
fs_inst::implied_mrf_writes() const
|
||||
{
|
||||
if (mlen == 0)
|
||||
return 0;
|
||||
|
||||
if (base_mrf == -1)
|
||||
return 0;
|
||||
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_RCP:
|
||||
case SHADER_OPCODE_RSQ:
|
||||
case SHADER_OPCODE_SQRT:
|
||||
case SHADER_OPCODE_EXP2:
|
||||
case SHADER_OPCODE_LOG2:
|
||||
case SHADER_OPCODE_SIN:
|
||||
case SHADER_OPCODE_COS:
|
||||
return 1 * exec_size / 8;
|
||||
case SHADER_OPCODE_POW:
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
return 2 * exec_size / 8;
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return 1;
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
return mlen;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_inst::has_sampler_residency() const
|
||||
{
|
||||
|
|
@ -2274,9 +2216,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
|
|||
case FIXED_GRF:
|
||||
fprintf(file, "g%d", inst->dst.nr);
|
||||
break;
|
||||
case MRF:
|
||||
fprintf(file, "m%d", inst->dst.nr);
|
||||
break;
|
||||
case BAD_FILE:
|
||||
fprintf(file, "(null)");
|
||||
break;
|
||||
|
|
@ -2333,9 +2272,6 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
|
|||
case FIXED_GRF:
|
||||
fprintf(file, "g%d", inst->src[i].nr);
|
||||
break;
|
||||
case MRF:
|
||||
fprintf(file, "***m%d***", inst->src[i].nr);
|
||||
break;
|
||||
case ATTR:
|
||||
fprintf(file, "attr%d", inst->src[i].nr);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -322,8 +322,6 @@ public:
|
|||
fs_reg outputs[VARYING_SLOT_MAX];
|
||||
fs_reg dual_src_output;
|
||||
int first_non_payload_grf;
|
||||
/** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */
|
||||
unsigned max_grf;
|
||||
|
||||
bool failed;
|
||||
char *fail_msg;
|
||||
|
|
@ -462,9 +460,6 @@ private:
|
|||
void generate_ddy(const fs_inst *inst,
|
||||
struct brw_reg dst, struct brw_reg src);
|
||||
void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
|
||||
void generate_halt(fs_inst *inst);
|
||||
|
||||
|
|
|
|||
|
|
@ -565,16 +565,6 @@ namespace {
|
|||
(is_grf(inst->src[i]) && inst->eot))
|
||||
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
|
||||
}
|
||||
|
||||
/* The location of the Gfx7 MRF hack registers is hard-coded in the
|
||||
* rest of the compiler back-end. Don't attempt to move them around.
|
||||
*/
|
||||
assert(inst->dst.file != MRF);
|
||||
|
||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||
const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i;
|
||||
constrained[p.atom_of_reg(reg)] = true;
|
||||
}
|
||||
}
|
||||
|
||||
return constrained;
|
||||
|
|
|
|||
|
|
@ -619,16 +619,6 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
|||
* The following restrictions apply for align1 mode: Scalar source is
|
||||
* supported. Source and destination horizontal stride must be the
|
||||
* same.
|
||||
*
|
||||
* From the Haswell PRM Volume 2b "Command Reference - Instructions", page
|
||||
* 134 ("Extended Math Function"):
|
||||
*
|
||||
* Scalar source is supported. Source and destination horizontal stride
|
||||
* must be 1.
|
||||
*
|
||||
* and similar language exists for IVB and SNB. Pre-SNB, math instructions
|
||||
* are sends, so the sources are moved to MRF's and there are no
|
||||
* restrictions.
|
||||
*/
|
||||
if (inst->is_math())
|
||||
return stride == inst->dst.stride || stride == 0;
|
||||
|
|
|
|||
|
|
@ -196,7 +196,6 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
|
|||
a->sfid == b->sfid &&
|
||||
a->desc == b->desc &&
|
||||
a->size_written == b->size_written &&
|
||||
a->base_mrf == b->base_mrf &&
|
||||
a->check_tdr == b->check_tdr &&
|
||||
a->send_has_side_effects == b->send_has_side_effects &&
|
||||
a->eot == b->eot &&
|
||||
|
|
|
|||
|
|
@ -44,8 +44,6 @@ brw_file_from_reg(fs_reg *reg)
|
|||
case FIXED_GRF:
|
||||
case VGRF:
|
||||
return BRW_GENERAL_REGISTER_FILE;
|
||||
case MRF:
|
||||
return BRW_MESSAGE_REGISTER_FILE;
|
||||
case IMM:
|
||||
return BRW_IMMEDIATE_VALUE;
|
||||
case BAD_FILE:
|
||||
|
|
@ -63,9 +61,6 @@ brw_reg_from_fs_reg(const struct intel_device_info *devinfo, fs_inst *inst,
|
|||
struct brw_reg brw_reg;
|
||||
|
||||
switch (reg->file) {
|
||||
case MRF:
|
||||
assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
|
||||
FALLTHROUGH;
|
||||
case VGRF:
|
||||
if (reg->stride == 0) {
|
||||
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
|
||||
|
|
@ -865,27 +860,6 @@ fs_generator::generate_scratch_header(fs_inst *inst, struct brw_reg dst)
|
|||
brw_inst_set_no_dd_check(p->devinfo, insn, true);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset)
|
||||
{
|
||||
assert(type_sz(dst.type) == 4);
|
||||
assert(inst->mlen != 0);
|
||||
|
||||
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
||||
index.type == BRW_REGISTER_TYPE_UD);
|
||||
uint32_t surf_index = index.ud;
|
||||
|
||||
assert(offset.file == BRW_IMMEDIATE_VALUE &&
|
||||
offset.type == BRW_REGISTER_TYPE_UD);
|
||||
uint32_t read_offset = offset.ud;
|
||||
|
||||
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
|
||||
read_offset, surf_index);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::enable_debug(const char *shader_name)
|
||||
{
|
||||
|
|
@ -1070,7 +1044,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
assert(inst->force_writemask_all || inst->exec_size >= 4);
|
||||
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
||||
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->ver));
|
||||
assert(inst->mlen <= BRW_MAX_MSG_LENGTH * reg_unit(devinfo));
|
||||
|
||||
switch (inst->opcode) {
|
||||
|
|
@ -1338,14 +1311,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
brw_MOV_reloc_imm(p, dst, dst.type, src[0].ud);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
assert(inst->force_writemask_all);
|
||||
generate_uniform_pull_constant_load(inst, dst,
|
||||
src[PULL_UNIFORM_CONSTANT_SRC_SURFACE],
|
||||
src[PULL_UNIFORM_CONSTANT_SRC_OFFSET]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case FS_OPCODE_FB_READ:
|
||||
generate_fb_read(inst, dst, src[0]);
|
||||
send_count++;
|
||||
|
|
|
|||
|
|
@ -88,14 +88,10 @@ brw_fs_lower_load_payload(fs_visitor &s)
|
|||
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
|
||||
continue;
|
||||
|
||||
assert(inst->dst.file == MRF || inst->dst.file == VGRF);
|
||||
assert(inst->dst.file == VGRF);
|
||||
assert(inst->saturate == false);
|
||||
fs_reg dst = inst->dst;
|
||||
|
||||
/* Get rid of COMPR4. We'll add it back in if we need it */
|
||||
if (dst.file == MRF)
|
||||
dst.nr = dst.nr & ~BRW_MRF_COMPR4;
|
||||
|
||||
const fs_builder ibld(&s, block, inst);
|
||||
const fs_builder ubld = ibld.exec_all();
|
||||
|
||||
|
|
@ -116,59 +112,6 @@ brw_fs_lower_load_payload(fs_visitor &s)
|
|||
i += n;
|
||||
}
|
||||
|
||||
if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&
|
||||
inst->exec_size > 8) {
|
||||
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
|
||||
* a straightforward copy. Instead, the result of the
|
||||
* LOAD_PAYLOAD is treated as interleaved and the first four
|
||||
* non-header sources are unpacked as:
|
||||
*
|
||||
* m + 0: r0
|
||||
* m + 1: g0
|
||||
* m + 2: b0
|
||||
* m + 3: a0
|
||||
* m + 4: r1
|
||||
* m + 5: g1
|
||||
* m + 6: b1
|
||||
* m + 7: a1
|
||||
*
|
||||
* This is used for gen <= 5 fb writes.
|
||||
*/
|
||||
assert(inst->exec_size == 16);
|
||||
assert(inst->header_size + 4 <= inst->sources);
|
||||
for (uint8_t i = inst->header_size; i < inst->header_size + 4; i++) {
|
||||
if (inst->src[i].file != BAD_FILE) {
|
||||
if (s.devinfo->has_compr4) {
|
||||
fs_reg compr4_dst = retype(dst, inst->src[i].type);
|
||||
compr4_dst.nr |= BRW_MRF_COMPR4;
|
||||
ibld.MOV(compr4_dst, inst->src[i]);
|
||||
} else {
|
||||
/* Platform doesn't have COMPR4. We have to fake it */
|
||||
fs_reg mov_dst = retype(dst, inst->src[i].type);
|
||||
ibld.quarter(0).MOV(mov_dst, quarter(inst->src[i], 0));
|
||||
mov_dst.nr += 4;
|
||||
ibld.quarter(1).MOV(mov_dst, quarter(inst->src[i], 1));
|
||||
}
|
||||
}
|
||||
|
||||
dst.nr++;
|
||||
}
|
||||
|
||||
/* The loop above only ever incremented us through the first set
|
||||
* of 4 registers. However, thanks to the magic of COMPR4, we
|
||||
* actually wrote to the first 8 registers, so we need to take
|
||||
* that into account now.
|
||||
*/
|
||||
dst.nr += 4;
|
||||
|
||||
/* The COMPR4 code took care of the first 4 sources. We'll let
|
||||
* the regular path handle any remaining sources. Yes, we are
|
||||
* modifying the instruction but we're about to delete it so
|
||||
* this really doesn't hurt anything.
|
||||
*/
|
||||
inst->header_size += 4;
|
||||
}
|
||||
|
||||
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
|
||||
dst.type = inst->src[i].type;
|
||||
if (inst->src[i].file != BAD_FILE) {
|
||||
|
|
@ -592,7 +535,7 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s)
|
|||
}
|
||||
|
||||
/**
|
||||
* Three source instruction must have a GRF/MRF destination register.
|
||||
* Three source instruction must have a GRF destination register.
|
||||
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
|
||||
*/
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
* register.
|
||||
*/
|
||||
fs_reg low = inst->dst;
|
||||
if (orig_dst.is_null() || orig_dst.file == MRF ||
|
||||
if (orig_dst.is_null() ||
|
||||
regions_overlap(inst->dst, inst->size_written,
|
||||
inst->src[0], inst->size_read(0)) ||
|
||||
regions_overlap(inst->dst, inst->size_written,
|
||||
|
|
|
|||
|
|
@ -217,7 +217,6 @@ namespace {
|
|||
case UNIFORM:
|
||||
case IMM:
|
||||
case VGRF:
|
||||
case MRF:
|
||||
case ATTR:
|
||||
return reg.stride * type_sz(reg.type);
|
||||
case ARF:
|
||||
|
|
|
|||
|
|
@ -69,9 +69,9 @@ fs_visitor::assign_regs_trivial()
|
|||
}
|
||||
}
|
||||
|
||||
if (this->grf_used >= max_grf) {
|
||||
if (this->grf_used >= BRW_MAX_GRF) {
|
||||
fail("Ran out of regs on trivial allocator (%d/%d)\n",
|
||||
this->grf_used, max_grf);
|
||||
this->grf_used, BRW_MAX_GRF);
|
||||
} else {
|
||||
this->alloc.count = this->grf_used;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -764,7 +764,6 @@ namespace {
|
|||
reg_offset(r) / REG_SIZE);
|
||||
|
||||
return (r.file == VGRF || r.file == FIXED_GRF ? &grf_deps[reg] :
|
||||
r.file == MRF ? &grf_deps[GFX7_MRF_HACK_START + reg] :
|
||||
r.file == ARF && reg >= BRW_ARF_ADDRESS &&
|
||||
reg < BRW_ARF_ACCUMULATOR ? &addr_dep :
|
||||
r.file == ARF && reg >= BRW_ARF_ACCUMULATOR &&
|
||||
|
|
@ -1044,13 +1043,6 @@ namespace {
|
|||
if (inst->reads_accumulator_implicitly())
|
||||
sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all));
|
||||
|
||||
if (is_send(inst) && inst->base_mrf != -1) {
|
||||
const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
|
||||
|
||||
for (unsigned j = 0; j < inst->mlen; j++)
|
||||
sb.set(brw_uvec_mrf(8, inst->base_mrf + j, 0), rd_dep);
|
||||
}
|
||||
|
||||
/* Track any destination registers of this instruction. */
|
||||
const dependency wr_dep =
|
||||
is_unordered(devinfo, inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
|
||||
|
|
@ -1173,12 +1165,6 @@ namespace {
|
|||
add_dependency(ids, deps[ip], dep);
|
||||
}
|
||||
|
||||
if (is_send(inst) && inst->base_mrf != -1) {
|
||||
for (unsigned j = 0; j < inst->mlen; j++)
|
||||
add_dependency(ids, deps[ip], dependency_for_read(
|
||||
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
|
||||
}
|
||||
|
||||
if (is_unordered(devinfo, inst) && !inst->eot)
|
||||
add_dependency(ids, deps[ip],
|
||||
dependency(TGL_SBID_SET, ip, exec_all));
|
||||
|
|
@ -1204,12 +1190,6 @@ namespace {
|
|||
if (dep.ordered && !is_single_pipe(dep.jp, p))
|
||||
add_dependency(ids, deps[ip], dep);
|
||||
}
|
||||
|
||||
if (is_send(inst) && inst->base_mrf != -1) {
|
||||
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
|
||||
add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
|
||||
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
|
||||
}
|
||||
}
|
||||
|
||||
update_inst_scoreboard(shader, jps, inst, ip, sb);
|
||||
|
|
|
|||
|
|
@ -1140,7 +1140,6 @@ fs_visitor::init()
|
|||
this->source_depth_to_render_target = false;
|
||||
this->runtime_check_aads_emit = false;
|
||||
this->first_non_payload_grf = 0;
|
||||
this->max_grf = GFX7_MRF_HACK_START;
|
||||
|
||||
this->uniforms = 0;
|
||||
this->last_scratch = 0;
|
||||
|
|
|
|||
|
|
@ -45,14 +45,14 @@ struct backend_reg : private brw_reg
|
|||
|
||||
const brw_reg &as_brw_reg() const
|
||||
{
|
||||
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
|
||||
assert(file == ARF || file == FIXED_GRF || file == IMM);
|
||||
assert(offset == 0);
|
||||
return static_cast<const brw_reg &>(*this);
|
||||
}
|
||||
|
||||
brw_reg &as_brw_reg()
|
||||
{
|
||||
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
|
||||
assert(file == ARF || file == FIXED_GRF || file == IMM);
|
||||
assert(offset == 0);
|
||||
return static_cast<brw_reg &>(*this);
|
||||
}
|
||||
|
|
@ -159,7 +159,6 @@ struct backend_instruction {
|
|||
uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
|
||||
uint8_t mlen; /**< SEND message length */
|
||||
uint8_t ex_mlen; /**< SENDS extended message length */
|
||||
int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
|
||||
uint8_t target; /**< MRT target. */
|
||||
uint8_t sfid; /**< SFID for SEND instructions */
|
||||
uint32_t desc; /**< SEND[S] message descriptor immediate */
|
||||
|
|
|
|||
|
|
@ -80,12 +80,6 @@ byte_offset(fs_reg reg, unsigned delta)
|
|||
case UNIFORM:
|
||||
reg.offset += delta;
|
||||
break;
|
||||
case MRF: {
|
||||
const unsigned suboffset = reg.offset + delta;
|
||||
reg.nr += suboffset / REG_SIZE;
|
||||
reg.offset = suboffset % REG_SIZE;
|
||||
break;
|
||||
}
|
||||
case ARF:
|
||||
case FIXED_GRF: {
|
||||
const unsigned suboffset = reg.subnr + delta;
|
||||
|
|
@ -113,7 +107,6 @@ horiz_offset(const fs_reg ®, unsigned delta)
|
|||
*/
|
||||
return reg;
|
||||
case VGRF:
|
||||
case MRF:
|
||||
case ATTR:
|
||||
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
|
||||
case ARF:
|
||||
|
|
@ -144,7 +137,6 @@ offset(fs_reg reg, unsigned width, unsigned delta)
|
|||
break;
|
||||
case ARF:
|
||||
case FIXED_GRF:
|
||||
case MRF:
|
||||
case VGRF:
|
||||
case ATTR:
|
||||
case UNIFORM:
|
||||
|
|
@ -212,31 +204,6 @@ reg_padding(const fs_reg &r)
|
|||
return (MAX2(1, stride) - 1) * type_sz(r.type);
|
||||
}
|
||||
|
||||
/* Do not call this directly. Call regions_overlap() instead. */
|
||||
static inline bool
|
||||
regions_overlap_MRF(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
||||
{
|
||||
if (r.nr & BRW_MRF_COMPR4) {
|
||||
fs_reg t = r;
|
||||
t.nr &= ~BRW_MRF_COMPR4;
|
||||
/* COMPR4 regions are translated by the hardware during decompression
|
||||
* into two separate half-regions 4 MRFs apart from each other.
|
||||
*
|
||||
* Note: swapping s and t in this parameter list eliminates one possible
|
||||
* level of recursion (since the s in the called versions of
|
||||
* regions_overlap_MRF can't be COMPR4), and that makes the compiled
|
||||
* code a lot smaller.
|
||||
*/
|
||||
return regions_overlap_MRF(s, ds, t, dr / 2) ||
|
||||
regions_overlap_MRF(s, ds, byte_offset(t, 4 * REG_SIZE), dr / 2);
|
||||
} else if (s.nr & BRW_MRF_COMPR4) {
|
||||
return regions_overlap_MRF(s, ds, r, dr);
|
||||
}
|
||||
|
||||
return !((r.nr * REG_SIZE + r.offset + dr) <= (s.nr * REG_SIZE + s.offset) ||
|
||||
(s.nr * REG_SIZE + s.offset + ds) <= (r.nr * REG_SIZE + r.offset));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the register region starting at \p r and spanning \p dr
|
||||
* bytes could potentially overlap the register region starting at \p s and
|
||||
|
|
@ -251,11 +218,9 @@ regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
|||
if (r.file == VGRF) {
|
||||
return r.nr == s.nr &&
|
||||
!(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
|
||||
} else if (r.file != MRF) {
|
||||
} else {
|
||||
return !(reg_offset(r) + dr <= reg_offset(s) ||
|
||||
reg_offset(s) + ds <= reg_offset(r));
|
||||
} else {
|
||||
return regions_overlap_MRF(r, dr, s, ds);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -392,7 +357,6 @@ public:
|
|||
bool can_do_cmod();
|
||||
bool can_change_types() const;
|
||||
bool has_source_and_destination_hazard() const;
|
||||
unsigned implied_mrf_writes() const;
|
||||
|
||||
/**
|
||||
* Return whether \p arg is a control source of a virtual instruction which
|
||||
|
|
|
|||
|
|
@ -70,10 +70,8 @@ namespace {
|
|||
enum intel_eu_dependency_id {
|
||||
/* Register part of the GRF. */
|
||||
EU_DEPENDENCY_ID_GRF0 = 0,
|
||||
/* Register part of the MRF. Only used on Gfx4-6. */
|
||||
EU_DEPENDENCY_ID_MRF0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF,
|
||||
/* Address register part of the ARF. */
|
||||
EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_MRF0 + 24,
|
||||
EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_GRF0 + XE2_MAX_GRF,
|
||||
/* Accumulator register part of the ARF. */
|
||||
EU_DEPENDENCY_ID_ACCUM0 = EU_DEPENDENCY_ID_ADDR0 + 1,
|
||||
/* Flag register part of the ARF. */
|
||||
|
|
@ -807,18 +805,12 @@ namespace {
|
|||
{
|
||||
if (r.file == VGRF) {
|
||||
const unsigned i = r.nr + r.offset / REG_SIZE + delta;
|
||||
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
|
||||
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
|
||||
|
||||
} else if (r.file == FIXED_GRF) {
|
||||
const unsigned i = r.nr + delta;
|
||||
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
|
||||
|
||||
} else if (r.file == MRF) {
|
||||
const unsigned i = GFX7_MRF_HACK_START +
|
||||
r.nr + r.offset / REG_SIZE + delta;
|
||||
assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0);
|
||||
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
|
||||
|
||||
} else if (r.file == ARF && r.nr >= BRW_ARF_ADDRESS &&
|
||||
|
|
@ -922,13 +914,6 @@ namespace {
|
|||
st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
|
||||
}
|
||||
|
||||
if (is_send(inst) && inst->base_mrf != -1) {
|
||||
for (unsigned j = 0; j < inst->mlen; j++)
|
||||
stall_on_dependency(
|
||||
st, reg_dependency_id(
|
||||
devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
|
||||
}
|
||||
|
||||
if (const unsigned mask = inst->flags_read(devinfo)) {
|
||||
for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) {
|
||||
if (mask & (1 << i))
|
||||
|
|
@ -980,12 +965,6 @@ namespace {
|
|||
}
|
||||
}
|
||||
|
||||
if (is_send(inst) && inst->base_mrf != -1) {
|
||||
for (unsigned j = 0; j < inst->mlen; j++)
|
||||
mark_read_dependency(st, perf,
|
||||
reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
|
||||
}
|
||||
|
||||
/* Mark any destination dependencies. */
|
||||
if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
|
||||
for (unsigned j = 0; j < regs_written(inst); j++) {
|
||||
|
|
|
|||
|
|
@ -310,9 +310,6 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||
|
||||
assert(inst->target != 0 || src0_alpha.file == BAD_FILE);
|
||||
|
||||
/* We can potentially have a message length of up to 15, so we have to set
|
||||
* base_mrf to either 0 or 1 in order to fit in m0..m15.
|
||||
*/
|
||||
fs_reg sources[15];
|
||||
int header_size = 2, payload_header_size;
|
||||
unsigned length = 0;
|
||||
|
|
|
|||
|
|
@ -61,17 +61,6 @@ struct intel_device_info;
|
|||
#define BRW_MAX_GRF 128
|
||||
#define XE2_MAX_GRF 256
|
||||
|
||||
/**
|
||||
* First GRF used for the MRF hack.
|
||||
*
|
||||
* On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We
|
||||
* haven't converted our compiler to be aware of this, so it asks for MRFs and
|
||||
* brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
|
||||
* register allocators have to be careful of this to avoid corrupting the "MRF"s
|
||||
* with actual GRF allocations.
|
||||
*/
|
||||
#define GFX7_MRF_HACK_START 112
|
||||
|
||||
/**
|
||||
* BRW hardware swizzles.
|
||||
* Only defines XYZW to ensure it can be contained in 2 bits
|
||||
|
|
@ -81,9 +70,6 @@ struct intel_device_info;
|
|||
#define BRW_SWIZZLE_Z 2
|
||||
#define BRW_SWIZZLE_W 3
|
||||
|
||||
/** Number of message register file registers */
|
||||
#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
|
||||
|
||||
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
|
||||
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
|
||||
|
||||
|
|
@ -454,10 +440,6 @@ brw_reg(enum brw_reg_file file,
|
|||
assert(nr < XE2_MAX_GRF);
|
||||
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
|
||||
assert(nr <= BRW_ARF_TIMESTAMP);
|
||||
/* Asserting on the MRF register number requires to know the hardware gen
|
||||
* (gfx6 has 24 MRF registers), which we don't know here, so we assert
|
||||
* for that in the generators and in brw_eu_emit.c
|
||||
*/
|
||||
|
||||
reg.type = type;
|
||||
reg.file = file;
|
||||
|
|
@ -1050,19 +1032,6 @@ brw_mask_stack_depth_reg(unsigned subnr)
|
|||
BRW_ARF_MASK_STACK_DEPTH, subnr);
|
||||
}
|
||||
|
||||
static inline struct brw_reg
|
||||
brw_message_reg(unsigned nr)
|
||||
{
|
||||
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
|
||||
}
|
||||
|
||||
static inline struct brw_reg
|
||||
brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
|
||||
{
|
||||
return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
}
|
||||
|
||||
/* This is almost always called with a numeric constant argument, so
|
||||
* make things easy to evaluate at compile time:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1174,7 +1174,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
* After register allocation, reg_offsets are gone and we track individual
|
||||
* GRF registers.
|
||||
*/
|
||||
schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->ver)];
|
||||
schedule_node *last_conditional_mod[8] = {};
|
||||
schedule_node *last_accumulator_write = NULL;
|
||||
/* Fixed HW registers are assumed to be separate from the virtual
|
||||
|
|
@ -1184,8 +1183,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
*/
|
||||
schedule_node *last_fixed_grf_write = NULL;
|
||||
|
||||
memset(last_mrf_write, 0, sizeof(last_mrf_write));
|
||||
|
||||
/* top-to-bottom dependencies: RAW and WAW. */
|
||||
for (schedule_node *n = current.start; n < current.end; n++) {
|
||||
fs_inst *inst = (fs_inst *)n->inst;
|
||||
|
|
@ -1223,16 +1220,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->base_mrf != -1) {
|
||||
for (int i = 0; i < inst->mlen; i++) {
|
||||
/* It looks like the MRF regs are released in the send
|
||||
* instruction once it's sent, not when the result comes
|
||||
* back.
|
||||
*/
|
||||
add_dep(last_mrf_write[inst->base_mrf + i], n);
|
||||
}
|
||||
}
|
||||
|
||||
if (const unsigned mask = inst->flags_read(v->devinfo)) {
|
||||
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
|
||||
|
||||
|
|
@ -1261,19 +1248,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
inst->dst.offset / REG_SIZE + r] = n;
|
||||
}
|
||||
}
|
||||
} else if (inst->dst.file == MRF) {
|
||||
int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
|
||||
|
||||
add_dep(last_mrf_write[reg], n);
|
||||
last_mrf_write[reg] = n;
|
||||
if (is_compressed(inst)) {
|
||||
if (inst->dst.nr & BRW_MRF_COMPR4)
|
||||
reg += 4;
|
||||
else
|
||||
reg++;
|
||||
add_dep(last_mrf_write[reg], n);
|
||||
last_mrf_write[reg] = n;
|
||||
}
|
||||
} else if (inst->dst.file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_written(inst); r++) {
|
||||
|
|
@ -1291,13 +1265,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
add_barrier_deps(n);
|
||||
}
|
||||
|
||||
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||
add_dep(last_mrf_write[inst->base_mrf + i], n);
|
||||
last_mrf_write[inst->base_mrf + i] = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (const unsigned mask = inst->flags_written(v->devinfo)) {
|
||||
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
|
||||
|
||||
|
|
@ -1319,7 +1286,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
clear_last_grf_write();
|
||||
|
||||
/* bottom-to-top dependencies: WAR */
|
||||
memset(last_mrf_write, 0, sizeof(last_mrf_write));
|
||||
memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
|
||||
last_accumulator_write = NULL;
|
||||
last_fixed_grf_write = NULL;
|
||||
|
|
@ -1353,16 +1319,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->base_mrf != -1) {
|
||||
for (int i = 0; i < inst->mlen; i++) {
|
||||
/* It looks like the MRF regs are released in the send
|
||||
* instruction once it's sent, not when the result comes
|
||||
* back.
|
||||
*/
|
||||
add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
|
||||
}
|
||||
}
|
||||
|
||||
if (const unsigned mask = inst->flags_read(v->devinfo)) {
|
||||
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
|
||||
|
||||
|
|
@ -1389,19 +1345,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
inst->dst.offset / REG_SIZE + r] = n;
|
||||
}
|
||||
}
|
||||
} else if (inst->dst.file == MRF) {
|
||||
int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
|
||||
|
||||
last_mrf_write[reg] = n;
|
||||
|
||||
if (is_compressed(inst)) {
|
||||
if (inst->dst.nr & BRW_MRF_COMPR4)
|
||||
reg += 4;
|
||||
else
|
||||
reg++;
|
||||
|
||||
last_mrf_write[reg] = n;
|
||||
}
|
||||
} else if (inst->dst.file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_written(inst); r++)
|
||||
|
|
@ -1415,12 +1358,6 @@ fs_instruction_scheduler::calculate_deps()
|
|||
add_barrier_deps(n);
|
||||
}
|
||||
|
||||
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||
last_mrf_write[inst->base_mrf + i] = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (const unsigned mask = inst->flags_written(v->devinfo)) {
|
||||
assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
|
||||
|
||||
|
|
|
|||
|
|
@ -222,29 +222,6 @@ TEST_P(validation_test, invalid_exec_size_encoding)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_P(validation_test, invalid_file_encoding)
|
||||
{
|
||||
/* Register file on Gfx12 is only one bit */
|
||||
if (devinfo.ver >= 12)
|
||||
return;
|
||||
|
||||
brw_MOV(p, g0, g0);
|
||||
brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
|
||||
|
||||
if (devinfo.ver > 6) {
|
||||
EXPECT_FALSE(validate(p));
|
||||
} else {
|
||||
EXPECT_TRUE(validate(p));
|
||||
}
|
||||
|
||||
clear_instructions(p);
|
||||
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
|
||||
|
||||
EXPECT_FALSE(validate(p));
|
||||
}
|
||||
|
||||
TEST_P(validation_test, invalid_type_encoding)
|
||||
{
|
||||
enum brw_reg_file files[2] = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue