mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 13:30:12 +01:00
intel/brw: Remove Gfx8- code from backend passes
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
parent
9569ea82a8
commit
7ac5696157
10 changed files with 64 additions and 276 deletions
|
|
@ -2341,10 +2341,10 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
|
|||
if (inst->conditional_mod) {
|
||||
fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
|
||||
if (!inst->predicate &&
|
||||
(devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL &&
|
||||
(inst->opcode != BRW_OPCODE_SEL &&
|
||||
inst->opcode != BRW_OPCODE_CSEL &&
|
||||
inst->opcode != BRW_OPCODE_IF &&
|
||||
inst->opcode != BRW_OPCODE_WHILE))) {
|
||||
inst->opcode != BRW_OPCODE_WHILE)) {
|
||||
fprintf(file, ".f%d.%d", inst->flag_subreg / 2,
|
||||
inst->flag_subreg % 2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -549,7 +549,6 @@ namespace {
|
|||
* Register allocation ensures that, so don't move 127 around to avoid
|
||||
* breaking that property.
|
||||
*/
|
||||
if (v->devinfo->ver >= 8)
|
||||
constrained[p.atom_of_reg(127)] = true;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
|
|
@ -567,18 +566,9 @@ namespace {
|
|||
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
|
||||
}
|
||||
|
||||
/* Preserve the original allocation of VGRFs used by the barycentric
|
||||
* source of the LINTERP instruction on Gfx6, since pair-aligned
|
||||
* barycentrics allow the PLN instruction to be used.
|
||||
*/
|
||||
if (v->devinfo->has_pln && v->devinfo->ver <= 6 &&
|
||||
inst->opcode == FS_OPCODE_LINTERP)
|
||||
constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true;
|
||||
|
||||
/* The location of the Gfx7 MRF hack registers is hard-coded in the
|
||||
* rest of the compiler back-end. Don't attempt to move them around.
|
||||
*/
|
||||
if (v->devinfo->ver >= 7) {
|
||||
assert(inst->dst.file != MRF);
|
||||
|
||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||
|
|
@ -586,7 +576,6 @@ namespace {
|
|||
constrained[p.atom_of_reg(reg)] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return constrained;
|
||||
}
|
||||
|
|
@ -600,10 +589,10 @@ namespace {
|
|||
is_conflict_optimized_out(const intel_device_info *devinfo,
|
||||
const fs_inst *inst)
|
||||
{
|
||||
return devinfo->ver >= 9 &&
|
||||
((is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) ||
|
||||
return
|
||||
(is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) ||
|
||||
reg_of(inst->src[0]) == reg_of(inst->src[2]))) ||
|
||||
reg_of(inst->src[1]) == reg_of(inst->src[2]));
|
||||
reg_of(inst->src[1]) == reg_of(inst->src[2]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -915,10 +904,6 @@ brw_fs_opt_bank_conflicts(fs_visitor &s)
|
|||
if (s.devinfo->ver >= 20)
|
||||
return false;
|
||||
|
||||
/* No ternary instructions -- No bank conflicts. */
|
||||
if (s.devinfo->ver < 6)
|
||||
return false;
|
||||
|
||||
const partitioning p = shader_reg_partitioning(&s);
|
||||
const bool *constrained = shader_reg_constraints(&s, p);
|
||||
const weight_vector_type *conflicts =
|
||||
|
|
|
|||
|
|
@ -451,18 +451,10 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
|||
break;
|
||||
}
|
||||
} else if (scan_inst->conditional_mod == inst->conditional_mod) {
|
||||
/* On Gfx4 and Gfx5 sel.cond will dirty the flags, but the
|
||||
* flags value is not based on the result stored in the
|
||||
* destination. On all other platforms sel.cond will not
|
||||
* write the flags, so execution will not get to this point.
|
||||
*/
|
||||
if (scan_inst->opcode == BRW_OPCODE_SEL) {
|
||||
assert(devinfo->ver <= 5);
|
||||
} else {
|
||||
/* sel.cond will not write the flags. */
|
||||
assert(scan_inst->opcode != BRW_OPCODE_SEL);
|
||||
inst->remove(block, true);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
break;
|
||||
} else if (!read_flag && scan_inst->can_do_cmod()) {
|
||||
scan_inst->conditional_mod = inst->conditional_mod;
|
||||
|
|
|
|||
|
|
@ -764,30 +764,6 @@ brw_combine_constants(struct value *candidates, unsigned num_candidates)
|
|||
return combine_constants_greedy(candidates, num_candidates);
|
||||
}
|
||||
|
||||
/* Returns whether an instruction could co-issue if its immediate source were
|
||||
* replaced with a GRF source.
|
||||
*/
|
||||
static bool
|
||||
could_coissue(const struct intel_device_info *devinfo, const fs_inst *inst)
|
||||
{
|
||||
assert(inst->opcode == BRW_OPCODE_MOV ||
|
||||
inst->opcode == BRW_OPCODE_CMP ||
|
||||
inst->opcode == BRW_OPCODE_ADD ||
|
||||
inst->opcode == BRW_OPCODE_MUL);
|
||||
|
||||
if (devinfo->ver != 7)
|
||||
return false;
|
||||
|
||||
/* Only float instructions can coissue. We don't have a great
|
||||
* understanding of whether or not something like float(int(a) + int(b))
|
||||
* would be considered float (based on the destination type) or integer
|
||||
* (based on the source types), so we take the conservative choice of
|
||||
* only promoting when both destination and source are float.
|
||||
*/
|
||||
return inst->dst.type == BRW_REGISTER_TYPE_F &&
|
||||
inst->src[0].type == BRW_REGISTER_TYPE_F;
|
||||
}
|
||||
|
||||
/**
|
||||
* Box for storing fs_inst and some other necessary data
|
||||
*
|
||||
|
|
@ -1346,12 +1322,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
|
||||
if (inst->src[1].file == IMM && devinfo->ver < 8) {
|
||||
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADD3:
|
||||
|
|
@ -1418,24 +1388,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MOV:
|
||||
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
|
||||
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_ADD:
|
||||
case BRW_OPCODE_MUL:
|
||||
assert(inst->src[0].file != IMM);
|
||||
|
||||
if (could_coissue(devinfo, inst) && inst->src[1].file == IMM) {
|
||||
add_candidate_immediate(&table, inst, ip, 1, false, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1552,7 +1504,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
if (s.cfg->num_blocks != 1)
|
||||
qsort(table.imm, table.len, sizeof(struct imm), compare);
|
||||
|
||||
if (devinfo->ver > 7) {
|
||||
struct register_allocation *regs =
|
||||
(struct register_allocation *) calloc(table.len, sizeof(regs[0]));
|
||||
|
||||
|
|
@ -1567,31 +1518,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
}
|
||||
|
||||
free(regs);
|
||||
} else {
|
||||
fs_reg reg(VGRF, s.alloc.allocate(1));
|
||||
reg.stride = 0;
|
||||
|
||||
for (int i = 0; i < table.len; i++) {
|
||||
struct imm *imm = &table.imm[i];
|
||||
|
||||
/* Put the immediate in an offset aligned to its size. Some
|
||||
* instructions seem to have additional alignment requirements, so
|
||||
* account for that too.
|
||||
*/
|
||||
reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
|
||||
|
||||
/* Ensure we have enough space in the register to copy the immediate */
|
||||
if (reg.offset + imm->size > REG_SIZE) {
|
||||
reg.nr = s.alloc.allocate(1);
|
||||
reg.offset = 0;
|
||||
}
|
||||
|
||||
imm->nr = reg.nr;
|
||||
imm->subreg_offset = reg.offset;
|
||||
|
||||
reg.offset += imm->size;
|
||||
}
|
||||
}
|
||||
|
||||
bool rebuild_cfg = false;
|
||||
|
||||
|
|
@ -1661,7 +1587,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
* replicating the single one we want. To avoid this, we always populate
|
||||
* both HF slots within a DWord with the constant.
|
||||
*/
|
||||
const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
|
||||
const uint32_t width = 1;
|
||||
const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all();
|
||||
|
||||
fs_reg reg(VGRF, imm->nr);
|
||||
|
|
|
|||
|
|
@ -630,14 +630,8 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
|||
* are sends, so the sources are moved to MRF's and there are no
|
||||
* restrictions.
|
||||
*/
|
||||
if (inst->is_math()) {
|
||||
if (devinfo->ver == 6 || devinfo->ver == 7) {
|
||||
assert(inst->dst.stride == 1);
|
||||
return stride == 1 || stride == 0;
|
||||
} else if (devinfo->ver >= 8) {
|
||||
if (inst->is_math())
|
||||
return stride == inst->dst.stride || stride == 0;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -725,15 +719,6 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
}
|
||||
}
|
||||
|
||||
/* Avoid propagating odd-numbered FIXED_GRF registers into the first source
|
||||
* of a LINTERP instruction on platforms where the PLN instruction has
|
||||
* register alignment restrictions.
|
||||
*/
|
||||
if (devinfo->has_pln && devinfo->ver <= 6 &&
|
||||
entry->src.file == FIXED_GRF && (entry->src.nr & 1) &&
|
||||
inst->opcode == FS_OPCODE_LINTERP && arg == 0)
|
||||
return false;
|
||||
|
||||
/* we can't generally copy-propagate UD negations because we
|
||||
* can end up accessing the resulting values as signed integers
|
||||
* instead. See also resolve_ud_negate() and comment in
|
||||
|
|
@ -750,8 +735,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
|
||||
/* Reject cases that would violate register regioning restrictions. */
|
||||
if ((entry->src.file == UNIFORM || !entry->src.is_contiguous()) &&
|
||||
((devinfo->ver == 6 && inst->is_math()) ||
|
||||
inst->is_send_from_grf() ||
|
||||
(inst->is_send_from_grf() ||
|
||||
inst->uses_indirect_addressing())) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -867,7 +851,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
type_sz(entry->dst.type) != type_sz(inst->src[arg].type)))
|
||||
return false;
|
||||
|
||||
if (devinfo->ver >= 8 && (entry->src.negate || entry->src.abs) &&
|
||||
if ((entry->src.negate || entry->src.abs) &&
|
||||
is_logic_op(inst->opcode)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -946,7 +930,6 @@ static bool
|
|||
try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
acp_entry *entry, int arg)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
bool progress = false;
|
||||
|
||||
if (type_sz(entry->src.type) > 4)
|
||||
|
|
@ -1002,14 +985,14 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
val.type = inst->src[arg].type;
|
||||
|
||||
if (inst->src[arg].abs) {
|
||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
||||
if (is_logic_op(inst->opcode) ||
|
||||
!brw_abs_immediate(val.type, &val.as_brw_reg())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->src[arg].negate) {
|
||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
||||
if (is_logic_op(inst->opcode) ||
|
||||
!brw_negate_immediate(val.type, &val.as_brw_reg())) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1024,13 +1007,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_POW:
|
||||
/* Allow constant propagation into src1 (except on Gen 6 which
|
||||
* doesn't support scalar source math), and let constant combining
|
||||
* promote the constant on Gen < 8.
|
||||
*/
|
||||
if (devinfo->ver == 6)
|
||||
break;
|
||||
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
|
|
@ -1190,15 +1166,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
/* Allow constant propagation into either source (except on Gen 6
|
||||
* which doesn't support scalar source math). Constant combining
|
||||
* promote the src1 constant on Gen < 8, and it will promote the src0
|
||||
* constant on all platforms.
|
||||
*/
|
||||
if (devinfo->ver == 6)
|
||||
break;
|
||||
|
||||
FALLTHROUGH;
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_BFE:
|
||||
|
|
|
|||
|
|
@ -334,13 +334,12 @@ bool
|
|||
brw_fs_lower_barycentrics(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
const bool has_interleaved_layout = devinfo->has_pln ||
|
||||
(devinfo->ver >= 7 && devinfo->ver < 20);
|
||||
bool progress = false;
|
||||
|
||||
if (s.stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout)
|
||||
if (s.stage != MESA_SHADER_FRAGMENT || devinfo->ver >= 20)
|
||||
return false;
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
if (inst->exec_size < 16)
|
||||
continue;
|
||||
|
|
@ -461,9 +460,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
|
|||
{
|
||||
bool progress = false;
|
||||
|
||||
if (s.devinfo->ver < 8)
|
||||
return false;
|
||||
|
||||
bool packed_dispatch =
|
||||
brw_stage_has_packed_dispatch(s.devinfo, s.stage, s.max_polygons,
|
||||
s.stage_prog_data);
|
||||
|
|
|
|||
|
|
@ -150,23 +150,16 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
*/
|
||||
if (inst->src[1].file == IMM &&
|
||||
(inst->src[1].d >= INT16_MIN && inst->src[1].d <= UINT16_MAX)) {
|
||||
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
|
||||
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
|
||||
* src1 are used.
|
||||
/* The MUL instruction isn't commutative. On Gen >= 7 only
|
||||
* the low 16-bits of src1 are used.
|
||||
*
|
||||
* If multiplying by an immediate value that fits in 16-bits, do a
|
||||
* single MUL instruction with that value in the proper location.
|
||||
*/
|
||||
const bool ud = (inst->src[1].d >= 0);
|
||||
if (devinfo->ver < 7) {
|
||||
fs_reg imm(VGRF, s.alloc.allocate(s.dispatch_width / 8), inst->dst.type);
|
||||
ibld.MOV(imm, inst->src[1]);
|
||||
ibld.MUL(inst->dst, imm, inst->src[0]);
|
||||
} else {
|
||||
ibld.MUL(inst->dst, inst->src[0],
|
||||
ud ? brw_imm_uw(inst->src[1].ud)
|
||||
: brw_imm_w(inst->src[1].d));
|
||||
}
|
||||
} else {
|
||||
/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot
|
||||
* do 32-bit integer multiplication in one instruction, but instead
|
||||
|
|
@ -239,7 +232,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
high.offset = inst->dst.offset % REG_SIZE;
|
||||
|
||||
bool do_addition = true;
|
||||
if (devinfo->ver >= 7) {
|
||||
{
|
||||
/* From Wa_1604601757:
|
||||
*
|
||||
* "When multiplying a DW and any lower precision integer, source modifier
|
||||
|
|
@ -294,14 +287,6 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
ibld.MUL(high, inst->src[0],
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
||||
}
|
||||
} else {
|
||||
if (inst->src[0].abs)
|
||||
lower_src_modifiers(&s, block, inst, 0);
|
||||
|
||||
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
|
||||
inst->src[1]);
|
||||
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
|
||||
inst->src[1]);
|
||||
}
|
||||
|
||||
if (do_addition) {
|
||||
|
|
@ -399,7 +384,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
* mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
|
||||
* mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
|
||||
*/
|
||||
if (devinfo->ver >= 8 && (inst->src[1].negate || inst->src[1].abs))
|
||||
if (inst->src[1].negate || inst->src[1].abs)
|
||||
lower_src_modifiers(&s, block, inst, 1);
|
||||
|
||||
/* Should have been lowered to 8-wide. */
|
||||
|
|
@ -408,9 +393,8 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
|
||||
inst->group % acc_width);
|
||||
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
|
||||
fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
||||
ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
/* Until Gfx8, integer multiplies read 32-bits from one source,
|
||||
* and 16-bits from the other, and relying on the MACH instruction
|
||||
* to generate the high bits of the result.
|
||||
|
|
@ -427,29 +411,6 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
|||
if (mul->src[1].file == IMM) {
|
||||
mul->src[1] = brw_imm_uw(mul->src[1].ud);
|
||||
}
|
||||
} else if (devinfo->verx10 == 70 &&
|
||||
inst->group > 0) {
|
||||
/* Among other things the quarter control bits influence which
|
||||
* accumulator register is used by the hardware for instructions
|
||||
* that access the accumulator implicitly (e.g. MACH). A
|
||||
* second-half instruction would normally map to acc1, which
|
||||
* doesn't exist on Gfx7 and up (the hardware does emulate it for
|
||||
* floating-point instructions *only* by taking advantage of the
|
||||
* extra precision of acc0 not normally used for floating point
|
||||
* arithmetic).
|
||||
*
|
||||
* HSW and up are careful enough not to try to access an
|
||||
* accumulator register that doesn't exist, but on earlier Gfx7
|
||||
* hardware we need to make sure that the quarter control bits are
|
||||
* zero to avoid non-deterministic behaviour and emit an extra MOV
|
||||
* to get the result masked correctly according to the current
|
||||
* channel enables.
|
||||
*/
|
||||
mach->group = 0;
|
||||
mach->force_writemask_all = true;
|
||||
mach->dst = ibld.vgrf(inst->dst.type);
|
||||
ibld.MOV(inst->dst, mach->dst);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -463,13 +424,8 @@ brw_fs_lower_integer_multiplication(fs_visitor &s)
|
|||
/* If the instruction is already in a form that does not need lowering,
|
||||
* return early.
|
||||
*/
|
||||
if (s.devinfo->ver >= 7) {
|
||||
if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4)
|
||||
continue;
|
||||
} else {
|
||||
if (type_sz(inst->src[0].type) < 4 && type_sz(inst->src[1].type) <= 4)
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((inst->dst.type == BRW_REGISTER_TYPE_Q ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_UQ) &&
|
||||
|
|
|
|||
|
|
@ -64,13 +64,6 @@ brw_fs_lower_pack(fs_visitor &s)
|
|||
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
|
||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i),
|
||||
brw_imm_uw(half));
|
||||
} else if (i == 1 && s.devinfo->ver < 9) {
|
||||
/* Pre-Skylake requires DWord aligned destinations */
|
||||
fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0),
|
||||
inst->src[i]);
|
||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1),
|
||||
subscript(tmp, BRW_REGISTER_TYPE_UW, 0));
|
||||
} else {
|
||||
ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i),
|
||||
inst->src[i]);
|
||||
|
|
|
|||
|
|
@ -184,7 +184,6 @@ namespace {
|
|||
* support 64-bit types at all.
|
||||
*/
|
||||
if ((!has_64bit || devinfo->verx10 >= 125 ||
|
||||
devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
||||
return BRW_REGISTER_TYPE_UD;
|
||||
else
|
||||
|
|
@ -192,9 +191,7 @@ namespace {
|
|||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
if (((devinfo->verx10 == 70 ||
|
||||
devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
intel_device_info_is_9lp(devinfo) ||
|
||||
if (((intel_device_info_is_9lp(devinfo) ||
|
||||
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
|
||||
(devinfo->verx10 >= 125 &&
|
||||
brw_reg_type_is_floating_point(inst->src[0].type)))
|
||||
|
|
@ -258,24 +255,6 @@ namespace {
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Empirical testing shows that Broadwell has a bug affecting half-float
|
||||
* MAD instructions when any of its sources has a non-zero offset, such
|
||||
* as:
|
||||
*
|
||||
* mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
|
||||
*
|
||||
* We used to generate code like this for SIMD8 executions where we
|
||||
* used to pack components Y and W of a vector at offset 16B of a SIMD
|
||||
* register. The problem doesn't occur if the stride of the source is 0.
|
||||
*/
|
||||
if (devinfo->ver == 8 &&
|
||||
inst->opcode == BRW_OPCODE_MAD &&
|
||||
inst->src[i].type == BRW_REGISTER_TYPE_HF &&
|
||||
reg_offset(inst->src[i]) % REG_SIZE > 0 &&
|
||||
inst->src[i].stride != 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||
const unsigned src_byte_offset = reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE);
|
||||
|
||||
|
|
|
|||
|
|
@ -190,9 +190,6 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read)
|
|||
bool
|
||||
brw_fs_opt_zero_samples(fs_visitor &s)
|
||||
{
|
||||
/* Implementation supports only SENDs, so applicable to Gfx7+ only. */
|
||||
assert(s.devinfo->ver >= 7);
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
||||
|
|
@ -268,9 +265,6 @@ brw_fs_opt_zero_samples(fs_visitor &s)
|
|||
bool
|
||||
brw_fs_opt_split_sends(fs_visitor &s)
|
||||
{
|
||||
if (s.devinfo->ver < 9)
|
||||
return false;
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue