diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2806ce9b6d8..64febe25521 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2341,10 +2341,10 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f if (inst->conditional_mod) { fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && - (devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL && - inst->opcode != BRW_OPCODE_CSEL && - inst->opcode != BRW_OPCODE_IF && - inst->opcode != BRW_OPCODE_WHILE))) { + (inst->opcode != BRW_OPCODE_SEL && + inst->opcode != BRW_OPCODE_CSEL && + inst->opcode != BRW_OPCODE_IF && + inst->opcode != BRW_OPCODE_WHILE)) { fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2); } diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index f611a8a81b5..a32c878757b 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -549,8 +549,7 @@ namespace { * Register allocation ensures that, so don't move 127 around to avoid * breaking that property. */ - if (v->devinfo->ver >= 8) - constrained[p.atom_of_reg(127)] = true; + constrained[p.atom_of_reg(127)] = true; foreach_block_and_inst(block, fs_inst, inst, v->cfg) { /* Assume that anything referenced via fixed GRFs is baked into the @@ -567,24 +566,14 @@ namespace { constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true; } - /* Preserve the original allocation of VGRFs used by the barycentric - * source of the LINTERP instruction on Gfx6, since pair-aligned - * barycentrics allow the PLN instruction to be used. - */ - if (v->devinfo->has_pln && v->devinfo->ver <= 6 && - inst->opcode == FS_OPCODE_LINTERP) - constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true; - /* The location of the Gfx7 MRF hack registers is hard-coded in the * rest of the compiler back-end. Don't attempt to move them around. */ - if (v->devinfo->ver >= 7) { - assert(inst->dst.file != MRF); + assert(inst->dst.file != MRF); - for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { - const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i; - constrained[p.atom_of_reg(reg)] = true; - } + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { + const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i; + constrained[p.atom_of_reg(reg)] = true; } } @@ -600,10 +589,10 @@ namespace { is_conflict_optimized_out(const intel_device_info *devinfo, const fs_inst *inst) { - return devinfo->ver >= 9 && - ((is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) || - reg_of(inst->src[0]) == reg_of(inst->src[2]))) || - reg_of(inst->src[1]) == reg_of(inst->src[2])); + return + (is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) || + reg_of(inst->src[0]) == reg_of(inst->src[2]))) || + reg_of(inst->src[1]) == reg_of(inst->src[2]); } /** @@ -915,10 +904,6 @@ brw_fs_opt_bank_conflicts(fs_visitor &s) if (s.devinfo->ver >= 20) return false; - /* No ternary instructions -- No bank conflicts. */ - if (s.devinfo->ver < 6) - return false; - const partitioning p = shader_reg_partitioning(&s); const bool *constrained = shader_reg_constraints(&s, p); const weight_vector_type *conflicts = diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index ca8ee21f69a..be73dff54b2 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -451,18 +451,10 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block) break; } } else if (scan_inst->conditional_mod == inst->conditional_mod) { - /* On Gfx4 and Gfx5 sel.cond will dirty the flags, but the - * flags value is not based on the result stored in the - * destination. On all other platforms sel.cond will not - * write the flags, so execution will not get to this point. - */ - if (scan_inst->opcode == BRW_OPCODE_SEL) { - assert(devinfo->ver <= 5); - } else { - inst->remove(block, true); - progress = true; - } - + /* sel.cond will not write the flags. */ + assert(scan_inst->opcode != BRW_OPCODE_SEL); + inst->remove(block, true); + progress = true; break; } else if (!read_flag && scan_inst->can_do_cmod()) { scan_inst->conditional_mod = inst->conditional_mod; diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index d4066ced9d8..3b6676449e9 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -764,30 +764,6 @@ brw_combine_constants(struct value *candidates, unsigned num_candidates) return combine_constants_greedy(candidates, num_candidates); } -/* Returns whether an instruction could co-issue if its immediate source were - * replaced with a GRF source. - */ -static bool -could_coissue(const struct intel_device_info *devinfo, const fs_inst *inst) -{ - assert(inst->opcode == BRW_OPCODE_MOV || - inst->opcode == BRW_OPCODE_CMP || - inst->opcode == BRW_OPCODE_ADD || - inst->opcode == BRW_OPCODE_MUL); - - if (devinfo->ver != 7) - return false; - - /* Only float instructions can coissue. We don't have a great - * understanding of whether or not something like float(int(a) + int(b)) - * would be considered float (based on the destination type) or integer - * (based on the source types), so we take the conservative choice of - * only promoting when both destination and source are float. - */ - return inst->dst.type == BRW_REGISTER_TYPE_F && - inst->src[0].type == BRW_REGISTER_TYPE_F; -} - /** * Box for storing fs_inst and some other necessary data * @@ -1346,12 +1322,6 @@ brw_fs_opt_combine_constants(fs_visitor &s) add_candidate_immediate(&table, inst, ip, 0, true, false, block, devinfo, const_ctx); } - - if (inst->src[1].file == IMM && devinfo->ver < 8) { - add_candidate_immediate(&table, inst, ip, 1, true, false, block, - devinfo, const_ctx); - } - break; case BRW_OPCODE_ADD3: @@ -1418,24 +1388,6 @@ brw_fs_opt_combine_constants(fs_visitor &s) } break; - case BRW_OPCODE_MOV: - if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) { - add_candidate_immediate(&table, inst, ip, 0, false, false, block, - devinfo, const_ctx); - } - break; - - case BRW_OPCODE_CMP: - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - assert(inst->src[0].file != IMM); - - if (could_coissue(devinfo, inst) && inst->src[1].file == IMM) { - add_candidate_immediate(&table, inst, ip, 1, false, false, block, - devinfo, const_ctx); - } - break; - default: break; } @@ -1552,47 +1504,21 @@ brw_fs_opt_combine_constants(fs_visitor &s) if (s.cfg->num_blocks != 1) qsort(table.imm, table.len, sizeof(struct imm), compare); - if (devinfo->ver > 7) { - struct register_allocation *regs = - (struct register_allocation *) calloc(table.len, sizeof(regs[0])); + struct register_allocation *regs = + (struct register_allocation *) calloc(table.len, sizeof(regs[0])); - for (int i = 0; i < table.len; i++) { - regs[i].nr = UINT_MAX; - regs[i].avail = 0xffff; - } - - foreach_block(block, s.cfg) { - parcel_out_registers(table.imm, table.len, block, regs, table.len, - s.alloc, devinfo->ver); - } - - free(regs); - } else { - fs_reg reg(VGRF, s.alloc.allocate(1)); - reg.stride = 0; - - for (int i = 0; i < table.len; i++) { - struct imm *imm = &table.imm[i]; - - /* Put the immediate in an offset aligned to its size. Some - * instructions seem to have additional alignment requirements, so - * account for that too. - */ - reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm)); - - /* Ensure we have enough space in the register to copy the immediate */ - if (reg.offset + imm->size > REG_SIZE) { - reg.nr = s.alloc.allocate(1); - reg.offset = 0; - } - - imm->nr = reg.nr; - imm->subreg_offset = reg.offset; - - reg.offset += imm->size; - } + for (int i = 0; i < table.len; i++) { + regs[i].nr = UINT_MAX; + regs[i].avail = 0xffff; } + foreach_block(block, s.cfg) { + parcel_out_registers(table.imm, table.len, block, regs, table.len, + s.alloc, devinfo->ver); + } + + free(regs); + bool rebuild_cfg = false; /* Insert MOVs to load the constant values into GRFs. */ @@ -1661,7 +1587,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) * replicating the single one we want. To avoid this, we always populate * both HF slots within a DWord with the constant. */ - const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1; + const uint32_t width = 1; const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all(); fs_reg reg(VGRF, imm->nr); diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index a8a3ca7537f..4cd3e8a6918 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -630,14 +630,8 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type, * are sends, so the sources are moved to MRF's and there are no * restrictions. */ - if (inst->is_math()) { - if (devinfo->ver == 6 || devinfo->ver == 7) { - assert(inst->dst.stride == 1); - return stride == 1 || stride == 0; - } else if (devinfo->ver >= 8) { - return stride == inst->dst.stride || stride == 0; - } - } + if (inst->is_math()) + return stride == inst->dst.stride || stride == 0; return true; } @@ -725,15 +719,6 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, } } - /* Avoid propagating odd-numbered FIXED_GRF registers into the first source - * of a LINTERP instruction on platforms where the PLN instruction has - * register alignment restrictions. - */ - if (devinfo->has_pln && devinfo->ver <= 6 && - entry->src.file == FIXED_GRF && (entry->src.nr & 1) && - inst->opcode == FS_OPCODE_LINTERP && arg == 0) - return false; - /* we can't generally copy-propagate UD negations because we * can end up accessing the resulting values as signed integers * instead. See also resolve_ud_negate() and comment in @@ -750,8 +735,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, /* Reject cases that would violate register regioning restrictions. */ if ((entry->src.file == UNIFORM || !entry->src.is_contiguous()) && - ((devinfo->ver == 6 && inst->is_math()) || - inst->is_send_from_grf() || + (inst->is_send_from_grf() || inst->uses_indirect_addressing())) { return false; } @@ -867,7 +851,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, type_sz(entry->dst.type) != type_sz(inst->src[arg].type))) return false; - if (devinfo->ver >= 8 && (entry->src.negate || entry->src.abs) && + if ((entry->src.negate || entry->src.abs) && is_logic_op(inst->opcode)) { return false; } @@ -946,7 +930,6 @@ static bool try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, acp_entry *entry, int arg) { - const struct intel_device_info *devinfo = compiler->devinfo; bool progress = false; if (type_sz(entry->src.type) > 4) @@ -1002,14 +985,14 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, val.type = inst->src[arg].type; if (inst->src[arg].abs) { - if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) || + if (is_logic_op(inst->opcode) || !brw_abs_immediate(val.type, &val.as_brw_reg())) { return false; } } if (inst->src[arg].negate) { - if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) || + if (is_logic_op(inst->opcode) || !brw_negate_immediate(val.type, &val.as_brw_reg())) { return false; } @@ -1024,13 +1007,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, break; case SHADER_OPCODE_POW: - /* Allow constant propagation into src1 (except on Gen 6 which - * doesn't support scalar source math), and let constant combining - * promote the constant on Gen < 8. - */ - if (devinfo->ver == 6) - break; - if (arg == 1) { inst->src[arg] = val; progress = true; @@ -1190,15 +1166,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - /* Allow constant propagation into either source (except on Gen 6 - * which doesn't support scalar source math). Constant combining - * promote the src1 constant on Gen < 8, and it will promote the src0 - * constant on all platforms. - */ - if (devinfo->ver == 6) - break; - - FALLTHROUGH; case BRW_OPCODE_AND: case BRW_OPCODE_ASR: case BRW_OPCODE_BFE: diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 96992837f44..52481d40ac3 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -334,13 +334,12 @@ bool brw_fs_lower_barycentrics(fs_visitor &s) { const intel_device_info *devinfo = s.devinfo; - const bool has_interleaved_layout = devinfo->has_pln || - (devinfo->ver >= 7 && devinfo->ver < 20); - bool progress = false; - if (s.stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout) + if (s.stage != MESA_SHADER_FRAGMENT || devinfo->ver >= 20) return false; + bool progress = false; + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { if (inst->exec_size < 16) continue; @@ -461,9 +460,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s) { bool progress = false; - if (s.devinfo->ver < 8) - return false; - bool packed_dispatch = brw_stage_has_packed_dispatch(s.devinfo, s.stage, s.max_polygons, s.stage_prog_data); diff --git a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp index 72939c9aa94..1968a305ca9 100644 --- a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp @@ -150,23 +150,16 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) */ if (inst->src[1].file == IMM && (inst->src[1].d >= INT16_MIN && inst->src[1].d <= UINT16_MAX)) { - /* The MUL instruction isn't commutative. On Gen <= 6, only the low - * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of - * src1 are used. + /* The MUL instruction isn't commutative. On Gen >= 7 only + * the low 16-bits of src1 are used. * * If multiplying by an immediate value that fits in 16-bits, do a * single MUL instruction with that value in the proper location. */ const bool ud = (inst->src[1].d >= 0); - if (devinfo->ver < 7) { - fs_reg imm(VGRF, s.alloc.allocate(s.dispatch_width / 8), inst->dst.type); - ibld.MOV(imm, inst->src[1]); - ibld.MUL(inst->dst, imm, inst->src[0]); - } else { - ibld.MUL(inst->dst, inst->src[0], - ud ? brw_imm_uw(inst->src[1].ud) - : brw_imm_w(inst->src[1].d)); - } + ibld.MUL(inst->dst, inst->src[0], + ud ? brw_imm_uw(inst->src[1].ud) + : brw_imm_w(inst->src[1].d)); } else { /* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot * do 32-bit integer multiplication in one instruction, but instead @@ -239,7 +232,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) high.offset = inst->dst.offset % REG_SIZE; bool do_addition = true; - if (devinfo->ver >= 7) { + { /* From Wa_1604601757: * * "When multiplying a DW and any lower precision integer, source modifier @@ -294,14 +287,6 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) ibld.MUL(high, inst->src[0], subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1)); } - } else { - if (inst->src[0].abs) - lower_src_modifiers(&s, block, inst, 0); - - ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0), - inst->src[1]); - ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1), - inst->src[1]); } if (do_addition) { @@ -399,7 +384,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d" */ - if (devinfo->ver >= 8 && (inst->src[1].negate || inst->src[1].abs)) + if (inst->src[1].negate || inst->src[1].abs) lower_src_modifiers(&s, block, inst, 1); /* Should have been lowered to 8-wide. */ @@ -408,47 +393,23 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block) const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type), inst->group % acc_width); fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]); - fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]); + ibld.MACH(inst->dst, inst->src[0], inst->src[1]); - if (devinfo->ver >= 8) { - /* Until Gfx8, integer multiplies read 32-bits from one source, - * and 16-bits from the other, and relying on the MACH instruction - * to generate the high bits of the result. - * - * On Gfx8, the multiply instruction does a full 32x32-bit - * multiply, but in order to do a 64-bit multiply we can simulate - * the previous behavior and then use a MACH instruction. - */ - assert(mul->src[1].type == BRW_REGISTER_TYPE_D || - mul->src[1].type == BRW_REGISTER_TYPE_UD); - mul->src[1].type = BRW_REGISTER_TYPE_UW; - mul->src[1].stride *= 2; + /* Until Gfx8, integer multiplies read 32-bits from one source, + * and 16-bits from the other, and relying on the MACH instruction + * to generate the high bits of the result. + * + * On Gfx8, the multiply instruction does a full 32x32-bit + * multiply, but in order to do a 64-bit multiply we can simulate + * the previous behavior and then use a MACH instruction. + */ + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || + mul->src[1].type == BRW_REGISTER_TYPE_UD); + mul->src[1].type = BRW_REGISTER_TYPE_UW; + mul->src[1].stride *= 2; - if (mul->src[1].file == IMM) { - mul->src[1] = brw_imm_uw(mul->src[1].ud); - } - } else if (devinfo->verx10 == 70 && - inst->group > 0) { - /* Among other things the quarter control bits influence which - * accumulator register is used by the hardware for instructions - * that access the accumulator implicitly (e.g. MACH). A - * second-half instruction would normally map to acc1, which - * doesn't exist on Gfx7 and up (the hardware does emulate it for - * floating-point instructions *only* by taking advantage of the - * extra precision of acc0 not normally used for floating point - * arithmetic). - * - * HSW and up are careful enough not to try to access an - * accumulator register that doesn't exist, but on earlier Gfx7 - * hardware we need to make sure that the quarter control bits are - * zero to avoid non-deterministic behaviour and emit an extra MOV - * to get the result masked correctly according to the current - * channel enables. - */ - mach->group = 0; - mach->force_writemask_all = true; - mach->dst = ibld.vgrf(inst->dst.type); - ibld.MOV(inst->dst, mach->dst); + if (mul->src[1].file == IMM) { + mul->src[1] = brw_imm_uw(mul->src[1].ud); } } @@ -463,13 +424,8 @@ brw_fs_lower_integer_multiplication(fs_visitor &s) /* If the instruction is already in a form that does not need lowering, * return early. */ - if (s.devinfo->ver >= 7) { - if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4) - continue; - } else { - if (type_sz(inst->src[0].type) < 4 && type_sz(inst->src[1].type) <= 4) - continue; - } + if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4) + continue; if ((inst->dst.type == BRW_REGISTER_TYPE_Q || inst->dst.type == BRW_REGISTER_TYPE_UQ) && diff --git a/src/intel/compiler/brw_fs_lower_pack.cpp b/src/intel/compiler/brw_fs_lower_pack.cpp index ca4c1eaa4e5..41101f2ee25 100644 --- a/src/intel/compiler/brw_fs_lower_pack.cpp +++ b/src/intel/compiler/brw_fs_lower_pack.cpp @@ -64,13 +64,6 @@ brw_fs_lower_pack(fs_visitor &s) const uint32_t half = _mesa_float_to_half(inst->src[i].f); ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i), brw_imm_uw(half)); - } else if (i == 1 && s.devinfo->ver < 9) { - /* Pre-Skylake requires DWord aligned destinations */ - fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD); - ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0), - inst->src[i]); - ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1), - subscript(tmp, BRW_REGISTER_TYPE_UW, 0)); } else { ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i), inst->src[i]); diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index c70aef45da5..db6385443f1 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -184,7 +184,6 @@ namespace { * support 64-bit types at all. */ if ((!has_64bit || devinfo->verx10 >= 125 || - devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) return BRW_REGISTER_TYPE_UD; else @@ -192,9 +191,7 @@ namespace { case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: - if (((devinfo->verx10 == 70 || - devinfo->platform == INTEL_PLATFORM_CHV || - intel_device_info_is_9lp(devinfo) || + if (((intel_device_info_is_9lp(devinfo) || devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) || (devinfo->verx10 >= 125 && brw_reg_type_is_floating_point(inst->src[0].type))) @@ -258,24 +255,6 @@ namespace { return false; } - /* Empirical testing shows that Broadwell has a bug affecting half-float - * MAD instructions when any of its sources has a non-zero offset, such - * as: - * - * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q }; - * - * We used to generate code like this for SIMD8 executions where we - * used to pack components Y and W of a vector at offset 16B of a SIMD - * register. The problem doesn't occur if the stride of the source is 0. - */ - if (devinfo->ver == 8 && - inst->opcode == BRW_OPCODE_MAD && - inst->src[i].type == BRW_REGISTER_TYPE_HF && - reg_offset(inst->src[i]) % REG_SIZE > 0 && - inst->src[i].stride != 0) { - return true; - } - const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE); const unsigned src_byte_offset = reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE); diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index 80ac1fa9e70..b32aca6d1d7 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -190,9 +190,6 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read) bool brw_fs_opt_zero_samples(fs_visitor &s) { - /* Implementation supports only SENDs, so applicable to Gfx7+ only. */ - assert(s.devinfo->ver >= 7); - bool progress = false; foreach_block_and_inst(block, fs_inst, send, s.cfg) { @@ -268,9 +265,6 @@ brw_fs_opt_zero_samples(fs_visitor &s) bool brw_fs_opt_split_sends(fs_visitor &s) { - if (s.devinfo->ver < 9) - return false; - bool progress = false; foreach_block_and_inst(block, fs_inst, send, s.cfg) {