mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 20:00:10 +01:00
intel/brw: Remove Gfx8- code from backend passes
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
parent
9569ea82a8
commit
7ac5696157
10 changed files with 64 additions and 276 deletions
|
|
@ -2341,10 +2341,10 @@ fs_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *f
|
||||||
if (inst->conditional_mod) {
|
if (inst->conditional_mod) {
|
||||||
fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
|
fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
|
||||||
if (!inst->predicate &&
|
if (!inst->predicate &&
|
||||||
(devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL &&
|
(inst->opcode != BRW_OPCODE_SEL &&
|
||||||
inst->opcode != BRW_OPCODE_CSEL &&
|
inst->opcode != BRW_OPCODE_CSEL &&
|
||||||
inst->opcode != BRW_OPCODE_IF &&
|
inst->opcode != BRW_OPCODE_IF &&
|
||||||
inst->opcode != BRW_OPCODE_WHILE))) {
|
inst->opcode != BRW_OPCODE_WHILE)) {
|
||||||
fprintf(file, ".f%d.%d", inst->flag_subreg / 2,
|
fprintf(file, ".f%d.%d", inst->flag_subreg / 2,
|
||||||
inst->flag_subreg % 2);
|
inst->flag_subreg % 2);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -549,8 +549,7 @@ namespace {
|
||||||
* Register allocation ensures that, so don't move 127 around to avoid
|
* Register allocation ensures that, so don't move 127 around to avoid
|
||||||
* breaking that property.
|
* breaking that property.
|
||||||
*/
|
*/
|
||||||
if (v->devinfo->ver >= 8)
|
constrained[p.atom_of_reg(127)] = true;
|
||||||
constrained[p.atom_of_reg(127)] = true;
|
|
||||||
|
|
||||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||||
/* Assume that anything referenced via fixed GRFs is baked into the
|
/* Assume that anything referenced via fixed GRFs is baked into the
|
||||||
|
|
@ -567,24 +566,14 @@ namespace {
|
||||||
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
|
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Preserve the original allocation of VGRFs used by the barycentric
|
|
||||||
* source of the LINTERP instruction on Gfx6, since pair-aligned
|
|
||||||
* barycentrics allow the PLN instruction to be used.
|
|
||||||
*/
|
|
||||||
if (v->devinfo->has_pln && v->devinfo->ver <= 6 &&
|
|
||||||
inst->opcode == FS_OPCODE_LINTERP)
|
|
||||||
constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true;
|
|
||||||
|
|
||||||
/* The location of the Gfx7 MRF hack registers is hard-coded in the
|
/* The location of the Gfx7 MRF hack registers is hard-coded in the
|
||||||
* rest of the compiler back-end. Don't attempt to move them around.
|
* rest of the compiler back-end. Don't attempt to move them around.
|
||||||
*/
|
*/
|
||||||
if (v->devinfo->ver >= 7) {
|
assert(inst->dst.file != MRF);
|
||||||
assert(inst->dst.file != MRF);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||||
const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i;
|
const unsigned reg = GFX7_MRF_HACK_START + inst->base_mrf + i;
|
||||||
constrained[p.atom_of_reg(reg)] = true;
|
constrained[p.atom_of_reg(reg)] = true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -600,10 +589,10 @@ namespace {
|
||||||
is_conflict_optimized_out(const intel_device_info *devinfo,
|
is_conflict_optimized_out(const intel_device_info *devinfo,
|
||||||
const fs_inst *inst)
|
const fs_inst *inst)
|
||||||
{
|
{
|
||||||
return devinfo->ver >= 9 &&
|
return
|
||||||
((is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) ||
|
(is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) ||
|
||||||
reg_of(inst->src[0]) == reg_of(inst->src[2]))) ||
|
reg_of(inst->src[0]) == reg_of(inst->src[2]))) ||
|
||||||
reg_of(inst->src[1]) == reg_of(inst->src[2]));
|
reg_of(inst->src[1]) == reg_of(inst->src[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -915,10 +904,6 @@ brw_fs_opt_bank_conflicts(fs_visitor &s)
|
||||||
if (s.devinfo->ver >= 20)
|
if (s.devinfo->ver >= 20)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* No ternary instructions -- No bank conflicts. */
|
|
||||||
if (s.devinfo->ver < 6)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const partitioning p = shader_reg_partitioning(&s);
|
const partitioning p = shader_reg_partitioning(&s);
|
||||||
const bool *constrained = shader_reg_constraints(&s, p);
|
const bool *constrained = shader_reg_constraints(&s, p);
|
||||||
const weight_vector_type *conflicts =
|
const weight_vector_type *conflicts =
|
||||||
|
|
|
||||||
|
|
@ -451,18 +451,10 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (scan_inst->conditional_mod == inst->conditional_mod) {
|
} else if (scan_inst->conditional_mod == inst->conditional_mod) {
|
||||||
/* On Gfx4 and Gfx5 sel.cond will dirty the flags, but the
|
/* sel.cond will not write the flags. */
|
||||||
* flags value is not based on the result stored in the
|
assert(scan_inst->opcode != BRW_OPCODE_SEL);
|
||||||
* destination. On all other platforms sel.cond will not
|
inst->remove(block, true);
|
||||||
* write the flags, so execution will not get to this point.
|
progress = true;
|
||||||
*/
|
|
||||||
if (scan_inst->opcode == BRW_OPCODE_SEL) {
|
|
||||||
assert(devinfo->ver <= 5);
|
|
||||||
} else {
|
|
||||||
inst->remove(block, true);
|
|
||||||
progress = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
} else if (!read_flag && scan_inst->can_do_cmod()) {
|
} else if (!read_flag && scan_inst->can_do_cmod()) {
|
||||||
scan_inst->conditional_mod = inst->conditional_mod;
|
scan_inst->conditional_mod = inst->conditional_mod;
|
||||||
|
|
|
||||||
|
|
@ -764,30 +764,6 @@ brw_combine_constants(struct value *candidates, unsigned num_candidates)
|
||||||
return combine_constants_greedy(candidates, num_candidates);
|
return combine_constants_greedy(candidates, num_candidates);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns whether an instruction could co-issue if its immediate source were
|
|
||||||
* replaced with a GRF source.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
could_coissue(const struct intel_device_info *devinfo, const fs_inst *inst)
|
|
||||||
{
|
|
||||||
assert(inst->opcode == BRW_OPCODE_MOV ||
|
|
||||||
inst->opcode == BRW_OPCODE_CMP ||
|
|
||||||
inst->opcode == BRW_OPCODE_ADD ||
|
|
||||||
inst->opcode == BRW_OPCODE_MUL);
|
|
||||||
|
|
||||||
if (devinfo->ver != 7)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Only float instructions can coissue. We don't have a great
|
|
||||||
* understanding of whether or not something like float(int(a) + int(b))
|
|
||||||
* would be considered float (based on the destination type) or integer
|
|
||||||
* (based on the source types), so we take the conservative choice of
|
|
||||||
* only promoting when both destination and source are float.
|
|
||||||
*/
|
|
||||||
return inst->dst.type == BRW_REGISTER_TYPE_F &&
|
|
||||||
inst->src[0].type == BRW_REGISTER_TYPE_F;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Box for storing fs_inst and some other necessary data
|
* Box for storing fs_inst and some other necessary data
|
||||||
*
|
*
|
||||||
|
|
@ -1346,12 +1322,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
|
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
|
||||||
devinfo, const_ctx);
|
devinfo, const_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->src[1].file == IMM && devinfo->ver < 8) {
|
|
||||||
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
|
|
||||||
devinfo, const_ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BRW_OPCODE_ADD3:
|
case BRW_OPCODE_ADD3:
|
||||||
|
|
@ -1418,24 +1388,6 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BRW_OPCODE_MOV:
|
|
||||||
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
|
|
||||||
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
|
|
||||||
devinfo, const_ctx);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case BRW_OPCODE_CMP:
|
|
||||||
case BRW_OPCODE_ADD:
|
|
||||||
case BRW_OPCODE_MUL:
|
|
||||||
assert(inst->src[0].file != IMM);
|
|
||||||
|
|
||||||
if (could_coissue(devinfo, inst) && inst->src[1].file == IMM) {
|
|
||||||
add_candidate_immediate(&table, inst, ip, 1, false, false, block,
|
|
||||||
devinfo, const_ctx);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -1552,47 +1504,21 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
if (s.cfg->num_blocks != 1)
|
if (s.cfg->num_blocks != 1)
|
||||||
qsort(table.imm, table.len, sizeof(struct imm), compare);
|
qsort(table.imm, table.len, sizeof(struct imm), compare);
|
||||||
|
|
||||||
if (devinfo->ver > 7) {
|
struct register_allocation *regs =
|
||||||
struct register_allocation *regs =
|
(struct register_allocation *) calloc(table.len, sizeof(regs[0]));
|
||||||
(struct register_allocation *) calloc(table.len, sizeof(regs[0]));
|
|
||||||
|
|
||||||
for (int i = 0; i < table.len; i++) {
|
for (int i = 0; i < table.len; i++) {
|
||||||
regs[i].nr = UINT_MAX;
|
regs[i].nr = UINT_MAX;
|
||||||
regs[i].avail = 0xffff;
|
regs[i].avail = 0xffff;
|
||||||
}
|
|
||||||
|
|
||||||
foreach_block(block, s.cfg) {
|
|
||||||
parcel_out_registers(table.imm, table.len, block, regs, table.len,
|
|
||||||
s.alloc, devinfo->ver);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(regs);
|
|
||||||
} else {
|
|
||||||
fs_reg reg(VGRF, s.alloc.allocate(1));
|
|
||||||
reg.stride = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < table.len; i++) {
|
|
||||||
struct imm *imm = &table.imm[i];
|
|
||||||
|
|
||||||
/* Put the immediate in an offset aligned to its size. Some
|
|
||||||
* instructions seem to have additional alignment requirements, so
|
|
||||||
* account for that too.
|
|
||||||
*/
|
|
||||||
reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
|
|
||||||
|
|
||||||
/* Ensure we have enough space in the register to copy the immediate */
|
|
||||||
if (reg.offset + imm->size > REG_SIZE) {
|
|
||||||
reg.nr = s.alloc.allocate(1);
|
|
||||||
reg.offset = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
imm->nr = reg.nr;
|
|
||||||
imm->subreg_offset = reg.offset;
|
|
||||||
|
|
||||||
reg.offset += imm->size;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach_block(block, s.cfg) {
|
||||||
|
parcel_out_registers(table.imm, table.len, block, regs, table.len,
|
||||||
|
s.alloc, devinfo->ver);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(regs);
|
||||||
|
|
||||||
bool rebuild_cfg = false;
|
bool rebuild_cfg = false;
|
||||||
|
|
||||||
/* Insert MOVs to load the constant values into GRFs. */
|
/* Insert MOVs to load the constant values into GRFs. */
|
||||||
|
|
@ -1661,7 +1587,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
||||||
* replicating the single one we want. To avoid this, we always populate
|
* replicating the single one we want. To avoid this, we always populate
|
||||||
* both HF slots within a DWord with the constant.
|
* both HF slots within a DWord with the constant.
|
||||||
*/
|
*/
|
||||||
const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
|
const uint32_t width = 1;
|
||||||
const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all();
|
const fs_builder ibld = fs_builder(&s, width).at(insert_block, n).exec_all();
|
||||||
|
|
||||||
fs_reg reg(VGRF, imm->nr);
|
fs_reg reg(VGRF, imm->nr);
|
||||||
|
|
|
||||||
|
|
@ -630,14 +630,8 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
||||||
* are sends, so the sources are moved to MRF's and there are no
|
* are sends, so the sources are moved to MRF's and there are no
|
||||||
* restrictions.
|
* restrictions.
|
||||||
*/
|
*/
|
||||||
if (inst->is_math()) {
|
if (inst->is_math())
|
||||||
if (devinfo->ver == 6 || devinfo->ver == 7) {
|
return stride == inst->dst.stride || stride == 0;
|
||||||
assert(inst->dst.stride == 1);
|
|
||||||
return stride == 1 || stride == 0;
|
|
||||||
} else if (devinfo->ver >= 8) {
|
|
||||||
return stride == inst->dst.stride || stride == 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -725,15 +719,6 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Avoid propagating odd-numbered FIXED_GRF registers into the first source
|
|
||||||
* of a LINTERP instruction on platforms where the PLN instruction has
|
|
||||||
* register alignment restrictions.
|
|
||||||
*/
|
|
||||||
if (devinfo->has_pln && devinfo->ver <= 6 &&
|
|
||||||
entry->src.file == FIXED_GRF && (entry->src.nr & 1) &&
|
|
||||||
inst->opcode == FS_OPCODE_LINTERP && arg == 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* we can't generally copy-propagate UD negations because we
|
/* we can't generally copy-propagate UD negations because we
|
||||||
* can end up accessing the resulting values as signed integers
|
* can end up accessing the resulting values as signed integers
|
||||||
* instead. See also resolve_ud_negate() and comment in
|
* instead. See also resolve_ud_negate() and comment in
|
||||||
|
|
@ -750,8 +735,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
|
|
||||||
/* Reject cases that would violate register regioning restrictions. */
|
/* Reject cases that would violate register regioning restrictions. */
|
||||||
if ((entry->src.file == UNIFORM || !entry->src.is_contiguous()) &&
|
if ((entry->src.file == UNIFORM || !entry->src.is_contiguous()) &&
|
||||||
((devinfo->ver == 6 && inst->is_math()) ||
|
(inst->is_send_from_grf() ||
|
||||||
inst->is_send_from_grf() ||
|
|
||||||
inst->uses_indirect_addressing())) {
|
inst->uses_indirect_addressing())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -867,7 +851,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
type_sz(entry->dst.type) != type_sz(inst->src[arg].type)))
|
type_sz(entry->dst.type) != type_sz(inst->src[arg].type)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (devinfo->ver >= 8 && (entry->src.negate || entry->src.abs) &&
|
if ((entry->src.negate || entry->src.abs) &&
|
||||||
is_logic_op(inst->opcode)) {
|
is_logic_op(inst->opcode)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -946,7 +930,6 @@ static bool
|
||||||
try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
acp_entry *entry, int arg)
|
acp_entry *entry, int arg)
|
||||||
{
|
{
|
||||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
if (type_sz(entry->src.type) > 4)
|
if (type_sz(entry->src.type) > 4)
|
||||||
|
|
@ -1002,14 +985,14 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
val.type = inst->src[arg].type;
|
val.type = inst->src[arg].type;
|
||||||
|
|
||||||
if (inst->src[arg].abs) {
|
if (inst->src[arg].abs) {
|
||||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
if (is_logic_op(inst->opcode) ||
|
||||||
!brw_abs_immediate(val.type, &val.as_brw_reg())) {
|
!brw_abs_immediate(val.type, &val.as_brw_reg())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->src[arg].negate) {
|
if (inst->src[arg].negate) {
|
||||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
if (is_logic_op(inst->opcode) ||
|
||||||
!brw_negate_immediate(val.type, &val.as_brw_reg())) {
|
!brw_negate_immediate(val.type, &val.as_brw_reg())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -1024,13 +1007,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_POW:
|
case SHADER_OPCODE_POW:
|
||||||
/* Allow constant propagation into src1 (except on Gen 6 which
|
|
||||||
* doesn't support scalar source math), and let constant combining
|
|
||||||
* promote the constant on Gen < 8.
|
|
||||||
*/
|
|
||||||
if (devinfo->ver == 6)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (arg == 1) {
|
if (arg == 1) {
|
||||||
inst->src[arg] = val;
|
inst->src[arg] = val;
|
||||||
progress = true;
|
progress = true;
|
||||||
|
|
@ -1190,15 +1166,6 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||||
|
|
||||||
case SHADER_OPCODE_INT_QUOTIENT:
|
case SHADER_OPCODE_INT_QUOTIENT:
|
||||||
case SHADER_OPCODE_INT_REMAINDER:
|
case SHADER_OPCODE_INT_REMAINDER:
|
||||||
/* Allow constant propagation into either source (except on Gen 6
|
|
||||||
* which doesn't support scalar source math). Constant combining
|
|
||||||
* promote the src1 constant on Gen < 8, and it will promote the src0
|
|
||||||
* constant on all platforms.
|
|
||||||
*/
|
|
||||||
if (devinfo->ver == 6)
|
|
||||||
break;
|
|
||||||
|
|
||||||
FALLTHROUGH;
|
|
||||||
case BRW_OPCODE_AND:
|
case BRW_OPCODE_AND:
|
||||||
case BRW_OPCODE_ASR:
|
case BRW_OPCODE_ASR:
|
||||||
case BRW_OPCODE_BFE:
|
case BRW_OPCODE_BFE:
|
||||||
|
|
|
||||||
|
|
@ -334,13 +334,12 @@ bool
|
||||||
brw_fs_lower_barycentrics(fs_visitor &s)
|
brw_fs_lower_barycentrics(fs_visitor &s)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = s.devinfo;
|
const intel_device_info *devinfo = s.devinfo;
|
||||||
const bool has_interleaved_layout = devinfo->has_pln ||
|
|
||||||
(devinfo->ver >= 7 && devinfo->ver < 20);
|
|
||||||
bool progress = false;
|
|
||||||
|
|
||||||
if (s.stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout)
|
if (s.stage != MESA_SHADER_FRAGMENT || devinfo->ver >= 20)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||||
if (inst->exec_size < 16)
|
if (inst->exec_size < 16)
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -461,9 +460,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
if (s.devinfo->ver < 8)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
bool packed_dispatch =
|
bool packed_dispatch =
|
||||||
brw_stage_has_packed_dispatch(s.devinfo, s.stage, s.max_polygons,
|
brw_stage_has_packed_dispatch(s.devinfo, s.stage, s.max_polygons,
|
||||||
s.stage_prog_data);
|
s.stage_prog_data);
|
||||||
|
|
|
||||||
|
|
@ -150,23 +150,16 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
*/
|
*/
|
||||||
if (inst->src[1].file == IMM &&
|
if (inst->src[1].file == IMM &&
|
||||||
(inst->src[1].d >= INT16_MIN && inst->src[1].d <= UINT16_MAX)) {
|
(inst->src[1].d >= INT16_MIN && inst->src[1].d <= UINT16_MAX)) {
|
||||||
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
|
/* The MUL instruction isn't commutative. On Gen >= 7 only
|
||||||
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
|
* the low 16-bits of src1 are used.
|
||||||
* src1 are used.
|
|
||||||
*
|
*
|
||||||
* If multiplying by an immediate value that fits in 16-bits, do a
|
* If multiplying by an immediate value that fits in 16-bits, do a
|
||||||
* single MUL instruction with that value in the proper location.
|
* single MUL instruction with that value in the proper location.
|
||||||
*/
|
*/
|
||||||
const bool ud = (inst->src[1].d >= 0);
|
const bool ud = (inst->src[1].d >= 0);
|
||||||
if (devinfo->ver < 7) {
|
ibld.MUL(inst->dst, inst->src[0],
|
||||||
fs_reg imm(VGRF, s.alloc.allocate(s.dispatch_width / 8), inst->dst.type);
|
ud ? brw_imm_uw(inst->src[1].ud)
|
||||||
ibld.MOV(imm, inst->src[1]);
|
: brw_imm_w(inst->src[1].d));
|
||||||
ibld.MUL(inst->dst, imm, inst->src[0]);
|
|
||||||
} else {
|
|
||||||
ibld.MUL(inst->dst, inst->src[0],
|
|
||||||
ud ? brw_imm_uw(inst->src[1].ud)
|
|
||||||
: brw_imm_w(inst->src[1].d));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot
|
/* Gen < 8 (and some Gfx8+ low-power parts like Cherryview) cannot
|
||||||
* do 32-bit integer multiplication in one instruction, but instead
|
* do 32-bit integer multiplication in one instruction, but instead
|
||||||
|
|
@ -239,7 +232,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
high.offset = inst->dst.offset % REG_SIZE;
|
high.offset = inst->dst.offset % REG_SIZE;
|
||||||
|
|
||||||
bool do_addition = true;
|
bool do_addition = true;
|
||||||
if (devinfo->ver >= 7) {
|
{
|
||||||
/* From Wa_1604601757:
|
/* From Wa_1604601757:
|
||||||
*
|
*
|
||||||
* "When multiplying a DW and any lower precision integer, source modifier
|
* "When multiplying a DW and any lower precision integer, source modifier
|
||||||
|
|
@ -294,14 +287,6 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
ibld.MUL(high, inst->src[0],
|
ibld.MUL(high, inst->src[0],
|
||||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (inst->src[0].abs)
|
|
||||||
lower_src_modifiers(&s, block, inst, 0);
|
|
||||||
|
|
||||||
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
|
|
||||||
inst->src[1]);
|
|
||||||
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
|
|
||||||
inst->src[1]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_addition) {
|
if (do_addition) {
|
||||||
|
|
@ -399,7 +384,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
* mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
|
* mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
|
||||||
* mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
|
* mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
|
||||||
*/
|
*/
|
||||||
if (devinfo->ver >= 8 && (inst->src[1].negate || inst->src[1].abs))
|
if (inst->src[1].negate || inst->src[1].abs)
|
||||||
lower_src_modifiers(&s, block, inst, 1);
|
lower_src_modifiers(&s, block, inst, 1);
|
||||||
|
|
||||||
/* Should have been lowered to 8-wide. */
|
/* Should have been lowered to 8-wide. */
|
||||||
|
|
@ -408,47 +393,23 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
|
||||||
const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
|
const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
|
||||||
inst->group % acc_width);
|
inst->group % acc_width);
|
||||||
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
|
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
|
||||||
fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
|
||||||
|
|
||||||
if (devinfo->ver >= 8) {
|
/* Until Gfx8, integer multiplies read 32-bits from one source,
|
||||||
/* Until Gfx8, integer multiplies read 32-bits from one source,
|
* and 16-bits from the other, and relying on the MACH instruction
|
||||||
* and 16-bits from the other, and relying on the MACH instruction
|
* to generate the high bits of the result.
|
||||||
* to generate the high bits of the result.
|
*
|
||||||
*
|
* On Gfx8, the multiply instruction does a full 32x32-bit
|
||||||
* On Gfx8, the multiply instruction does a full 32x32-bit
|
* multiply, but in order to do a 64-bit multiply we can simulate
|
||||||
* multiply, but in order to do a 64-bit multiply we can simulate
|
* the previous behavior and then use a MACH instruction.
|
||||||
* the previous behavior and then use a MACH instruction.
|
*/
|
||||||
*/
|
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
|
||||||
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
|
mul->src[1].type == BRW_REGISTER_TYPE_UD);
|
||||||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
|
mul->src[1].type = BRW_REGISTER_TYPE_UW;
|
||||||
mul->src[1].type = BRW_REGISTER_TYPE_UW;
|
mul->src[1].stride *= 2;
|
||||||
mul->src[1].stride *= 2;
|
|
||||||
|
|
||||||
if (mul->src[1].file == IMM) {
|
if (mul->src[1].file == IMM) {
|
||||||
mul->src[1] = brw_imm_uw(mul->src[1].ud);
|
mul->src[1] = brw_imm_uw(mul->src[1].ud);
|
||||||
}
|
|
||||||
} else if (devinfo->verx10 == 70 &&
|
|
||||||
inst->group > 0) {
|
|
||||||
/* Among other things the quarter control bits influence which
|
|
||||||
* accumulator register is used by the hardware for instructions
|
|
||||||
* that access the accumulator implicitly (e.g. MACH). A
|
|
||||||
* second-half instruction would normally map to acc1, which
|
|
||||||
* doesn't exist on Gfx7 and up (the hardware does emulate it for
|
|
||||||
* floating-point instructions *only* by taking advantage of the
|
|
||||||
* extra precision of acc0 not normally used for floating point
|
|
||||||
* arithmetic).
|
|
||||||
*
|
|
||||||
* HSW and up are careful enough not to try to access an
|
|
||||||
* accumulator register that doesn't exist, but on earlier Gfx7
|
|
||||||
* hardware we need to make sure that the quarter control bits are
|
|
||||||
* zero to avoid non-deterministic behaviour and emit an extra MOV
|
|
||||||
* to get the result masked correctly according to the current
|
|
||||||
* channel enables.
|
|
||||||
*/
|
|
||||||
mach->group = 0;
|
|
||||||
mach->force_writemask_all = true;
|
|
||||||
mach->dst = ibld.vgrf(inst->dst.type);
|
|
||||||
ibld.MOV(inst->dst, mach->dst);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -463,13 +424,8 @@ brw_fs_lower_integer_multiplication(fs_visitor &s)
|
||||||
/* If the instruction is already in a form that does not need lowering,
|
/* If the instruction is already in a form that does not need lowering,
|
||||||
* return early.
|
* return early.
|
||||||
*/
|
*/
|
||||||
if (s.devinfo->ver >= 7) {
|
if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4)
|
||||||
if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4)
|
continue;
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
if (type_sz(inst->src[0].type) < 4 && type_sz(inst->src[1].type) <= 4)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((inst->dst.type == BRW_REGISTER_TYPE_Q ||
|
if ((inst->dst.type == BRW_REGISTER_TYPE_Q ||
|
||||||
inst->dst.type == BRW_REGISTER_TYPE_UQ) &&
|
inst->dst.type == BRW_REGISTER_TYPE_UQ) &&
|
||||||
|
|
|
||||||
|
|
@ -64,13 +64,6 @@ brw_fs_lower_pack(fs_visitor &s)
|
||||||
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
|
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
|
||||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i),
|
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i),
|
||||||
brw_imm_uw(half));
|
brw_imm_uw(half));
|
||||||
} else if (i == 1 && s.devinfo->ver < 9) {
|
|
||||||
/* Pre-Skylake requires DWord aligned destinations */
|
|
||||||
fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD);
|
|
||||||
ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0),
|
|
||||||
inst->src[i]);
|
|
||||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1),
|
|
||||||
subscript(tmp, BRW_REGISTER_TYPE_UW, 0));
|
|
||||||
} else {
|
} else {
|
||||||
ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i),
|
ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i),
|
||||||
inst->src[i]);
|
inst->src[i]);
|
||||||
|
|
|
||||||
|
|
@ -184,7 +184,6 @@ namespace {
|
||||||
* support 64-bit types at all.
|
* support 64-bit types at all.
|
||||||
*/
|
*/
|
||||||
if ((!has_64bit || devinfo->verx10 >= 125 ||
|
if ((!has_64bit || devinfo->verx10 >= 125 ||
|
||||||
devinfo->platform == INTEL_PLATFORM_CHV ||
|
|
||||||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
||||||
return BRW_REGISTER_TYPE_UD;
|
return BRW_REGISTER_TYPE_UD;
|
||||||
else
|
else
|
||||||
|
|
@ -192,9 +191,7 @@ namespace {
|
||||||
|
|
||||||
case SHADER_OPCODE_BROADCAST:
|
case SHADER_OPCODE_BROADCAST:
|
||||||
case SHADER_OPCODE_MOV_INDIRECT:
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
if (((devinfo->verx10 == 70 ||
|
if (((intel_device_info_is_9lp(devinfo) ||
|
||||||
devinfo->platform == INTEL_PLATFORM_CHV ||
|
|
||||||
intel_device_info_is_9lp(devinfo) ||
|
|
||||||
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
|
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
|
||||||
(devinfo->verx10 >= 125 &&
|
(devinfo->verx10 >= 125 &&
|
||||||
brw_reg_type_is_floating_point(inst->src[0].type)))
|
brw_reg_type_is_floating_point(inst->src[0].type)))
|
||||||
|
|
@ -258,24 +255,6 @@ namespace {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Empirical testing shows that Broadwell has a bug affecting half-float
|
|
||||||
* MAD instructions when any of its sources has a non-zero offset, such
|
|
||||||
* as:
|
|
||||||
*
|
|
||||||
* mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
|
|
||||||
*
|
|
||||||
* We used to generate code like this for SIMD8 executions where we
|
|
||||||
* used to pack components Y and W of a vector at offset 16B of a SIMD
|
|
||||||
* register. The problem doesn't occur if the stride of the source is 0.
|
|
||||||
*/
|
|
||||||
if (devinfo->ver == 8 &&
|
|
||||||
inst->opcode == BRW_OPCODE_MAD &&
|
|
||||||
inst->src[i].type == BRW_REGISTER_TYPE_HF &&
|
|
||||||
reg_offset(inst->src[i]) % REG_SIZE > 0 &&
|
|
||||||
inst->src[i].stride != 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||||
const unsigned src_byte_offset = reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE);
|
const unsigned src_byte_offset = reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -190,9 +190,6 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read)
|
||||||
bool
|
bool
|
||||||
brw_fs_opt_zero_samples(fs_visitor &s)
|
brw_fs_opt_zero_samples(fs_visitor &s)
|
||||||
{
|
{
|
||||||
/* Implementation supports only SENDs, so applicable to Gfx7+ only. */
|
|
||||||
assert(s.devinfo->ver >= 7);
|
|
||||||
|
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
||||||
|
|
@ -268,9 +265,6 @@ brw_fs_opt_zero_samples(fs_visitor &s)
|
||||||
bool
|
bool
|
||||||
brw_fs_opt_split_sends(fs_visitor &s)
|
brw_fs_opt_split_sends(fs_visitor &s)
|
||||||
{
|
{
|
||||||
if (s.devinfo->ver < 9)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
foreach_block_and_inst(block, fs_inst, send, s.cfg) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue