mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 00:30:13 +01:00
intel/brw: Split out 64-bit lowering from algebraic optimizations
We don't necessarily want to split up MOVs for 64-bit addresses into 2x 32-bit MOVs right away, as this makes things like copy propagating the whole address around harder. We should do this late, once, while still doing other algebraic optimizations earlier. fossil-db results for Alchemist show tiny improvements: Totals: Instrs: 161310502 -> 161310436 (-0.00%); split: -0.00%, +0.00% Cycles: 14370605606 -> 14370605159 (-0.00%); split: -0.00%, +0.00% Totals from 33 (0.01% of 652298) affected shaders: Instrs: 15053 -> 14987 (-0.44%); split: -0.64%, +0.20% Cycles: 196947 -> 196500 (-0.23%); split: -0.25%, +0.02% Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28286>
This commit is contained in:
parent
831703157e
commit
ea423aba1b
4 changed files with 101 additions and 72 deletions
|
|
@ -593,6 +593,7 @@ void nir_to_brw(fs_visitor *s);
|
|||
void brw_fs_optimize(fs_visitor &s);
|
||||
|
||||
bool brw_fs_lower_3src_null_dest(fs_visitor &s);
|
||||
bool brw_fs_lower_alu_restrictions(fs_visitor &s);
|
||||
bool brw_fs_lower_barycentrics(fs_visitor &s);
|
||||
bool brw_fs_lower_constant_loads(fs_visitor &s);
|
||||
bool brw_fs_lower_derivatives(fs_visitor &s);
|
||||
|
|
|
|||
|
|
@ -562,3 +562,101 @@ brw_fs_lower_3src_null_dest(fs_visitor &s)
|
|||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform lowering to legalize the IR for various ALU restrictions.
|
||||
*
|
||||
* For example:
|
||||
* - Splitting 64-bit MOV/SEL into 2x32-bit where needed
|
||||
*/
|
||||
bool
|
||||
brw_fs_lower_alu_restrictions(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
inst->dst.type == BRW_REGISTER_TYPE_DF) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs && !inst->src[0].negate);
|
||||
assert(!inst->src[1].abs && !inst->src[1].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
||||
DEPENDENCY_INSTRUCTION_DETAIL);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ using namespace brw;
|
|||
void
|
||||
brw_fs_optimize(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
const nir_shader *nir = s.nir;
|
||||
|
||||
s.debug_optimizer(nir, "start", 0, 0);
|
||||
|
|
@ -123,15 +122,13 @@ brw_fs_optimize(fs_visitor &s)
|
|||
if (OPT(brw_fs_lower_load_payload)) {
|
||||
OPT(brw_fs_opt_split_virtual_grfs);
|
||||
|
||||
/* Lower 64 bit MOVs generated by payload lowering. */
|
||||
if (!devinfo->has_64bit_float || !devinfo->has_64bit_int)
|
||||
OPT(brw_fs_opt_algebraic);
|
||||
|
||||
OPT(brw_fs_opt_register_coalesce);
|
||||
OPT(brw_fs_lower_simd_width);
|
||||
OPT(brw_fs_opt_dead_code_eliminate);
|
||||
}
|
||||
|
||||
OPT(brw_fs_lower_alu_restrictions);
|
||||
|
||||
OPT(brw_fs_opt_combine_constants);
|
||||
if (OPT(brw_fs_lower_integer_multiplication)) {
|
||||
/* If lower_integer_multiplication made progress, it may have produced
|
||||
|
|
|
|||
|
|
@ -73,47 +73,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
inst->dst.type == BRW_REGISTER_TYPE_DF) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
|
||||
inst->dst.is_null() &&
|
||||
|
|
@ -299,32 +258,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
}
|
||||
break;
|
||||
case BRW_OPCODE_SEL:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs && !inst->src[0].negate);
|
||||
assert(!inst->src[1].abs && !inst->src[1].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
if (inst->src[0].equals(inst->src[1])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue