intel/brw: Split out 64-bit lowering from algebraic optimizations

We don't necessarily want to split up MOVs for 64-bit addresses into
2x 32-bit MOVs right away, as this makes things like copy propagating
the whole address around harder.  We should do this late, once, while
still doing other algebraic optimizations earlier.

fossil-db results for Alchemist show tiny improvements:

   Totals:
   Instrs: 161310502 -> 161310436 (-0.00%); split: -0.00%, +0.00%
   Cycles: 14370605606 -> 14370605159 (-0.00%); split: -0.00%, +0.00%

   Totals from 33 (0.01% of 652298) affected shaders:
   Instrs: 15053 -> 14987 (-0.44%); split: -0.64%, +0.20%
   Cycles: 196947 -> 196500 (-0.23%); split: -0.25%, +0.02%

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28286>
This commit is contained in:
Kenneth Graunke 2024-03-18 22:52:35 -07:00
parent 831703157e
commit ea423aba1b
4 changed files with 101 additions and 72 deletions

View file

@ -593,6 +593,7 @@ void nir_to_brw(fs_visitor *s);
void brw_fs_optimize(fs_visitor &s); void brw_fs_optimize(fs_visitor &s);
bool brw_fs_lower_3src_null_dest(fs_visitor &s); bool brw_fs_lower_3src_null_dest(fs_visitor &s);
bool brw_fs_lower_alu_restrictions(fs_visitor &s);
bool brw_fs_lower_barycentrics(fs_visitor &s); bool brw_fs_lower_barycentrics(fs_visitor &s);
bool brw_fs_lower_constant_loads(fs_visitor &s); bool brw_fs_lower_constant_loads(fs_visitor &s);
bool brw_fs_lower_derivatives(fs_visitor &s); bool brw_fs_lower_derivatives(fs_visitor &s);

View file

@ -562,3 +562,101 @@ brw_fs_lower_3src_null_dest(fs_visitor &s)
return progress; return progress;
} }
/**
* Perform lowering to legalize the IR for various ALU restrictions.
*
* For example:
* - Splitting 64-bit MOV/SEL into 2x32-bit where needed
*/
bool
brw_fs_lower_alu_restrictions(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
switch (inst->opcode) {
case BRW_OPCODE_MOV:
if (!devinfo->has_64bit_float &&
inst->dst.type == BRW_REGISTER_TYPE_DF) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs);
assert(!inst->src[0].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
inst->remove(block);
progress = true;
}
if (!devinfo->has_64bit_int &&
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs);
assert(!inst->src[0].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
inst->remove(block);
progress = true;
}
break;
case BRW_OPCODE_SEL:
if (!devinfo->has_64bit_float &&
!devinfo->has_64bit_int &&
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs && !inst->src[0].negate);
assert(!inst->src[1].abs && !inst->src[1].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
set_predicate(inst->predicate,
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
set_predicate(inst->predicate,
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
inst->remove(block);
progress = true;
}
break;
default:
break;
}
}
if (progress) {
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
DEPENDENCY_INSTRUCTION_DETAIL);
}
return progress;
}

View file

@ -12,7 +12,6 @@ using namespace brw;
void void
brw_fs_optimize(fs_visitor &s) brw_fs_optimize(fs_visitor &s)
{ {
const intel_device_info *devinfo = s.devinfo;
const nir_shader *nir = s.nir; const nir_shader *nir = s.nir;
s.debug_optimizer(nir, "start", 0, 0); s.debug_optimizer(nir, "start", 0, 0);
@ -123,15 +122,13 @@ brw_fs_optimize(fs_visitor &s)
if (OPT(brw_fs_lower_load_payload)) { if (OPT(brw_fs_lower_load_payload)) {
OPT(brw_fs_opt_split_virtual_grfs); OPT(brw_fs_opt_split_virtual_grfs);
/* Lower 64 bit MOVs generated by payload lowering. */
if (!devinfo->has_64bit_float || !devinfo->has_64bit_int)
OPT(brw_fs_opt_algebraic);
OPT(brw_fs_opt_register_coalesce); OPT(brw_fs_opt_register_coalesce);
OPT(brw_fs_lower_simd_width); OPT(brw_fs_lower_simd_width);
OPT(brw_fs_opt_dead_code_eliminate); OPT(brw_fs_opt_dead_code_eliminate);
} }
OPT(brw_fs_lower_alu_restrictions);
OPT(brw_fs_opt_combine_constants); OPT(brw_fs_opt_combine_constants);
if (OPT(brw_fs_lower_integer_multiplication)) { if (OPT(brw_fs_lower_integer_multiplication)) {
/* If lower_integer_multiplication made progress, it may have produced /* If lower_integer_multiplication made progress, it may have produced

View file

@ -73,47 +73,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
switch (inst->opcode) { switch (inst->opcode) {
case BRW_OPCODE_MOV: case BRW_OPCODE_MOV:
if (!devinfo->has_64bit_float &&
inst->dst.type == BRW_REGISTER_TYPE_DF) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs);
assert(!inst->src[0].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
inst->remove(block);
progress = true;
}
if (!devinfo->has_64bit_int &&
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs);
assert(!inst->src[0].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
inst->remove(block);
progress = true;
}
if ((inst->conditional_mod == BRW_CONDITIONAL_Z || if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
inst->conditional_mod == BRW_CONDITIONAL_NZ) && inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
inst->dst.is_null() && inst->dst.is_null() &&
@ -299,32 +258,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
} }
break; break;
case BRW_OPCODE_SEL: case BRW_OPCODE_SEL:
if (!devinfo->has_64bit_float &&
!devinfo->has_64bit_int &&
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
assert(inst->dst.type == inst->src[0].type);
assert(!inst->saturate);
assert(!inst->src[0].abs && !inst->src[0].negate);
assert(!inst->src[1].abs && !inst->src[1].negate);
const brw::fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
set_predicate(inst->predicate,
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
set_predicate(inst->predicate,
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
inst->remove(block);
progress = true;
}
if (inst->src[0].equals(inst->src[1])) { if (inst->src[0].equals(inst->src[1])) {
inst->opcode = BRW_OPCODE_MOV; inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1; inst->sources = 1;