brw/algebraic: Refactor constant folding out of brw_fs_opt_algebraic

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31729>
This commit is contained in:
Ian Romanick 2024-10-10 14:07:04 -07:00 committed by Marge Bot
parent 90ad5d3f06
commit 2cc1575a31
2 changed files with 176 additions and 134 deletions

View file

@ -654,6 +654,7 @@ bool brw_fs_lower_subgroup_ops(fs_visitor &s);
bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s); bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s); void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
bool brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
bool brw_fs_opt_algebraic(fs_visitor &s); bool brw_fs_opt_algebraic(fs_visitor &s);
bool brw_fs_opt_bank_conflicts(fs_visitor &s); bool brw_fs_opt_bank_conflicts(fs_visitor &s);
bool brw_fs_opt_cmod_propagation(fs_visitor &s); bool brw_fs_opt_cmod_propagation(fs_visitor &s);

View file

@ -64,6 +64,176 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
} }
} }
bool
brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
{
bool progress = false;
switch (inst->opcode) {
case BRW_OPCODE_ADD:
if (inst->src[1].file != IMM)
break;
if (brw_type_is_int(inst->src[1].type) &&
inst->src[1].is_zero()) {
inst->opcode = BRW_OPCODE_MOV;
inst->resize_sources(1);
progress = true;
break;
}
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0].f += inst->src[1].f;
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_AND:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
const uint64_t src0 = src_as_uint(inst->src[0]);
const uint64_t src1 = src_as_uint(inst->src[1]);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_MUL:
if ((inst->src[0].file != IMM && inst->src[1].file != IMM) ||
brw_type_is_float(inst->src[1].type))
break;
/* From the BDW PRM, Vol 2a, "mul - Multiply":
*
* "When multiplying integer datatypes, if src0 is DW and src1
* is W, irrespective of the destination datatype, the
* accumulator maintains full 48-bit precision."
* ...
* "When multiplying integer data types, if one of the sources
* is a DW, the resulting full precision data is stored in
* the accumulator."
*
* There are also similar notes in earlier PRMs.
*
* The MOV instruction can copy the bits of the source, but it
* does not clear the higher bits of the accumulator. So, because
* we might use the full accumulator in the MUL/MACH macro, we
* shouldn't replace such MULs with MOVs.
*/
if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
brw_type_size_bytes(inst->src[1].type) == 4) &&
(inst->dst.is_accumulator() ||
inst->writes_accumulator_implicitly(devinfo)))
break;
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_d(0);
inst->resize_sources(1);
progress = true;
break;
}
/* a * 1 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->resize_sources(1);
return true;
}
/* a * -1 = -a */
if (inst->src[0].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = inst->src[1];
inst->src[0].negate = !inst->src[0].negate;
inst->resize_sources(1);
progress = true;
break;
}
if (inst->src[1].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0].negate = !inst->src[0].negate;
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_OR:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
const uint64_t src0 = src_as_uint(inst->src[0]);
const uint64_t src1 = src_as_uint(inst->src[1]);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_SHL:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
/* It's not currently possible to generate this, and this constant
* folding does not handle it.
*/
assert(!inst->saturate);
brw_reg result;
switch (brw_type_size_bytes(inst->src[0].type)) {
case 2:
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
break;
case 4:
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
break;
case 8:
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
break;
default:
/* Just in case a future platform re-enables B or UB types. */
unreachable("Invalid source size.");
}
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = retype(result, inst->dst.type);
inst->resize_sources(1);
progress = true;
}
break;
default:
break;
}
#ifndef NDEBUG
/* The function is only intended to do constant folding, so the result of
* progress must be a MOV of an immediate value.
*/
if (progress) {
assert(inst->opcode == BRW_OPCODE_MOV);
assert(inst->src[0].file == IMM);
}
#endif
return progress;
}
bool bool
brw_fs_opt_algebraic(fs_visitor &s) brw_fs_opt_algebraic(fs_visitor &s)
{ {
@ -107,119 +277,17 @@ brw_fs_opt_algebraic(fs_visitor &s)
break; break;
case BRW_OPCODE_MUL: case BRW_OPCODE_MUL:
if (inst->src[0].file != IMM && inst->src[1].file != IMM)
continue;
if (brw_type_is_float(inst->src[1].type))
break;
/* From the BDW PRM, Vol 2a, "mul - Multiply":
*
* "When multiplying integer datatypes, if src0 is DW and src1
* is W, irrespective of the destination datatype, the
* accumulator maintains full 48-bit precision."
* ...
* "When multiplying integer data types, if one of the sources
* is a DW, the resulting full precision data is stored in
* the accumulator."
*
* There are also similar notes in earlier PRMs.
*
* The MOV instruction can copy the bits of the source, but it
* does not clear the higher bits of the accumulator. So, because
* we might use the full accumulator in the MUL/MACH macro, we
* shouldn't replace such MULs with MOVs.
*/
if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
brw_type_size_bytes(inst->src[1].type) == 4) &&
(inst->dst.is_accumulator() ||
inst->writes_accumulator_implicitly(devinfo)))
break;
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_d(0);
inst->resize_sources(1);
progress = true;
break;
}
/* a * 1.0 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->resize_sources(1);
progress = true;
break;
}
/* a * -1.0 = -a */
if (inst->src[0].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = inst->src[1];
inst->src[0].negate = !inst->src[0].negate;
inst->resize_sources(1);
progress = true;
break;
}
if (inst->src[1].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0].negate = !inst->src[0].negate;
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_ADD: case BRW_OPCODE_ADD:
if (inst->src[1].file != IMM)
continue;
if (brw_type_is_int(inst->src[1].type) &&
inst->src[1].is_zero()) {
inst->opcode = BRW_OPCODE_MOV;
inst->resize_sources(1);
progress = true;
break;
}
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0].f += inst->src[1].f;
inst->resize_sources(1);
progress = true;
break;
}
break;
case BRW_OPCODE_AND: case BRW_OPCODE_AND:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) { if (brw_constant_fold_instruction(devinfo, inst))
const uint64_t src0 = src_as_uint(inst->src[0]);
const uint64_t src1 = src_as_uint(inst->src[1]);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
inst->resize_sources(1);
progress = true; progress = true;
break;
}
break; break;
case BRW_OPCODE_OR: case BRW_OPCODE_OR:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) { if (brw_constant_fold_instruction(devinfo, inst)) {
const uint64_t src0 = src_as_uint(inst->src[0]);
const uint64_t src1 = src_as_uint(inst->src[1]);
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
inst->resize_sources(1);
progress = true; progress = true;
break; } else if (inst->src[0].equals(inst->src[1]) ||
}
if (inst->src[0].equals(inst->src[1]) ||
inst->src[1].is_zero()) { inst->src[1].is_zero()) {
/* On Gfx8+, the OR instruction can have a source modifier that /* On Gfx8+, the OR instruction can have a source modifier that
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0' * performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
@ -388,35 +456,8 @@ brw_fs_opt_algebraic(fs_visitor &s)
} }
break; break;
case BRW_OPCODE_SHL: case BRW_OPCODE_SHL:
if (inst->src[0].file == IMM && inst->src[1].file == IMM) { if (brw_constant_fold_instruction(devinfo, inst))
/* It's not currently possible to generate this, and this constant
* folding does not handle it.
*/
assert(!inst->saturate);
brw_reg result;
switch (brw_type_size_bytes(inst->src[0].type)) {
case 2:
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
break;
case 4:
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
break;
case 8:
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
break;
default:
/* Just in case a future platform re-enables B or UB types. */
unreachable("Invalid source size.");
}
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = retype(result, inst->dst.type);
inst->resize_sources(1);
progress = true; progress = true;
}
break; break;
case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_BROADCAST: