mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
brw/algebraic: Refactor constant folding out of brw_fs_opt_algebraic
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31729>
This commit is contained in:
parent
90ad5d3f06
commit
2cc1575a31
2 changed files with 176 additions and 134 deletions
|
|
@ -654,6 +654,7 @@ bool brw_fs_lower_subgroup_ops(fs_visitor &s);
|
|||
bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
|
||||
void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
|
||||
|
||||
bool brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
|
||||
bool brw_fs_opt_algebraic(fs_visitor &s);
|
||||
bool brw_fs_opt_bank_conflicts(fs_visitor &s);
|
||||
bool brw_fs_opt_cmod_propagation(fs_visitor &s);
|
||||
|
|
|
|||
|
|
@ -64,6 +64,176 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_ADD:
|
||||
if (inst->src[1].file != IMM)
|
||||
break;
|
||||
|
||||
if (brw_type_is_int(inst->src[1].type) &&
|
||||
inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file == IMM) {
|
||||
assert(inst->src[0].type == BRW_TYPE_F);
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0].f += inst->src[1].f;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
if ((inst->src[0].file != IMM && inst->src[1].file != IMM) ||
|
||||
brw_type_is_float(inst->src[1].type))
|
||||
break;
|
||||
|
||||
/* From the BDW PRM, Vol 2a, "mul - Multiply":
|
||||
*
|
||||
* "When multiplying integer datatypes, if src0 is DW and src1
|
||||
* is W, irrespective of the destination datatype, the
|
||||
* accumulator maintains full 48-bit precision."
|
||||
* ...
|
||||
* "When multiplying integer data types, if one of the sources
|
||||
* is a DW, the resulting full precision data is stored in
|
||||
* the accumulator."
|
||||
*
|
||||
* There are also similar notes in earlier PRMs.
|
||||
*
|
||||
* The MOV instruction can copy the bits of the source, but it
|
||||
* does not clear the higher bits of the accumulator. So, because
|
||||
* we might use the full accumulator in the MUL/MACH macro, we
|
||||
* shouldn't replace such MULs with MOVs.
|
||||
*/
|
||||
if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
|
||||
brw_type_size_bytes(inst->src[1].type) == 4) &&
|
||||
(inst->dst.is_accumulator() ||
|
||||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
||||
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_d(0);
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * 1 = a */
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->resize_sources(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* a * -1 = -a */
|
||||
if (inst->src[0].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[1].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHL:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
/* It's not currently possible to generate this, and this constant
|
||||
* folding does not handle it.
|
||||
*/
|
||||
assert(!inst->saturate);
|
||||
|
||||
brw_reg result;
|
||||
|
||||
switch (brw_type_size_bytes(inst->src[0].type)) {
|
||||
case 2:
|
||||
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
|
||||
break;
|
||||
case 4:
|
||||
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
|
||||
break;
|
||||
case 8:
|
||||
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
|
||||
break;
|
||||
default:
|
||||
/* Just in case a future platform re-enables B or UB types. */
|
||||
unreachable("Invalid source size.");
|
||||
}
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = retype(result, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* The function is only intended to do constant folding, so the result of
|
||||
* progress must be a MOV of an immediate value.
|
||||
*/
|
||||
if (progress) {
|
||||
assert(inst->opcode == BRW_OPCODE_MOV);
|
||||
assert(inst->src[0].file == IMM);
|
||||
}
|
||||
#endif
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_opt_algebraic(fs_visitor &s)
|
||||
{
|
||||
|
|
@ -107,119 +277,17 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
if (inst->src[0].file != IMM && inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_type_is_float(inst->src[1].type))
|
||||
break;
|
||||
|
||||
/* From the BDW PRM, Vol 2a, "mul - Multiply":
|
||||
*
|
||||
* "When multiplying integer datatypes, if src0 is DW and src1
|
||||
* is W, irrespective of the destination datatype, the
|
||||
* accumulator maintains full 48-bit precision."
|
||||
* ...
|
||||
* "When multiplying integer data types, if one of the sources
|
||||
* is a DW, the resulting full precision data is stored in
|
||||
* the accumulator."
|
||||
*
|
||||
* There are also similar notes in earlier PRMs.
|
||||
*
|
||||
* The MOV instruction can copy the bits of the source, but it
|
||||
* does not clear the higher bits of the accumulator. So, because
|
||||
* we might use the full accumulator in the MUL/MACH macro, we
|
||||
* shouldn't replace such MULs with MOVs.
|
||||
*/
|
||||
if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
|
||||
brw_type_size_bytes(inst->src[1].type) == 4) &&
|
||||
(inst->dst.is_accumulator() ||
|
||||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
||||
if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_d(0);
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * 1.0 = a */
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * -1.0 = -a */
|
||||
if (inst->src[0].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[1].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
case BRW_OPCODE_ADD:
|
||||
if (inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_type_is_int(inst->src[1].type) &&
|
||||
inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file == IMM) {
|
||||
assert(inst->src[0].type == BRW_TYPE_F);
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0].f += inst->src[1].f;
|
||||
inst->resize_sources(1);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
if (brw_constant_fold_instruction(devinfo, inst))
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
if (brw_constant_fold_instruction(devinfo, inst)) {
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].equals(inst->src[1]) ||
|
||||
} else if (inst->src[0].equals(inst->src[1]) ||
|
||||
inst->src[1].is_zero()) {
|
||||
/* On Gfx8+, the OR instruction can have a source modifier that
|
||||
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
|
||||
|
|
@ -388,35 +456,8 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
}
|
||||
break;
|
||||
case BRW_OPCODE_SHL:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
/* It's not currently possible to generate this, and this constant
|
||||
* folding does not handle it.
|
||||
*/
|
||||
assert(!inst->saturate);
|
||||
|
||||
brw_reg result;
|
||||
|
||||
switch (brw_type_size_bytes(inst->src[0].type)) {
|
||||
case 2:
|
||||
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
|
||||
break;
|
||||
case 4:
|
||||
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
|
||||
break;
|
||||
case 8:
|
||||
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
|
||||
break;
|
||||
default:
|
||||
/* Just in case a future platform re-enables B or UB types. */
|
||||
unreachable("Invalid source size.");
|
||||
}
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = retype(result, inst->dst.type);
|
||||
inst->resize_sources(1);
|
||||
|
||||
if (brw_constant_fold_instruction(devinfo, inst))
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue