From bbffe124fc40cc3c631d1a2496ef6a0918abaaab Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Thu, 4 Jan 2024 23:26:50 -0800 Subject: [PATCH] intel/brw: Move fs algebraic to its own file Reviewed-by: Lionel Landwerlin Acked-by: Ian Romanick Part-of: --- src/intel/compiler/brw_fs.cpp | 460 ------------------- src/intel/compiler/brw_fs_opt_algebraic.cpp | 469 ++++++++++++++++++++ src/intel/compiler/meson.build | 1 + 3 files changed, 470 insertions(+), 460 deletions(-) create mode 100644 src/intel/compiler/brw_fs_opt_algebraic.cpp diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index fdc4d6e96bf..d3aec055a77 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2203,466 +2203,6 @@ brw_fs_lower_constant_loads(fs_visitor &s) return progress; } -static uint64_t -src_as_uint(const fs_reg &src) -{ - assert(src.file == IMM); - - switch (src.type) { - case BRW_REGISTER_TYPE_W: - return (uint64_t)(int16_t)(src.ud & 0xffff); - - case BRW_REGISTER_TYPE_UW: - return (uint64_t)(uint16_t)(src.ud & 0xffff); - - case BRW_REGISTER_TYPE_D: - return (uint64_t)src.d; - - case BRW_REGISTER_TYPE_UD: - return (uint64_t)src.ud; - - case BRW_REGISTER_TYPE_Q: - return src.d64; - - case BRW_REGISTER_TYPE_UQ: - return src.u64; - - default: - unreachable("Invalid integer type."); - } -} - -static fs_reg -brw_imm_for_type(uint64_t value, enum brw_reg_type type) -{ - switch (type) { - case BRW_REGISTER_TYPE_W: - return brw_imm_w(value); - - case BRW_REGISTER_TYPE_UW: - return brw_imm_uw(value); - - case BRW_REGISTER_TYPE_D: - return brw_imm_d(value); - - case BRW_REGISTER_TYPE_UD: - return brw_imm_ud(value); - - case BRW_REGISTER_TYPE_Q: - return brw_imm_d(value); - - case BRW_REGISTER_TYPE_UQ: - return brw_imm_uq(value); - - default: - unreachable("Invalid integer type."); - } -} - -bool -brw_fs_opt_algebraic(fs_visitor &s) -{ - const intel_device_info *devinfo = s.devinfo; - bool progress = false; - - foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { - switch (inst->opcode) { - case BRW_OPCODE_MOV: - if (!devinfo->has_64bit_float && - inst->dst.type == BRW_REGISTER_TYPE_DF) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs); - assert(!inst->src[0].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1)); - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0)); - - inst->remove(block); - progress = true; - } - - if (!devinfo->has_64bit_int && - (inst->dst.type == BRW_REGISTER_TYPE_UQ || - inst->dst.type == BRW_REGISTER_TYPE_Q)) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs); - assert(!inst->src[0].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0)); - - inst->remove(block); - progress = true; - } - - if ((inst->conditional_mod == BRW_CONDITIONAL_Z || - inst->conditional_mod == BRW_CONDITIONAL_NZ) && - inst->dst.is_null() && - (inst->src[0].abs || inst->src[0].negate)) { - inst->src[0].abs = false; - inst->src[0].negate = false; - progress = true; - break; - } - - if (inst->src[0].file != IMM) - break; - - if (inst->saturate) { - /* Full mixed-type saturates don't happen. However, we can end up - * with things like: - * - * mov.sat(8) g21<1>DF -1F - * - * Other mixed-size-but-same-base-type cases may also be possible. - */ - if (inst->dst.type != inst->src[0].type && - inst->dst.type != BRW_REGISTER_TYPE_DF && - inst->src[0].type != BRW_REGISTER_TYPE_F) - assert(!"unimplemented: saturate mixed types"); - - if (brw_saturate_immediate(inst->src[0].type, - &inst->src[0].as_brw_reg())) { - inst->saturate = false; - progress = true; - } - } - break; - - case BRW_OPCODE_MUL: - if (inst->src[1].file != IMM) - continue; - - if (brw_reg_type_is_floating_point(inst->src[1].type)) - break; - - /* From the BDW PRM, Vol 2a, "mul - Multiply": - * - * "When multiplying integer datatypes, if src0 is DW and src1 - * is W, irrespective of the destination datatype, the - * accumulator maintains full 48-bit precision." - * ... - * "When multiplying integer data types, if one of the sources - * is a DW, the resulting full precision data is stored in - * the accumulator." - * - * There are also similar notes in earlier PRMs. - * - * The MOV instruction can copy the bits of the source, but it - * does not clear the higher bits of the accumulator. So, because - * we might use the full accumulator in the MUL/MACH macro, we - * shouldn't replace such MULs with MOVs. - */ - if ((brw_reg_type_to_size(inst->src[0].type) == 4 || - brw_reg_type_to_size(inst->src[1].type) == 4) && - (inst->dst.is_accumulator() || - inst->writes_accumulator_implicitly(devinfo))) - break; - - /* a * 1.0 = a */ - if (inst->src[1].is_one()) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[1] = reg_undef; - progress = true; - break; - } - - /* a * -1.0 = -a */ - if (inst->src[1].is_negative_one()) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[0].negate = !inst->src[0].negate; - inst->src[1] = reg_undef; - progress = true; - break; - } - - break; - case BRW_OPCODE_ADD: - if (inst->src[1].file != IMM) - continue; - - if (brw_reg_type_is_integer(inst->src[1].type) && - inst->src[1].is_zero()) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[1] = reg_undef; - progress = true; - break; - } - - if (inst->src[0].file == IMM) { - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[0].f += inst->src[1].f; - inst->src[1] = reg_undef; - progress = true; - break; - } - break; - - case BRW_OPCODE_AND: - if (inst->src[0].file == IMM && inst->src[1].file == IMM) { - const uint64_t src0 = src_as_uint(inst->src[0]); - const uint64_t src1 = src_as_uint(inst->src[1]); - - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type); - inst->src[1] = reg_undef; - progress = true; - break; - } - - break; - - case BRW_OPCODE_OR: - if (inst->src[0].file == IMM && inst->src[1].file == IMM) { - const uint64_t src0 = src_as_uint(inst->src[0]); - const uint64_t src1 = src_as_uint(inst->src[1]); - - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type); - inst->src[1] = reg_undef; - progress = true; - break; - } - - if (inst->src[0].equals(inst->src[1]) || - inst->src[1].is_zero()) { - /* On Gfx8+, the OR instruction can have a source modifier that - * performs logical not on the operand. Cases of 'OR r0, ~r1, 0' - * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV. - */ - if (inst->src[0].negate) { - inst->opcode = BRW_OPCODE_NOT; - inst->sources = 1; - inst->src[0].negate = false; - } else { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - } - inst->src[1] = reg_undef; - progress = true; - break; - } - break; - case BRW_OPCODE_CMP: - if ((inst->conditional_mod == BRW_CONDITIONAL_Z || - inst->conditional_mod == BRW_CONDITIONAL_NZ) && - inst->src[1].is_zero() && - (inst->src[0].abs || inst->src[0].negate)) { - inst->src[0].abs = false; - inst->src[0].negate = false; - progress = true; - break; - } - break; - case BRW_OPCODE_SEL: - if (!devinfo->has_64bit_float && - !devinfo->has_64bit_int && - (inst->dst.type == BRW_REGISTER_TYPE_DF || - inst->dst.type == BRW_REGISTER_TYPE_UQ || - inst->dst.type == BRW_REGISTER_TYPE_Q)) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs && !inst->src[0].negate); - assert(!inst->src[1].abs && !inst->src[1].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - set_predicate(inst->predicate, - ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0))); - set_predicate(inst->predicate, - ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1))); - - inst->remove(block); - progress = true; - } - if (inst->src[0].equals(inst->src[1])) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[1] = reg_undef; - inst->predicate = BRW_PREDICATE_NONE; - inst->predicate_inverse = false; - progress = true; - } else if (inst->saturate && inst->src[1].file == IMM) { - switch (inst->conditional_mod) { - case BRW_CONDITIONAL_LE: - case BRW_CONDITIONAL_L: - switch (inst->src[1].type) { - case BRW_REGISTER_TYPE_F: - if (inst->src[1].f >= 1.0f) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[1] = reg_undef; - inst->conditional_mod = BRW_CONDITIONAL_NONE; - progress = true; - } - break; - default: - break; - } - break; - case BRW_CONDITIONAL_GE: - case BRW_CONDITIONAL_G: - switch (inst->src[1].type) { - case BRW_REGISTER_TYPE_F: - if (inst->src[1].f <= 0.0f) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->src[1] = reg_undef; - inst->conditional_mod = BRW_CONDITIONAL_NONE; - progress = true; - } - break; - default: - break; - } - default: - break; - } - } - break; - case BRW_OPCODE_MAD: - if (inst->src[0].type != BRW_REGISTER_TYPE_F || - inst->src[1].type != BRW_REGISTER_TYPE_F || - inst->src[2].type != BRW_REGISTER_TYPE_F) - break; - if (inst->src[1].is_one()) { - inst->opcode = BRW_OPCODE_ADD; - inst->sources = 2; - inst->src[1] = inst->src[2]; - inst->src[2] = reg_undef; - progress = true; - } else if (inst->src[2].is_one()) { - inst->opcode = BRW_OPCODE_ADD; - inst->sources = 2; - inst->src[2] = reg_undef; - progress = true; - } - break; - case BRW_OPCODE_SHL: - if (inst->src[0].file == IMM && inst->src[1].file == IMM) { - /* It's not currently possible to generate this, and this constant - * folding does not handle it. - */ - assert(!inst->saturate); - - fs_reg result; - - switch (type_sz(inst->src[0].type)) { - case 2: - result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f))); - break; - case 4: - result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f)); - break; - case 8: - result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f)); - break; - default: - /* Just in case a future platform re-enables B or UB types. */ - unreachable("Invalid source size."); - } - - inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = retype(result, inst->dst.type); - inst->src[1] = reg_undef; - inst->sources = 1; - - progress = true; - } - break; - - case SHADER_OPCODE_BROADCAST: - if (is_uniform(inst->src[0])) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - inst->force_writemask_all = true; - progress = true; - } else if (inst->src[1].file == IMM) { - inst->opcode = BRW_OPCODE_MOV; - /* It's possible that the selected component will be too large and - * overflow the register. This can happen if someone does a - * readInvocation() from GLSL or SPIR-V and provides an OOB - * invocationIndex. If this happens and we some how manage - * to constant fold it in and get here, then component() may cause - * us to start reading outside of the VGRF which will lead to an - * assert later. Instead, just let it wrap around if it goes over - * exec_size. - */ - const unsigned comp = inst->src[1].ud & (inst->exec_size - 1); - inst->src[0] = component(inst->src[0], comp); - inst->sources = 1; - inst->force_writemask_all = true; - progress = true; - } - break; - - case SHADER_OPCODE_SHUFFLE: - if (is_uniform(inst->src[0])) { - inst->opcode = BRW_OPCODE_MOV; - inst->sources = 1; - progress = true; - } else if (inst->src[1].file == IMM) { - inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = component(inst->src[0], - inst->src[1].ud); - inst->sources = 1; - progress = true; - } - break; - - default: - break; - } - - /* Ensure that the correct source has the immediate value. 2-source - * instructions must have the immediate in src[1]. On Gfx12 and later, - * some 3-source instructions can have the immediate in src[0] or - * src[2]. It's complicated, so don't mess with 3-source instructions - * here. - */ - if (progress && inst->sources == 2 && inst->is_commutative()) { - if (inst->src[0].file == IMM) { - fs_reg tmp = inst->src[1]; - inst->src[1] = inst->src[0]; - inst->src[0] = tmp; - } - } - } - - if (progress) - s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | - DEPENDENCY_INSTRUCTION_DETAIL); - - return progress; -} - /** * Once we've generated code, try to convert normal FS_OPCODE_FB_WRITE * instructions to FS_OPCODE_REP_FB_WRITE. diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp new file mode 100644 index 00000000000..7d492f7fd70 --- /dev/null +++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp @@ -0,0 +1,469 @@ +/* + * Copyright © 2010 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_fs.h" +#include "brw_fs_builder.h" + +using namespace brw; + +static uint64_t +src_as_uint(const fs_reg &src) +{ + assert(src.file == IMM); + + switch (src.type) { + case BRW_REGISTER_TYPE_W: + return (uint64_t)(int16_t)(src.ud & 0xffff); + + case BRW_REGISTER_TYPE_UW: + return (uint64_t)(uint16_t)(src.ud & 0xffff); + + case BRW_REGISTER_TYPE_D: + return (uint64_t)src.d; + + case BRW_REGISTER_TYPE_UD: + return (uint64_t)src.ud; + + case BRW_REGISTER_TYPE_Q: + return src.d64; + + case BRW_REGISTER_TYPE_UQ: + return src.u64; + + default: + unreachable("Invalid integer type."); + } +} + +static fs_reg +brw_imm_for_type(uint64_t value, enum brw_reg_type type) +{ + switch (type) { + case BRW_REGISTER_TYPE_W: + return brw_imm_w(value); + + case BRW_REGISTER_TYPE_UW: + return brw_imm_uw(value); + + case BRW_REGISTER_TYPE_D: + return brw_imm_d(value); + + case BRW_REGISTER_TYPE_UD: + return brw_imm_ud(value); + + case BRW_REGISTER_TYPE_Q: + return brw_imm_d(value); + + case BRW_REGISTER_TYPE_UQ: + return brw_imm_uq(value); + + default: + unreachable("Invalid integer type."); + } +} + +bool +brw_fs_opt_algebraic(fs_visitor &s) +{ + const intel_device_info *devinfo = s.devinfo; + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { + switch (inst->opcode) { + case BRW_OPCODE_MOV: + if (!devinfo->has_64bit_float && + inst->dst.type == BRW_REGISTER_TYPE_DF) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs); + assert(!inst->src[0].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0)); + + inst->remove(block); + progress = true; + } + + if (!devinfo->has_64bit_int && + (inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs); + assert(!inst->src[0].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0)); + + inst->remove(block); + progress = true; + } + + if ((inst->conditional_mod == BRW_CONDITIONAL_Z || + inst->conditional_mod == BRW_CONDITIONAL_NZ) && + inst->dst.is_null() && + (inst->src[0].abs || inst->src[0].negate)) { + inst->src[0].abs = false; + inst->src[0].negate = false; + progress = true; + break; + } + + if (inst->src[0].file != IMM) + break; + + if (inst->saturate) { + /* Full mixed-type saturates don't happen. However, we can end up + * with things like: + * + * mov.sat(8) g21<1>DF -1F + * + * Other mixed-size-but-same-base-type cases may also be possible. + */ + if (inst->dst.type != inst->src[0].type && + inst->dst.type != BRW_REGISTER_TYPE_DF && + inst->src[0].type != BRW_REGISTER_TYPE_F) + assert(!"unimplemented: saturate mixed types"); + + if (brw_saturate_immediate(inst->src[0].type, + &inst->src[0].as_brw_reg())) { + inst->saturate = false; + progress = true; + } + } + break; + + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + if (brw_reg_type_is_floating_point(inst->src[1].type)) + break; + + /* From the BDW PRM, Vol 2a, "mul - Multiply": + * + * "When multiplying integer datatypes, if src0 is DW and src1 + * is W, irrespective of the destination datatype, the + * accumulator maintains full 48-bit precision." + * ... + * "When multiplying integer data types, if one of the sources + * is a DW, the resulting full precision data is stored in + * the accumulator." + * + * There are also similar notes in earlier PRMs. + * + * The MOV instruction can copy the bits of the source, but it + * does not clear the higher bits of the accumulator. So, because + * we might use the full accumulator in the MUL/MACH macro, we + * shouldn't replace such MULs with MOVs. + */ + if ((brw_reg_type_to_size(inst->src[0].type) == 4 || + brw_reg_type_to_size(inst->src[1].type) == 4) && + (inst->dst.is_accumulator() || + inst->writes_accumulator_implicitly(devinfo))) + break; + + /* a * 1.0 = a */ + if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[1] = reg_undef; + progress = true; + break; + } + + /* a * -1.0 = -a */ + if (inst->src[1].is_negative_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0].negate = !inst->src[0].negate; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + case BRW_OPCODE_ADD: + if (inst->src[1].file != IMM) + continue; + + if (brw_reg_type_is_integer(inst->src[1].type) && + inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[1] = reg_undef; + progress = true; + break; + } + + if (inst->src[0].file == IMM) { + assert(inst->src[0].type == BRW_REGISTER_TYPE_F); + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0].f += inst->src[1].f; + inst->src[1] = reg_undef; + progress = true; + break; + } + break; + + case BRW_OPCODE_AND: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + const uint64_t src0 = src_as_uint(inst->src[0]); + const uint64_t src1 = src_as_uint(inst->src[1]); + + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type); + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + + case BRW_OPCODE_OR: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + const uint64_t src0 = src_as_uint(inst->src[0]); + const uint64_t src1 = src_as_uint(inst->src[1]); + + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type); + inst->src[1] = reg_undef; + progress = true; + break; + } + + if (inst->src[0].equals(inst->src[1]) || + inst->src[1].is_zero()) { + /* On Gfx8+, the OR instruction can have a source modifier that + * performs logical not on the operand. Cases of 'OR r0, ~r1, 0' + * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV. + */ + if (inst->src[0].negate) { + inst->opcode = BRW_OPCODE_NOT; + inst->sources = 1; + inst->src[0].negate = false; + } else { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + } + inst->src[1] = reg_undef; + progress = true; + break; + } + break; + case BRW_OPCODE_CMP: + if ((inst->conditional_mod == BRW_CONDITIONAL_Z || + inst->conditional_mod == BRW_CONDITIONAL_NZ) && + inst->src[1].is_zero() && + (inst->src[0].abs || inst->src[0].negate)) { + inst->src[0].abs = false; + inst->src[0].negate = false; + progress = true; + break; + } + break; + case BRW_OPCODE_SEL: + if (!devinfo->has_64bit_float && + !devinfo->has_64bit_int && + (inst->dst.type == BRW_REGISTER_TYPE_DF || + inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs && !inst->src[0].negate); + assert(!inst->src[1].abs && !inst->src[1].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0))); + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1))); + + inst->remove(block); + progress = true; + } + if (inst->src[0].equals(inst->src[1])) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[1] = reg_undef; + inst->predicate = BRW_PREDICATE_NONE; + inst->predicate_inverse = false; + progress = true; + } else if (inst->saturate && inst->src[1].file == IMM) { + switch (inst->conditional_mod) { + case BRW_CONDITIONAL_LE: + case BRW_CONDITIONAL_L: + switch (inst->src[1].type) { + case BRW_REGISTER_TYPE_F: + if (inst->src[1].f >= 1.0f) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[1] = reg_undef; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + progress = true; + } + break; + default: + break; + } + break; + case BRW_CONDITIONAL_GE: + case BRW_CONDITIONAL_G: + switch (inst->src[1].type) { + case BRW_REGISTER_TYPE_F: + if (inst->src[1].f <= 0.0f) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[1] = reg_undef; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + progress = true; + } + break; + default: + break; + } + default: + break; + } + } + break; + case BRW_OPCODE_MAD: + if (inst->src[0].type != BRW_REGISTER_TYPE_F || + inst->src[1].type != BRW_REGISTER_TYPE_F || + inst->src[2].type != BRW_REGISTER_TYPE_F) + break; + if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_ADD; + inst->sources = 2; + inst->src[1] = inst->src[2]; + inst->src[2] = reg_undef; + progress = true; + } else if (inst->src[2].is_one()) { + inst->opcode = BRW_OPCODE_ADD; + inst->sources = 2; + inst->src[2] = reg_undef; + progress = true; + } + break; + case BRW_OPCODE_SHL: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + /* It's not currently possible to generate this, and this constant + * folding does not handle it. + */ + assert(!inst->saturate); + + fs_reg result; + + switch (type_sz(inst->src[0].type)) { + case 2: + result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f))); + break; + case 4: + result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f)); + break; + case 8: + result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f)); + break; + default: + /* Just in case a future platform re-enables B or UB types. */ + unreachable("Invalid source size."); + } + + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = retype(result, inst->dst.type); + inst->src[1] = reg_undef; + inst->sources = 1; + + progress = true; + } + break; + + case SHADER_OPCODE_BROADCAST: + if (is_uniform(inst->src[0])) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->force_writemask_all = true; + progress = true; + } else if (inst->src[1].file == IMM) { + inst->opcode = BRW_OPCODE_MOV; + /* It's possible that the selected component will be too large and + * overflow the register. This can happen if someone does a + * readInvocation() from GLSL or SPIR-V and provides an OOB + * invocationIndex. If this happens and we some how manage + * to constant fold it in and get here, then component() may cause + * us to start reading outside of the VGRF which will lead to an + * assert later. Instead, just let it wrap around if it goes over + * exec_size. + */ + const unsigned comp = inst->src[1].ud & (inst->exec_size - 1); + inst->src[0] = component(inst->src[0], comp); + inst->sources = 1; + inst->force_writemask_all = true; + progress = true; + } + break; + + case SHADER_OPCODE_SHUFFLE: + if (is_uniform(inst->src[0])) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + progress = true; + } else if (inst->src[1].file == IMM) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = component(inst->src[0], + inst->src[1].ud); + inst->sources = 1; + progress = true; + } + break; + + default: + break; + } + + /* Ensure that the correct source has the immediate value. 2-source + * instructions must have the immediate in src[1]. On Gfx12 and later, + * some 3-source instructions can have the immediate in src[0] or + * src[2]. It's complicated, so don't mess with 3-source instructions + * here. + */ + if (progress && inst->sources == 2 && inst->is_commutative()) { + if (inst->src[0].file == IMM) { + fs_reg tmp = inst->src[1]; + inst->src[1] = inst->src[0]; + inst->src[0] = tmp; + } + } + } + + if (progress) + s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | + DEPENDENCY_INSTRUCTION_DETAIL); + + return progress; +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 45475dbccc1..ea6382465c1 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -80,6 +80,7 @@ libintel_compiler_brw_files = files( 'brw_fs_lower_regioning.cpp', 'brw_fs_nir.cpp', 'brw_fs_opt.cpp', + 'brw_fs_opt_algebraic.cpp', 'brw_fs_opt_virtual_grfs.cpp', 'brw_fs_reg_allocate.cpp', 'brw_fs_register_coalesce.cpp',