mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
intel/brw: Move fs algebraic to its own file
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26887>
This commit is contained in:
parent
df013738db
commit
bbffe124fc
3 changed files with 470 additions and 460 deletions
|
|
@ -2203,466 +2203,6 @@ brw_fs_lower_constant_loads(fs_visitor &s)
|
|||
return progress;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
src_as_uint(const fs_reg &src)
|
||||
{
|
||||
assert(src.file == IMM);
|
||||
|
||||
switch (src.type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return (uint64_t)(int16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return (uint64_t)(uint16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return (uint64_t)src.d;
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return (uint64_t)src.ud;
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return src.d64;
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return src.u64;
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
static fs_reg
|
||||
brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return brw_imm_w(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return brw_imm_uw(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return brw_imm_ud(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return brw_imm_uq(value);
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_opt_algebraic(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
inst->dst.type == BRW_REGISTER_TYPE_DF) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
|
||||
inst->dst.is_null() &&
|
||||
(inst->src[0].abs || inst->src[0].negate)) {
|
||||
inst->src[0].abs = false;
|
||||
inst->src[0].negate = false;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file != IMM)
|
||||
break;
|
||||
|
||||
if (inst->saturate) {
|
||||
/* Full mixed-type saturates don't happen. However, we can end up
|
||||
* with things like:
|
||||
*
|
||||
* mov.sat(8) g21<1>DF -1F
|
||||
*
|
||||
* Other mixed-size-but-same-base-type cases may also be possible.
|
||||
*/
|
||||
if (inst->dst.type != inst->src[0].type &&
|
||||
inst->dst.type != BRW_REGISTER_TYPE_DF &&
|
||||
inst->src[0].type != BRW_REGISTER_TYPE_F)
|
||||
assert(!"unimplemented: saturate mixed types");
|
||||
|
||||
if (brw_saturate_immediate(inst->src[0].type,
|
||||
&inst->src[0].as_brw_reg())) {
|
||||
inst->saturate = false;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
if (inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_reg_type_is_floating_point(inst->src[1].type))
|
||||
break;
|
||||
|
||||
/* From the BDW PRM, Vol 2a, "mul - Multiply":
|
||||
*
|
||||
* "When multiplying integer datatypes, if src0 is DW and src1
|
||||
* is W, irrespective of the destination datatype, the
|
||||
* accumulator maintains full 48-bit precision."
|
||||
* ...
|
||||
* "When multiplying integer data types, if one of the sources
|
||||
* is a DW, the resulting full precision data is stored in
|
||||
* the accumulator."
|
||||
*
|
||||
* There are also similar notes in earlier PRMs.
|
||||
*
|
||||
* The MOV instruction can copy the bits of the source, but it
|
||||
* does not clear the higher bits of the accumulator. So, because
|
||||
* we might use the full accumulator in the MUL/MACH macro, we
|
||||
* shouldn't replace such MULs with MOVs.
|
||||
*/
|
||||
if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
|
||||
brw_reg_type_to_size(inst->src[1].type) == 4) &&
|
||||
(inst->dst.is_accumulator() ||
|
||||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
||||
/* a * 1.0 = a */
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * -1.0 = -a */
|
||||
if (inst->src[1].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
case BRW_OPCODE_ADD:
|
||||
if (inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_reg_type_is_integer(inst->src[1].type) &&
|
||||
inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file == IMM) {
|
||||
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0].f += inst->src[1].f;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].equals(inst->src[1]) ||
|
||||
inst->src[1].is_zero()) {
|
||||
/* On Gfx8+, the OR instruction can have a source modifier that
|
||||
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
|
||||
* or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
|
||||
*/
|
||||
if (inst->src[0].negate) {
|
||||
inst->opcode = BRW_OPCODE_NOT;
|
||||
inst->sources = 1;
|
||||
inst->src[0].negate = false;
|
||||
} else {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
}
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_CMP:
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
|
||||
inst->src[1].is_zero() &&
|
||||
(inst->src[0].abs || inst->src[0].negate)) {
|
||||
inst->src[0].abs = false;
|
||||
inst->src[0].negate = false;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_SEL:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs && !inst->src[0].negate);
|
||||
assert(!inst->src[1].abs && !inst->src[1].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
if (inst->src[0].equals(inst->src[1])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->predicate = BRW_PREDICATE_NONE;
|
||||
inst->predicate_inverse = false;
|
||||
progress = true;
|
||||
} else if (inst->saturate && inst->src[1].file == IMM) {
|
||||
switch (inst->conditional_mod) {
|
||||
case BRW_CONDITIONAL_LE:
|
||||
case BRW_CONDITIONAL_L:
|
||||
switch (inst->src[1].type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
if (inst->src[1].f >= 1.0f) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_CONDITIONAL_GE:
|
||||
case BRW_CONDITIONAL_G:
|
||||
switch (inst->src[1].type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
if (inst->src[1].f <= 0.0f) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_MAD:
|
||||
if (inst->src[0].type != BRW_REGISTER_TYPE_F ||
|
||||
inst->src[1].type != BRW_REGISTER_TYPE_F ||
|
||||
inst->src[2].type != BRW_REGISTER_TYPE_F)
|
||||
break;
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_ADD;
|
||||
inst->sources = 2;
|
||||
inst->src[1] = inst->src[2];
|
||||
inst->src[2] = reg_undef;
|
||||
progress = true;
|
||||
} else if (inst->src[2].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_ADD;
|
||||
inst->sources = 2;
|
||||
inst->src[2] = reg_undef;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_SHL:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
/* It's not currently possible to generate this, and this constant
|
||||
* folding does not handle it.
|
||||
*/
|
||||
assert(!inst->saturate);
|
||||
|
||||
fs_reg result;
|
||||
|
||||
switch (type_sz(inst->src[0].type)) {
|
||||
case 2:
|
||||
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
|
||||
break;
|
||||
case 4:
|
||||
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
|
||||
break;
|
||||
case 8:
|
||||
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
|
||||
break;
|
||||
default:
|
||||
/* Just in case a future platform re-enables B or UB types. */
|
||||
unreachable("Invalid source size.");
|
||||
}
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = retype(result, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
inst->sources = 1;
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
if (is_uniform(inst->src[0])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->force_writemask_all = true;
|
||||
progress = true;
|
||||
} else if (inst->src[1].file == IMM) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
/* It's possible that the selected component will be too large and
|
||||
* overflow the register. This can happen if someone does a
|
||||
* readInvocation() from GLSL or SPIR-V and provides an OOB
|
||||
* invocationIndex. If this happens and we some how manage
|
||||
* to constant fold it in and get here, then component() may cause
|
||||
* us to start reading outside of the VGRF which will lead to an
|
||||
* assert later. Instead, just let it wrap around if it goes over
|
||||
* exec_size.
|
||||
*/
|
||||
const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
|
||||
inst->src[0] = component(inst->src[0], comp);
|
||||
inst->sources = 1;
|
||||
inst->force_writemask_all = true;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SHUFFLE:
|
||||
if (is_uniform(inst->src[0])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
progress = true;
|
||||
} else if (inst->src[1].file == IMM) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = component(inst->src[0],
|
||||
inst->src[1].ud);
|
||||
inst->sources = 1;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ensure that the correct source has the immediate value. 2-source
|
||||
* instructions must have the immediate in src[1]. On Gfx12 and later,
|
||||
* some 3-source instructions can have the immediate in src[0] or
|
||||
* src[2]. It's complicated, so don't mess with 3-source instructions
|
||||
* here.
|
||||
*/
|
||||
if (progress && inst->sources == 2 && inst->is_commutative()) {
|
||||
if (inst->src[0].file == IMM) {
|
||||
fs_reg tmp = inst->src[1];
|
||||
inst->src[1] = inst->src[0];
|
||||
inst->src[0] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
||||
DEPENDENCY_INSTRUCTION_DETAIL);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Once we've generated code, try to convert normal FS_OPCODE_FB_WRITE
|
||||
* instructions to FS_OPCODE_REP_FB_WRITE.
|
||||
|
|
|
|||
469
src/intel/compiler/brw_fs_opt_algebraic.cpp
Normal file
469
src/intel/compiler/brw_fs_opt_algebraic.cpp
Normal file
|
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_fs.h"
|
||||
#include "brw_fs_builder.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
static uint64_t
|
||||
src_as_uint(const fs_reg &src)
|
||||
{
|
||||
assert(src.file == IMM);
|
||||
|
||||
switch (src.type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return (uint64_t)(int16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return (uint64_t)(uint16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return (uint64_t)src.d;
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return (uint64_t)src.ud;
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return src.d64;
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return src.u64;
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
static fs_reg
|
||||
brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return brw_imm_w(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return brw_imm_uw(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return brw_imm_ud(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return brw_imm_uq(value);
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_opt_algebraic(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
inst->dst.type == BRW_REGISTER_TYPE_DF) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs);
|
||||
assert(!inst->src[0].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
|
||||
ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
|
||||
inst->dst.is_null() &&
|
||||
(inst->src[0].abs || inst->src[0].negate)) {
|
||||
inst->src[0].abs = false;
|
||||
inst->src[0].negate = false;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file != IMM)
|
||||
break;
|
||||
|
||||
if (inst->saturate) {
|
||||
/* Full mixed-type saturates don't happen. However, we can end up
|
||||
* with things like:
|
||||
*
|
||||
* mov.sat(8) g21<1>DF -1F
|
||||
*
|
||||
* Other mixed-size-but-same-base-type cases may also be possible.
|
||||
*/
|
||||
if (inst->dst.type != inst->src[0].type &&
|
||||
inst->dst.type != BRW_REGISTER_TYPE_DF &&
|
||||
inst->src[0].type != BRW_REGISTER_TYPE_F)
|
||||
assert(!"unimplemented: saturate mixed types");
|
||||
|
||||
if (brw_saturate_immediate(inst->src[0].type,
|
||||
&inst->src[0].as_brw_reg())) {
|
||||
inst->saturate = false;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
if (inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_reg_type_is_floating_point(inst->src[1].type))
|
||||
break;
|
||||
|
||||
/* From the BDW PRM, Vol 2a, "mul - Multiply":
|
||||
*
|
||||
* "When multiplying integer datatypes, if src0 is DW and src1
|
||||
* is W, irrespective of the destination datatype, the
|
||||
* accumulator maintains full 48-bit precision."
|
||||
* ...
|
||||
* "When multiplying integer data types, if one of the sources
|
||||
* is a DW, the resulting full precision data is stored in
|
||||
* the accumulator."
|
||||
*
|
||||
* There are also similar notes in earlier PRMs.
|
||||
*
|
||||
* The MOV instruction can copy the bits of the source, but it
|
||||
* does not clear the higher bits of the accumulator. So, because
|
||||
* we might use the full accumulator in the MUL/MACH macro, we
|
||||
* shouldn't replace such MULs with MOVs.
|
||||
*/
|
||||
if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
|
||||
brw_reg_type_to_size(inst->src[1].type) == 4) &&
|
||||
(inst->dst.is_accumulator() ||
|
||||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
||||
/* a * 1.0 = a */
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* a * -1.0 = -a */
|
||||
if (inst->src[1].is_negative_one()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0].negate = !inst->src[0].negate;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
case BRW_OPCODE_ADD:
|
||||
if (inst->src[1].file != IMM)
|
||||
continue;
|
||||
|
||||
if (brw_reg_type_is_integer(inst->src[1].type) &&
|
||||
inst->src[1].is_zero()) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].file == IMM) {
|
||||
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0].f += inst->src[1].f;
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].equals(inst->src[1]) ||
|
||||
inst->src[1].is_zero()) {
|
||||
/* On Gfx8+, the OR instruction can have a source modifier that
|
||||
* performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
|
||||
* or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
|
||||
*/
|
||||
if (inst->src[0].negate) {
|
||||
inst->opcode = BRW_OPCODE_NOT;
|
||||
inst->sources = 1;
|
||||
inst->src[0].negate = false;
|
||||
} else {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
}
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_CMP:
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
|
||||
inst->src[1].is_zero() &&
|
||||
(inst->src[0].abs || inst->src[0].negate)) {
|
||||
inst->src[0].abs = false;
|
||||
inst->src[0].negate = false;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_SEL:
|
||||
if (!devinfo->has_64bit_float &&
|
||||
!devinfo->has_64bit_int &&
|
||||
(inst->dst.type == BRW_REGISTER_TYPE_DF ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_UQ ||
|
||||
inst->dst.type == BRW_REGISTER_TYPE_Q)) {
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
assert(!inst->saturate);
|
||||
assert(!inst->src[0].abs && !inst->src[0].negate);
|
||||
assert(!inst->src[1].abs && !inst->src[1].negate);
|
||||
const brw::fs_builder ibld(&s, block, inst);
|
||||
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
|
||||
set_predicate(inst->predicate,
|
||||
ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
|
||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
if (inst->src[0].equals(inst->src[1])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->predicate = BRW_PREDICATE_NONE;
|
||||
inst->predicate_inverse = false;
|
||||
progress = true;
|
||||
} else if (inst->saturate && inst->src[1].file == IMM) {
|
||||
switch (inst->conditional_mod) {
|
||||
case BRW_CONDITIONAL_LE:
|
||||
case BRW_CONDITIONAL_L:
|
||||
switch (inst->src[1].type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
if (inst->src[1].f >= 1.0f) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case BRW_CONDITIONAL_GE:
|
||||
case BRW_CONDITIONAL_G:
|
||||
switch (inst->src[1].type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
if (inst->src[1].f <= 0.0f) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[1] = reg_undef;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_MAD:
|
||||
if (inst->src[0].type != BRW_REGISTER_TYPE_F ||
|
||||
inst->src[1].type != BRW_REGISTER_TYPE_F ||
|
||||
inst->src[2].type != BRW_REGISTER_TYPE_F)
|
||||
break;
|
||||
if (inst->src[1].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_ADD;
|
||||
inst->sources = 2;
|
||||
inst->src[1] = inst->src[2];
|
||||
inst->src[2] = reg_undef;
|
||||
progress = true;
|
||||
} else if (inst->src[2].is_one()) {
|
||||
inst->opcode = BRW_OPCODE_ADD;
|
||||
inst->sources = 2;
|
||||
inst->src[2] = reg_undef;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_SHL:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
/* It's not currently possible to generate this, and this constant
|
||||
* folding does not handle it.
|
||||
*/
|
||||
assert(!inst->saturate);
|
||||
|
||||
fs_reg result;
|
||||
|
||||
switch (type_sz(inst->src[0].type)) {
|
||||
case 2:
|
||||
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
|
||||
break;
|
||||
case 4:
|
||||
result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
|
||||
break;
|
||||
case 8:
|
||||
result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
|
||||
break;
|
||||
default:
|
||||
/* Just in case a future platform re-enables B or UB types. */
|
||||
unreachable("Invalid source size.");
|
||||
}
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = retype(result, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
inst->sources = 1;
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
if (is_uniform(inst->src[0])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->force_writemask_all = true;
|
||||
progress = true;
|
||||
} else if (inst->src[1].file == IMM) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
/* It's possible that the selected component will be too large and
|
||||
* overflow the register. This can happen if someone does a
|
||||
* readInvocation() from GLSL or SPIR-V and provides an OOB
|
||||
* invocationIndex. If this happens and we some how manage
|
||||
* to constant fold it in and get here, then component() may cause
|
||||
* us to start reading outside of the VGRF which will lead to an
|
||||
* assert later. Instead, just let it wrap around if it goes over
|
||||
* exec_size.
|
||||
*/
|
||||
const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
|
||||
inst->src[0] = component(inst->src[0], comp);
|
||||
inst->sources = 1;
|
||||
inst->force_writemask_all = true;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SHUFFLE:
|
||||
if (is_uniform(inst->src[0])) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
progress = true;
|
||||
} else if (inst->src[1].file == IMM) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = component(inst->src[0],
|
||||
inst->src[1].ud);
|
||||
inst->sources = 1;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ensure that the correct source has the immediate value. 2-source
|
||||
* instructions must have the immediate in src[1]. On Gfx12 and later,
|
||||
* some 3-source instructions can have the immediate in src[0] or
|
||||
* src[2]. It's complicated, so don't mess with 3-source instructions
|
||||
* here.
|
||||
*/
|
||||
if (progress && inst->sources == 2 && inst->is_commutative()) {
|
||||
if (inst->src[0].file == IMM) {
|
||||
fs_reg tmp = inst->src[1];
|
||||
inst->src[1] = inst->src[0];
|
||||
inst->src[0] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
||||
DEPENDENCY_INSTRUCTION_DETAIL);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -80,6 +80,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_fs_lower_regioning.cpp',
|
||||
'brw_fs_nir.cpp',
|
||||
'brw_fs_opt.cpp',
|
||||
'brw_fs_opt_algebraic.cpp',
|
||||
'brw_fs_opt_virtual_grfs.cpp',
|
||||
'brw_fs_reg_allocate.cpp',
|
||||
'brw_fs_register_coalesce.cpp',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue