mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 21:40:08 +01:00
intel/fs: Constant fold OR and AND
The path taken in fs_visitor::swizzle_nir_scratch_addr for DG2 generates
some AND and OR instructions before the SHL. This commit folds those so
the whold calculation becomes a constant (like on older platforms).
v2: Fix return type of src_as_uint. Noticed by Marcin.
shader-db results:
DG2
total instructions in shared programs: 23190475 -> 23179540 (-0.05%)
instructions in affected programs: 36026 -> 25091 (-30.35%)
helped: 7 / HURT: 0
total cycles in shared programs: 841196807 -> 841142563 (<.01%)
cycles in affected programs: 1660670 -> 1606426 (-3.27%)
helped: 7 / HURT: 0
No shader-db changes on any older Intel platforms.
fossil-db results:
DG2
Totals:
Instrs: 197780372 -> 197773966 (-0.00%)
Cycles: 14066410782 -> 14066399378 (-0.00%); split: -0.00%, +0.00%
Subgroup size: 8438104 -> 8438112 (+0.00%)
Send messages: 8049445 -> 8049446 (+0.00%)
Scratch Memory Size: 14263296 -> 14264320 (+0.01%)
Totals from 9 (0.00% of 668055) affected shaders:
Instrs: 24547 -> 18141 (-26.10%)
Cycles: 1984791 -> 1973387 (-0.57%); split: -0.98%, +0.40%
Subgroup size: 88 -> 96 (+9.09%)
Send messages: 867 -> 868 (+0.12%)
Scratch Memory Size: 69632 -> 70656 (+1.47%)
No fossil-db changes on any older Intel platforms.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
(cherry picked from commit cb0de0a1d3)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25377>
This commit is contained in:
parent
41fe60cf3e
commit
3fd835f6fd
2 changed files with 97 additions and 2 deletions
|
|
@ -2602,6 +2602,62 @@ fs_visitor::lower_constant_loads()
|
|||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
src_as_uint(const fs_reg &src)
|
||||
{
|
||||
assert(src.file == IMM);
|
||||
|
||||
switch (src.type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return (uint64_t)(int16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return (uint64_t)(uint16_t)(src.ud & 0xffff);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return (uint64_t)src.d;
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return (uint64_t)src.ud;
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return src.d64;
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return src.u64;
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
static fs_reg
|
||||
brw_imm_for_type(uint64_t value, enum brw_reg_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BRW_REGISTER_TYPE_W:
|
||||
return brw_imm_w(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UW:
|
||||
return brw_imm_uw(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
return brw_imm_ud(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_Q:
|
||||
return brw_imm_d(value);
|
||||
|
||||
case BRW_REGISTER_TYPE_UQ:
|
||||
return brw_imm_uq(value);
|
||||
|
||||
default:
|
||||
unreachable("Invalid integer type.");
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::opt_algebraic()
|
||||
{
|
||||
|
|
@ -2735,7 +2791,35 @@ fs_visitor::opt_algebraic()
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
|
||||
const uint64_t src0 = src_as_uint(inst->src[0]);
|
||||
const uint64_t src1 = src_as_uint(inst->src[1]);
|
||||
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->sources = 1;
|
||||
inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
|
||||
inst->src[1] = reg_undef;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->src[0].equals(inst->src[1]) ||
|
||||
inst->src[1].is_zero()) {
|
||||
/* On Gfx8+, the OR instruction can have a source modifier that
|
||||
|
|
|
|||
|
|
@ -935,8 +935,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
|||
case BRW_OPCODE_MUL:
|
||||
case SHADER_OPCODE_MULH:
|
||||
case BRW_OPCODE_ADD:
|
||||
case BRW_OPCODE_OR:
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_XOR:
|
||||
case BRW_OPCODE_ADDC:
|
||||
if (i == 1) {
|
||||
|
|
@ -1072,6 +1070,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
|||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_OR:
|
||||
case SHADER_OPCODE_TEX_LOGICAL:
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
|
|
@ -1120,6 +1120,17 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
|||
}
|
||||
}
|
||||
|
||||
/* If only one of the sources of a 2-source, commutative instruction (e.g.,
|
||||
* AND) is immediate, it must be src1. If both are immediate, opt_algebraic
|
||||
* should fold it away.
|
||||
*/
|
||||
if (progress && inst->sources == 2 && inst->is_commutative() &&
|
||||
inst->src[0].file == IMM && inst->src[1].file != IMM) {
|
||||
const auto src1 = inst->src[1];
|
||||
inst->src[1] = inst->src[0];
|
||||
inst->src[0] = src1;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue