nak,nir: Add 32-bit nir_op_lea_nv and use it

Changes code size by -0.80% on shaderdb.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32517>
This commit is contained in:
Mel Henning 2024-11-15 15:56:46 -05:00 committed by Marge Bot
parent 54fcc63d3e
commit 0470643047
5 changed files with 51 additions and 0 deletions

View file

@ -1412,6 +1412,11 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],
dst |= ((uint32_t)x) << i * 8;
}""")
# Address arithmetic instructions: shift and add
# Shift must be a constant.
opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False,
"", "src0 + (src1 << (src2 % bit_size))")
# 24b multiply into 32b result (with sign extension)
binop("imul24", tint32, _2src_commutative + associative,
"(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")

View file

@ -541,6 +541,24 @@ is_used_by_non_fsat(const nir_alu_instr *instr)
return false;
}
static inline bool
is_used_by_non_ldc_nv(const nir_alu_instr *instr)
{
nir_foreach_use(src, &instr->def) {
const nir_instr *const user_instr = nir_src_parent_instr(src);
if (user_instr->type != nir_instr_type_intrinsic)
return true;
const nir_intrinsic_instr *const user_intrin = nir_instr_as_intrinsic(user_instr);
if (user_intrin->intrinsic != nir_intrinsic_ldc_nv)
return true;
}
return false;
}
static inline bool
is_only_used_as_float_impl(const nir_alu_instr *instr, unsigned depth)
{

View file

@ -612,6 +612,24 @@ pub trait SSABuilder: Builder {
dst
}
fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 1);
assert!(self.sm() >= 70);
self.push_op(OpLea {
dst: dst.into(),
overflow: Dst::None,
a: a,
b: b,
a_high: 0.into(),
dst_high: false,
shift: shift % 32,
intermediate_mod: SrcMod::None,
});
dst
}
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
let dst = if x.is_predicate() {
self.alloc_ssa(RegFile::Pred, 1)

View file

@ -1478,6 +1478,12 @@ impl<'a> ShaderFromNir<'a> {
b.shr(srcs[0], srcs[1], true)
}
}
nir_op_lea_nv => {
let src_a = srcs[1];
let src_b = srcs[0];
let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
b.lea(src_a, src_b, shift)
}
nir_op_isub => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
64 => b.iadd64(srcs[0], srcs[1].ineg(), 0.into()),

View file

@ -27,6 +27,7 @@ import sys
a = 'a'
b = 'b'
c = 'c'
s = 's'
# common conditions to improve readability
volta = 'nak->sm >= 70 && nak->sm < 75'
@ -38,6 +39,9 @@ algebraic_lowering = [
(('umin', 'a', 'b'), ('bcsel', ('ult', a, b), a, b), volta),
(('umax', 'a', 'b'), ('bcsel', ('ult', a, b), b, a), volta),
(('iadd', 'a@64', ('ineg', 'b@64')), ('isub', a, b)),
(('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')),
('lea_nv', a, b, s), 'nak->sm >= 70'),
]
def main():