nak: Lower isign in NIR

The NIR lowering just clamps to [-1, 1] which should turn into two IMnMx
as opposed to the 4 instructions we're emitting now.  We can maybe do
better than the NIR lowering for 64-bit but that seems unnecessary.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26246>
This commit is contained in:
Faith Ekstrand 2023-12-04 13:22:08 -06:00 committed by Marge Bot
parent 12acb2ef62
commit 9c1eade3eb
2 changed files with 1 additions and 72 deletions

View file

@ -124,6 +124,7 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
op.lower_insert_word = true;
op.lower_cs_local_index_to_id = true;
op.lower_device_index_to_zero = true;
op.lower_isign = true;
op.lower_uadd_sat = true; // TODO
op.lower_usub_sat = true; // TODO
op.lower_iadd_sat = true; // TODO

View file

@ -1016,78 +1016,6 @@ impl<'a> ShaderFromNir<'a> {
dst
}
}
nir_op_isign => {
let gt_pred = b.alloc_ssa(RegFile::Pred, 1);
let lt_pred = b.alloc_ssa(RegFile::Pred, 1);
let gt = b.alloc_ssa(RegFile::GPR, 1);
let lt = b.alloc_ssa(RegFile::GPR, 1);
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpISetP {
dst: gt_pred.into(),
set_op: PredSetOp::And,
cmp_op: IntCmpOp::Gt,
cmp_type: IntCmpType::I32,
srcs: [srcs[0], 0.into()],
accum: true.into(),
});
let cond = Src::from(gt_pred).bnot();
b.push_op(OpSel {
dst: gt.into(),
cond,
srcs: [0.into(), u32::MAX.into()],
});
b.push_op(OpISetP {
dst: lt_pred.into(),
set_op: PredSetOp::And,
cmp_op: IntCmpOp::Lt,
cmp_type: IntCmpType::I32,
srcs: [srcs[0], 0.into()],
accum: true.into(),
});
let cond = Src::from(lt_pred).bnot();
b.push_op(OpSel {
dst: lt.into(),
cond,
srcs: [0.into(), u32::MAX.into()],
});
let dst_is_signed = alu.info().output_type & 2 != 0;
let dst_type = IntType::from_bits(
alu.def.bit_size().into(),
dst_is_signed,
);
match dst_type {
IntType::I32 => {
let gt_neg = b.ineg(gt.into());
b.push_op(OpIAdd3 {
dst: dst.into(),
srcs: [lt.into(), gt_neg.into(), 0.into()],
});
}
IntType::I64 => {
let high = b.alloc_ssa(RegFile::GPR, 1);
let gt_neg = b.ineg(gt.into());
b.push_op(OpIAdd3 {
dst: high.into(),
srcs: [lt.into(), gt_neg.into(), 0.into()],
});
b.push_op(OpShf {
dst: dst.into(),
low: 0.into(),
high: high.into(),
shift: 31_u32.into(),
right: true,
wrap: true,
data_type: dst_type,
dst_high: true,
});
}
_ => panic!("Invalid IntType {}", dst_type),
}
dst
}
nir_op_ixor => {
b.lop2(LogicOp::new_lut(&|x, y, _| x ^ y), srcs[0], srcs[1])
}