nak: Lower isign in NIR

The NIR lowering just clamps to [-1, 1] which should turn into two IMnMx as opposed to the 4 instructions we're emitting now. We can maybe do better than the NIR lowering for 64-bit but that seems unnecessary. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26246>
2026-01-08 06:20:19 +01:00 · 2023-12-04 13:22:08 -06:00 · 2023-12-04 13:22:08 -06:00 · 9c1eade3eb
commit 9c1eade3eb
parent 12acb2ef62
2 changed files with 1 additions and 72 deletions
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@ -124,6 +124,7 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
    op.lower_insert_word = true;
    op.lower_cs_local_index_to_id = true;
    op.lower_device_index_to_zero = true;
+    op.lower_isign = true;
    op.lower_uadd_sat = true; // TODO
    op.lower_usub_sat = true; // TODO
    op.lower_iadd_sat = true; // TODO
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@ -1016,78 +1016,6 @@ impl<'a> ShaderFromNir<'a> {
                    dst
                }
            }
-            nir_op_isign => {
-                let gt_pred = b.alloc_ssa(RegFile::Pred, 1);
-                let lt_pred = b.alloc_ssa(RegFile::Pred, 1);
-                let gt = b.alloc_ssa(RegFile::GPR, 1);
-                let lt = b.alloc_ssa(RegFile::GPR, 1);
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-                b.push_op(OpISetP {
-                    dst: gt_pred.into(),
-                    set_op: PredSetOp::And,
-                    cmp_op: IntCmpOp::Gt,
-                    cmp_type: IntCmpType::I32,
-                    srcs: [srcs[0], 0.into()],
-                    accum: true.into(),
-                });
-
-                let cond = Src::from(gt_pred).bnot();
-                b.push_op(OpSel {
-                    dst: gt.into(),
-                    cond,
-                    srcs: [0.into(), u32::MAX.into()],
-                });
-                b.push_op(OpISetP {
-                    dst: lt_pred.into(),
-                    set_op: PredSetOp::And,
-                    cmp_op: IntCmpOp::Lt,
-                    cmp_type: IntCmpType::I32,
-                    srcs: [srcs[0], 0.into()],
-                    accum: true.into(),
-                });
-
-                let cond = Src::from(lt_pred).bnot();
-                b.push_op(OpSel {
-                    dst: lt.into(),
-                    cond,
-                    srcs: [0.into(), u32::MAX.into()],
-                });
-
-                let dst_is_signed = alu.info().output_type & 2 != 0;
-                let dst_type = IntType::from_bits(
-                    alu.def.bit_size().into(),
-                    dst_is_signed,
-                );
-                match dst_type {
-                    IntType::I32 => {
-                        let gt_neg = b.ineg(gt.into());
-                        b.push_op(OpIAdd3 {
-                            dst: dst.into(),
-                            srcs: [lt.into(), gt_neg.into(), 0.into()],
-                        });
-                    }
-                    IntType::I64 => {
-                        let high = b.alloc_ssa(RegFile::GPR, 1);
-                        let gt_neg = b.ineg(gt.into());
-                        b.push_op(OpIAdd3 {
-                            dst: high.into(),
-                            srcs: [lt.into(), gt_neg.into(), 0.into()],
-                        });
-                        b.push_op(OpShf {
-                            dst: dst.into(),
-                            low: 0.into(),
-                            high: high.into(),
-                            shift: 31_u32.into(),
-                            right: true,
-                            wrap: true,
-                            data_type: dst_type,
-                            dst_high: true,
-                        });
-                    }
-                    _ => panic!("Invalid IntType {}", dst_type),
-                }
-                dst
-            }
            nir_op_ixor => {
                b.lop2(LogicOp::new_lut(&|x, y, _| x ^ y), srcs[0], srcs[1])
            }