nak: Implement nir_op_extract_*

This should make a lot of bit twiddling more efficient since NIR can
optimize certain shifts patterns to extract and we can implement it with
a single PRMT instruction.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26348>
This commit is contained in:
Faith Ekstrand 2023-11-22 15:13:00 -06:00 committed by Faith Ekstrand
parent c0d0ce4c6b
commit dc5cc847dd
2 changed files with 36 additions and 2 deletions

View file

@ -120,8 +120,6 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
op.lower_unpack_snorm_2x16 = true;
op.lower_unpack_unorm_4x8 = true;
op.lower_unpack_snorm_4x8 = true;
op.lower_extract_byte = true;
op.lower_extract_word = true;
op.lower_insert_byte = true;
op.lower_insert_word = true;
op.lower_cs_local_index_to_id = true;

View file

@ -371,6 +371,42 @@ impl<'a> ShaderFromNir<'a> {
});
dst
}
nir_op_extract_u8
| nir_op_extract_i8
| nir_op_extract_u16
| nir_op_extract_i16 => {
let src1 = alu.get_src(1);
let elem = src1.src.comp_as_uint(src1.swizzle[0]).unwrap();
let elem = u8::try_from(elem).unwrap();
match alu.op {
nir_op_extract_u8 => {
assert!(elem < 4);
let byte = elem;
let zero = 4;
b.prmt(srcs[0], 0.into(), [byte, zero, zero, zero])
}
nir_op_extract_i8 => {
assert!(elem < 4);
let byte = elem;
let sign = byte | 0x8;
b.prmt(srcs[0], 0.into(), [byte, sign, sign, sign])
}
nir_op_extract_u16 => {
assert!(elem < 2);
let byte = elem * 2;
let zero = 4;
b.prmt(srcs[0], 0.into(), [byte, byte + 1, zero, zero])
}
nir_op_extract_i16 => {
assert!(elem < 2);
let byte = elem * 2;
let sign = (byte + 1) | 0x8;
b.prmt(srcs[0], 0.into(), [byte, byte + 1, sign, sign])
}
_ => panic!("Unknown extract op: {}", alu.op),
}
}
nir_op_find_lsb => {
let tmp = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpBrev {