mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
nak: Implement nir_op_extract_*
This should make a lot of bit twiddling more efficient since NIR can optimize certain shifts patterns to extract and we can implement it with a single PRMT instruction. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26348>
This commit is contained in:
parent
c0d0ce4c6b
commit
dc5cc847dd
2 changed files with 36 additions and 2 deletions
|
|
@ -120,8 +120,6 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
|
||||||
op.lower_unpack_snorm_2x16 = true;
|
op.lower_unpack_snorm_2x16 = true;
|
||||||
op.lower_unpack_unorm_4x8 = true;
|
op.lower_unpack_unorm_4x8 = true;
|
||||||
op.lower_unpack_snorm_4x8 = true;
|
op.lower_unpack_snorm_4x8 = true;
|
||||||
op.lower_extract_byte = true;
|
|
||||||
op.lower_extract_word = true;
|
|
||||||
op.lower_insert_byte = true;
|
op.lower_insert_byte = true;
|
||||||
op.lower_insert_word = true;
|
op.lower_insert_word = true;
|
||||||
op.lower_cs_local_index_to_id = true;
|
op.lower_cs_local_index_to_id = true;
|
||||||
|
|
|
||||||
|
|
@ -371,6 +371,42 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
});
|
});
|
||||||
dst
|
dst
|
||||||
}
|
}
|
||||||
|
nir_op_extract_u8
|
||||||
|
| nir_op_extract_i8
|
||||||
|
| nir_op_extract_u16
|
||||||
|
| nir_op_extract_i16 => {
|
||||||
|
let src1 = alu.get_src(1);
|
||||||
|
let elem = src1.src.comp_as_uint(src1.swizzle[0]).unwrap();
|
||||||
|
let elem = u8::try_from(elem).unwrap();
|
||||||
|
|
||||||
|
match alu.op {
|
||||||
|
nir_op_extract_u8 => {
|
||||||
|
assert!(elem < 4);
|
||||||
|
let byte = elem;
|
||||||
|
let zero = 4;
|
||||||
|
b.prmt(srcs[0], 0.into(), [byte, zero, zero, zero])
|
||||||
|
}
|
||||||
|
nir_op_extract_i8 => {
|
||||||
|
assert!(elem < 4);
|
||||||
|
let byte = elem;
|
||||||
|
let sign = byte | 0x8;
|
||||||
|
b.prmt(srcs[0], 0.into(), [byte, sign, sign, sign])
|
||||||
|
}
|
||||||
|
nir_op_extract_u16 => {
|
||||||
|
assert!(elem < 2);
|
||||||
|
let byte = elem * 2;
|
||||||
|
let zero = 4;
|
||||||
|
b.prmt(srcs[0], 0.into(), [byte, byte + 1, zero, zero])
|
||||||
|
}
|
||||||
|
nir_op_extract_i16 => {
|
||||||
|
assert!(elem < 2);
|
||||||
|
let byte = elem * 2;
|
||||||
|
let sign = (byte + 1) | 0x8;
|
||||||
|
b.prmt(srcs[0], 0.into(), [byte, byte + 1, sign, sign])
|
||||||
|
}
|
||||||
|
_ => panic!("Unknown extract op: {}", alu.op),
|
||||||
|
}
|
||||||
|
}
|
||||||
nir_op_find_lsb => {
|
nir_op_find_lsb => {
|
||||||
let tmp = b.alloc_ssa(RegFile::GPR, 1);
|
let tmp = b.alloc_ssa(RegFile::GPR, 1);
|
||||||
b.push_op(OpBrev {
|
b.push_op(OpBrev {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue