From dc5cc847ddeab1d41b00a520c19a8c1273dc967e Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 22 Nov 2023 15:13:00 -0600 Subject: [PATCH] nak: Implement nir_op_extract_* This should make a lot of bit twiddling more efficient since NIR can optimize certain shifts patterns to extract and we can implement it with a single PRMT instruction. Part-of: --- src/nouveau/compiler/nak.rs | 2 -- src/nouveau/compiler/nak_from_nir.rs | 36 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 9cb75648590..fae3d24d987 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -120,8 +120,6 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_unpack_snorm_2x16 = true; op.lower_unpack_unorm_4x8 = true; op.lower_unpack_snorm_4x8 = true; - op.lower_extract_byte = true; - op.lower_extract_word = true; op.lower_insert_byte = true; op.lower_insert_word = true; op.lower_cs_local_index_to_id = true; diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 40e87c63dcf..69d82252615 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -371,6 +371,42 @@ impl<'a> ShaderFromNir<'a> { }); dst } + nir_op_extract_u8 + | nir_op_extract_i8 + | nir_op_extract_u16 + | nir_op_extract_i16 => { + let src1 = alu.get_src(1); + let elem = src1.src.comp_as_uint(src1.swizzle[0]).unwrap(); + let elem = u8::try_from(elem).unwrap(); + + match alu.op { + nir_op_extract_u8 => { + assert!(elem < 4); + let byte = elem; + let zero = 4; + b.prmt(srcs[0], 0.into(), [byte, zero, zero, zero]) + } + nir_op_extract_i8 => { + assert!(elem < 4); + let byte = elem; + let sign = byte | 0x8; + b.prmt(srcs[0], 0.into(), [byte, sign, sign, sign]) + } + nir_op_extract_u16 => { + assert!(elem < 2); + let byte = elem * 2; + let zero = 4; + b.prmt(srcs[0], 0.into(), [byte, byte + 1, zero, zero]) + } + nir_op_extract_i16 => { + assert!(elem < 2); + let byte = elem * 2; + let sign = (byte + 1) | 0x8; + b.prmt(srcs[0], 0.into(), [byte, byte + 1, sign, sign]) + } + _ => panic!("Unknown extract op: {}", alu.op), + } + } nir_op_find_lsb => { let tmp = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpBrev {