From 0d77e91ca3e056fe0341920235c24e537dafc1d2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 27 Nov 2024 08:57:20 -0500 Subject: [PATCH] nir/opt_load_store_vectorize: match amul like imul for AGX, we preserve amul all the way until fusing address modes in order to be able to fuse effectively. so the load/store vectorizer wouldn't vectorize before fusing. however, after fusing we get fused intrinsics which are tricky to teach the vectorizer about as their semantics are pretty subtle. so we can't vectorize after, either. the easiest solution is to teach the vectorize about amul, which can always be replaced by imul for our pattern matches. this fixes certain cases of vectorization in OpenCL kernels on asahi. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_opt_load_store_vectorize.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 4b476f97ca1..4a8c5099a18 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -258,13 +258,27 @@ get_write_mask(const nir_intrinsic_instr *intrin) return nir_component_mask(intrin->src[info->value_src].ssa->num_components); } +static nir_op +get_effective_alu_op(nir_scalar scalar) +{ + nir_op op = nir_scalar_alu_op(scalar); + + /* amul can always be replaced by imul and we pattern match on the more + * general opcode, so return imul for amul. + */ + if (op == nir_op_amul) + return nir_op_imul; + else + return op; +} + /* If "def" is from an alu instruction with the opcode "op" and one of it's * sources is a constant, update "def" to be the non-constant source, fill "c" * with the constant and return true. */ static bool parse_alu(nir_scalar *def, nir_op op, uint64_t *c) { - if (!nir_scalar_is_alu(*def) || nir_scalar_alu_op(*def) != op) + if (!nir_scalar_is_alu(*def) || get_effective_alu_op(*def) != op) return false; nir_scalar src0 = nir_scalar_chase_alu_src(*def, 0);