From 7b78e05ba8ecddab48af32f4e7a439e05a7925b2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 24 Aug 2022 12:20:04 -0400 Subject: [PATCH] pan/mdg: Replicate swizzles for scalar sources This works around issue packing 32-bit scalar swizzles zero-extended to 64-bit, seen with the umul_high implementation. I tried for a while figuring out the root cause (even rewrote a big chunk of disassembler) but am still a bit lost. Nevertheless this is a safe workaround with no performance impact (and avoids relying on NIR undefined behaviour to implement GPU undefined behaviour), so let's do this for now to fix umul_high. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/midgard_compile.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 2874e49ff2c..027d333857b 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -661,10 +661,27 @@ mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigne ins->src[to] = nir_src_index(NULL, &src.src); ins->src_types[to] = nir_op_infos[instr->op].input_types[i] | bits; + /* Figure out which component we should fill unused channels with. This + * doesn't matter too much in the non-broadcast case, but it makes + * should that scalar sources are packed with replicated swizzles, + * which works around issues seen with the combination of source + * expansion and destination shrinking. + */ + unsigned replicate_c = 0; + if (bcast_count) { + replicate_c = bcast_count - 1; + } else { + for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) { + if (nir_alu_instr_channel_used(instr, i, c)) + replicate_c = c; + } + } + for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) { ins->swizzle[to][c] = src.swizzle[ - (!bcast_count || c < bcast_count) ? c : - (bcast_count - 1)]; + ((!bcast_count || c < bcast_count) && + nir_alu_instr_channel_used(instr, i, c)) ? + c : replicate_c]; } }