From d1038197f31041f0ff5712bf92be63d34fb883e2 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 20 Dec 2021 01:54:57 -0800 Subject: [PATCH] intel/fs: Take into account region strides during SIMD lowering decision of SHUFFLE. This fixes a bug in the handcrafted SIMD lowering done by the SHUFFLE code generation, which wasn't taking into account the source and destination region strides while deciding whether it needs to split an instruction. v2: Use new element_sz() helper instead of left shift. (Lionel) Fixes: 90c9f29518d ("i965/fs: Add support for nir_intrinsic_shuffle") Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs_generator.cpp | 4 ++-- src/intel/compiler/brw_reg.h | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 926568a161e..2158bbad17d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -616,8 +616,8 @@ fs_generator::generate_shuffle(fs_inst *inst, * easier just to split it here. */ const unsigned lower_width = - (devinfo->ver <= 7 || type_sz(src.type) > 4) ? - 8 : MIN2(16, inst->exec_size); + devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 : + MIN2(16, inst->exec_size); brw_set_default_exec_size(p, cvt(lower_width) - 1); for (unsigned group = 0; group < inst->exec_size; group += lower_width) { diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 3bb49727183..ff6662c0830 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -1238,6 +1238,28 @@ region_matches(struct brw_reg reg, enum brw_vertical_stride v, region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \ BRW_HORIZONTAL_STRIDE_0) +/** + * Return the size in bytes per data element of register \p reg on the + * corresponding register file. + */ +static inline unsigned +element_sz(struct brw_reg reg) +{ + if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) { + return type_sz(reg.type); + + } else if (reg.width == BRW_WIDTH_1 && + reg.hstride == BRW_HORIZONTAL_STRIDE_0) { + assert(reg.vstride != BRW_VERTICAL_STRIDE_0); + return type_sz(reg.type) << (reg.vstride - 1); + + } else { + assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0); + assert(reg.vstride == reg.hstride + reg.width); + return type_sz(reg.type) << (reg.hstride - 1); + } +} + /* brw_packed_float.c */ int brw_float_to_vf(float f); float brw_vf_to_float(unsigned char vf);