diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 926568a161e..2158bbad17d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -616,8 +616,8 @@ fs_generator::generate_shuffle(fs_inst *inst, * easier just to split it here. */ const unsigned lower_width = - (devinfo->ver <= 7 || type_sz(src.type) > 4) ? - 8 : MIN2(16, inst->exec_size); + devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 : + MIN2(16, inst->exec_size); brw_set_default_exec_size(p, cvt(lower_width) - 1); for (unsigned group = 0; group < inst->exec_size; group += lower_width) { diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 3bb49727183..ff6662c0830 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -1238,6 +1238,28 @@ region_matches(struct brw_reg reg, enum brw_vertical_stride v, region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \ BRW_HORIZONTAL_STRIDE_0) +/** + * Return the size in bytes per data element of register \p reg on the + * corresponding register file. + */ +static inline unsigned +element_sz(struct brw_reg reg) +{ + if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) { + return type_sz(reg.type); + + } else if (reg.width == BRW_WIDTH_1 && + reg.hstride == BRW_HORIZONTAL_STRIDE_0) { + assert(reg.vstride != BRW_VERTICAL_STRIDE_0); + return type_sz(reg.type) << (reg.vstride - 1); + + } else { + assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0); + assert(reg.vstride == reg.hstride + reg.width); + return type_sz(reg.type) << (reg.hstride - 1); + } +} + /* brw_packed_float.c */ int brw_float_to_vf(float f); float brw_vf_to_float(unsigned char vf);