diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 5bea405469c..aab50a1c5a7 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2462,6 +2462,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; case SHADER_OPCODE_CLUSTER_BROADCAST: { + assert((devinfo->platform != INTEL_PLATFORM_CHV && + !intel_device_info_is_9lp(devinfo) && + devinfo->has_64bit_float) || type_sz(src[0].type) <= 4); assert(!src[0].negate && !src[0].abs); assert(src[1].file == BRW_IMMEDIATE_VALUE); assert(src[1].type == BRW_REGISTER_TYPE_UD); @@ -2482,35 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, struct brw_reg strided = stride(suboffset(src[0], component * s), vstride, width, 0); - if (type_sz(src[0].type) > 4 && - (devinfo->platform == INTEL_PLATFORM_CHV || - intel_device_info_is_9lp(devinfo) || - !devinfo->has_64bit_float)) { - /* IVB has an issue (which we found empirically) where it reads - * two address register components per channel for indirectly - * addressed 64-bit sources. - * - * From the Cherryview PRM Vol 7. "Register Region Restrictions": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, indirect addressing must not be - * used." - * - * To work around both of these, we do two integer MOVs insead of - * one 64-bit MOV. Because no double value should ever cross a - * register boundary, it's safe to use the immediate offset in the - * indirect here to handle adding 4 bytes to the offset and avoid - * the extra ADD to the register file. - */ - assert(src[0].type == dst.type); - brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), - subscript(strided, BRW_REGISTER_TYPE_D, 0)); - brw_set_default_swsb(p, tgl_swsb_null()); - brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), - subscript(strided, BRW_REGISTER_TYPE_D, 1)); - } else { - brw_MOV(p, dst, strided); - } + brw_MOV(p, dst, strided); break; } diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index 53110ef009d..0ddd47c4b1c 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -166,6 +166,22 @@ namespace { else return t; + case SHADER_OPCODE_CLUSTER_BROADCAST: + /* From the Cherryview PRM Vol 7. "Register Region Restrictions": + * + * "When source or destination datatype is 64b or operation is + * integer DWord multiply, indirect addressing must not be + * used." + * + * Work around the above and handle platforms that don't + * support 64-bit types at all. + */ + if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV || + intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) + return BRW_REGISTER_TYPE_UD; + else + return t; + case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: if (((devinfo->verx10 == 70 || @@ -262,6 +278,7 @@ namespace { switch (inst->opcode) { case SHADER_OPCODE_SHUFFLE: case SHADER_OPCODE_QUAD_SWIZZLE: + case SHADER_OPCODE_CLUSTER_BROADCAST: case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: return 0x1;