intel/fs: Perform 64-bit CLUSTER_BROADCAST lowering in the lower_regioning pass.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14273>
This commit is contained in:
Francisco Jerez 2021-12-20 14:34:13 -08:00 committed by Marge Bot
parent bdf8ac2466
commit 79fb7f9de8
2 changed files with 21 additions and 29 deletions

View file

@ -2462,6 +2462,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
break;
case SHADER_OPCODE_CLUSTER_BROADCAST: {
assert((devinfo->platform != INTEL_PLATFORM_CHV &&
!intel_device_info_is_9lp(devinfo) &&
devinfo->has_64bit_float) || type_sz(src[0].type) <= 4);
assert(!src[0].negate && !src[0].abs);
assert(src[1].file == BRW_IMMEDIATE_VALUE);
assert(src[1].type == BRW_REGISTER_TYPE_UD);
@ -2482,35 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
struct brw_reg strided = stride(suboffset(src[0], component * s),
vstride, width, 0);
if (type_sz(src[0].type) > 4 &&
(devinfo->platform == INTEL_PLATFORM_CHV ||
intel_device_info_is_9lp(devinfo) ||
!devinfo->has_64bit_float)) {
/* IVB has an issue (which we found empirically) where it reads
* two address register components per channel for indirectly
* addressed 64-bit sources.
*
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
*
* "When source or destination datatype is 64b or operation is
* integer DWord multiply, indirect addressing must not be
* used."
*
* To work around both of these, we do two integer MOVs insead of
* one 64-bit MOV. Because no double value should ever cross a
* register boundary, it's safe to use the immediate offset in the
* indirect here to handle adding 4 bytes to the offset and avoid
* the extra ADD to the register file.
*/
assert(src[0].type == dst.type);
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
subscript(strided, BRW_REGISTER_TYPE_D, 0));
brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
subscript(strided, BRW_REGISTER_TYPE_D, 1));
} else {
brw_MOV(p, dst, strided);
}
brw_MOV(p, dst, strided);
break;
}

View file

@ -166,6 +166,22 @@ namespace {
else
return t;
case SHADER_OPCODE_CLUSTER_BROADCAST:
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
*
* "When source or destination datatype is 64b or operation is
* integer DWord multiply, indirect addressing must not be
* used."
*
* Work around the above and handle platforms that don't
* support 64-bit types at all.
*/
if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV ||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
return BRW_REGISTER_TYPE_UD;
else
return t;
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
if (((devinfo->verx10 == 70 ||
@ -262,6 +278,7 @@ namespace {
switch (inst->opcode) {
case SHADER_OPCODE_SHUFFLE:
case SHADER_OPCODE_QUAD_SWIZZLE:
case SHADER_OPCODE_CLUSTER_BROADCAST:
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
return 0x1;