mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
intel/fs: Perform 64-bit CLUSTER_BROADCAST lowering in the lower_regioning pass.
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14273>
This commit is contained in:
parent
bdf8ac2466
commit
79fb7f9de8
2 changed files with 21 additions and 29 deletions
|
|
@ -2462,6 +2462,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST: {
|
||||
assert((devinfo->platform != INTEL_PLATFORM_CHV &&
|
||||
!intel_device_info_is_9lp(devinfo) &&
|
||||
devinfo->has_64bit_float) || type_sz(src[0].type) <= 4);
|
||||
assert(!src[0].negate && !src[0].abs);
|
||||
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
||||
assert(src[1].type == BRW_REGISTER_TYPE_UD);
|
||||
|
|
@ -2482,35 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
struct brw_reg strided = stride(suboffset(src[0], component * s),
|
||||
vstride, width, 0);
|
||||
if (type_sz(src[0].type) > 4 &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
intel_device_info_is_9lp(devinfo) ||
|
||||
!devinfo->has_64bit_float)) {
|
||||
/* IVB has an issue (which we found empirically) where it reads
|
||||
* two address register components per channel for indirectly
|
||||
* addressed 64-bit sources.
|
||||
*
|
||||
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply, indirect addressing must not be
|
||||
* used."
|
||||
*
|
||||
* To work around both of these, we do two integer MOVs insead of
|
||||
* one 64-bit MOV. Because no double value should ever cross a
|
||||
* register boundary, it's safe to use the immediate offset in the
|
||||
* indirect here to handle adding 4 bytes to the offset and avoid
|
||||
* the extra ADD to the register file.
|
||||
*/
|
||||
assert(src[0].type == dst.type);
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||
subscript(strided, BRW_REGISTER_TYPE_D, 0));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
||||
subscript(strided, BRW_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
brw_MOV(p, dst, strided);
|
||||
}
|
||||
brw_MOV(p, dst, strided);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -166,6 +166,22 @@ namespace {
|
|||
else
|
||||
return t;
|
||||
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply, indirect addressing must not be
|
||||
* used."
|
||||
*
|
||||
* Work around the above and handle platforms that don't
|
||||
* support 64-bit types at all.
|
||||
*/
|
||||
if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
||||
return BRW_REGISTER_TYPE_UD;
|
||||
else
|
||||
return t;
|
||||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
if (((devinfo->verx10 == 70 ||
|
||||
|
|
@ -262,6 +278,7 @@ namespace {
|
|||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_SHUFFLE:
|
||||
case SHADER_OPCODE_QUAD_SWIZZLE:
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
return 0x1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue