intel/brw: ensure find_live_channel don't access arch register without sync

Another architecture register that requires some care before reading.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 49ee3ae9e8 ("intel/compiler: Lower FIND_[LAST_]LIVE_CHANNEL in IR on Gfx8+")
Tested-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29319>
This commit is contained in:
Lionel Landwerlin 2022-08-08 16:43:58 +03:00 committed by Marge Bot
parent 1add55863f
commit 2c65d90bc8
5 changed files with 28 additions and 2 deletions

View file

@ -536,6 +536,7 @@ enum opcode {
SHADER_OPCODE_BTD_SPAWN_LOGICAL,
SHADER_OPCODE_BTD_RETIRE_LOGICAL,
SHADER_OPCODE_READ_MASK_REG,
SHADER_OPCODE_READ_SR_REG,
RT_OPCODE_TRACE_RAY_LOGICAL,

View file

@ -2464,6 +2464,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "btd_spawn_logical";
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
return "btd_retire_logical";
case SHADER_OPCODE_READ_MASK_REG:
return "read_mask_reg";
case SHADER_OPCODE_READ_SR_REG:
return "read_sr_reg";
}

View file

@ -1329,6 +1329,26 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_float_controls_mode(p, src[0].d, src[1].d);
break;
case SHADER_OPCODE_READ_MASK_REG:
if (devinfo->ver >= 12) {
/* There is a SWSB restriction that requires that any time sr0 is
* accessed both the instruction doing the access and the next one
* have SWSB set to RegDist(1).
*/
if (brw_get_default_swsb(p).mode != TGL_SBID_NULL)
brw_SYNC(p, TGL_SYNC_NOP);
assert(src[0].file == BRW_IMMEDIATE_VALUE);
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud),
BRW_TYPE_UD));
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_AND(p, dst, dst, brw_imm_ud(0xffffffff));
} else {
brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud),
BRW_TYPE_UD));
}
break;
case SHADER_OPCODE_READ_SR_REG:
if (devinfo->ver >= 12) {
/* There is a SWSB restriction that requires that any time sr0 is

View file

@ -381,7 +381,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
* instruction has execution masking disabled, so it's kind of
* useless there.
*/
fs_reg exec_mask(retype(brw_mask_reg(0), BRW_TYPE_UD));
const fs_builder ibld(&s, block, inst);
if (!inst->is_partial_write())
@ -389,6 +388,10 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0);
fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD);
ubld.UNDEF(exec_mask);
ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0));
/* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
* so combine the execution and dispatch masks to obtain the true mask.
*
@ -696,4 +699,3 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
DEPENDENCY_VARIABLES);
}

View file

@ -312,6 +312,7 @@ namespace {
case FS_OPCODE_DDY_COARSE:
case FS_OPCODE_PIXEL_X:
case FS_OPCODE_PIXEL_Y:
case SHADER_OPCODE_READ_MASK_REG:
case SHADER_OPCODE_READ_SR_REG:
if (devinfo->ver >= 11) {
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,