From 2c65d90bc8500bb8ad0b9204798905e4d79fb283 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 8 Aug 2022 16:43:58 +0300 Subject: [PATCH] intel/brw: ensure find_live_channel don't access arch register without sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Another architecture register that requires some care before reading. Signed-off-by: Lionel Landwerlin Fixes: 49ee3ae9e8b ("intel/compiler: Lower FIND_[LAST_]LIVE_CHANNEL in IR on Gfx8+") Tested-by: Tapani Pälli Part-of: --- src/intel/compiler/brw_eu_defines.h | 1 + src/intel/compiler/brw_fs.cpp | 2 ++ src/intel/compiler/brw_fs_generator.cpp | 20 ++++++++++++++++++++ src/intel/compiler/brw_fs_lower.cpp | 6 ++++-- src/intel/compiler/brw_ir_performance.cpp | 1 + 5 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index eb1691027bb..2076d829a38 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -536,6 +536,7 @@ enum opcode { SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, + SHADER_OPCODE_READ_MASK_REG, SHADER_OPCODE_READ_SR_REG, RT_OPCODE_TRACE_RAY_LOGICAL, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3b6bde8808a..12514838f94 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2464,6 +2464,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; + case SHADER_OPCODE_READ_MASK_REG: + return "read_mask_reg"; case SHADER_OPCODE_READ_SR_REG: return "read_sr_reg"; } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 1cf70fd5707..9e477baea77 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1329,6 +1329,26 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_float_controls_mode(p, src[0].d, src[1].d); break; + case SHADER_OPCODE_READ_MASK_REG: + if (devinfo->ver >= 12) { + /* There is a SWSB restriction that requires that any time sr0 is + * accessed both the instruction doing the access and the next one + * have SWSB set to RegDist(1). + */ + if (brw_get_default_swsb(p).mode != TGL_SBID_NULL) + brw_SYNC(p, TGL_SYNC_NOP); + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_TYPE_UD)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_AND(p, dst, dst, brw_imm_ud(0xffffffff)); + } else { + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_TYPE_UD)); + } + break; + case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 12) { /* There is a SWSB restriction that requires that any time sr0 is diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index b382fc50e4b..e1dffaa6c2f 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -381,7 +381,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s) * instruction has execution masking disabled, so it's kind of * useless there. */ - fs_reg exec_mask(retype(brw_mask_reg(0), BRW_TYPE_UD)); const fs_builder ibld(&s, block, inst); if (!inst->is_partial_write()) @@ -389,6 +388,10 @@ brw_fs_lower_find_live_channel(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0); + fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD); + ubld.UNDEF(exec_mask); + ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0)); + /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), * so combine the execution and dispatch masks to obtain the true mask. * @@ -696,4 +699,3 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_VARIABLES); } - diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index b920bf16c83..1a496b4d201 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -312,6 +312,7 @@ namespace { case FS_OPCODE_DDY_COARSE: case FS_OPCODE_PIXEL_X: case FS_OPCODE_PIXEL_Y: + case SHADER_OPCODE_READ_MASK_REG: case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 11) { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,