From 72614641895cf9a66dfea328ecbc05cda0aa06cd Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 8 Aug 2022 16:43:58 +0300 Subject: [PATCH] intel/brw: ensure find_live_channel don't access arch register without sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Another architecture register that requires some care before reading. Signed-off-by: Lionel Landwerlin Fixes: 49ee3ae9e8b ("intel/compiler: Lower FIND_[LAST_]LIVE_CHANNEL in IR on Gfx8+") Tested-by: Tapani Pälli Part-of: (cherry picked from commit 2c65d90bc8500bb8ad0b9204798905e4d79fb283) --- .pick_status.json | 2 +- src/intel/compiler/brw_eu_defines.h | 1 + src/intel/compiler/brw_fs.cpp | 2 ++ src/intel/compiler/brw_fs_generator.cpp | 20 ++++++++++++++++++++ src/intel/compiler/brw_fs_lower.cpp | 6 ++++-- src/intel/compiler/brw_ir_performance.cpp | 1 + 6 files changed, 29 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 8115b27741a..9741160fc5c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2924,7 +2924,7 @@ "description": "intel/brw: ensure find_live_channel don't access arch register without sync", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "49ee3ae9e8be4fd2a4a9f658c06e0bf01e08d13c", "notes": null diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index eb1691027bb..2076d829a38 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -536,6 +536,7 @@ enum opcode { SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, + SHADER_OPCODE_READ_MASK_REG, SHADER_OPCODE_READ_SR_REG, RT_OPCODE_TRACE_RAY_LOGICAL, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a5d6c8dc499..38d746672db 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2472,6 +2472,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; + case SHADER_OPCODE_READ_MASK_REG: + return "read_mask_reg"; case SHADER_OPCODE_READ_SR_REG: return "read_sr_reg"; } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index cd32c40d405..04cabc1fcad 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1329,6 +1329,26 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_float_controls_mode(p, src[0].d, src[1].d); break; + case SHADER_OPCODE_READ_MASK_REG: + if (devinfo->ver >= 12) { + /* There is a SWSB restriction that requires that any time sr0 is + * accessed both the instruction doing the access and the next one + * have SWSB set to RegDist(1). + */ + if (brw_get_default_swsb(p).mode != TGL_SBID_NULL) + brw_SYNC(p, TGL_SYNC_NOP); + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_REGISTER_TYPE_UD)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_AND(p, dst, dst, brw_imm_ud(0xffffffff)); + } else { + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_REGISTER_TYPE_UD)); + } + break; + case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 12) { /* There is a SWSB restriction that requires that any time sr0 is diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 51cb76165a1..4201246b71b 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -387,7 +387,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s) * instruction has execution masking disabled, so it's kind of * useless there. */ - fs_reg exec_mask(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); const fs_builder ibld(&s, block, inst); if (!inst->is_partial_write()) @@ -395,6 +394,10 @@ brw_fs_lower_find_live_channel(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0); + fs_reg exec_mask = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.UNDEF(exec_mask); + ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0)); + /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), * so combine the execution and dispatch masks to obtain the true mask. * @@ -703,4 +706,3 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_VARIABLES); } - diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 2ad48767b97..bc8f111833b 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -312,6 +312,7 @@ namespace { case FS_OPCODE_DDY_COARSE: case FS_OPCODE_PIXEL_X: case FS_OPCODE_PIXEL_Y: + case SHADER_OPCODE_READ_MASK_REG: case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 11) { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,