diff --git a/.pick_status.json b/.pick_status.json index 8115b27741a..9741160fc5c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2924,7 +2924,7 @@ "description": "intel/brw: ensure find_live_channel don't access arch register without sync", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "49ee3ae9e8be4fd2a4a9f658c06e0bf01e08d13c", "notes": null diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index eb1691027bb..2076d829a38 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -536,6 +536,7 @@ enum opcode { SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, + SHADER_OPCODE_READ_MASK_REG, SHADER_OPCODE_READ_SR_REG, RT_OPCODE_TRACE_RAY_LOGICAL, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a5d6c8dc499..38d746672db 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2472,6 +2472,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; + case SHADER_OPCODE_READ_MASK_REG: + return "read_mask_reg"; case SHADER_OPCODE_READ_SR_REG: return "read_sr_reg"; } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index cd32c40d405..04cabc1fcad 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1329,6 +1329,26 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_float_controls_mode(p, src[0].d, src[1].d); break; + case SHADER_OPCODE_READ_MASK_REG: + if (devinfo->ver >= 12) { + /* There is a SWSB restriction that requires that any time sr0 is + * accessed both the instruction doing the access and the next one + * have SWSB set to RegDist(1). + */ + if (brw_get_default_swsb(p).mode != TGL_SBID_NULL) + brw_SYNC(p, TGL_SYNC_NOP); + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_REGISTER_TYPE_UD)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_AND(p, dst, dst, brw_imm_ud(0xffffffff)); + } else { + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_REGISTER_TYPE_UD)); + } + break; + case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 12) { /* There is a SWSB restriction that requires that any time sr0 is diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 51cb76165a1..4201246b71b 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -387,7 +387,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s) * instruction has execution masking disabled, so it's kind of * useless there. */ - fs_reg exec_mask(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); const fs_builder ibld(&s, block, inst); if (!inst->is_partial_write()) @@ -395,6 +394,10 @@ brw_fs_lower_find_live_channel(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0); + fs_reg exec_mask = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.UNDEF(exec_mask); + ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0)); + /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), * so combine the execution and dispatch masks to obtain the true mask. * @@ -703,4 +706,3 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_VARIABLES); } - diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 2ad48767b97..bc8f111833b 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -312,6 +312,7 @@ namespace { case FS_OPCODE_DDY_COARSE: case FS_OPCODE_PIXEL_X: case FS_OPCODE_PIXEL_Y: + case SHADER_OPCODE_READ_MASK_REG: case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 11) { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,