diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index eb1691027bb..2076d829a38 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -536,6 +536,7 @@ enum opcode { SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, + SHADER_OPCODE_READ_MASK_REG, SHADER_OPCODE_READ_SR_REG, RT_OPCODE_TRACE_RAY_LOGICAL, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3b6bde8808a..12514838f94 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2464,6 +2464,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; + case SHADER_OPCODE_READ_MASK_REG: + return "read_mask_reg"; case SHADER_OPCODE_READ_SR_REG: return "read_sr_reg"; } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 1cf70fd5707..9e477baea77 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1329,6 +1329,26 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_float_controls_mode(p, src[0].d, src[1].d); break; + case SHADER_OPCODE_READ_MASK_REG: + if (devinfo->ver >= 12) { + /* There is a SWSB restriction that requires that any time sr0 is + * accessed both the instruction doing the access and the next one + * have SWSB set to RegDist(1). + */ + if (brw_get_default_swsb(p).mode != TGL_SBID_NULL) + brw_SYNC(p, TGL_SYNC_NOP); + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_TYPE_UD)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + brw_AND(p, dst, dst, brw_imm_ud(0xffffffff)); + } else { + brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud), + BRW_TYPE_UD)); + } + break; + case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 12) { /* There is a SWSB restriction that requires that any time sr0 is diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index b382fc50e4b..e1dffaa6c2f 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -381,7 +381,6 @@ brw_fs_lower_find_live_channel(fs_visitor &s) * instruction has execution masking disabled, so it's kind of * useless there. */ - fs_reg exec_mask(retype(brw_mask_reg(0), BRW_TYPE_UD)); const fs_builder ibld(&s, block, inst); if (!inst->is_partial_write()) @@ -389,6 +388,10 @@ brw_fs_lower_find_live_channel(fs_visitor &s) const fs_builder ubld = fs_builder(&s, block, inst).exec_all().group(1, 0); + fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD); + ubld.UNDEF(exec_mask); + ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0)); + /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), * so combine the execution and dispatch masks to obtain the true mask. * @@ -696,4 +699,3 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_VARIABLES); } - diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index b920bf16c83..1a496b4d201 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -312,6 +312,7 @@ namespace { case FS_OPCODE_DDY_COARSE: case FS_OPCODE_PIXEL_X: case FS_OPCODE_PIXEL_Y: + case SHADER_OPCODE_READ_MASK_REG: case SHADER_OPCODE_READ_SR_REG: if (devinfo->ver >= 11) { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,