From 0e3942259f872039080c25d38006effc45747e0e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 24 May 2021 22:53:27 -0700 Subject: [PATCH] intel/fs: Fix synchronization of accumulator-clearing W/A move on TGL+. Right now the accumulator-clearing move emitted by the generator for Wa_14010017096 inherits the SWSB field from the previous instruction. This can lead to redundant synchronization, or possibly more serious issues if the previous instruction had a TGL_SBID_SET SWSB synchronization mode. Take the SWSB synchronization information from the IR. Fixes: a27542c5ddec8 ("intel/compiler: Clear accumulator register before EOT") Reviewed-by: Jason Ekstrand Part-of: (cherry picked from commit c19cfa9dc22b805581ac5ed3ad835fed3c8506c2) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_generator.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 2348f4225f5..faa6972c817 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1327,7 +1327,7 @@ "description": "intel/fs: Fix synchronization of accumulator-clearing W/A move on TGL+.", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "a27542c5ddec8dd6a64a9c236cf6bea1db1b9e48" }, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index aef20e440af..e14b2b0d9a7 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1917,6 +1917,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, struct brw_reg src[4], dst; unsigned int last_insn_offset = p->next_insn_offset; bool multiple_instructions_emitted = false; + tgl_swsb swsb = inst->sched; /* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the * "Register Region Restrictions" section: for BDW, SKL: @@ -1951,8 +1952,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_set_default_exec_size(p, BRW_EXECUTE_16); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, brw_acc_reg(8), brw_imm_f(0.0f)); last_insn_offset = p->next_insn_offset; + swsb = tgl_swsb_dst_dep(swsb, 1); } if (!is_accum_used && !inst->eot) { @@ -2010,7 +2013,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); - brw_set_default_swsb(p, inst->sched); + brw_set_default_swsb(p, swsb); unsigned exec_size = inst->exec_size; if (devinfo->ver == 7 && !devinfo->is_haswell && @@ -2426,8 +2429,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, } case FS_OPCODE_SCHEDULING_FENCE: - if (inst->sources == 0 && inst->sched.regdist == 0 && - inst->sched.mode == TGL_SBID_NULL) { + if (inst->sources == 0 && swsb.regdist == 0 && + swsb.mode == TGL_SBID_NULL) { if (unlikely(debug_flag)) disasm_info->use_tail = true; break;