intel/fs: Fix synchronization of accumulator-clearing W/A move on TGL+.

Right now the accumulator-clearing move emitted by the generator for
Wa_14010017096 inherits the SWSB field from the previous instruction.
This can lead to redundant synchronization, or possibly more serious
issues if the previous instruction had a TGL_SBID_SET SWSB
synchronization mode.  Take the SWSB synchronization information from
the IR.

Fixes: a27542c5dd ("intel/compiler: Clear accumulator register before EOT")
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
(cherry picked from commit c19cfa9dc2)
This commit is contained in:
Francisco Jerez 2021-05-24 22:53:27 -07:00 committed by Eric Engestrom
parent 2b5ac1147b
commit 0e3942259f
2 changed files with 7 additions and 4 deletions

View file

@ -1327,7 +1327,7 @@
"description": "intel/fs: Fix synchronization of accumulator-clearing W/A move on TGL+.",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "a27542c5ddec8dd6a64a9c236cf6bea1db1b9e48"
},

View file

@ -1917,6 +1917,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
struct brw_reg src[4], dst;
unsigned int last_insn_offset = p->next_insn_offset;
bool multiple_instructions_emitted = false;
tgl_swsb swsb = inst->sched;
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
* "Register Region Restrictions" section: for BDW, SKL:
@ -1951,8 +1952,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_set_default_exec_size(p, BRW_EXECUTE_16);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, brw_acc_reg(8), brw_imm_f(0.0f));
last_insn_offset = p->next_insn_offset;
swsb = tgl_swsb_dst_dep(swsb, 1);
}
if (!is_accum_used && !inst->eot) {
@ -2010,7 +2013,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_set_default_saturate(p, inst->saturate);
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
brw_set_default_swsb(p, inst->sched);
brw_set_default_swsb(p, swsb);
unsigned exec_size = inst->exec_size;
if (devinfo->ver == 7 && !devinfo->is_haswell &&
@ -2426,8 +2429,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
}
case FS_OPCODE_SCHEDULING_FENCE:
if (inst->sources == 0 && inst->sched.regdist == 0 &&
inst->sched.mode == TGL_SBID_NULL) {
if (inst->sources == 0 && swsb.regdist == 0 &&
swsb.mode == TGL_SBID_NULL) {
if (unlikely(debug_flag))
disasm_info->use_tail = true;
break;