diff --git a/src/intel/compiler/brw/brw_shader.cpp b/src/intel/compiler/brw/brw_shader.cpp index 5579e022b08..e0f91b81dba 100644 --- a/src/intel/compiler/brw/brw_shader.cpp +++ b/src/intel/compiler/brw/brw_shader.cpp @@ -956,6 +956,8 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling) if (s.failed) return; + brw_workaround_emit_dummy_mov_mulmac(s); + OPT(brw_lower_scoreboard); } diff --git a/src/intel/compiler/brw/brw_shader.h b/src/intel/compiler/brw/brw_shader.h index f241b7e56dd..b6ac4f328df 100644 --- a/src/intel/compiler/brw/brw_shader.h +++ b/src/intel/compiler/brw/brw_shader.h @@ -381,6 +381,7 @@ bool brw_workaround_emit_dummy_mov_instruction(brw_shader &s); bool brw_workaround_memory_fence_before_eot(brw_shader &s); bool brw_workaround_nomask_control_flow(brw_shader &s); bool brw_workaround_source_arf_before_eot(brw_shader &s); +bool brw_workaround_emit_dummy_mov_mulmac(brw_shader &s); /* Helpers. */ unsigned brw_get_lowered_simd_width(const brw_shader *shader, diff --git a/src/intel/compiler/brw/brw_workaround.cpp b/src/intel/compiler/brw/brw_workaround.cpp index 7dbd4cbfe16..b82fd28c980 100644 --- a/src/intel/compiler/brw/brw_workaround.cpp +++ b/src/intel/compiler/brw/brw_workaround.cpp @@ -36,6 +36,60 @@ brw_workaround_emit_dummy_mov_instruction(brw_shader &s) return true; } +/* Wa_18035690555 + * + * If we have mul <-> mac or macl <-> mach and src1 is the same in current + * and previous inst, we need to insert a dummy mov in between. We can skip + * issue 2 mentioned in wa as macl is not used by our compiler. + * + * Other conditions listed in the issue for mul <-> mac case: + * "prev instruction src1 has regioning/scalar" (not flat) + * "current instruction src1 is flat and shares the same src1 as prev" + */ +bool +brw_workaround_emit_dummy_mov_mulmac(brw_shader &s) +{ + if (!intel_needs_workaround(s.devinfo, 18035690555)) + return false; + +#define IS_MUL_CLASS(x) \ + (x->opcode == BRW_OPCODE_MUL || x->opcode == BRW_OPCODE_MAC) +#define IS_MACL_CLASS(x) \ + (x->opcode == BRW_OPCODE_MACH) + +#define IS_FLAT(x, i) (x->dst.subnr == x->src[i].subnr && \ + x->src[i].is_contiguous()) + + brw_inst *prev_inst = NULL; + bool progress = false; + foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { + if (prev_inst && + inferred_exec_pipe(s.devinfo, inst) == + inferred_exec_pipe(s.devinfo, prev_inst) && + ((IS_MUL_CLASS(inst) && IS_MUL_CLASS(prev_inst)) || + (IS_MACL_CLASS(inst) && IS_MACL_CLASS(prev_inst))) && + (phys_nr(s.devinfo, inst->src[1]) == + phys_nr(s.devinfo, prev_inst->src[1])) && + (IS_FLAT(inst, 1) && !IS_FLAT(prev_inst, 1))) { + /* Insert dummy mov between prev and current inst. */ + const brw_builder ubld = brw_builder(prev_inst).exec_all().group(8, 0); + ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u)); + progress = true; + } + prev_inst = inst; + } + + if (progress) { + s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS | + BRW_DEPENDENCY_VARIABLES); + } +#undef IS_FLAT +#undef IS_MUL_CLASS +#undef IS_MACL_CLASS + + return progress; +} + static bool needs_dummy_fence(const intel_device_info *devinfo, const brw_inst *inst) {