intel/compiler: implement dummy mov for Wa_18035690555

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37804>
This commit is contained in:
Tapani Pälli 2025-11-27 14:49:57 +02:00 committed by Marge Bot
parent 4bb68d7474
commit 3ab9145393
3 changed files with 57 additions and 0 deletions

View file

@ -956,6 +956,8 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling)
if (s.failed)
return;
brw_workaround_emit_dummy_mov_mulmac(s);
OPT(brw_lower_scoreboard);
}

View file

@ -381,6 +381,7 @@ bool brw_workaround_emit_dummy_mov_instruction(brw_shader &s);
bool brw_workaround_memory_fence_before_eot(brw_shader &s);
bool brw_workaround_nomask_control_flow(brw_shader &s);
bool brw_workaround_source_arf_before_eot(brw_shader &s);
bool brw_workaround_emit_dummy_mov_mulmac(brw_shader &s);
/* Helpers. */
unsigned brw_get_lowered_simd_width(const brw_shader *shader,

View file

@ -36,6 +36,60 @@ brw_workaround_emit_dummy_mov_instruction(brw_shader &s)
return true;
}
/* Wa_18035690555
*
* If we have mul <-> mac or macl <-> mach and src1 is the same in current
* and previous inst, we need to insert a dummy mov in between. We can skip
* issue 2 mentioned in wa as macl is not used by our compiler.
*
* Other conditions listed in the issue for mul <-> mac case:
* "prev instruction src1 has regioning/scalar" (not flat)
* "current instruction src1 is flat and shares the same src1 as prev"
*/
bool
brw_workaround_emit_dummy_mov_mulmac(brw_shader &s)
{
if (!intel_needs_workaround(s.devinfo, 18035690555))
return false;
#define IS_MUL_CLASS(x) \
(x->opcode == BRW_OPCODE_MUL || x->opcode == BRW_OPCODE_MAC)
#define IS_MACL_CLASS(x) \
(x->opcode == BRW_OPCODE_MACH)
#define IS_FLAT(x, i) (x->dst.subnr == x->src[i].subnr && \
x->src[i].is_contiguous())
brw_inst *prev_inst = NULL;
bool progress = false;
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
if (prev_inst &&
inferred_exec_pipe(s.devinfo, inst) ==
inferred_exec_pipe(s.devinfo, prev_inst) &&
((IS_MUL_CLASS(inst) && IS_MUL_CLASS(prev_inst)) ||
(IS_MACL_CLASS(inst) && IS_MACL_CLASS(prev_inst))) &&
(phys_nr(s.devinfo, inst->src[1]) ==
phys_nr(s.devinfo, prev_inst->src[1])) &&
(IS_FLAT(inst, 1) && !IS_FLAT(prev_inst, 1))) {
/* Insert dummy mov between prev and current inst. */
const brw_builder ubld = brw_builder(prev_inst).exec_all().group(8, 0);
ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));
progress = true;
}
prev_inst = inst;
}
if (progress) {
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
BRW_DEPENDENCY_VARIABLES);
}
#undef IS_FLAT
#undef IS_MUL_CLASS
#undef IS_MACL_CLASS
return progress;
}
static bool
needs_dummy_fence(const intel_device_info *devinfo, const brw_inst *inst)
{