brw: fix Wa_22013689345 emission
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

2 problems :
  - not detecting null destination correctly
  - applied too late using SHADER_OPCODE_MEMORY_FENCE, when lowering
    already happened

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34319>
This commit is contained in:
Lionel Landwerlin 2025-03-31 09:44:42 +03:00 committed by Marge Bot
parent 22fa3e88dd
commit 06ad9a25e5
10 changed files with 18 additions and 12 deletions

View file

@ -53,7 +53,6 @@ run_bs(brw_shader &s, bool allow_spilling)
s.assign_curb_setup();
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);

View file

@ -79,7 +79,6 @@ run_cs(brw_shader &s, bool allow_spilling)
s.assign_curb_setup();
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);

View file

@ -1463,7 +1463,6 @@ run_fs(brw_shader &s, bool allow_spilling, bool do_rep_send)
brw_assign_urb_setup(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);

View file

@ -123,7 +123,6 @@ run_gs(brw_shader &s)
brw_assign_gs_urb_setup(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);

View file

@ -337,7 +337,6 @@ run_task_mesh(brw_shader &s, bool allow_spilling)
s.assign_curb_setup();
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);

View file

@ -171,7 +171,6 @@ run_tcs(brw_shader &s)
brw_assign_tcs_urb_setup(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);

View file

@ -49,7 +49,6 @@ run_tes(brw_shader &s)
brw_assign_tes_urb_setup(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);

View file

@ -217,7 +217,6 @@ run_vs(brw_shader &s)
brw_assign_vs_urb_setup(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);

View file

@ -196,6 +196,9 @@ brw_optimize(brw_shader &s)
OPT(brw_lower_uniform_pull_constant_loads);
/* Do this before brw_lower_send_descriptors. */
OPT(brw_workaround_memory_fence_before_eot);
if (OPT(brw_lower_send_descriptors)) {
/* No need for standard copy_propagation since
* brw_opt_address_reg_load will only optimize defs.

View file

@ -65,7 +65,7 @@ needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst)
}
/* Any UGM Atomic message WITHOUT return value */
if (lsc_opcode_is_atomic(opcode) && inst->dst.file == BAD_FILE)
if (lsc_opcode_is_atomic(opcode) && inst->dst.is_null())
return true;
return false;
@ -88,6 +88,11 @@ brw_workaround_memory_fence_before_eot(brw_shader &s)
if (!intel_needs_workaround(s.devinfo, 22013689345))
return false;
/* Needs to happen after brw_lower_logical_sends & before
* brw_lower_send_descriptors.
*/
assert(s.phase == BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING);
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
if (!inst->eot) {
if (needs_dummy_fence(s.devinfo, inst))
@ -101,9 +106,15 @@ brw_workaround_memory_fence_before_eot(brw_shader &s)
const brw_builder ubld = brw_builder(inst).uniform();
brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
brw_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
dst, brw_vec8_grf(0, 0),
/* commit enable */ brw_imm_ud(1));
brw_inst *dummy_fence = ubld.emit(SHADER_OPCODE_SEND, dst);
dummy_fence->resize_sources(4);
dummy_fence->src[0] = brw_imm_ud(0);
dummy_fence->src[1] = brw_imm_ud(0);
dummy_fence->src[2] = brw_vec8_grf(0, 0);
dummy_fence->src[3] = brw_reg();
dummy_fence->mlen = reg_unit(s.devinfo);
dummy_fence->ex_mlen = 0;
dummy_fence->sfid = BRW_SFID_UGM;
dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE,
LSC_FLUSH_TYPE_NONE_6, false);