diff --git a/src/intel/compiler/brw_compile_bs.cpp b/src/intel/compiler/brw_compile_bs.cpp index 3ff16e08a09..f90141b4e01 100644 --- a/src/intel/compiler/brw_compile_bs.cpp +++ b/src/intel/compiler/brw_compile_bs.cpp @@ -53,7 +53,6 @@ run_bs(brw_shader &s, bool allow_spilling) s.assign_curb_setup(); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, allow_spilling); diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp index 73bbb83b001..ab1356d61a4 100644 --- a/src/intel/compiler/brw_compile_cs.cpp +++ b/src/intel/compiler/brw_compile_cs.cpp @@ -79,7 +79,6 @@ run_cs(brw_shader &s, bool allow_spilling) s.assign_curb_setup(); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, allow_spilling); diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index 1743184b4e9..45d10517d56 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1463,7 +1463,6 @@ run_fs(brw_shader &s, bool allow_spilling, bool do_rep_send) brw_assign_urb_setup(s); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, allow_spilling); diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp index 7ac66d683bd..3161a5845fa 100644 --- a/src/intel/compiler/brw_compile_gs.cpp +++ b/src/intel/compiler/brw_compile_gs.cpp @@ -123,7 +123,6 @@ run_gs(brw_shader &s) brw_assign_gs_urb_setup(s); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, true /* allow_spilling */); diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index fae56100f2a..0c6c568558f 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -337,7 +337,6 @@ run_task_mesh(brw_shader &s, bool allow_spilling) s.assign_curb_setup(); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, allow_spilling); diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp index 417f2e015ca..705bdc4f508 100644 --- a/src/intel/compiler/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw_compile_tcs.cpp @@ -171,7 +171,6 @@ run_tcs(brw_shader &s) brw_assign_tcs_urb_setup(s); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, true /* allow_spilling */); diff --git a/src/intel/compiler/brw_compile_tes.cpp b/src/intel/compiler/brw_compile_tes.cpp index 646dffcc22d..484f7bcf5c2 100644 --- a/src/intel/compiler/brw_compile_tes.cpp +++ b/src/intel/compiler/brw_compile_tes.cpp @@ -49,7 +49,6 @@ run_tes(brw_shader &s) brw_assign_tes_urb_setup(s); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, true /* allow_spilling */); diff --git a/src/intel/compiler/brw_compile_vs.cpp b/src/intel/compiler/brw_compile_vs.cpp index 24d4d997728..f9d180e15ea 100644 --- a/src/intel/compiler/brw_compile_vs.cpp +++ b/src/intel/compiler/brw_compile_vs.cpp @@ -217,7 +217,6 @@ run_vs(brw_shader &s) brw_assign_vs_urb_setup(s); brw_lower_3src_null_dest(s); - brw_workaround_memory_fence_before_eot(s); brw_workaround_emit_dummy_mov_instruction(s); brw_allocate_registers(s, true /* allow_spilling */); diff --git a/src/intel/compiler/brw_opt.cpp b/src/intel/compiler/brw_opt.cpp index 90d125116b5..7c00367d8ae 100644 --- a/src/intel/compiler/brw_opt.cpp +++ b/src/intel/compiler/brw_opt.cpp @@ -196,6 +196,9 @@ brw_optimize(brw_shader &s) OPT(brw_lower_uniform_pull_constant_loads); + /* Do this before brw_lower_send_descriptors. */ + OPT(brw_workaround_memory_fence_before_eot); + if (OPT(brw_lower_send_descriptors)) { /* No need for standard copy_propagation since * brw_opt_address_reg_load will only optimize defs. diff --git a/src/intel/compiler/brw_workaround.cpp b/src/intel/compiler/brw_workaround.cpp index 647dd3f19f7..afbd89cf80d 100644 --- a/src/intel/compiler/brw_workaround.cpp +++ b/src/intel/compiler/brw_workaround.cpp @@ -65,7 +65,7 @@ needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst) } /* Any UGM Atomic message WITHOUT return value */ - if (lsc_opcode_is_atomic(opcode) && inst->dst.file == BAD_FILE) + if (lsc_opcode_is_atomic(opcode) && inst->dst.is_null()) return true; return false; @@ -88,6 +88,11 @@ brw_workaround_memory_fence_before_eot(brw_shader &s) if (!intel_needs_workaround(s.devinfo, 22013689345)) return false; + /* Needs to happen after brw_lower_logical_sends & before + * brw_lower_send_descriptors. + */ + assert(s.phase == BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING); + foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) { if (!inst->eot) { if (needs_dummy_fence(s.devinfo, inst)) @@ -101,9 +106,15 @@ brw_workaround_memory_fence_before_eot(brw_shader &s) const brw_builder ubld = brw_builder(inst).uniform(); brw_reg dst = ubld.vgrf(BRW_TYPE_UD); - brw_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE, - dst, brw_vec8_grf(0, 0), - /* commit enable */ brw_imm_ud(1)); + brw_inst *dummy_fence = ubld.emit(SHADER_OPCODE_SEND, dst); + + dummy_fence->resize_sources(4); + dummy_fence->src[0] = brw_imm_ud(0); + dummy_fence->src[1] = brw_imm_ud(0); + dummy_fence->src[2] = brw_vec8_grf(0, 0); + dummy_fence->src[3] = brw_reg(); + dummy_fence->mlen = reg_unit(s.devinfo); + dummy_fence->ex_mlen = 0; dummy_fence->sfid = BRW_SFID_UGM; dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE, LSC_FLUSH_TYPE_NONE_6, false);