diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 1a0140cfdfc..b6a0109e2b3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1511,6 +1511,34 @@ fs_visitor::resolve_source_modifiers(const fs_reg &src) return temp; } +/** + * Walk backwards from the end of the program looking for a URB write that + * isn't in control flow, and mark it with EOT. + * + * Return true if successful or false if a separate EOT write is needed. + */ +bool +fs_visitor::mark_last_urb_write_with_eot() +{ + foreach_in_list_reverse(fs_inst, prev, &this->instructions) { + if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) { + prev->eot = true; + + /* Delete now dead instructions. */ + foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) { + if (dead == prev) + break; + dead->remove(); + } + return true; + } else if (prev->is_control_flow() || prev->has_side_effects()) { + break; + } + } + + return false; +} + void fs_visitor::emit_gs_thread_end() { @@ -1526,21 +1554,12 @@ fs_visitor::emit_gs_thread_end() fs_inst *inst; if (gs_prog_data->static_vertex_count != -1) { - foreach_in_list_reverse(fs_inst, prev, &this->instructions) { - if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) { - prev->eot = true; + /* Try and tag the last URB write with EOT instead of emitting a whole + * separate write just to finish the thread. + */ + if (mark_last_urb_write_with_eot()) + return; - /* Delete now dead instructions. */ - foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) { - if (dead == prev) - break; - dead->remove(); - } - return; - } else if (prev->is_control_flow() || prev->has_side_effects()) { - break; - } - } fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, @@ -6555,6 +6574,31 @@ fs_visitor::set_tcs_invocation_id() } } +void +fs_visitor::emit_tcs_thread_end() +{ + /* Try and tag the last URB write with EOT instead of emitting a whole + * separate write just to finish the thread. There isn't guaranteed to + * be one, so this may not succeed. + */ + if (devinfo->ver != 8 && mark_last_urb_write_with_eot()) + return; + + /* Emit a URB write to end the thread. On Broadwell, we use this to write + * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy + * algorithm to set it optimally). On other platforms, we simply write + * zero to a reserved/MBZ patch header DWord which has no consequence. + */ + fs_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output; + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); + srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, + reg_undef, srcs, ARRAY_SIZE(srcs)); + inst->mlen = 3; + inst->eot = true; +} + bool fs_visitor::run_tcs() { @@ -6587,15 +6631,7 @@ fs_visitor::run_tcs() bld.emit(BRW_OPCODE_ENDIF); } - /* Emit EOT write; set TR DS Cache bit */ - fs_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); - srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, - reg_undef, srcs, ARRAY_SIZE(srcs)); - inst->mlen = 3; - inst->eot = true; + emit_tcs_thread_end(); if (failed) return false; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index bbe48456221..0bdebed3bd8 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -412,6 +412,8 @@ public: void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, unsigned base_offset, const nir_src &offset_src, unsigned num_components, unsigned first_component); + bool mark_last_urb_write_with_eot(); + void emit_tcs_thread_end(); void emit_urb_fence(); void emit_cs_terminate(); fs_reg emit_work_group_id_setup();