diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp index 2502778f8b5..cd08e7673ad 100644 --- a/src/intel/compiler/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw_compile_tcs.cpp @@ -42,6 +42,90 @@ get_patch_count_threshold(int input_control_points) return 1; } +static void +brw_set_tcs_invocation_id(fs_visitor &s) +{ + const struct intel_device_info *devinfo = s.devinfo; + struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s.prog_data); + struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; + const fs_builder bld = fs_builder(&s).at_end(); + + const unsigned instance_id_mask = + (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) : + (devinfo->ver >= 11) ? INTEL_MASK(22, 16) : + INTEL_MASK(23, 17); + const unsigned instance_id_shift = + (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17; + + /* Get instance number from g0.2 bits: + * * 7:0 on DG2+ + * * 22:16 on gfx11+ + * * 23:17 otherwise + */ + brw_reg t = + bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)), + brw_imm_ud(instance_id_mask)); + + if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) { + /* gl_InvocationID is just the thread number */ + s.invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift)); + return; + } + + assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH); + + brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW); + brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD); + bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210))); + bld.MOV(channels_ud, channels_uw); + + if (tcs_prog_data->instances == 1) { + s.invocation_id = channels_ud; + } else { + /* instance_id = 8 * t + <76543210> */ + s.invocation_id = + bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud); + } +} + +static void +brw_emit_tcs_thread_end(fs_visitor &s) +{ + /* Try and tag the last URB write with EOT instead of emitting a whole + * separate write just to finish the thread. There isn't guaranteed to + * be one, so this may not succeed. + */ + if (s.mark_last_urb_write_with_eot()) + return; + + const fs_builder bld = fs_builder(&s).at_end(); + + /* Emit a URB write to end the thread. On Broadwell, we use this to write + * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy + * algorithm to set it optimally). On other platforms, we simply write + * zero to a reserved/MBZ patch header DWord which has no consequence. + */ + brw_reg srcs[URB_LOGICAL_NUM_SRCS]; + srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); + srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, + reg_undef, srcs, ARRAY_SIZE(srcs)); + inst->eot = true; +} + +static void +brw_assign_tcs_urb_setup(fs_visitor &s) +{ + assert(s.stage == MESA_SHADER_TESS_CTRL); + + /* Rewrite all ATTR file references to HW_REGs. */ + foreach_block_and_inst(block, fs_inst, inst, s.cfg) { + s.convert_attr_sources_to_hw_regs(inst); + } +} + static bool run_tcs(fs_visitor &s) { @@ -56,7 +140,7 @@ run_tcs(fs_visitor &s) s.payload_ = new tcs_thread_payload(s); /* Initialize gl_InvocationID */ - s.set_tcs_invocation_id(); + brw_set_tcs_invocation_id(s); const bool fix_dispatch_mask = vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH && @@ -75,7 +159,7 @@ run_tcs(fs_visitor &s) bld.emit(BRW_OPCODE_ENDIF); } - s.emit_tcs_thread_end(); + brw_emit_tcs_thread_end(s); if (s.failed) return false; @@ -85,7 +169,7 @@ run_tcs(fs_visitor &s) brw_fs_optimize(s); s.assign_curb_setup(); - s.assign_tcs_urb_setup(); + brw_assign_tcs_urb_setup(s); brw_fs_lower_3src_null_dest(s); brw_fs_workaround_memory_fence_before_eot(s); diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 27ac6de460e..6db8c151a7c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1357,17 +1357,6 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst) } } -void -fs_visitor::assign_tcs_urb_setup() -{ - assert(stage == MESA_SHADER_TESS_CTRL); - - /* Rewrite all ATTR file references to HW_REGs. */ - foreach_block_and_inst(block, fs_inst, inst, cfg) { - convert_attr_sources_to_hw_regs(inst); - } -} - void fs_visitor::assign_tes_urb_setup() { @@ -2394,78 +2383,6 @@ fs_visitor::allocate_registers(bool allow_spilling) brw_fs_lower_scoreboard(*this); } -void -fs_visitor::set_tcs_invocation_id() -{ - struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); - struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; - const fs_builder bld = fs_builder(this).at_end(); - - const unsigned instance_id_mask = - (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) : - (devinfo->ver >= 11) ? INTEL_MASK(22, 16) : - INTEL_MASK(23, 17); - const unsigned instance_id_shift = - (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17; - - /* Get instance number from g0.2 bits: - * * 7:0 on DG2+ - * * 22:16 on gfx11+ - * * 23:17 otherwise - */ - brw_reg t = - bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)), - brw_imm_ud(instance_id_mask)); - - if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) { - /* gl_InvocationID is just the thread number */ - invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift)); - return; - } - - assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH); - - brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW); - brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD); - bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210))); - bld.MOV(channels_ud, channels_uw); - - if (tcs_prog_data->instances == 1) { - invocation_id = channels_ud; - } else { - /* instance_id = 8 * t + <76543210> */ - invocation_id = - bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud); - } -} - -void -fs_visitor::emit_tcs_thread_end() -{ - /* Try and tag the last URB write with EOT instead of emitting a whole - * separate write just to finish the thread. There isn't guaranteed to - * be one, so this may not succeed. - */ - if (mark_last_urb_write_with_eot()) - return; - - const fs_builder bld = fs_builder(this).at_end(); - - /* Emit a URB write to end the thread. On Broadwell, we use this to write - * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy - * algorithm to set it optimally). On other platforms, we simply write - * zero to a reserved/MBZ patch header DWord which has no consequence. - */ - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); - srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); - srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, - reg_undef, srcs, ARRAY_SIZE(srcs)); - inst->eot = true; -} - /** * Move load_interpolated_input with simple (payload-based) barycentric modes * to the top of the program so we don't emit multiple PLNs for the same input. diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4709e5d1d0b..0b1ecb47793 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -305,7 +305,6 @@ public: uint32_t compute_max_register_pressure(); void assign_curb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); - void assign_tcs_urb_setup(); void assign_tes_urb_setup(); bool assign_regs(bool allow_spilling, bool spill_all); void assign_regs_trivial(); @@ -325,14 +324,11 @@ public: void fail(const char *msg, ...); void limit_dispatch_width(unsigned n, const char *msg); - void set_tcs_invocation_id(); - void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg()); void emit_gs_control_data_bits(const brw_reg &vertex_count); brw_reg gs_urb_channel_mask(const brw_reg &dword_index); brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count); bool mark_last_urb_write_with_eot(); - void emit_tcs_thread_end(); void emit_urb_fence(); void emit_cs_terminate();