diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 57b0e481607..25e3b565256 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10876,10 +10876,116 @@ get_arg_for_end(isel_context* ctx, struct ac_arg arg) return Operand(get_arg(ctx, arg), get_arg_reg(ctx->args, arg)); } +static Temp +get_tcs_out_current_patch_data_offset(isel_context* ctx) +{ + Builder bld(ctx->program, ctx->block); + + const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 4u; + const unsigned pervertex_output_patch_size = + ctx->program->info.tcs.tcs_vertices_out * output_vertex_size; + const unsigned output_patch_stride = + pervertex_output_patch_size + ctx->program->info.tcs.num_linked_patch_outputs * 4u; + + Temp tcs_rel_ids = get_arg(ctx, ctx->args->tcs_rel_ids); + Temp rel_patch_id = + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), tcs_rel_ids, Operand::c32(0u), Operand::c32(8u)); + Temp patch_offset = bld.v_mul_imm(bld.def(v1), rel_patch_id, output_patch_stride, false); + + Temp tcs_offchip_layout = get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout); + + Temp patch_control_points = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), + tcs_offchip_layout, Operand::c32(0x3f)); + + Temp num_patches = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + tcs_offchip_layout, Operand::c32(0x60006)); + + Temp lshs_vertex_stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + tcs_offchip_layout, Operand::c32(0x8000c)); + + Temp input_patch_size = + bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), patch_control_points, lshs_vertex_stride); + + Temp output_patch0_offset = + bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches, input_patch_size); + + Temp output_patch_offset = + bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), + Operand::c32(pervertex_output_patch_size), output_patch0_offset); + + return bld.nuw().vadd32(bld.def(v1), patch_offset, output_patch_offset); +} + +static Temp +get_patch_base(isel_context* ctx) +{ + Builder bld(ctx->program, ctx->block); + + const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 16u; + const unsigned pervertex_output_patch_size = + ctx->program->info.tcs.tcs_vertices_out * output_vertex_size; + + Temp num_patches = + bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout), Operand::c32(0x60006)); + + return bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches, + Operand::c32(pervertex_output_patch_size)); +} + static void create_tcs_jump_to_epilog(isel_context* ctx) { - /* TODO */ + Builder bld(ctx->program, ctx->block); + + PhysReg vgpr_start(256); /* VGPR 0 */ + PhysReg sgpr_start(0); /* SGPR 0 */ + + /* SGPRs */ + Operand ring_offsets = Operand(get_arg(ctx, ctx->args->ring_offsets)); + ring_offsets.setFixed(sgpr_start); + + Operand tess_offchip_offset = Operand(get_arg(ctx, ctx->args->tess_offchip_offset)); + tess_offchip_offset.setFixed(sgpr_start.advance(8u)); + + Operand tcs_factor_offset = Operand(get_arg(ctx, ctx->args->tcs_factor_offset)); + tcs_factor_offset.setFixed(sgpr_start.advance(12u)); + + Operand tcs_offchip_layout = Operand(get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout)); + tcs_offchip_layout.setFixed(sgpr_start.advance(16u)); + + Operand patch_base = Operand(get_patch_base(ctx)); + patch_base.setFixed(sgpr_start.advance(20u)); + + /* VGPRs */ + Operand tcs_out_current_patch_data_offset = Operand(get_tcs_out_current_patch_data_offset(ctx)); + tcs_out_current_patch_data_offset.setFixed(vgpr_start); + + Operand invocation_id = + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids), + Operand::c32(8u), Operand::c32(5u)); + invocation_id.setFixed(vgpr_start.advance(4u)); + + Operand rel_patch_id = + bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids), + Operand::c32(0u), Operand::c32(8u), Operand::c32(0u)); + rel_patch_id.setFixed(vgpr_start.advance(8u)); + + Temp continue_pc = + convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.tcs.epilog_pc)); + + aco_ptr jump{ + create_instruction(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 9, 0)}; + jump->operands[0] = Operand(continue_pc); + jump->operands[1] = ring_offsets; + jump->operands[2] = tess_offchip_offset; + jump->operands[3] = tcs_factor_offset; + jump->operands[4] = tcs_offchip_layout; + jump->operands[5] = patch_base; + jump->operands[6] = tcs_out_current_patch_data_offset; + jump->operands[7] = invocation_id; + jump->operands[8] = rel_patch_id; + ctx->block->instructions.emplace_back(std::move(jump)); } static void @@ -10901,8 +11007,8 @@ create_tcs_end_for_epilog(isel_context* ctx) unsigned vgpr = 256 + ctx->args->num_vgprs_used; Temp rel_patch_id = - bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids), - Operand::c32(0u), Operand::c32(8u)); + bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids), + Operand::c32(0u), Operand::c32(8u), Operand::c32(0u)); regs.emplace_back(Operand(rel_patch_id, PhysReg{vgpr++})); Temp invocation_id = diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index d9d7c17c8f3..241af7544f2 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -108,13 +108,18 @@ struct aco_shader_info { bool has_prolog; } vs; struct { + struct ac_arg tcs_offchip_layout; + /* Vulkan only */ uint32_t num_lds_blocks; + struct ac_arg epilog_pc; + uint32_t num_linked_outputs; + uint32_t num_linked_patch_outputs; + uint32_t tcs_vertices_out; /* OpenGL only */ bool pass_tessfactors_by_reg; unsigned patch_stride; - struct ac_arg tcs_offchip_layout; struct ac_arg tes_offchip_addr; struct ac_arg vs_state_bits; } tcs; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index ed01e4d6eb2..468e8d3b884 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -53,6 +53,9 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv ASSIGN_FIELD(vs.tcs_temp_only_input_mask); ASSIGN_FIELD(vs.has_prolog); ASSIGN_FIELD(tcs.num_lds_blocks); + ASSIGN_FIELD(tcs.num_linked_outputs); + ASSIGN_FIELD(tcs.num_linked_patch_outputs); + ASSIGN_FIELD(tcs.tcs_vertices_out); ASSIGN_FIELD(ps.num_interp); ASSIGN_FIELD(ps.spi_ps_input); ASSIGN_FIELD(cs.subgroup_size); @@ -62,6 +65,8 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d; aco_info->ps.epilog_pc = radv_args->ps_epilog_pc; aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level); + aco_info->tcs.epilog_pc = radv_args->tcs_epilog_pc; + aco_info->tcs.tcs_offchip_layout = radv_args->tcs_offchip_layout; } #define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x @@ -89,8 +94,16 @@ static inline void radv_aco_convert_tcs_epilog_key(struct aco_tcs_epilog_info *aco_info, const struct radv_tcs_epilog_key *radv, const struct radv_shader_args *radv_args) { + aco_info->pass_tessfactors_by_reg = false; + ASSIGN_FIELD(tcs_out_patch_fits_subgroup); ASSIGN_FIELD(primitive_mode); ASSIGN_FIELD(tes_reads_tessfactors); + + aco_info->tcs_offchip_layout = radv_args->tcs_offchip_layout; + aco_info->invocation_id = radv_args->invocation_id; + aco_info->rel_patch_id = radv_args->rel_patch_id; + aco_info->tcs_out_current_patch_data_offset = radv_args->tcs_out_current_patch_data_offset; + aco_info->patch_base = radv_args->patch_base; } static inline void