aco: implement create_tcs_jump_to_epilog()

This implements jumping from the main TCS to the epilog.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24643>
This commit is contained in:
Samuel Pitoiset 2023-08-17 12:17:17 +02:00 committed by Marge Bot
parent e03c09dfb2
commit a29e2c6fbc
3 changed files with 128 additions and 4 deletions

View file

@ -10876,10 +10876,116 @@ get_arg_for_end(isel_context* ctx, struct ac_arg arg)
return Operand(get_arg(ctx, arg), get_arg_reg(ctx->args, arg));
}
static Temp
get_tcs_out_current_patch_data_offset(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 4u;
const unsigned pervertex_output_patch_size =
ctx->program->info.tcs.tcs_vertices_out * output_vertex_size;
const unsigned output_patch_stride =
pervertex_output_patch_size + ctx->program->info.tcs.num_linked_patch_outputs * 4u;
Temp tcs_rel_ids = get_arg(ctx, ctx->args->tcs_rel_ids);
Temp rel_patch_id =
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), tcs_rel_ids, Operand::c32(0u), Operand::c32(8u));
Temp patch_offset = bld.v_mul_imm(bld.def(v1), rel_patch_id, output_patch_stride, false);
Temp tcs_offchip_layout = get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout);
Temp patch_control_points = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
tcs_offchip_layout, Operand::c32(0x3f));
Temp num_patches = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
tcs_offchip_layout, Operand::c32(0x60006));
Temp lshs_vertex_stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
tcs_offchip_layout, Operand::c32(0x8000c));
Temp input_patch_size =
bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), patch_control_points, lshs_vertex_stride);
Temp output_patch0_offset =
bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches, input_patch_size);
Temp output_patch_offset =
bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
Operand::c32(pervertex_output_patch_size), output_patch0_offset);
return bld.nuw().vadd32(bld.def(v1), patch_offset, output_patch_offset);
}
static Temp
get_patch_base(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 16u;
const unsigned pervertex_output_patch_size =
ctx->program->info.tcs.tcs_vertices_out * output_vertex_size;
Temp num_patches =
bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout), Operand::c32(0x60006));
return bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches,
Operand::c32(pervertex_output_patch_size));
}
static void
create_tcs_jump_to_epilog(isel_context* ctx)
{
/* TODO */
Builder bld(ctx->program, ctx->block);
PhysReg vgpr_start(256); /* VGPR 0 */
PhysReg sgpr_start(0); /* SGPR 0 */
/* SGPRs */
Operand ring_offsets = Operand(get_arg(ctx, ctx->args->ring_offsets));
ring_offsets.setFixed(sgpr_start);
Operand tess_offchip_offset = Operand(get_arg(ctx, ctx->args->tess_offchip_offset));
tess_offchip_offset.setFixed(sgpr_start.advance(8u));
Operand tcs_factor_offset = Operand(get_arg(ctx, ctx->args->tcs_factor_offset));
tcs_factor_offset.setFixed(sgpr_start.advance(12u));
Operand tcs_offchip_layout = Operand(get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout));
tcs_offchip_layout.setFixed(sgpr_start.advance(16u));
Operand patch_base = Operand(get_patch_base(ctx));
patch_base.setFixed(sgpr_start.advance(20u));
/* VGPRs */
Operand tcs_out_current_patch_data_offset = Operand(get_tcs_out_current_patch_data_offset(ctx));
tcs_out_current_patch_data_offset.setFixed(vgpr_start);
Operand invocation_id =
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
Operand::c32(8u), Operand::c32(5u));
invocation_id.setFixed(vgpr_start.advance(4u));
Operand rel_patch_id =
bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
Operand::c32(0u), Operand::c32(8u), Operand::c32(0u));
rel_patch_id.setFixed(vgpr_start.advance(8u));
Temp continue_pc =
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.tcs.epilog_pc));
aco_ptr<Pseudo_instruction> jump{
create_instruction<Pseudo_instruction>(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 9, 0)};
jump->operands[0] = Operand(continue_pc);
jump->operands[1] = ring_offsets;
jump->operands[2] = tess_offchip_offset;
jump->operands[3] = tcs_factor_offset;
jump->operands[4] = tcs_offchip_layout;
jump->operands[5] = patch_base;
jump->operands[6] = tcs_out_current_patch_data_offset;
jump->operands[7] = invocation_id;
jump->operands[8] = rel_patch_id;
ctx->block->instructions.emplace_back(std::move(jump));
}
static void
@ -10901,8 +11007,8 @@ create_tcs_end_for_epilog(isel_context* ctx)
unsigned vgpr = 256 + ctx->args->num_vgprs_used;
Temp rel_patch_id =
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
Operand::c32(0u), Operand::c32(8u));
bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
Operand::c32(0u), Operand::c32(8u), Operand::c32(0u));
regs.emplace_back(Operand(rel_patch_id, PhysReg{vgpr++}));
Temp invocation_id =

View file

@ -108,13 +108,18 @@ struct aco_shader_info {
bool has_prolog;
} vs;
struct {
struct ac_arg tcs_offchip_layout;
/* Vulkan only */
uint32_t num_lds_blocks;
struct ac_arg epilog_pc;
uint32_t num_linked_outputs;
uint32_t num_linked_patch_outputs;
uint32_t tcs_vertices_out;
/* OpenGL only */
bool pass_tessfactors_by_reg;
unsigned patch_stride;
struct ac_arg tcs_offchip_layout;
struct ac_arg tes_offchip_addr;
struct ac_arg vs_state_bits;
} tcs;

View file

@ -53,6 +53,9 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
ASSIGN_FIELD(vs.tcs_temp_only_input_mask);
ASSIGN_FIELD(vs.has_prolog);
ASSIGN_FIELD(tcs.num_lds_blocks);
ASSIGN_FIELD(tcs.num_linked_outputs);
ASSIGN_FIELD(tcs.num_linked_patch_outputs);
ASSIGN_FIELD(tcs.tcs_vertices_out);
ASSIGN_FIELD(ps.num_interp);
ASSIGN_FIELD(ps.spi_ps_input);
ASSIGN_FIELD(cs.subgroup_size);
@ -62,6 +65,8 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d;
aco_info->ps.epilog_pc = radv_args->ps_epilog_pc;
aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level);
aco_info->tcs.epilog_pc = radv_args->tcs_epilog_pc;
aco_info->tcs.tcs_offchip_layout = radv_args->tcs_offchip_layout;
}
#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
@ -89,8 +94,16 @@ static inline void
radv_aco_convert_tcs_epilog_key(struct aco_tcs_epilog_info *aco_info, const struct radv_tcs_epilog_key *radv,
const struct radv_shader_args *radv_args)
{
aco_info->pass_tessfactors_by_reg = false;
ASSIGN_FIELD(tcs_out_patch_fits_subgroup);
ASSIGN_FIELD(primitive_mode);
ASSIGN_FIELD(tes_reads_tessfactors);
aco_info->tcs_offchip_layout = radv_args->tcs_offchip_layout;
aco_info->invocation_id = radv_args->invocation_id;
aco_info->rel_patch_id = radv_args->rel_patch_id;
aco_info->tcs_out_current_patch_data_offset = radv_args->tcs_out_current_patch_data_offset;
aco_info->patch_base = radv_args->patch_base;
}
static inline void