aco: add vs prolog instruction selection for radeonsi

Port from llvm si_llvm_build_vs_prolog().

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24712>
This commit is contained in:
Qiang Yu 2023-08-05 16:17:02 +08:00 committed by Marge Bot
parent 3f87413811
commit a2ba50aee6
3 changed files with 136 additions and 7 deletions

View file

@ -10933,6 +10933,28 @@ get_patch_base(isel_context* ctx)
Operand::c32(pervertex_output_patch_size));
}
static void
passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
{
struct ac_arg arg;
arg.used = true;
for (arg.arg_index = 0; arg.arg_index < ctx->args->arg_count; arg.arg_index++)
regs.emplace_back(get_arg_for_end(ctx, arg));
}
static void
build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
{
aco_ptr<Pseudo_instruction> end{create_instruction<Pseudo_instruction>(
aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)};
for (unsigned i = 0; i < regs.size(); i++)
end->operands[i] = regs[i];
ctx->block->instructions.emplace_back(std::move(end));
}
static void
create_tcs_jump_to_epilog(isel_context* ctx)
{
@ -11041,13 +11063,7 @@ create_tcs_end_for_epilog(isel_context* ctx)
regs.emplace_back(Operand(tf_lds_offset, PhysReg{vgpr}));
}
aco_ptr<Pseudo_instruction> end{create_instruction<Pseudo_instruction>(
aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)};
for (unsigned i = 0; i < regs.size(); i++)
end->operands[i] = regs[i];
ctx->block->instructions.emplace_back(std::move(end));
build_end_with_regs(ctx, regs);
}
Pseudo_instruction*
@ -11671,6 +11687,55 @@ store_tess_factor_to_tess_ring(isel_context* ctx, Temp tess_ring_desc, Temp fact
memory_sync_info(storage_vmem_output), true, false, false);
}
Temp
build_fast_udiv_nuw(isel_context* ctx, Temp num, Temp multiplier, Temp pre_shift, Temp post_shift,
Temp increment)
{
Builder bld(ctx->program, ctx->block);
num = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), pre_shift, num);
num = bld.nuw().vadd32(bld.def(v1), num, increment);
num = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1), num, multiplier);
return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), post_shift, num);
}
Temp
get_gl_vs_prolog_vertex_index(isel_context* ctx, const struct aco_gl_vs_prolog_info* vinfo,
unsigned input_index, Temp instance_divisor_constbuf)
{
bool divisor_is_one = vinfo->instance_divisor_is_one & (1u << input_index);
bool divisor_is_fetched = vinfo->instance_divisor_is_fetched & (1u << input_index);
Builder bld(ctx->program, ctx->block);
Temp index;
if (divisor_is_one) {
index = get_arg(ctx, ctx->args->instance_id);
} else if (divisor_is_fetched) {
Temp instance_id = get_arg(ctx, ctx->args->instance_id);
Temp udiv_factors = bld.smem(aco_opcode::s_buffer_load_dwordx4, bld.def(s4),
instance_divisor_constbuf, Operand::c32(input_index * 16));
emit_split_vector(ctx, udiv_factors, 4);
index = build_fast_udiv_nuw(ctx, instance_id, emit_extract_vector(ctx, udiv_factors, 0, s1),
emit_extract_vector(ctx, udiv_factors, 1, s1),
emit_extract_vector(ctx, udiv_factors, 2, s1),
emit_extract_vector(ctx, udiv_factors, 3, s1));
}
if (divisor_is_one || divisor_is_fetched) {
Temp start_instance = get_arg(ctx, ctx->args->start_instance);
index = bld.vadd32(bld.def(v1), index, start_instance);
} else {
Temp base_vertex = get_arg(ctx, ctx->args->base_vertex);
Temp vertex_id = get_arg(ctx, ctx->args->vertex_id);
index = bld.vadd32(bld.def(v1), base_vertex, vertex_id);
}
return index;
}
} /* end namespace */
void
@ -12542,4 +12607,54 @@ select_tcs_epilog(Program* program, void* pinfo, ac_shader_config* config,
cleanup_cfg(program);
}
void
select_gl_vs_prolog(Program* program, void* pinfo, ac_shader_config* config,
const struct aco_compiler_options* options, const struct aco_shader_info* info,
const struct ac_shader_args* args)
{
const struct aco_gl_vs_prolog_info* vinfo = (const struct aco_gl_vs_prolog_info*)pinfo;
isel_context ctx =
setup_isel_context(program, 0, NULL, config, options, info, args, SWStage::VS);
ctx.block->fp_mode = program->next_fp_mode;
add_startpgm(&ctx);
append_logical_start(ctx.block);
Builder bld(ctx.program, ctx.block);
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
if (vinfo->as_ls && options->has_ls_vgpr_init_bug)
fix_ls_vgpr_init_bug(&ctx);
std::vector<Operand> regs;
passthrough_all_args(&ctx, regs);
Temp instance_divisor_constbuf;
if (vinfo->instance_divisor_is_fetched) {
Temp list = get_arg(&ctx, vinfo->internal_bindings);
list = convert_pointer_to_64_bit(&ctx, list);
instance_divisor_constbuf = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), list,
Operand::c32(vinfo->instance_diviser_buf_offset));
}
unsigned vgpr = 256 + ctx.args->num_vgprs_used;
for (unsigned i = 0; i < vinfo->num_inputs; i++) {
Temp index = get_gl_vs_prolog_vertex_index(&ctx, vinfo, i, instance_divisor_constbuf);
regs.emplace_back(Operand(index, PhysReg{vgpr + i}));
}
program->config->float_mode = program->blocks[0].fp_mode.val;
append_logical_end(ctx.block);
build_end_with_regs(&ctx, regs);
cleanup_cfg(program);
}
} // namespace aco

View file

@ -2239,6 +2239,10 @@ void select_tcs_epilog(Program* program, void* pinfo, ac_shader_config* config,
const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct ac_shader_args* args);
void select_gl_vs_prolog(Program* program, void* pinfo, ac_shader_config* config,
const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct ac_shader_args* args);
void lower_phis(Program* program);
void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);

View file

@ -94,6 +94,16 @@ struct aco_tcs_epilog_info {
struct ac_arg tcs_offchip_layout;
};
struct aco_gl_vs_prolog_info {
uint16_t instance_divisor_is_one;
uint16_t instance_divisor_is_fetched;
unsigned instance_diviser_buf_offset;
unsigned num_inputs;
bool as_ls;
struct ac_arg internal_bindings;
};
struct aco_shader_info {
enum ac_hw_stage hw_stage;
uint8_t wave_size;