radv,aco: use the new TCS WaveID SGPR to compute vs_rel_patch_id on GFX11

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16369>
This commit is contained in:
Samuel Pitoiset 2022-05-06 11:26:21 +02:00 committed by Marge Bot
parent 432cde7f00
commit 42ef89e8db
3 changed files with 34 additions and 6 deletions

View file

@ -8212,8 +8212,21 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
}
case nir_intrinsic_load_local_invocation_index: {
if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) {
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
get_arg(ctx, ctx->args->ac.vs_rel_patch_id));
if (ctx->options->chip_class >= GFX11) {
/* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */
Temp wave_id =
bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->ac.tcs_wave_id), Operand::c32(0u | (5u << 16)));
Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id,
Operand::c32(ctx->program->wave_size));
Temp thread_id = emit_mbcnt(ctx, bld.tmp(v1));
bld.vadd32(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), temp, thread_id);
} else {
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
get_arg(ctx, ctx->args->ac.vs_rel_patch_id));
}
break;
} else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) {
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));

View file

@ -1585,7 +1585,11 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
*/
if (pdevice->rad_info.chip_class >= GFX10) {
vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1;
if (info->vs.needs_instance_id) {
vgpr_comp_cnt = 3;
} else if (pdevice->rad_info.chip_class <= GFX10_3) {
vgpr_comp_cnt = 1;
}
config_out->rsrc2 |=
S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX6(excp_en);
} else {

View file

@ -301,11 +301,17 @@ declare_vs_input_vgprs(enum chip_class chip_class, const struct radv_shader_info
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (!args->is_gs_copy_shader) {
if (info->vs.as_ls) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
if (chip_class >= GFX10) {
if (chip_class >= GFX11) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else if (chip_class >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
@ -639,7 +645,12 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
if (chip_class >= GFX11) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown