From 42ef89e8db4f1df31accc26dcda91c6e02ede870 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 6 May 2022 11:26:21 +0200 Subject: [PATCH] radv,aco: use the new TCS WaveID SGPR to compute vs_rel_patch_id on GFX11 Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 17 +++++++++++++++-- src/amd/vulkan/radv_shader.c | 6 +++++- src/amd/vulkan/radv_shader_args.c | 17 ++++++++++++++--- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 10814cbbde5..bffd93b47b0 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8212,8 +8212,21 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_local_invocation_index: { if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) { - bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - get_arg(ctx, ctx->args->ac.vs_rel_patch_id)); + if (ctx->options->chip_class >= GFX11) { + /* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */ + Temp wave_id = + bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->ac.tcs_wave_id), Operand::c32(0u | (5u << 16))); + + Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id, + Operand::c32(ctx->program->wave_size)); + Temp thread_id = emit_mbcnt(ctx, bld.tmp(v1)); + + bld.vadd32(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), temp, thread_id); + } else { + bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), + get_arg(ctx, ctx->args->ac.vs_rel_patch_id)); + } break; } else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) { bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx)); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index a403ef37d7f..4b538188141 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1585,7 +1585,11 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ if (pdevice->rad_info.chip_class >= GFX10) { - vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1; + if (info->vs.needs_instance_id) { + vgpr_comp_cnt = 3; + } else if (pdevice->rad_info.chip_class <= GFX10_3) { + vgpr_comp_cnt = 1; + } config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX6(excp_en); } else { diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index d07898217c3..ce4f14a8777 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -301,11 +301,17 @@ declare_vs_input_vgprs(enum chip_class chip_class, const struct radv_shader_info ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); if (!args->is_gs_copy_shader) { if (info->vs.as_ls) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); - if (chip_class >= GFX10) { + + if (chip_class >= GFX11) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } else if (chip_class >= GFX10) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ } @@ -639,7 +645,12 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); + if (chip_class >= GFX11) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id); + } else { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); + } + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown