radv: declare a new user SGPR for dynamic descriptors

To move them out of push constants.

fossils-db (GFX1201):
Totals from 20700 (25.99% of 79646) affected shaders:
Instrs: 14375624 -> 14370051 (-0.04%); split: -0.07%, +0.03%
CodeSize: 76746128 -> 76723772 (-0.03%); split: -0.05%, +0.02%
Latency: 74103586 -> 74113651 (+0.01%); split: -0.01%, +0.02%
InvThroughput: 11908817 -> 11908798 (-0.00%); split: -0.00%, +0.00%
VClause: 249605 -> 249607 (+0.00%); split: -0.00%, +0.00%
SClause: 337914 -> 337772 (-0.04%); split: -0.08%, +0.04%
Copies: 843585 -> 839233 (-0.52%); split: -0.62%, +0.10%
PreSGPRs: 836283 -> 837260 (+0.12%)
SALU: 1790713 -> 1786374 (-0.24%); split: -0.29%, +0.05%

Co-authored-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37768>
This commit is contained in:
Samuel Pitoiset 2025-09-02 11:25:40 +02:00 committed by Marge Bot
parent 6541b911bd
commit bc32286e5b
6 changed files with 27 additions and 12 deletions

View file

@ -182,6 +182,7 @@ struct ac_shader_args {
struct ac_arg push_constants;
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
uint64_t inline_push_const_mask;
struct ac_arg dynamic_descriptors;
struct ac_arg view_index;
struct ac_arg force_vrs_rates;

View file

@ -32,9 +32,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Ring offsets: s[0-1]
* Indirect descriptor sets: s[2]
* Push constants pointer: s[3]
* SBT descriptors: s[4-5]
* Traversal shader address: s[6]
* Unused (for future work): s[7]
* Dynamic descriptors: s[4]
* Traversal shader address: s[5]
* SBT descriptors: s[6-7]
* Ray launch size address: s[8-9]
* Dynamic callable stack base: s[10]
* Workgroup IDs (xyz): s[11], s[12], s[13]
@ -70,9 +70,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Callee shader PC: s[0-1]
* Indirect descriptor sets: s[2]
* Push constants pointer: s[3]
* SBT descriptors: s[4-5]
* Traversal shader address: s[6]
* Unused (for future work): s[7]
* Dynamic descriptors: s[4]
* Traversal shader address: s[5]
* SBT descriptors: s[6-7]
* Ray launch sizes (xyz): s[8], s[9], s[10]
* Scratch offset (<GFX9 only): s[11]
* Ring offsets (<GFX9 only): s[12-13]
@ -106,8 +106,12 @@ select_rt_prolog(Program* program, ac_shader_config* config,
assert(in_ring_offsets == out_uniform_shader_addr);
assert(get_arg_reg(in_args, in_args->push_constants) ==
get_arg_reg(out_args, out_args->push_constants));
assert(get_arg_reg(in_args, in_args->dynamic_descriptors) ==
get_arg_reg(out_args, out_args->dynamic_descriptors));
assert(get_arg_reg(in_args, in_args->rt.sbt_descriptors) ==
get_arg_reg(out_args, out_args->rt.sbt_descriptors));
assert(get_arg_reg(in_args, in_args->rt.traversal_shader_addr) ==
get_arg_reg(out_args, out_args->rt.traversal_shader_addr));
assert(in_launch_size_addr == out_launch_size_x);
assert(in_stack_base == out_launch_size_z);
assert(in_local_ids[0] == out_launch_ids[0]);

View file

@ -2002,6 +2002,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
nir_def *descriptors = ac_nir_load_arg(&b, &args->ac, args->descriptors[0]);
nir_def *push_constants = ac_nir_load_arg(&b, &args->ac, args->ac.push_constants);
nir_def *dynamic_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.dynamic_descriptors);
nir_def *sbt_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_descriptors);
nir_def *launch_sizes[3];
@ -2085,6 +2086,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
ac_nir_store_arg(&b, &args->ac, args->descriptors[0], descriptors);
ac_nir_store_arg(&b, &args->ac, args->ac.push_constants, push_constants);
ac_nir_store_arg(&b, &args->ac, args->ac.dynamic_descriptors, dynamic_descriptors);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_descriptors, sbt_descriptors);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr, traversal_addr);

View file

@ -3385,6 +3385,7 @@ radv_create_rt_prolog(struct radv_device *device)
struct radv_shader_info info = {0};
info.stage = MESA_SHADER_COMPUTE;
info.loads_push_constants = true;
info.loads_dynamic_offsets = true;
info.force_indirect_descriptors = true;
info.wave_size = pdev->rt_wave_size;
info.workgroup_size = info.wave_size;

View file

@ -100,10 +100,13 @@ declare_global_input_sgprs(const enum amd_gfx_level gfx_level, const struct radv
if (info->merged_shader_compiled_separately ||
(info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts)) {
/* 1 for push constants and dynamic descriptors */
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants, AC_UD_PUSH_CONSTANTS);
}
if (info->merged_shader_compiled_separately || info->loads_dynamic_offsets) {
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.dynamic_descriptors, AC_UD_DYNAMIC_DESCRIPTORS);
}
for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
}
@ -321,9 +324,9 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.uniform_shader_addr, AC_UD_SCRATCH_RING_OFFSETS);
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->descriptors[0], AC_UD_INDIRECT_DESCRIPTORS);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.dynamic_descriptors);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors);
for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.rt.launch_sizes[i]);
@ -430,6 +433,7 @@ declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct ra
ac_add_preserved(&args->ac, &args->descriptors[0]);
ac_add_preserved(&args->ac, &args->ac.push_constants);
ac_add_preserved(&args->ac, &args->ac.dynamic_descriptors);
ac_add_preserved(&args->ac, &args->ac.view_index);
ac_add_preserved(&args->ac, &args->ac.tcs_offchip_layout);
ac_add_preserved(&args->ac, &args->epilog_pc);
@ -495,6 +499,7 @@ declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct
ac_add_preserved(&args->ac, &args->descriptors[0]);
ac_add_preserved(&args->ac, &args->ac.push_constants);
ac_add_preserved(&args->ac, &args->ac.dynamic_descriptors);
ac_add_preserved(&args->ac, &args->streamout_buffers);
if (gfx_level >= GFX12)
ac_add_preserved(&args->ac, &args->streamout_state);
@ -588,9 +593,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
}
if (info->type == RADV_SHADER_TYPE_RT_PROLOG) {
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, NULL, AC_UD_PS_STATE); /* unused */
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.rt.dynamic_callable_stack_base,
AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
@ -891,6 +895,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
uint32_t num_user_sgprs = args->num_user_sgprs;
if (info->loads_push_constants)
num_user_sgprs++;
if (info->loads_dynamic_offsets)
num_user_sgprs++;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;

View file

@ -33,7 +33,8 @@ enum radv_ud_index {
AC_UD_TASK_RING_ENTRY = 15,
AC_UD_NEXT_STAGE_PC = 16,
AC_UD_EPILOG_PC = 17,
AC_UD_SHADER_START = 18,
AC_UD_DYNAMIC_DESCRIPTORS = 18,
AC_UD_SHADER_START = 19,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_PROLOG_INPUTS,