mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 08:50:13 +01:00
radv: declare a new user SGPR for dynamic descriptors
To move them out of push constants. fossils-db (GFX1201): Totals from 20700 (25.99% of 79646) affected shaders: Instrs: 14375624 -> 14370051 (-0.04%); split: -0.07%, +0.03% CodeSize: 76746128 -> 76723772 (-0.03%); split: -0.05%, +0.02% Latency: 74103586 -> 74113651 (+0.01%); split: -0.01%, +0.02% InvThroughput: 11908817 -> 11908798 (-0.00%); split: -0.00%, +0.00% VClause: 249605 -> 249607 (+0.00%); split: -0.00%, +0.00% SClause: 337914 -> 337772 (-0.04%); split: -0.08%, +0.04% Copies: 843585 -> 839233 (-0.52%); split: -0.62%, +0.10% PreSGPRs: 836283 -> 837260 (+0.12%) SALU: 1790713 -> 1786374 (-0.24%); split: -0.29%, +0.05% Co-authored-by: Konstantin Seurer <konstantin.seurer@gmail.com> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37768>
This commit is contained in:
parent
6541b911bd
commit
bc32286e5b
6 changed files with 27 additions and 12 deletions
|
|
@ -182,6 +182,7 @@ struct ac_shader_args {
|
|||
struct ac_arg push_constants;
|
||||
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
|
||||
uint64_t inline_push_const_mask;
|
||||
struct ac_arg dynamic_descriptors;
|
||||
struct ac_arg view_index;
|
||||
struct ac_arg force_vrs_rates;
|
||||
|
||||
|
|
|
|||
|
|
@ -32,9 +32,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
* Ring offsets: s[0-1]
|
||||
* Indirect descriptor sets: s[2]
|
||||
* Push constants pointer: s[3]
|
||||
* SBT descriptors: s[4-5]
|
||||
* Traversal shader address: s[6]
|
||||
* Unused (for future work): s[7]
|
||||
* Dynamic descriptors: s[4]
|
||||
* Traversal shader address: s[5]
|
||||
* SBT descriptors: s[6-7]
|
||||
* Ray launch size address: s[8-9]
|
||||
* Dynamic callable stack base: s[10]
|
||||
* Workgroup IDs (xyz): s[11], s[12], s[13]
|
||||
|
|
@ -70,9 +70,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
* Callee shader PC: s[0-1]
|
||||
* Indirect descriptor sets: s[2]
|
||||
* Push constants pointer: s[3]
|
||||
* SBT descriptors: s[4-5]
|
||||
* Traversal shader address: s[6]
|
||||
* Unused (for future work): s[7]
|
||||
* Dynamic descriptors: s[4]
|
||||
* Traversal shader address: s[5]
|
||||
* SBT descriptors: s[6-7]
|
||||
* Ray launch sizes (xyz): s[8], s[9], s[10]
|
||||
* Scratch offset (<GFX9 only): s[11]
|
||||
* Ring offsets (<GFX9 only): s[12-13]
|
||||
|
|
@ -106,8 +106,12 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
assert(in_ring_offsets == out_uniform_shader_addr);
|
||||
assert(get_arg_reg(in_args, in_args->push_constants) ==
|
||||
get_arg_reg(out_args, out_args->push_constants));
|
||||
assert(get_arg_reg(in_args, in_args->dynamic_descriptors) ==
|
||||
get_arg_reg(out_args, out_args->dynamic_descriptors));
|
||||
assert(get_arg_reg(in_args, in_args->rt.sbt_descriptors) ==
|
||||
get_arg_reg(out_args, out_args->rt.sbt_descriptors));
|
||||
assert(get_arg_reg(in_args, in_args->rt.traversal_shader_addr) ==
|
||||
get_arg_reg(out_args, out_args->rt.traversal_shader_addr));
|
||||
assert(in_launch_size_addr == out_launch_size_x);
|
||||
assert(in_stack_base == out_launch_size_z);
|
||||
assert(in_local_ids[0] == out_launch_ids[0]);
|
||||
|
|
|
|||
|
|
@ -2002,6 +2002,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
|
||||
nir_def *descriptors = ac_nir_load_arg(&b, &args->ac, args->descriptors[0]);
|
||||
nir_def *push_constants = ac_nir_load_arg(&b, &args->ac, args->ac.push_constants);
|
||||
nir_def *dynamic_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.dynamic_descriptors);
|
||||
nir_def *sbt_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_descriptors);
|
||||
|
||||
nir_def *launch_sizes[3];
|
||||
|
|
@ -2085,6 +2086,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
|
||||
ac_nir_store_arg(&b, &args->ac, args->descriptors[0], descriptors);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.push_constants, push_constants);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.dynamic_descriptors, dynamic_descriptors);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_descriptors, sbt_descriptors);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr, traversal_addr);
|
||||
|
||||
|
|
|
|||
|
|
@ -3385,6 +3385,7 @@ radv_create_rt_prolog(struct radv_device *device)
|
|||
struct radv_shader_info info = {0};
|
||||
info.stage = MESA_SHADER_COMPUTE;
|
||||
info.loads_push_constants = true;
|
||||
info.loads_dynamic_offsets = true;
|
||||
info.force_indirect_descriptors = true;
|
||||
info.wave_size = pdev->rt_wave_size;
|
||||
info.workgroup_size = info.wave_size;
|
||||
|
|
|
|||
|
|
@ -100,10 +100,13 @@ declare_global_input_sgprs(const enum amd_gfx_level gfx_level, const struct radv
|
|||
|
||||
if (info->merged_shader_compiled_separately ||
|
||||
(info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts)) {
|
||||
/* 1 for push constants and dynamic descriptors */
|
||||
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants, AC_UD_PUSH_CONSTANTS);
|
||||
}
|
||||
|
||||
if (info->merged_shader_compiled_separately || info->loads_dynamic_offsets) {
|
||||
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.dynamic_descriptors, AC_UD_DYNAMIC_DESCRIPTORS);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
|
||||
add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
|
||||
}
|
||||
|
|
@ -321,9 +324,9 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
|
|||
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.uniform_shader_addr, AC_UD_SCRATCH_RING_OFFSETS);
|
||||
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->descriptors[0], AC_UD_INDIRECT_DESCRIPTORS);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.dynamic_descriptors);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, NULL); /* unused */
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.rt.launch_sizes[i]);
|
||||
|
|
@ -430,6 +433,7 @@ declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct ra
|
|||
|
||||
ac_add_preserved(&args->ac, &args->descriptors[0]);
|
||||
ac_add_preserved(&args->ac, &args->ac.push_constants);
|
||||
ac_add_preserved(&args->ac, &args->ac.dynamic_descriptors);
|
||||
ac_add_preserved(&args->ac, &args->ac.view_index);
|
||||
ac_add_preserved(&args->ac, &args->ac.tcs_offchip_layout);
|
||||
ac_add_preserved(&args->ac, &args->epilog_pc);
|
||||
|
|
@ -495,6 +499,7 @@ declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct
|
|||
|
||||
ac_add_preserved(&args->ac, &args->descriptors[0]);
|
||||
ac_add_preserved(&args->ac, &args->ac.push_constants);
|
||||
ac_add_preserved(&args->ac, &args->ac.dynamic_descriptors);
|
||||
ac_add_preserved(&args->ac, &args->streamout_buffers);
|
||||
if (gfx_level >= GFX12)
|
||||
ac_add_preserved(&args->ac, &args->streamout_state);
|
||||
|
|
@ -588,9 +593,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
}
|
||||
|
||||
if (info->type == RADV_SHADER_TYPE_RT_PROLOG) {
|
||||
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
|
||||
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
|
||||
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, NULL, AC_UD_PS_STATE); /* unused */
|
||||
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
|
||||
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
|
||||
add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.rt.dynamic_callable_stack_base,
|
||||
AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
|
||||
|
|
@ -891,6 +895,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
uint32_t num_user_sgprs = args->num_user_sgprs;
|
||||
if (info->loads_push_constants)
|
||||
num_user_sgprs++;
|
||||
if (info->loads_dynamic_offsets)
|
||||
num_user_sgprs++;
|
||||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,8 @@ enum radv_ud_index {
|
|||
AC_UD_TASK_RING_ENTRY = 15,
|
||||
AC_UD_NEXT_STAGE_PC = 16,
|
||||
AC_UD_EPILOG_PC = 17,
|
||||
AC_UD_SHADER_START = 18,
|
||||
AC_UD_DYNAMIC_DESCRIPTORS = 18,
|
||||
AC_UD_SHADER_START = 19,
|
||||
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
|
||||
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
|
||||
AC_UD_VS_PROLOG_INPUTS,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue