ac/nir: split local_invocation_ids to 3 separate VGPR inputs

so that we can set the upper range per VGPR.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32782>
This commit is contained in:
Marek Olšák 2024-12-29 20:57:06 -05:00 committed by Marge Bot
parent 65d241c947
commit 0d5b03f2b9
5 changed files with 30 additions and 15 deletions

View file

@ -233,7 +233,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
ac_nir_load_arg(b, s->args, s->args->frag_pos[3]));
break;
case nir_intrinsic_load_local_invocation_id:
if (s->args->args[s->args->local_invocation_ids.arg_index].size == 1) {
if (s->args->local_invocation_ids_packed.used) {
/* Thread IDs are packed in VGPR0, 10 bits per component. */
unsigned num_bits[3];
@ -263,13 +263,17 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
nir_def *vec[3];
for (unsigned i = 0; i < 3; i++) {
vec[i] = !num_bits[i] ? nir_imm_int(b, 0) :
ac_nir_unpack_arg(b, s->args, s->args->local_invocation_ids, i * 10,
ac_nir_unpack_arg(b, s->args,
s->args->local_invocation_ids_packed, i * 10,
num_bits[i]);
}
replacement = nir_vec(b, vec, 3);
} else {
replacement = ac_nir_load_arg(b, s->args, s->args->local_invocation_ids);
replacement = nir_vec3(b,
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_x),
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_y),
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_z));
}
break;
case nir_intrinsic_load_merged_wave_info_amd:

View file

@ -161,7 +161,10 @@ struct ac_shader_args {
struct ac_arg pos_fixed_pt;
/* CS */
struct ac_arg local_invocation_ids;
struct ac_arg local_invocation_id_x;
struct ac_arg local_invocation_id_y;
struct ac_arg local_invocation_id_z;
struct ac_arg local_invocation_ids_packed;
struct ac_arg num_work_groups;
/* GFX6-11 only. GFX12+ uses read only SGPRs {TTMP9[0:31], TTMP7[0:15], TTMP7[16:31]}. */
struct ac_arg workgroup_ids[3];

View file

@ -12411,9 +12411,11 @@ select_rt_prolog(Program* program, ac_shader_config* config,
}
if (options->gfx_level < GFX11)
in_scratch_offset = get_arg_reg(in_args, in_args->scratch_offset);
struct ac_arg arg_id = options->gfx_level >= GFX11 ? in_args->local_invocation_ids_packed
: in_args->local_invocation_id_x;
PhysReg in_local_ids[2] = {
get_arg_reg(in_args, in_args->local_invocation_ids),
get_arg_reg(in_args, in_args->local_invocation_ids).advance(4),
get_arg_reg(in_args, arg_id),
get_arg_reg(in_args, arg_id).advance(4),
};
/* Outputs:

View file

@ -249,7 +249,7 @@ declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->mesh_fast_launch_2) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
@ -616,10 +616,13 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
if (gfx_level >= GFX11)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
else
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
if (gfx_level >= GFX11) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
}
break;
case MESA_SHADER_VERTEX:
/* NGG is handled by the GS case */

View file

@ -741,10 +741,13 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args
/* Hardware VGPRs. */
/* Thread IDs are packed in VGPR0, 10 bits per component or stored in 3 separate VGPRs */
if (sel->screen->info.gfx_level >= GFX11 ||
(!sel->screen->info.has_graphics && sel->screen->info.family >= CHIP_MI200))
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
else
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
(!sel->screen->info.has_graphics && sel->screen->info.family >= CHIP_MI200)) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
}
break;
default:
assert(0 && "unimplemented shader");