mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
ac/nir: split local_invocation_ids to 3 separate VGPR inputs
so that we can set the upper range per VGPR. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32782>
This commit is contained in:
parent
65d241c947
commit
0d5b03f2b9
5 changed files with 30 additions and 15 deletions
|
|
@ -233,7 +233,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
|
|||
ac_nir_load_arg(b, s->args, s->args->frag_pos[3]));
|
||||
break;
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
if (s->args->args[s->args->local_invocation_ids.arg_index].size == 1) {
|
||||
if (s->args->local_invocation_ids_packed.used) {
|
||||
/* Thread IDs are packed in VGPR0, 10 bits per component. */
|
||||
unsigned num_bits[3];
|
||||
|
||||
|
|
@ -263,13 +263,17 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
|
|||
nir_def *vec[3];
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
vec[i] = !num_bits[i] ? nir_imm_int(b, 0) :
|
||||
ac_nir_unpack_arg(b, s->args, s->args->local_invocation_ids, i * 10,
|
||||
ac_nir_unpack_arg(b, s->args,
|
||||
s->args->local_invocation_ids_packed, i * 10,
|
||||
num_bits[i]);
|
||||
}
|
||||
|
||||
replacement = nir_vec(b, vec, 3);
|
||||
} else {
|
||||
replacement = ac_nir_load_arg(b, s->args, s->args->local_invocation_ids);
|
||||
replacement = nir_vec3(b,
|
||||
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_x),
|
||||
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_y),
|
||||
ac_nir_load_arg(b, s->args, s->args->local_invocation_id_z));
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_merged_wave_info_amd:
|
||||
|
|
|
|||
|
|
@ -161,7 +161,10 @@ struct ac_shader_args {
|
|||
struct ac_arg pos_fixed_pt;
|
||||
|
||||
/* CS */
|
||||
struct ac_arg local_invocation_ids;
|
||||
struct ac_arg local_invocation_id_x;
|
||||
struct ac_arg local_invocation_id_y;
|
||||
struct ac_arg local_invocation_id_z;
|
||||
struct ac_arg local_invocation_ids_packed;
|
||||
struct ac_arg num_work_groups;
|
||||
/* GFX6-11 only. GFX12+ uses read only SGPRs {TTMP9[0:31], TTMP7[0:15], TTMP7[16:31]}. */
|
||||
struct ac_arg workgroup_ids[3];
|
||||
|
|
|
|||
|
|
@ -12411,9 +12411,11 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
}
|
||||
if (options->gfx_level < GFX11)
|
||||
in_scratch_offset = get_arg_reg(in_args, in_args->scratch_offset);
|
||||
struct ac_arg arg_id = options->gfx_level >= GFX11 ? in_args->local_invocation_ids_packed
|
||||
: in_args->local_invocation_id_x;
|
||||
PhysReg in_local_ids[2] = {
|
||||
get_arg_reg(in_args, in_args->local_invocation_ids),
|
||||
get_arg_reg(in_args, in_args->local_invocation_ids).advance(4),
|
||||
get_arg_reg(in_args, arg_id),
|
||||
get_arg_reg(in_args, arg_id).advance(4),
|
||||
};
|
||||
|
||||
/* Outputs:
|
||||
|
|
|
|||
|
|
@ -249,7 +249,7 @@ declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
|
||||
|
|
@ -616,10 +616,13 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
}
|
||||
|
||||
if (gfx_level >= GFX11)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
else
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
if (gfx_level >= GFX11) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
/* NGG is handled by the GS case */
|
||||
|
|
|
|||
|
|
@ -741,10 +741,13 @@ static void si_init_shader_args(struct si_shader *shader, struct si_shader_args
|
|||
/* Hardware VGPRs. */
|
||||
/* Thread IDs are packed in VGPR0, 10 bits per component or stored in 3 separate VGPRs */
|
||||
if (sel->screen->info.gfx_level >= GFX11 ||
|
||||
(!sel->screen->info.has_graphics && sel->screen->info.family >= CHIP_MI200))
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
else
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
(!sel->screen->info.has_graphics && sel->screen->info.family >= CHIP_MI200)) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_x);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_y);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_id_z);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0 && "unimplemented shader");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue