ac: move tcs_offchip_layout into ac_shader_args

It's the same variable between radv and radeonsi, but the implementation of
the load intrinsics is very different.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34780>
This commit is contained in:
Marek Olšák 2025-05-08 01:24:55 -04:00 committed by Marge Bot
parent 5994e08f8b
commit 5734a916d6
8 changed files with 40 additions and 53 deletions

View file

@ -92,6 +92,17 @@ struct ac_shader_args {
struct ac_arg tcs_patch_id;
struct ac_arg tcs_rel_ids;
/* # [0:6] = the number of tessellation patches, max = 127
* # [7:11] = TCS: the number of input patch control points minus one, max = 31
* TES: the number of output patch control points minus one, max = 31
* # [12:16] = the stride of 1 TCS per-vertex output in memory / 256, max = 16
* # [17:22] = the number of LS outputs, up to 32
* # [23:28] = the number of HS per-vertex outputs, up to 32
* # [29:30] = tess_primitive_mode
* # [31] = whether TES reads tess factors
*/
struct ac_arg tcs_offchip_layout;
/* TES */
struct ac_arg tes_u;
struct ac_arg tes_v;

View file

@ -80,7 +80,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
if (s->info->num_tess_patches) {
replacement = nir_imm_int(b, s->info->num_tess_patches);
} else {
replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
replacement = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
}
break;
case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
@ -88,14 +88,14 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
replacement = nir_imm_bool(b, s->info->tcs.tes_reads_tess_factors);
} else {
replacement =
nir_ine_imm(b, GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TES_READS_TF), 0);
nir_ine_imm(b, GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TES_READS_TF), 0);
}
break;
case nir_intrinsic_load_tcs_primitive_mode_amd:
if (s->info->outputs_linked) {
replacement = nir_imm_int(b, s->info->tes._primitive_mode);
} else {
replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE);
replacement = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE);
}
break;
case nir_intrinsic_load_ring_esgs_amd:
@ -123,14 +123,14 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
if (s->gfx_state->ts.patch_control_points) {
replacement = nir_imm_int(b, s->gfx_state->ts.patch_control_points);
} else {
nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN);
nir_def *n = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN);
replacement = nir_iadd_imm_nuw(b, n, 1);
}
} else if (stage == MESA_SHADER_TESS_EVAL) {
if (s->info->tes.tcs_vertices_out) {
replacement = nir_imm_int(b, s->info->tes.tcs_vertices_out);
} else {
nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN);
nir_def *n = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_VERTICES_IN);
replacement = nir_iadd_imm_nuw(b, n, 1);
}
} else
@ -223,7 +223,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
if (s->info->inputs_linked) {
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
} else {
nir_def *num_ls_out = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS);
nir_def *num_ls_out = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS);
nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
}
@ -253,7 +253,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
replacement = nir_imm_int(b, align(s->info->num_tess_patches * tcs_vertices_out * 16, 256));
} else {
replacement = nir_imul_imm(
b, GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TCS_MEM_ATTRIB_STRIDE), 256);
b, GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TCS_MEM_ATTRIB_STRIDE), 256);
}
if (intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd) {
@ -265,7 +265,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
num_tcs_mem_outputs = nir_imm_int(b, s->info->tes.num_linked_inputs);
} else {
assert(stage == MESA_SHADER_TESS_EVAL);
num_tcs_mem_outputs = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS);
num_tcs_mem_outputs = GET_SGPR_FIELD_NIR(s->args->ac.tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS);
}
replacement = nir_imul(b, replacement, num_tcs_mem_outputs);

View file

@ -402,7 +402,7 @@ declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct ra
declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC);
add_ud_arg(args, 1, AC_ARG_INT, &args->next_stage_pc, AC_UD_NEXT_STAGE_PC);
@ -427,7 +427,7 @@ declare_unmerged_vs_tcs_args(const enum amd_gfx_level gfx_level, const struct ra
ac_add_preserved(&args->ac, &args->descriptor_sets[0]);
ac_add_preserved(&args->ac, &args->ac.push_constants);
ac_add_preserved(&args->ac, &args->ac.view_index);
ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
ac_add_preserved(&args->ac, &args->ac.tcs_offchip_layout);
ac_add_preserved(&args->ac, &args->epilog_pc);
/* Preserved VGPRs */
@ -449,7 +449,7 @@ declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct
declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
if (info->is_ngg) {
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_state, AC_UD_NGG_STATE);
@ -495,7 +495,7 @@ declare_unmerged_vs_tes_gs_args(const enum amd_gfx_level gfx_level, const struct
if (gfx_level >= GFX12)
ac_add_preserved(&args->ac, &args->streamout_state);
ac_add_preserved(&args->ac, &args->ac.view_index);
ac_add_preserved(&args->ac, &args->tcs_offchip_layout);
ac_add_preserved(&args->ac, &args->ac.tcs_offchip_layout);
if (info->is_ngg) {
ac_add_preserved(&args->ac, &args->ngg_state);
if (gfx_level >= GFX11)
@ -686,7 +686,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
}
if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
@ -702,7 +702,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
}
if (radv_tcs_needs_state_sgpr(info, gfx_state)) {
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
@ -724,7 +724,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, AC_UD_VIEW_INDEX);
if (radv_tes_needs_state_sgpr(info))
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
if (info->tes.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
@ -776,7 +776,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
}
if (previous_stage == MESA_SHADER_TESS_EVAL && radv_tes_needs_state_sgpr(info))
add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT);
/* Legacy GS force vrs is handled by GS copy shader. */
if (info->force_vrs_per_vertex && info->is_ngg) {

View file

@ -106,18 +106,6 @@ struct radv_shader_args {
struct ac_arg stencil;
struct ac_arg sample_mask;
/* TCS */
/* # [0:6] = the number of tessellation patches, max = 127
* # [7:11] = TCS: the number of input patch control points minus one, max = 31
* TES: the number of output patch control points minus one, max = 31
* # [12:16] = the stride of 1 TCS per-vertex output in memory / 256, max = 16
* # [17:22] = the number of LS outputs, up to 32
* # [23:28] = the number of HS per-vertex outputs, up to 32
* # [29:30] = tess_primitive_mode
* # [31] = whether TES reads tess factors
*/
struct ac_arg tcs_offchip_layout;
/* GS */
struct ac_arg vgt_esgs_ring_itemsize;

View file

@ -269,7 +269,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
}
case nir_intrinsic_load_patch_vertices_in:
replacement =
nir_iadd_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5), 1);
nir_iadd_imm(b, ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 7, 5), 1);
break;
case nir_intrinsic_load_sample_mask_in:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
@ -281,7 +281,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) {
replacement = nir_imm_int(b, si_shader_lshs_vertex_stride(shader));
} else {
nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6);
nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 17, 6);
nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
}
@ -299,11 +299,11 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
}
break;
case nir_intrinsic_load_tcs_num_patches_amd: {
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 0, 7);
break;
}
case nir_intrinsic_load_tcs_mem_attrib_stride:
replacement = nir_imul_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5), 256);
replacement = nir_imul_imm(b, ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 12, 5), 256);
break;
case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
nir_def *num_tcs_mem_outputs;
@ -311,11 +311,11 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
if (stage == MESA_SHADER_TESS_CTRL)
num_tcs_mem_outputs = nir_imm_int(b, sel->info.tess_io_info.highest_remapped_vram_output);
else
num_tcs_mem_outputs = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6);
num_tcs_mem_outputs = ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 23, 6);
/* Get the stride of a single output. */
nir_def *attr_stride =
nir_imul_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5), 256);
nir_imul_imm(b, ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 12, 5), 256);
replacement = nir_imul(b, attr_stride, num_tcs_mem_outputs);
break;
}
@ -564,7 +564,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
if (shader->is_monolithic) {
replacement = nir_imm_bool(b, key->ge.opt.tes_reads_tess_factors);
} else {
replacement = nir_ine_imm(b, ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 31, 1), 0);
replacement = nir_ine_imm(b, ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 31, 1), 0);
}
break;
case nir_intrinsic_load_tcs_primitive_mode_amd:
@ -574,7 +574,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
if (b->shader->info.tess._primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
replacement = nir_imm_int(b, b->shader->info.tess._primitive_mode);
else
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 29, 2);
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.tcs_offchip_layout, 29, 2);
}
break;
case nir_intrinsic_load_ring_gsvs_amd: {

View file

@ -225,7 +225,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
case MESA_SHADER_TESS_CTRL: /* GFX6-GFX8 */
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, info, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
@ -262,7 +262,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
/* VGPRs (first TCS, then VS) */
@ -337,7 +337,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
} else if (stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
} else {
@ -393,7 +393,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, info, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
if (shader->key.ge.as_es) {

View file

@ -46,18 +46,6 @@ struct si_shader_args {
struct ac_arg vs_state_bits;
struct ac_arg vs_blit_inputs;
/* API TCS & TES */
/* Layout of TCS outputs in the offchip buffer
* [0:6] (7 bits) = the number of patches per threadgroup, max = 127
* [7:11] (5 bits) = patch_vertices_in - 1, different for TCS and TES, max = 31
* [12:16] (5 bits) = the stride of 1 TCS per-vertex output in memory / 256, max = 16
* [17:22] (6 bits) = the number of LS outputs in LDS, max = 63
* [23:28] (6 bits) = the number of HS per-vertex outputs in memory, max = 63
* [29:30] (2 bits) = TES output primitive type (TCS only)
* [31] (1 bit) = whether TES reads tess factor outputs from TCS (TCS only)
*/
struct ac_arg tcs_offchip_layout;
/* API TCS & TES */
struct ac_arg tes_offchip_addr;
/* PS */

View file

@ -62,7 +62,7 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
ret = si_insert_input_ret(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;