radeonsi: Add number of VS outputs to TCS output layout.

Use tcs_offchip_layout instead of VS state to determine the
number of LS outputs.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28425>
This commit is contained in:
Timur Kristóf 2024-03-30 00:12:32 +01:00
parent 8883b88dd4
commit b82614e06b
3 changed files with 17 additions and 9 deletions

View file

@ -349,14 +349,18 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);
break;
case nir_intrinsic_load_lshs_vertex_stride_amd:
if (stage == MESA_SHADER_VERTEX)
if (stage == MESA_SHADER_VERTEX) {
replacement = nir_imm_int(b, sel->info.lshs_vertex_stride);
else if (stage == MESA_SHADER_TESS_CTRL)
replacement = sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic ?
nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride) :
nir_ishl_imm(b, GET_FIELD_NIR(VS_STATE_LS_OUT_VERTEX_SIZE), 2);
else
} else if (stage == MESA_SHADER_TESS_CTRL) {
if (sel->screen->info.gfx_level >= GFX9 && shader->is_monolithic) {
replacement = nir_imm_int(b, key->ge.part.tcs.ls->info.lshs_vertex_stride);
} else {
nir_def *num_ls_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 17, 6);
replacement = nir_iadd_imm_nuw(b, nir_ishl_imm(b, num_ls_out, 4), 4);
}
} else {
unreachable("no nir_load_lshs_vertex_stride_amd");
}
break;
case nir_intrinsic_load_esgs_vertex_stride_amd:
assert(sel->screen->info.gfx_level >= GFX9);

View file

@ -57,8 +57,10 @@ struct si_shader_args {
* [6:10] = the number of output vertices per patch - 1, max = 31
* # 5 bits
* [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only)
* # 7 bits
* [16:22] = reserved for future use
* # 1 bit
* [16] = reserved for future use
* # 6 bits
* [17:22] = the number of LS outputs, max = 63
* # 6 bits
* [23:28] = the number of HS per-vertex outputs, max = 63
* # 2 bits

View file

@ -4486,6 +4486,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
unsigned input_vertex_size = ls->info.lshs_vertex_stride;
unsigned num_vs_outputs = (input_vertex_size - 4) / 16;
unsigned output_vertex_size = num_tcs_outputs * 16;
unsigned input_patch_size;
@ -4595,6 +4596,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
assert(num_tcs_input_cp <= 32);
assert(num_tcs_output_cp <= 32);
assert(num_patches <= 64);
assert(num_vs_outputs <= 63);
assert(num_tcs_outputs <= 63);
assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0);
@ -4608,7 +4610,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
sctx->tcs_offchip_layout &= 0xe0000000;
sctx->tcs_offchip_layout |=
(num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) |
(num_tcs_outputs << 23);
(num_vs_outputs << 17) | (num_tcs_outputs << 23);
/* Compute the LDS size. */
unsigned lds_size = lds_per_patch * num_patches;