radeonsi: move TCS.gl_PatchVerticesIn into the tcs_offchip_layout SGPR

we'll be able to remove 1 TCS user data SGPR thanks to this

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517>
This commit is contained in:
Marek Olšák 2023-06-07 23:39:57 -04:00 committed by Marge Bot
parent b4e2073f04
commit 1b40ab2150
3 changed files with 7 additions and 6 deletions

View file

@ -367,12 +367,12 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
}
case nir_intrinsic_load_patch_vertices_in:
if (stage == MESA_SHADER_TESS_CTRL)
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_out_lds_layout, 13, 6);
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 11, 5);
else if (stage == MESA_SHADER_TESS_EVAL) {
nir_ssa_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5);
replacement = nir_iadd_imm(b, tmp, 1);
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5);
} else
unreachable("no nir_load_patch_vertices_in");
replacement = nir_iadd_imm(b, replacement, 1);
break;
case nir_intrinsic_load_sample_mask_in:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.sample_coverage);

View file

@ -42,6 +42,8 @@ struct si_shader_args {
* [0:5] = the number of patches per threadgroup - 1, max = 63
* # 5 bits
* [6:10] = the number of output vertices per patch - 1, max = 31
* # 5 bits
* [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only)
* # 16 bits
* [16:31] = the offset of per patch attributes in the buffer in bytes.
* 64 outputs are implied by SI_UNIQUE_SLOT_* values.
@ -59,7 +61,6 @@ struct si_shader_args {
*/
struct ac_arg tcs_out_lds_offsets;
/* Layout of TCS outputs / TES inputs:
* [13:18] = gl_PatchVerticesIn, max = 32
* [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
*/
struct ac_arg tcs_out_lds_layout;

View file

@ -797,10 +797,10 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
sctx->tes_offchip_ring_va_sgpr = ring_va;
sctx->tcs_out_layout = (num_tcs_input_cp << 13) | ring_va;
sctx->tcs_out_layout = ring_va;
sctx->tcs_out_offsets = ((perpatch_output_offset / 4) << 16);
sctx->tcs_offchip_layout =
(num_patches - 1) | ((num_tcs_output_cp - 1) << 6) |
(num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) |
((pervertex_output_patch_size * num_patches) << 16);
/* Compute the LDS size. */