mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi: Use one more bit for number of patches in TCS offchip layout.
There was 1 more bit left, may as well use it for something. In the future, this may allow increasing the maximum number of patches per workgroup. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28425>
This commit is contained in:
parent
04dea4aef2
commit
b34e99d021
3 changed files with 11 additions and 19 deletions
|
|
@ -338,9 +338,9 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
}
|
||||
case nir_intrinsic_load_patch_vertices_in:
|
||||
if (stage == MESA_SHADER_TESS_CTRL)
|
||||
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 11, 5);
|
||||
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 12, 5);
|
||||
else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5);
|
||||
replacement = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
|
||||
} else
|
||||
unreachable("no nir_load_patch_vertices_in");
|
||||
replacement = nir_iadd_imm(b, replacement, 1);
|
||||
|
|
@ -372,7 +372,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
}
|
||||
break;
|
||||
case nir_intrinsic_load_tcs_num_patches_amd: {
|
||||
nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6);
|
||||
nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
|
||||
replacement = nir_iadd_imm(b, tmp, 1);
|
||||
break;
|
||||
}
|
||||
|
|
@ -387,12 +387,12 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
} else {
|
||||
nir_def *num_hs_out = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 23, 6);
|
||||
nir_def *out_vtx_size = nir_ishl_imm(b, num_hs_out, 4);
|
||||
nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 6, 5);
|
||||
nir_def *o = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 7, 5);
|
||||
nir_def *out_vtx_per_patch = nir_iadd_imm_nuw(b, o, 1);
|
||||
per_vtx_out_patch_size = nir_imul(b, out_vtx_per_patch, out_vtx_size);
|
||||
}
|
||||
|
||||
nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6);
|
||||
nir_def *p = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 7);
|
||||
nir_def *num_patches = nir_iadd_imm_nuw(b, p, 1);
|
||||
replacement = nir_imul(b, per_vtx_out_patch_size, num_patches);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -51,14 +51,12 @@ struct si_shader_args {
|
|||
|
||||
/* API TCS & TES */
|
||||
/* Layout of TCS outputs in the offchip buffer
|
||||
* # 6 bits
|
||||
* [0:5] = the number of patches per threadgroup - 1, max = 63
|
||||
* # 7 bits
|
||||
* [0:6] = the number of patches per threadgroup - 1, max = 127
|
||||
* # 5 bits
|
||||
* [6:10] = the number of output vertices per patch - 1, max = 31
|
||||
* [7:11] = the number of output vertices per patch - 1, max = 31
|
||||
* # 5 bits
|
||||
* [11:15] = the number of input vertices per patch - 1, max = 31 (TCS only)
|
||||
* # 1 bit
|
||||
* [16] = reserved for future use
|
||||
* [12:16] = the number of input vertices per patch - 1, max = 31 (TCS only)
|
||||
* # 6 bits
|
||||
* [17:22] = the number of LS outputs, max = 63
|
||||
* # 6 bits
|
||||
|
|
|
|||
|
|
@ -4587,18 +4587,12 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
|
|||
si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state);
|
||||
}
|
||||
|
||||
unsigned output_patch0_offset = input_patch_size * num_patches;
|
||||
unsigned perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
|
||||
|
||||
/* Compute userdata SGPRs. */
|
||||
assert(((input_vertex_size / 4) & ~0xff) == 0);
|
||||
assert(((perpatch_output_offset / 4) & ~0xffff) == 0);
|
||||
assert(num_tcs_input_cp <= 32);
|
||||
assert(num_tcs_output_cp <= 32);
|
||||
assert(num_patches <= 64);
|
||||
assert(num_patches <= 128);
|
||||
assert(num_vs_outputs <= 63);
|
||||
assert(num_tcs_outputs <= 63);
|
||||
assert(((pervertex_output_patch_size * num_patches) & ~0xffff) == 0);
|
||||
|
||||
uint64_t ring_va =
|
||||
sctx->ws->cs_is_secure(&sctx->gfx_cs) ?
|
||||
|
|
@ -4609,7 +4603,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
|
|||
sctx->tes_offchip_ring_va_sgpr = ring_va;
|
||||
sctx->tcs_offchip_layout &= 0xe0000000;
|
||||
sctx->tcs_offchip_layout |=
|
||||
(num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) |
|
||||
(num_patches - 1) | ((num_tcs_output_cp - 1) << 7) | ((num_tcs_input_cp - 1) << 12) |
|
||||
(num_vs_outputs << 17) | (num_tcs_outputs << 23);
|
||||
|
||||
/* Compute the LDS size. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue