mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
radv: switch to the new TCS LDS/offchip size computation
to use the same logic as radeonsi. This could be improved, see TODOs. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31673>
This commit is contained in:
parent
823e9e846e
commit
8c2f9f0665
7 changed files with 44 additions and 87 deletions
|
|
@ -1209,20 +1209,6 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
|
|||
return num_patches;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ac_compute_tess_lds_size(const struct radeon_info *info, uint32_t lds_per_patch, uint32_t num_patches)
|
||||
{
|
||||
unsigned lds_size = lds_per_patch * num_patches;
|
||||
|
||||
/* The first vec4 is reserved for the tf0/1 shader message group vote. */
|
||||
if (info->gfx_level >= GFX11)
|
||||
lds_size += AC_HS_MSG_VOTE_LDS_BYTES;
|
||||
|
||||
assert(lds_size <= (info->gfx_level >= GFX9 ? 65536 : 32768));
|
||||
|
||||
return align(lds_size, info->lds_encode_granularity) / info->lds_encode_granularity;
|
||||
}
|
||||
|
||||
uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
|
||||
const struct radeon_info *info)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -306,9 +306,6 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
|
|||
uint32_t lds_per_patch, uint32_t wave_size,
|
||||
bool tess_uses_primid);
|
||||
|
||||
uint32_t ac_compute_tess_lds_size(const struct radeon_info *info,
|
||||
uint32_t lds_per_patch, uint32_t num_patches);
|
||||
|
||||
uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
|
||||
const struct radeon_info *info);
|
||||
|
||||
|
|
|
|||
|
|
@ -3495,17 +3495,21 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
|
|||
* is dynamic.
|
||||
*/
|
||||
if (cmd_buffer->state.uses_dynamic_patch_control_points) {
|
||||
/* Compute the number of patches. */
|
||||
cmd_buffer->state.tess_num_patches = radv_get_tcs_num_patches(
|
||||
pdev, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, vs->info.vs.num_linked_outputs,
|
||||
tcs->info.tcs.num_lds_per_vertex_outputs, tcs->info.tcs.num_lds_per_patch_outputs,
|
||||
tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs);
|
||||
struct shader_info tcs_info;
|
||||
|
||||
/* Compute the LDS size. */
|
||||
cmd_buffer->state.tess_lds_size =
|
||||
radv_get_tess_lds_size(pdev, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out,
|
||||
vs->info.vs.num_linked_outputs, cmd_buffer->state.tess_num_patches,
|
||||
tcs->info.tcs.num_lds_per_vertex_outputs, tcs->info.tcs.num_lds_per_patch_outputs);
|
||||
/* No other shader_info fields are needed. */
|
||||
tcs_info.tess.tcs_vertices_out = tcs->info.tcs.tcs_vertices_out;
|
||||
/* These are only used to determine the LDS layout for TCS outputs. */
|
||||
tcs_info.outputs_read = tcs->info.tcs.tcs_outputs_read;
|
||||
tcs_info.outputs_written = tcs->info.tcs.tcs_outputs_written;
|
||||
tcs_info.patch_outputs_read = tcs->info.tcs.tcs_patch_outputs_read;
|
||||
tcs_info.patch_outputs_written = tcs->info.tcs.tcs_patch_outputs_written;
|
||||
|
||||
radv_get_tess_wg_info(pdev, &tcs_info, d->vk.ts.patch_control_points,
|
||||
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
|
||||
vs->info.vs.num_linked_outputs, tcs->info.tcs.num_linked_outputs,
|
||||
tcs->info.tcs.num_linked_patch_outputs, tcs->info.tcs.all_invocations_define_tess_levels,
|
||||
&cmd_buffer->state.tess_num_patches, &cmd_buffer->state.tess_lds_size);
|
||||
}
|
||||
|
||||
ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) |
|
||||
|
|
|
|||
|
|
@ -3567,43 +3567,17 @@ radv_get_user_sgpr(const struct radv_shader *shader, int idx)
|
|||
return offset ? ((offset - SI_SH_REG_OFFSET) >> 2) : 0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
radv_get_tess_patch_size(uint32_t tcs_num_input_vertices, uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs,
|
||||
uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs)
|
||||
void
|
||||
radv_get_tess_wg_info(const struct radv_physical_device *pdev, const struct shader_info *tcs_info,
|
||||
unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned tcs_num_vram_outputs,
|
||||
unsigned tcs_num_vram_patch_outputs, bool all_invocations_define_tess_levels,
|
||||
unsigned *num_patches_per_wg, unsigned *hw_lds_size)
|
||||
{
|
||||
const uint32_t input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs);
|
||||
const uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
|
||||
const uint32_t lds_output_vertex_size = tcs_num_lds_outputs * 16;
|
||||
const uint32_t lds_pervertex_output_patch_size = tcs_num_output_vertices * lds_output_vertex_size;
|
||||
const uint32_t lds_output_patch_size = lds_pervertex_output_patch_size + tcs_num_lds_patch_outputs * 16;
|
||||
const uint32_t lds_input_vertex_size = get_tcs_input_vertex_stride(tcs_num_lds_inputs);
|
||||
|
||||
return input_patch_size + lds_output_patch_size;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
radv_get_tcs_num_patches(const struct radv_physical_device *pdev, unsigned tcs_num_input_vertices,
|
||||
unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, unsigned tcs_num_lds_outputs,
|
||||
unsigned tcs_num_lds_patch_outputs, unsigned tcs_num_vram_outputs,
|
||||
unsigned tcs_num_vram_patch_outputs)
|
||||
{
|
||||
const uint32_t lds_per_patch = radv_get_tess_patch_size(
|
||||
tcs_num_input_vertices, tcs_num_output_vertices, tcs_num_inputs, tcs_num_lds_outputs, tcs_num_lds_patch_outputs);
|
||||
const uint32_t vram_per_patch = radv_get_tess_patch_size(tcs_num_input_vertices, tcs_num_output_vertices, 0,
|
||||
tcs_num_vram_outputs, tcs_num_vram_patch_outputs);
|
||||
|
||||
return ac_compute_num_tess_patches(&pdev->info, tcs_num_input_vertices, tcs_num_output_vertices, vram_per_patch,
|
||||
lds_per_patch, pdev->ge_wave_size, false);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
radv_get_tess_lds_size(const struct radv_physical_device *pdev, uint32_t tcs_num_input_vertices,
|
||||
uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs, uint32_t tcs_num_patches,
|
||||
uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs)
|
||||
{
|
||||
const uint32_t lds_per_patch = radv_get_tess_patch_size(
|
||||
tcs_num_input_vertices, tcs_num_output_vertices, tcs_num_inputs, tcs_num_lds_outputs, tcs_num_lds_patch_outputs);
|
||||
|
||||
return ac_compute_tess_lds_size(&pdev->info, lds_per_patch, tcs_num_patches);
|
||||
ac_nir_compute_tess_wg_info(&pdev->info, tcs_info, pdev->ge_wave_size, false, all_invocations_define_tess_levels,
|
||||
tcs_num_input_vertices, lds_input_vertex_size, tcs_num_vram_outputs,
|
||||
tcs_num_vram_patch_outputs, num_patches_per_wg, hw_lds_size);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
|
|||
|
|
@ -664,14 +664,10 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs)
|
|||
return stride;
|
||||
}
|
||||
|
||||
uint32_t radv_get_tcs_num_patches(const struct radv_physical_device *pdev, unsigned tcs_num_input_vertices,
|
||||
unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
|
||||
unsigned tcs_num_lds_outputs, unsigned tcs_num_lds_patch_outputs,
|
||||
unsigned tcs_num_vram_outputs, unsigned tcs_num_vram_patch_outputs);
|
||||
|
||||
uint32_t radv_get_tess_lds_size(const struct radv_physical_device *pdev, uint32_t tcs_num_input_vertices,
|
||||
uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs, uint32_t tcs_num_patches,
|
||||
uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs);
|
||||
void radv_get_tess_wg_info(const struct radv_physical_device *pdev, const struct shader_info *tcs_info,
|
||||
unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned tcs_num_vram_outputs,
|
||||
unsigned tcs_num_vram_patch_outputs, bool all_invocations_define_tess_levels,
|
||||
unsigned *num_patches_per_wg, unsigned *hw_lds_size);
|
||||
|
||||
void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
const struct radv_graphics_state_key *gfx_state);
|
||||
|
|
|
|||
|
|
@ -633,17 +633,18 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
|||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_tcs_info tcs_info;
|
||||
|
||||
const uint64_t tess_lvl_mask = VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER;
|
||||
const uint64_t per_vtx_out_mask = nir->info.outputs_read & nir->info.outputs_written & ~tess_lvl_mask;
|
||||
const uint64_t tess_lvl_out_mask = nir->info.outputs_written & tess_lvl_mask;
|
||||
const uint32_t per_patch_out_mask = nir->info.patch_outputs_read & nir->info.patch_outputs_written;
|
||||
nir_gather_tcs_info(nir, &tcs_info, nir->info.tess._primitive_mode, nir->info.tess.spacing);
|
||||
|
||||
info->tcs.num_lds_per_vertex_outputs = util_bitcount64(per_vtx_out_mask);
|
||||
info->tcs.num_lds_per_patch_outputs = util_bitcount64(tess_lvl_out_mask) + util_bitcount(per_patch_out_mask);
|
||||
info->tcs.tcs_outputs_read = nir->info.outputs_read;
|
||||
info->tcs.tcs_outputs_written = nir->info.outputs_written;
|
||||
info->tcs.tcs_patch_outputs_read = nir->info.patch_inputs_read;
|
||||
info->tcs.tcs_patch_outputs_written = nir->info.patch_outputs_written;
|
||||
info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
|
||||
info->tcs.tes_inputs_read = ~0ULL;
|
||||
info->tcs.tes_patch_inputs_read = ~0ULL;
|
||||
info->tcs.all_invocations_define_tess_levels = tcs_info.all_invocations_define_tess_levels;
|
||||
|
||||
if (!info->inputs_linked)
|
||||
info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
|
||||
|
|
@ -655,16 +656,12 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
|||
}
|
||||
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
/* Number of tessellation patches per workgroup processed by the current pipeline. */
|
||||
info->num_tess_patches = radv_get_tcs_num_patches(
|
||||
pdev, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
|
||||
info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs);
|
||||
|
||||
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
|
||||
info->tcs.num_lds_blocks = radv_get_tess_lds_size(
|
||||
pdev, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
|
||||
info->num_tess_patches, info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs);
|
||||
radv_get_tess_wg_info(pdev, &nir->info, gfx_state->ts.patch_control_points,
|
||||
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
|
||||
info->tcs.num_linked_inputs, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs, tcs_info.all_invocations_define_tess_levels,
|
||||
&info->num_tess_patches, &info->tcs.num_lds_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -235,14 +235,17 @@ struct radv_shader_info {
|
|||
struct {
|
||||
uint64_t tes_inputs_read;
|
||||
uint64_t tes_patch_inputs_read;
|
||||
uint64_t tcs_outputs_read;
|
||||
uint64_t tcs_outputs_written;
|
||||
uint32_t tcs_patch_outputs_read;
|
||||
uint32_t tcs_patch_outputs_written;
|
||||
unsigned tcs_vertices_out;
|
||||
uint32_t num_lds_blocks;
|
||||
uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in LDS. */
|
||||
uint8_t num_linked_outputs; /* Number of reserved per-vertex output slots in VRAM. */
|
||||
uint8_t num_linked_patch_outputs; /* Number of reserved per-patch output slots in VRAM. */
|
||||
uint8_t num_lds_per_vertex_outputs; /* Number of reserved per-vertex output slots in LDS. */
|
||||
uint8_t num_lds_per_patch_outputs; /* Number of reserved per-patch output slots in LDS. */
|
||||
bool tes_reads_tess_factors : 1;
|
||||
bool all_invocations_define_tess_levels : 1;
|
||||
} tcs;
|
||||
struct {
|
||||
enum mesa_prim output_prim;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue