mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 01:20:17 +01:00
radv: Remove PSIZ output when it isn't needed.
PSIZ output is only needed when: 1. There is a next stage and it reads it. 2. Primitive topology is point list, in the last vertex pipeline stage. Zink always adds this output in its vertex (and other) shaders, because it helps Zink avoid recompiling shader variants. However, this has a performance impact for RADV because it needs a scalar memory load. That becomes noticeable at high primitive rates. The Fossil stats are unremarkable because our DB doesn't include any shaders from Zink or D9VK, but there are a few affected shaders. Note that there may be an increase in LDS use in some GS. This is because with PSIZ removed the ES per-vertex LDS size is smaller, so we can squeeze more GS threads in the same workgroup. Fossil DB stats on Sienna Cichlid: Totals from 14 (0.01% of 128647) affected shaders: CodeSize: 119884 -> 119732 (-0.13%) LDS: 235008 -> 228864 (-2.61%); split: -2.83%, +0.22% Instrs: 23076 -> 23048 (-0.12%) Latency: 71667 -> 71625 (-0.06%) InvThroughput: 19155 -> 18870 (-1.49%) Copies: 1586 -> 1572 (-0.88%) Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10725>
This commit is contained in:
parent
a2c30c1488
commit
92e1981a80
1 changed files with 37 additions and 2 deletions
|
|
@ -2305,7 +2305,9 @@ get_vs_output_info(const struct radv_pipeline *pipeline)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
|
||||
radv_link_shaders(struct radv_pipeline *pipeline,
|
||||
const struct radv_pipeline_key *pipeline_key,
|
||||
nir_shader **shaders,
|
||||
bool optimize_conservatively)
|
||||
{
|
||||
nir_shader *ordered_shaders[MESA_SHADER_STAGES];
|
||||
|
|
@ -2389,6 +2391,39 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
|
|||
}
|
||||
}
|
||||
|
||||
if (!optimize_conservatively) {
|
||||
/* Remove PSIZ from shaders when it's not needed.
|
||||
* This is typically produced by translation layers like Zink or D9VK.
|
||||
*/
|
||||
for (unsigned i = 0; i < shader_count; ++i) {
|
||||
shader_info *info = &ordered_shaders[i]->info;
|
||||
if (!(info->outputs_written & VARYING_BIT_PSIZ))
|
||||
continue;
|
||||
|
||||
bool next_stage_needs_psiz =
|
||||
i != 0 && /* ordered_shaders is backwards, so next stage is: i - 1 */
|
||||
ordered_shaders[i - 1]->info.inputs_read & VARYING_BIT_PSIZ;
|
||||
bool topology_uses_psiz =
|
||||
info->stage == pipeline->graphics.last_vgt_api_stage &&
|
||||
((info->stage == MESA_SHADER_VERTEX && pipeline_key->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ||
|
||||
(info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) ||
|
||||
(info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS));
|
||||
|
||||
if (!next_stage_needs_psiz && !topology_uses_psiz) {
|
||||
/* Change PSIZ to a global variable which allows it to be DCE'd. */
|
||||
nir_variable *psiz_var =
|
||||
nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ);
|
||||
psiz_var->data.location = 0;
|
||||
psiz_var->data.mode = nir_var_shader_temp;
|
||||
|
||||
info->outputs_written &= ~VARYING_BIT_PSIZ;
|
||||
nir_fixup_deref_modes(ordered_shaders[i]);
|
||||
nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_temp, NULL);
|
||||
nir_opt_dce(ordered_shaders[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) {
|
||||
if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
|
||||
nir_opt_constant_folding(ordered_shaders[i - 1]);
|
||||
|
|
@ -3395,7 +3430,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
|
|||
|
||||
bool optimize_conservatively = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
|
||||
|
||||
radv_link_shaders(pipeline, nir, optimize_conservatively);
|
||||
radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively);
|
||||
radv_set_driver_locations(pipeline, nir, infos);
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue