From fce6ca0f3a674f7531598c1588ed100ec80c89c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 28 Jan 2022 14:29:05 +0100 Subject: [PATCH] radv: remove exports without color attachment or writemask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets us make use of NIR's more advanced DCE. This includes removing of CF constructs, PS inputs and VS outputs. Totals from 1959 (1.45% of 134913) affected shaders: (GFX10.3) VGPRs: 73464 -> 71944 (-2.07%); split: -3.79%, +1.72% SpillSGPRs: 6 -> 0 (-inf%) CodeSize: 4860324 -> 4675248 (-3.81%); split: -4.92%, +1.11% LDS: 2619904 -> 2781696 (+6.18%); split: -0.37%, +6.55% MaxWaves: 50614 -> 50852 (+0.47%); split: +1.63%, -1.16% Instrs: 924233 -> 887836 (-3.94%); split: -5.01%, +1.07% Latency: 5635532 -> 5418083 (-3.86%); split: -4.53%, +0.67% InvThroughput: 1107764 -> 1077542 (-2.73%); split: -3.44%, +0.71% VClause: 17361 -> 16163 (-6.90%); split: -8.38%, +1.47% SClause: 31886 -> 29323 (-8.04%); split: -8.52%, +0.48% Copies: 53529 -> 52127 (-2.62%); split: -5.30%, +2.68% Branches: 22993 -> 22802 (-0.83%); split: -3.44%, +2.61% PreSGPRs: 53123 -> 51395 (-3.25%); split: -3.60%, +0.35% PreVGPRs: 59699 -> 57424 (-3.81%); split: -5.13%, +1.32% Reviewed-by: Timur Kristóf Part-of: --- src/amd/vulkan/radv_pipeline.c | 45 ++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index b372079c37b..3e76366c2e4 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2586,15 +2586,46 @@ radv_link_shaders(struct radv_pipeline *pipeline, } } - bool uses_xfb = pipeline->graphics.last_vgt_api_stage != -1 && - radv_nir_stage_uses_xfb(shaders[pipeline->graphics.last_vgt_api_stage]); - if (!uses_xfb && !optimize_conservatively) { - /* Remove PSIZ from shaders when it's not needed. - * This is typically produced by translation layers like Zink or D9VK. - */ + if (!optimize_conservatively) { + bool uses_xfb = pipeline->graphics.last_vgt_api_stage != -1 && + radv_nir_stage_uses_xfb(shaders[pipeline->graphics.last_vgt_api_stage]); + for (unsigned i = 0; i < shader_count; ++i) { shader_info *info = &ordered_shaders[i]->info; - if (!(info->outputs_written & VARYING_BIT_PSIZ)) + + /* Remove exports without color attachment or writemask. */ + if (info->stage == MESA_SHADER_FRAGMENT) { + bool fixup_derefs = false; + nir_foreach_variable_with_modes(var, ordered_shaders[i], nir_var_shader_out) { + int idx = var->data.location; + idx -= FRAG_RESULT_DATA0; + if (idx < 0) + continue; + + unsigned col_format = (pipeline_key->ps.col_format >> (4 * idx)) & 0xf; + switch (col_format) { + case V_028714_SPI_SHADER_ZERO: + info->outputs_written &= ~BITFIELD64_BIT(var->data.location); + var->data.location = 0; + var->data.mode = nir_var_shader_temp; + fixup_derefs = true; + break; + default: + break; + } + } + if (fixup_derefs) { + nir_fixup_deref_modes(ordered_shaders[i]); + nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_temp, NULL); + nir_opt_dce(ordered_shaders[i]); + } + continue; + } + + /* Remove PSIZ from shaders when it's not needed. + * This is typically produced by translation layers like Zink or D9VK. + */ + if (uses_xfb || !(info->outputs_written & VARYING_BIT_PSIZ)) continue; bool next_stage_needs_psiz =