mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
tu: use nir_opt_varyings_bulk for linking
Replace the use of nir_link_opt_varyings/nir_compact_varyings for linking with the new nir_opt_varyings linker using the nir_opt_varyings_bulk helper. This moves all the NIR lowering up to nir_lower_io (tu_lower_nir) to the pre-linking stage since nir_opt_varyings expects lowered IO. Totals from 38233 (21.69% of 176258) affected shaders: MaxWaves: 522636 -> 522578 (-0.01%); split: +0.14%, -0.15% Instrs: 15111014 -> 15062812 (-0.32%); split: -0.71%, +0.39% CodeSize: 31555448 -> 31530676 (-0.08%); split: -0.70%, +0.62% NOPs: 2605163 -> 2582030 (-0.89%); split: -2.38%, +1.49% MOVs: 519056 -> 511167 (-1.52%); split: -4.88%, +3.36% COVs: 244091 -> 243317 (-0.32%); split: -0.55%, +0.23% Full: 463796 -> 463307 (-0.11%); split: -0.47%, +0.36% (ss): 390558 -> 386374 (-1.07%); split: -3.07%, +2.00% (sy): 180298 -> 179347 (-0.53%); split: -1.55%, +1.02% (ss)-stall: 1485337 -> 1473362 (-0.81%); split: -3.92%, +3.11% (sy)-stall: 5441818 -> 5375690 (-1.22%); split: -2.99%, +1.78% Preamble Instrs: 3707325 -> 3724339 (+0.46%); split: -0.38%, +0.84% Early Preamble: 29397 -> 29392 (-0.02%); split: +0.10%, -0.12% Cat0: 2883908 -> 2860585 (-0.81%); split: -2.16%, +1.35% Cat1: 765447 -> 757066 (-1.09%); split: -3.46%, +2.36% Cat2: 5664380 -> 5663562 (-0.01%); split: -0.51%, +0.49% Cat3: 4393358 -> 4386474 (-0.16%); split: -0.27%, +0.12% Cat4: 443624 -> 443546 (-0.02%); split: -0.03%, +0.01% Cat5: 427389 -> 427239 (-0.04%); split: -0.27%, +0.24% Cat6: 173632 -> 164362 (-5.34%); split: -5.36%, +0.02% Cat7: 359276 -> 359978 (+0.20%); split: -1.33%, +1.53% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40651>
This commit is contained in:
parent
99713d0c53
commit
893d3caf7b
3 changed files with 46 additions and 97 deletions
|
|
@ -4971,7 +4971,9 @@ tu_compute_pipeline_create(VkDevice device,
|
|||
nir_initial_disasm = executable_info ?
|
||||
nir_shader_as_str(nir, pipeline->base.executables_mem_ctx) : NULL;
|
||||
|
||||
result = tu_shader_create(dev, &shader, nir, &key, &ir3_key,
|
||||
struct tu_shader_info info = {};
|
||||
tu_lower_nir(dev, nir, &key, &info);
|
||||
result = tu_shader_create(dev, &shader, nir, &key, &info, &ir3_key,
|
||||
pipeline_blake3, sizeof(pipeline_blake3), layout,
|
||||
executable_info);
|
||||
if (!shader) {
|
||||
|
|
|
|||
|
|
@ -3083,6 +3083,7 @@ tu_shader_create(struct tu_device *dev,
|
|||
struct tu_shader **shader_out,
|
||||
nir_shader *nir,
|
||||
const struct tu_shader_key *key,
|
||||
const struct tu_shader_info *info,
|
||||
const struct ir3_shader_key *ir3_key,
|
||||
const void *key_data,
|
||||
size_t key_size,
|
||||
|
|
@ -3094,10 +3095,7 @@ tu_shader_create(struct tu_device *dev,
|
|||
if (!shader)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
struct tu_shader_info info = {};
|
||||
tu_lower_nir(dev, nir, key, &info);
|
||||
|
||||
shader->per_layer_viewport = info.per_layer_viewport;
|
||||
shader->per_layer_viewport = info->per_layer_viewport;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
key->fdm_per_layer) {
|
||||
|
|
@ -3232,108 +3230,42 @@ tu_shader_create(struct tu_device *dev,
|
|||
}
|
||||
|
||||
static void
|
||||
lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
|
||||
link_opts(nir_shader *shader, void *data)
|
||||
{
|
||||
bool progress = false;
|
||||
NIR_PASS(progress, nir, nir_lower_io_vars_to_scalar, mask);
|
||||
struct ir3_compiler *compiler = static_cast<struct ir3_compiler *>(data);
|
||||
|
||||
if (progress) {
|
||||
/* Optimize the new vector code and then remove dead vars. */
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
|
||||
if (mask & nir_var_shader_out) {
|
||||
/* Optimize swizzled movs of load_const for nir_link_opt_varyings's
|
||||
* constant propagation.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
/* For nir_link_opt_varyings's duplicate input opt. */
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
}
|
||||
|
||||
/* Run copy-propagation to help remove dead output variables (some
|
||||
* shaders have useless copies to/from an output), so compaction later
|
||||
* will be more effective.
|
||||
*
|
||||
* This will have been done earlier but it might not have worked because
|
||||
* the outputs were vector.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
|
||||
/* This must be called before nir_link_opt_varyings() and after
|
||||
* nir_opt_copy_prop_vars(), otherwise repeated (scalarized) stores in the
|
||||
* last block will propagate the wrong values into the consumer.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_dead_write_vars);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
const nir_remove_dead_variables_options var_opts = {
|
||||
.can_remove_var =
|
||||
(mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts);
|
||||
}
|
||||
struct ir3_optimize_options optimize_options = {};
|
||||
ir3_optimize_loop(compiler, &optimize_options, shader);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
|
||||
tu_link_shaders(struct tu_device *dev,
|
||||
nir_shader **shaders,
|
||||
unsigned shaders_count)
|
||||
{
|
||||
nir_shader *consumer = NULL;
|
||||
for (mesa_shader_stage stage = (mesa_shader_stage) (shaders_count - 1);
|
||||
stage >= MESA_SHADER_VERTEX; stage = (mesa_shader_stage) (stage - 1)) {
|
||||
if (!shaders[stage])
|
||||
continue;
|
||||
nir_shader *link_shaders[MESA_SHADER_STAGES] = {};
|
||||
assert(shaders_count <= ARRAY_SIZE(link_shaders));
|
||||
|
||||
nir_shader *producer = shaders[stage];
|
||||
if (!consumer) {
|
||||
consumer = producer;
|
||||
continue;
|
||||
unsigned link_shaders_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < shaders_count; i++) {
|
||||
if (shaders[i]) {
|
||||
link_shaders[link_shaders_count++] = shaders[i];
|
||||
}
|
||||
|
||||
lower_io_to_scalar_early(producer, nir_var_shader_out);
|
||||
lower_io_to_scalar_early(consumer, nir_var_shader_in);
|
||||
|
||||
if (nir_link_opt_varyings(producer, consumer)) {
|
||||
NIR_PASS(_, consumer, nir_opt_constant_folding);
|
||||
NIR_PASS(_, consumer, nir_opt_algebraic);
|
||||
NIR_PASS(_, consumer, nir_opt_dce);
|
||||
}
|
||||
|
||||
const nir_remove_dead_variables_options out_var_opts = {
|
||||
.can_remove_var = nir_vk_is_not_xfb_output,
|
||||
};
|
||||
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
|
||||
&out_var_opts);
|
||||
|
||||
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
|
||||
NULL);
|
||||
|
||||
bool progress = nir_remove_unused_varyings(producer, consumer);
|
||||
|
||||
nir_compact_varyings(producer, consumer, true);
|
||||
if (progress) {
|
||||
if (nir_lower_global_vars_to_local(producer)) {
|
||||
/* Remove dead writes, which can remove input loads */
|
||||
NIR_PASS(_, producer, nir_remove_dead_variables,
|
||||
nir_var_shader_temp, NULL);
|
||||
NIR_PASS(_, producer, nir_opt_dce);
|
||||
}
|
||||
nir_lower_global_vars_to_local(consumer);
|
||||
}
|
||||
|
||||
NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out);
|
||||
NIR_PASS(_, consumer, nir_opt_vectorize_io_vars, nir_var_shader_in);
|
||||
consumer = producer;
|
||||
}
|
||||
|
||||
/* Gather info after linking so that we can fill out the ir3 shader key.
|
||||
nir_opt_varyings_bulk(link_shaders, link_shaders_count, true, UINT32_MAX,
|
||||
UINT32_MAX, link_opts, dev->compiler);
|
||||
|
||||
/* We have to make sure nir_recompute_io_bases is called at least once so
|
||||
* that num_inputs/num_outputs is correctly set for all shaders.
|
||||
* nir_opt_varyings_bulk will do this for us when linking multiple shaders
|
||||
* but not when there is only a single shader. Call it manually in that
|
||||
* case.
|
||||
*/
|
||||
for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
|
||||
stage <= MESA_SHADER_FRAGMENT; stage = (mesa_shader_stage) (stage + 1)) {
|
||||
if (shaders[stage])
|
||||
nir_shader_gather_info(shaders[stage],
|
||||
nir_shader_get_entrypoint(shaders[stage]));
|
||||
if (link_shaders_count == 1) {
|
||||
NIR_PASS(_, link_shaders[0], nir_recompute_io_bases,
|
||||
nir_var_shader_in | nir_var_shader_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3370,6 +3302,7 @@ tu_compile_shaders(struct tu_device *device,
|
|||
VkPipelineCreationFeedback *stage_feedbacks)
|
||||
{
|
||||
struct ir3_shader_key ir3_key = {};
|
||||
struct tu_shader_info info[MESA_SHADER_STAGES] = {};
|
||||
VkResult result = VK_SUCCESS;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
|
|
@ -3407,7 +3340,19 @@ tu_compile_shaders(struct tu_device *device,
|
|||
}
|
||||
}
|
||||
|
||||
tu_link_shaders(nir, MESA_SHADER_STAGES);
|
||||
for (mesa_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
|
||||
stage = (mesa_shader_stage) (stage + 1)) {
|
||||
if (!nir[stage])
|
||||
continue;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
tu_lower_nir(device, nir[stage], &keys[stage], &info[stage]);
|
||||
|
||||
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
tu_link_shaders(device, nir, MESA_SHADER_STAGES);
|
||||
|
||||
if (nir_out) {
|
||||
for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
|
||||
|
|
@ -3482,6 +3427,7 @@ tu_compile_shaders(struct tu_device *device,
|
|||
|
||||
result = tu_shader_create(device,
|
||||
&shaders[stage], nir[stage], &keys[stage],
|
||||
&info[stage],
|
||||
&ir3_key, shader_blake3, sizeof(shader_blake3),
|
||||
layout, !!nir_initial_disasm);
|
||||
if (result != VK_SUCCESS) {
|
||||
|
|
|
|||
|
|
@ -209,6 +209,7 @@ tu_shader_create(struct tu_device *dev,
|
|||
struct tu_shader **shader_out,
|
||||
nir_shader *nir,
|
||||
const struct tu_shader_key *key,
|
||||
const struct tu_shader_info *shader_info,
|
||||
const struct ir3_shader_key *ir3_key,
|
||||
const void *key_data,
|
||||
size_t key_size,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue