diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 3ec82c9abc4..df4f18085bb 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -4971,7 +4971,9 @@ tu_compute_pipeline_create(VkDevice device, nir_initial_disasm = executable_info ? nir_shader_as_str(nir, pipeline->base.executables_mem_ctx) : NULL; - result = tu_shader_create(dev, &shader, nir, &key, &ir3_key, + struct tu_shader_info info = {}; + tu_lower_nir(dev, nir, &key, &info); + result = tu_shader_create(dev, &shader, nir, &key, &info, &ir3_key, pipeline_blake3, sizeof(pipeline_blake3), layout, executable_info); if (!shader) { diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 12656768aa2..5d80753dee4 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -3083,6 +3083,7 @@ tu_shader_create(struct tu_device *dev, struct tu_shader **shader_out, nir_shader *nir, const struct tu_shader_key *key, + const struct tu_shader_info *info, const struct ir3_shader_key *ir3_key, const void *key_data, size_t key_size, @@ -3094,10 +3095,7 @@ tu_shader_create(struct tu_device *dev, if (!shader) return VK_ERROR_OUT_OF_HOST_MEMORY; - struct tu_shader_info info = {}; - tu_lower_nir(dev, nir, key, &info); - - shader->per_layer_viewport = info.per_layer_viewport; + shader->per_layer_viewport = info->per_layer_viewport; if (nir->info.stage == MESA_SHADER_FRAGMENT && key->fdm_per_layer) { @@ -3232,108 +3230,42 @@ tu_shader_create(struct tu_device *dev, } static void -lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) +link_opts(nir_shader *shader, void *data) { - bool progress = false; - NIR_PASS(progress, nir, nir_lower_io_vars_to_scalar, mask); + struct ir3_compiler *compiler = static_cast(data); - if (progress) { - /* Optimize the new vector code and then remove dead vars. */ - NIR_PASS(_, nir, nir_opt_copy_prop); - - if (mask & nir_var_shader_out) { - /* Optimize swizzled movs of load_const for nir_link_opt_varyings's - * constant propagation. - */ - NIR_PASS(_, nir, nir_opt_constant_folding); - - /* For nir_link_opt_varyings's duplicate input opt. */ - NIR_PASS(_, nir, nir_opt_cse); - } - - /* Run copy-propagation to help remove dead output variables (some - * shaders have useless copies to/from an output), so compaction later - * will be more effective. - * - * This will have been done earlier but it might not have worked because - * the outputs were vector. - */ - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - - /* This must be called before nir_link_opt_varyings() and after - * nir_opt_copy_prop_vars(), otherwise repeated (scalarized) stores in the - * last block will propagate the wrong values into the consumer. - */ - NIR_PASS(_, nir, nir_opt_dead_write_vars); - - NIR_PASS(_, nir, nir_opt_dce); - - const nir_remove_dead_variables_options var_opts = { - .can_remove_var = - (mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL, - }; - NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts); - } + struct ir3_optimize_options optimize_options = {}; + ir3_optimize_loop(compiler, &optimize_options, shader); } static void -tu_link_shaders(nir_shader **shaders, unsigned shaders_count) +tu_link_shaders(struct tu_device *dev, + nir_shader **shaders, + unsigned shaders_count) { - nir_shader *consumer = NULL; - for (mesa_shader_stage stage = (mesa_shader_stage) (shaders_count - 1); - stage >= MESA_SHADER_VERTEX; stage = (mesa_shader_stage) (stage - 1)) { - if (!shaders[stage]) - continue; + nir_shader *link_shaders[MESA_SHADER_STAGES] = {}; + assert(shaders_count <= ARRAY_SIZE(link_shaders)); - nir_shader *producer = shaders[stage]; - if (!consumer) { - consumer = producer; - continue; + unsigned link_shaders_count = 0; + + for (unsigned i = 0; i < shaders_count; i++) { + if (shaders[i]) { + link_shaders[link_shaders_count++] = shaders[i]; } - - lower_io_to_scalar_early(producer, nir_var_shader_out); - lower_io_to_scalar_early(consumer, nir_var_shader_in); - - if (nir_link_opt_varyings(producer, consumer)) { - NIR_PASS(_, consumer, nir_opt_constant_folding); - NIR_PASS(_, consumer, nir_opt_algebraic); - NIR_PASS(_, consumer, nir_opt_dce); - } - - const nir_remove_dead_variables_options out_var_opts = { - .can_remove_var = nir_vk_is_not_xfb_output, - }; - NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, - &out_var_opts); - - NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, - NULL); - - bool progress = nir_remove_unused_varyings(producer, consumer); - - nir_compact_varyings(producer, consumer, true); - if (progress) { - if (nir_lower_global_vars_to_local(producer)) { - /* Remove dead writes, which can remove input loads */ - NIR_PASS(_, producer, nir_remove_dead_variables, - nir_var_shader_temp, NULL); - NIR_PASS(_, producer, nir_opt_dce); - } - nir_lower_global_vars_to_local(consumer); - } - - NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out); - NIR_PASS(_, consumer, nir_opt_vectorize_io_vars, nir_var_shader_in); - consumer = producer; } - /* Gather info after linking so that we can fill out the ir3 shader key. + nir_opt_varyings_bulk(link_shaders, link_shaders_count, true, UINT32_MAX, + UINT32_MAX, link_opts, dev->compiler); + + /* We have to make sure nir_recompute_io_bases is called at least once so + * that num_inputs/num_outputs is correctly set for all shaders. + * nir_opt_varyings_bulk will do this for us when linking multiple shaders + * but not when there is only a single shader. Call it manually in that + * case. */ - for (mesa_shader_stage stage = MESA_SHADER_VERTEX; - stage <= MESA_SHADER_FRAGMENT; stage = (mesa_shader_stage) (stage + 1)) { - if (shaders[stage]) - nir_shader_gather_info(shaders[stage], - nir_shader_get_entrypoint(shaders[stage])); + if (link_shaders_count == 1) { + NIR_PASS(_, link_shaders[0], nir_recompute_io_bases, + nir_var_shader_in | nir_var_shader_out); } } @@ -3370,6 +3302,7 @@ tu_compile_shaders(struct tu_device *device, VkPipelineCreationFeedback *stage_feedbacks) { struct ir3_shader_key ir3_key = {}; + struct tu_shader_info info[MESA_SHADER_STAGES] = {}; VkResult result = VK_SUCCESS; void *mem_ctx = ralloc_context(NULL); @@ -3407,7 +3340,19 @@ tu_compile_shaders(struct tu_device *device, } } - tu_link_shaders(nir, MESA_SHADER_STAGES); + for (mesa_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; + stage = (mesa_shader_stage) (stage + 1)) { + if (!nir[stage]) + continue; + + int64_t stage_start = os_time_get_nano(); + + tu_lower_nir(device, nir[stage], &keys[stage], &info[stage]); + + stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; + } + + tu_link_shaders(device, nir, MESA_SHADER_STAGES); if (nir_out) { for (mesa_shader_stage stage = MESA_SHADER_VERTEX; @@ -3482,6 +3427,7 @@ tu_compile_shaders(struct tu_device *device, result = tu_shader_create(device, &shaders[stage], nir[stage], &keys[stage], + &info[stage], &ir3_key, shader_blake3, sizeof(shader_blake3), layout, !!nir_initial_disasm); if (result != VK_SUCCESS) { diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index ccead647011..3ae62052002 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -209,6 +209,7 @@ tu_shader_create(struct tu_device *dev, struct tu_shader **shader_out, nir_shader *nir, const struct tu_shader_key *key, + const struct tu_shader_info *shader_info, const struct ir3_shader_key *ir3_key, const void *key_data, size_t key_size,