diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index f7b2685c7e0..d4bf942020e 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -24,6 +24,7 @@ static const struct spirv_to_nir_options spirv_options = { .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, .push_const_addr_format = nir_address_format_32bit_offset, + .shared_addr_format = nir_address_format_32bit_offset, .min_ubo_alignment = PVR_UNIFORM_BUFFER_OFFSET_ALIGNMENT, .min_ssbo_alignment = PVR_STORAGE_BUFFER_OFFSET_ALIGNMENT, @@ -49,8 +50,13 @@ static const nir_shader_compiler_options nir_options = { .lower_ifind_msb = true, .lower_ldexp = true, .lower_layer_fs_input_to_sysval = true, + .lower_uadd_carry = true, + .lower_uadd_sat = true, + .lower_usub_borrow = true, .compact_arrays = true, .scalarize_ddx = true, + + .max_unroll_iterations = 16, }; /** @@ -74,99 +80,6 @@ const nir_shader_compiler_options *pco_nir_options(void) return &nir_options; } -/** - * \brief Runs pre-processing passes on a NIR shader. - * - * \param[in] ctx PCO compiler context. - * \param[in,out] nir NIR shader. - */ -void pco_preprocess_nir(pco_ctx *ctx, nir_shader *nir) -{ - if (nir->info.stage == MESA_SHADER_COMPUTE) - NIR_PASS(_, nir, pco_nir_compute_instance_check); - - if (nir->info.internal) - NIR_PASS(_, nir, nir_lower_returns); - - NIR_PASS(_, nir, nir_lower_global_vars_to_local); - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_split_var_copies); - NIR_PASS(_, nir, nir_lower_var_copies); - NIR_PASS(_, nir, nir_split_per_member_structs); - NIR_PASS(_, - nir, - nir_split_struct_vars, - nir_var_function_temp | nir_var_shader_temp); - NIR_PASS(_, - nir, - nir_split_array_vars, - nir_var_function_temp | nir_var_shader_temp); - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { - .frag_coord = true, - .point_coord = true, - }; - NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); - } - - NIR_PASS(_, nir, nir_lower_system_values); - - if (nir->info.stage == MESA_SHADER_COMPUTE) { - NIR_PASS(_, - nir, - nir_lower_compute_system_values, - &(nir_lower_compute_system_values_options){ - .lower_cs_local_id_to_index = true, - }); - } - - NIR_PASS(_, - nir, - nir_lower_io_vars_to_temporaries, - nir_shader_get_entrypoint(nir), - true, - true); - - NIR_PASS(_, nir, nir_split_var_copies); - NIR_PASS(_, nir, nir_lower_var_copies); - NIR_PASS(_, nir, nir_lower_global_vars_to_local); - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_remove_dead_derefs); - - NIR_PASS(_, - nir, - nir_lower_indirect_derefs, - nir_var_shader_in | nir_var_shader_out, - UINT32_MAX); - - NIR_PASS(_, nir, nir_opt_idiv_const, 32); - NIR_PASS(_, - nir, - nir_lower_idiv, - &(nir_lower_idiv_options){ - .allow_fp16 = false, - }); - - NIR_PASS(_, nir, nir_lower_frexp); - NIR_PASS(_, nir, nir_lower_flrp, 32, true); - - NIR_PASS(_, - nir, - nir_remove_dead_variables, - nir_var_function_temp | nir_var_shader_temp, - NULL); - NIR_PASS(_, nir, nir_copy_prop); - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_opt_dead_cf); - NIR_PASS(_, nir, nir_opt_cse); - - if (pco_should_print_nir(nir)) { - puts("after pco_preprocess_nir:"); - nir_print_shader(nir, stdout); - } -} - /** * \brief Returns the GLSL type size. * @@ -235,133 +148,29 @@ static bool frag_in_scalar_filter(const nir_instr *instr, const void *data) return false; } -/** - * \brief Lowers a NIR shader. - * - * \param[in] ctx PCO compiler context. - * \param[in,out] nir NIR shader. - * \param[in,out] data Shader data. - */ -void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) { - NIR_PASS(_, - nir, - nir_lower_explicit_io, - nir_var_mem_ubo | nir_var_mem_ssbo, - nir_address_format_vec2_index_32bit_offset); + assert(glsl_type_is_vector_or_scalar(type)); - NIR_PASS(_, - nir, - nir_lower_explicit_io, - nir_var_mem_push_const, - spirv_options.push_const_addr_format); + uint32_t comp_size = + glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, *align = comp_size; +} - NIR_PASS(_, nir, pco_nir_lower_vk, &data->common); - - NIR_PASS(_, - nir, - nir_lower_io, - nir_var_shader_in | nir_var_shader_out, - glsl_type_size, - nir_lower_io_lower_64bit_to_32); - - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_opt_constant_folding); - NIR_PASS(_, - nir, - nir_io_add_const_offset_to_base, - nir_var_shader_in | nir_var_shader_out); - - NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){}); - NIR_PASS(_, nir, pco_nir_lower_tex); - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - NIR_PASS(_, nir, pco_nir_pfo, &data->fs); - } else if (nir->info.stage == MESA_SHADER_VERTEX) { - NIR_PASS(_, - nir, - nir_lower_point_size, - PVR_POINT_SIZE_RANGE_MIN, - PVR_POINT_SIZE_RANGE_MAX); - - if (!nir->info.internal) - NIR_PASS(_, nir, pco_nir_point_size); - - NIR_PASS(_, nir, pco_nir_pvi, &data->vs); - } - - /* TODO: this should happen in the linking stage to cull unused I/O. */ - NIR_PASS(_, - nir, - nir_lower_io_to_scalar, - nir_var_shader_in | nir_var_shader_out, - NULL, - NULL); - - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - NIR_PASS(_, nir, nir_opt_dead_write_vars); - NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); - - bool progress; - NIR_PASS(_, nir, nir_lower_alu); - NIR_PASS(_, nir, nir_lower_pack); - NIR_PASS(_, nir, nir_opt_algebraic); - NIR_PASS(_, nir, pco_nir_lower_algebraic); - NIR_PASS(_, nir, nir_opt_constant_folding); - NIR_PASS(_, nir, nir_opt_algebraic); - NIR_PASS(_, nir, pco_nir_lower_algebraic); - NIR_PASS(_, nir, nir_opt_constant_folding); - - NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL); - - do { - progress = false; - - NIR_PASS(progress, nir, nir_opt_algebraic_late); - NIR_PASS(progress, nir, pco_nir_lower_algebraic_late); - NIR_PASS(progress, nir, nir_opt_constant_folding); - NIR_PASS(progress, nir, nir_lower_load_const_to_scalar); - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_cse); - } while (progress); - - nir_variable_mode vec_modes = nir_var_shader_in; - /* Fragment shader needs scalar writes after pfo. */ - if (nir->info.stage != MESA_SHADER_FRAGMENT) - vec_modes |= nir_var_shader_out; - - NIR_PASS(_, nir, nir_opt_vectorize_io, vec_modes, false); - - /* Special case for frag coords: - * - x,y come from (non-consecutive) special regs - always scalar. - * - z,w are iterated and driver will make sure they're consecutive. - * - TODO: keep scalar for now, but add pass to vectorize. - */ - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - NIR_PASS(_, - nir, - nir_lower_io_to_scalar, - nir_var_shader_in, - frag_in_scalar_filter, - nir); - } - - do { - progress = false; - - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_dce); - NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_constant_folding); - NIR_PASS(progress, nir, nir_opt_undef); - } while (progress); - - if (pco_should_print_nir(nir)) { - puts("after pco_lower_nir:"); - nir_print_shader(nir, stdout); - } +/** + * \brief Checks whether two varying variables are the same. + * + * \param[in] out_var The first varying being compared. + * \param[in] in_var The second varying being compared. + * \return True if the varyings match. + */ +static bool varyings_match(nir_variable *out_var, nir_variable *in_var) +{ + return in_var->data.location == out_var->data.location && + in_var->data.location_frac == out_var->data.location_frac && + in_var->type == out_var->type; } /** @@ -436,6 +245,7 @@ static void gather_cs_data(nir_shader *nir, pco_data *data) * \param[in] op The NIR intrinsic op. * \return True if the intrinsic op is atomic, else false. */ +/* TODO: what about emulated atomic ops? */ static inline bool intr_op_is_atomic(nir_intrinsic_op op) { switch (op) { @@ -450,6 +260,30 @@ static inline bool intr_op_is_atomic(nir_intrinsic_op op) return false; } +static void gather_common_store_data(nir_intrinsic_instr *intr, + pco_common_data *common) +{ + nir_src *offset_src; + unsigned num_components; + switch (intr->intrinsic) { + case nir_intrinsic_load_push_constant: + offset_src = &intr->src[0]; + num_components = intr->def.num_components; + break; + + default: + return; + } + + if (nir_src_is_const(*offset_src) && common->push_consts.used != ~0U) { + unsigned offset = nir_src_as_uint(*offset_src); + common->push_consts.used = + MAX2(common->push_consts.used, offset + num_components); + } else { + common->push_consts.used = ~0U; + } +} + /** * \brief Gather common data pass. * @@ -464,6 +298,8 @@ static bool gather_common_data_pass(UNUSED struct nir_builder *b, { pco_data *data = cb_data; data->common.uses.atomics |= intr_op_is_atomic(intr->intrinsic); + gather_common_store_data(intr, &data->common); + return false; } @@ -508,33 +344,174 @@ static void gather_data(nir_shader *nir, pco_data *data) UNREACHABLE(""); } +static bool should_vectorize_mem_cb(unsigned align_mul, + unsigned align_offset, + unsigned bit_size, + unsigned num_components, + int64_t hole_size, + nir_intrinsic_instr *low, + nir_intrinsic_instr *high, + void *data) +{ + if (bit_size > 32 || hole_size > 0) + return false; + + if (!nir_num_components_valid(num_components)) + return false; + + return true; +} + +static void pco_nir_opt(pco_ctx *ctx, nir_shader *nir) +{ + bool progress; + + unsigned count = 0; + do { + progress = false; + + if (count > 1000) { + printf("WARNING! Infinite opt loop!\n"); + break; + } + + NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_opt_deref); + + bool progress_opt_memcpy = false; + NIR_PASS(progress_opt_memcpy, nir, nir_opt_memcpy); + progress |= progress_opt_memcpy; + + if (progress_opt_memcpy) + NIR_PASS(progress, nir, nir_split_var_copies); + + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + + if (!nir->info.var_copies_lowered) + NIR_PASS(progress, nir, nir_opt_find_array_copies); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_dead_write_vars); + + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); + nir_opt_peephole_select_options peep_opts = { + .limit = 64, + .expensive_alu_ok = true, + }; + NIR_PASS(progress, nir, nir_opt_peephole_select, &peep_opts); + NIR_PASS(progress, nir, nir_opt_phi_precision); + NIR_PASS(progress, nir, nir_lower_alu); + NIR_PASS(progress, nir, nir_lower_pack); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, pco_nir_lower_algebraic); + + NIR_PASS(progress, nir, nir_opt_constant_folding); + + nir_load_store_vectorize_options vectorize_opts = { + .modes = nir_var_mem_ubo | nir_var_mem_ssbo, + .callback = should_vectorize_mem_cb, + }; + NIR_PASS(progress, nir, nir_opt_load_store_vectorize, &vectorize_opts); + + NIR_PASS(progress, nir, nir_opt_shrink_stores, true); + + NIR_PASS(progress, nir, nir_opt_loop); + NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_lower_undef_to_zero); + NIR_PASS(progress, nir, nir_opt_loop_unroll); + } while (progress); +} + /** - * \brief Runs post-processing passes on a NIR shader. + * \brief Runs pre-processing passes on a NIR shader. * * \param[in] ctx PCO compiler context. * \param[in,out] nir NIR shader. - * \param[in,out] data Shader data. */ -void pco_postprocess_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) +void pco_preprocess_nir(pco_ctx *ctx, nir_shader *nir) { - NIR_PASS(_, nir, nir_opt_sink, ~0U); - NIR_PASS(_, nir, nir_opt_move, ~0U); + if (nir->info.stage == MESA_SHADER_COMPUTE) + NIR_PASS(_, nir, pco_nir_compute_instance_check); - NIR_PASS(_, nir, nir_move_vec_src_uses_to_dest, false); + if (nir->info.internal) + NIR_PASS(_, nir, nir_lower_returns); - /* Re-index everything. */ - nir_foreach_function_with_impl (_, impl, nir) { - nir_index_blocks(impl); - nir_index_instrs(impl); - nir_index_ssa_defs(impl); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { + .frag_coord = true, + .point_coord = true, + }; + NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); } - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + NIR_PASS(_, nir, nir_lower_system_values); - gather_data(nir, data); + if (nir->info.stage == MESA_SHADER_COMPUTE) { + NIR_PASS(_, + nir, + nir_lower_compute_system_values, + &(nir_lower_compute_system_values_options){ + .lower_cs_local_id_to_index = true, + }); + } + + NIR_PASS(_, + nir, + nir_lower_io_vars_to_temporaries, + nir_shader_get_entrypoint(nir), + true, + true); + + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + NIR_PASS(_, nir, nir_split_per_member_structs); + NIR_PASS(_, + nir, + nir_split_struct_vars, + nir_var_function_temp | nir_var_shader_temp); + NIR_PASS(_, + nir, + nir_split_array_vars, + nir_var_function_temp | nir_var_shader_temp); + + NIR_PASS(_, + nir, + nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); + + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + + NIR_PASS(_, nir, nir_opt_idiv_const, 32); + NIR_PASS(_, + nir, + nir_lower_idiv, + &(nir_lower_idiv_options){ + .allow_fp16 = false, + }); + + NIR_PASS(_, nir, nir_scale_fdiv); + NIR_PASS(_, nir, nir_lower_frexp); + NIR_PASS(_, nir, nir_lower_flrp, 32, true); + + NIR_PASS(_, nir, nir_remove_dead_derefs); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, + nir, + nir_remove_dead_variables, + nir_var_function_temp | nir_var_shader_temp, + NULL); + + pco_nir_opt(ctx, nir); if (pco_should_print_nir(nir)) { - puts("after pco_postprocess_nir:"); + puts("after pco_preprocess_nir:"); nir_print_shader(nir, stdout); } } @@ -548,8 +525,49 @@ void pco_postprocess_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) */ void pco_link_nir(pco_ctx *ctx, nir_shader *producer, nir_shader *consumer) { - /* TODO */ - puts("finishme: pco_link_nir"); + /* TODO: clip/cull */ + + nir_lower_io_array_vars_to_elements(producer, consumer); + nir_validate_shader(producer, "after nir_lower_io_array_vars_to_elements"); + nir_validate_shader(consumer, "after nir_lower_io_array_vars_to_elements"); + + NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out); + NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in); + + pco_nir_opt(ctx, producer); + pco_nir_opt(ctx, consumer); + + if (nir_link_opt_varyings(producer, consumer)) + pco_nir_opt(ctx, consumer); + + NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); + NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); + + bool progress = nir_remove_unused_varyings(producer, consumer); + nir_compact_varyings(producer, consumer, true); + + if (progress) { + NIR_PASS(_, producer, nir_lower_global_vars_to_local); + NIR_PASS(_, consumer, nir_lower_global_vars_to_local); + + NIR_PASS(_, + producer, + nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); + NIR_PASS(_, + consumer, + nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); + + pco_nir_opt(ctx, producer); + pco_nir_opt(ctx, consumer); + } + + NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out); + NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out); + NIR_PASS(_, consumer, nir_opt_vectorize_io_vars, nir_var_shader_in); if (pco_should_print_nir(producer)) { puts("producer after pco_link_nir:"); @@ -562,20 +580,6 @@ void pco_link_nir(pco_ctx *ctx, nir_shader *producer, nir_shader *consumer) } } -/** - * \brief Checks whether two varying variables are the same. - * - * \param[in] out_var The first varying being compared. - * \param[in] in_var The second varying being compared. - * \return True if the varyings match. - */ -static bool varyings_match(nir_variable *out_var, nir_variable *in_var) -{ - return in_var->data.location == out_var->data.location && - in_var->data.location_frac == out_var->data.location_frac && - in_var->type == out_var->type; -} - /** * \brief Performs reverse linking optimizations on consecutive NIR shader * stages. @@ -586,9 +590,6 @@ static bool varyings_match(nir_variable *out_var, nir_variable *in_var) */ void pco_rev_link_nir(pco_ctx *ctx, nir_shader *producer, nir_shader *consumer) { - /* TODO */ - puts("finishme: pco_rev_link_nir"); - /* Propagate back/adjust the interpolation qualifiers. */ nir_foreach_shader_in_variable (in_var, consumer) { if (in_var->data.location == VARYING_SLOT_POS || @@ -617,3 +618,195 @@ void pco_rev_link_nir(pco_ctx *ctx, nir_shader *producer, nir_shader *consumer) nir_print_shader(consumer, stdout); } } +/** + * \brief Lowers a NIR shader. + * + * \param[in] ctx PCO compiler context. + * \param[in,out] nir NIR shader. + * \param[in,out] data Shader data. + */ +void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) +{ + NIR_PASS(_, + nir, + nir_opt_access, + &(nir_opt_access_options){ .is_vulkan = true }); + + NIR_PASS(_, nir, nir_lower_memory_model); + + NIR_PASS(_, nir, nir_opt_licm); + + NIR_PASS(_, nir, nir_lower_memcpy); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS(_, nir, nir_opt_vectorize_io_vars, nir_var_shader_out); + + NIR_PASS(_, + nir, + nir_lower_explicit_io, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_vec2_index_32bit_offset); + + nir_move_options move_options = + nir_move_load_ubo | nir_move_load_ssbo | nir_move_load_input | + nir_move_load_frag_coord | nir_intrinsic_load_uniform; + NIR_PASS(_, nir, nir_opt_sink, move_options); + NIR_PASS(_, nir, nir_opt_move, move_options); + + if (!nir->info.shared_memory_explicit_layout) { + NIR_PASS(_, + nir, + nir_lower_vars_to_explicit_types, + nir_var_mem_shared, + shared_var_info); + } + + NIR_PASS(_, + nir, + nir_lower_explicit_io, + nir_var_mem_push_const | nir_var_mem_shared, + nir_address_format_32bit_offset); + NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_mem_shared, NULL, NULL); + + NIR_PASS(_, nir, pco_nir_lower_vk, &data->common); + + NIR_PASS(_, + nir, + nir_lower_io, + nir_var_shader_in | nir_var_shader_out, + glsl_type_size, + nir_lower_io_lower_64bit_to_32); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); + NIR_PASS(_, nir, nir_copy_prop); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_cse); + NIR_PASS(_, nir, nir_opt_vectorize_io, nir_var_shader_out, false); + } + + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_constant_folding); + NIR_PASS(_, + nir, + nir_io_add_const_offset_to_base, + nir_var_shader_in | nir_var_shader_out); + + NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){}); + NIR_PASS(_, nir, pco_nir_lower_tex); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, nir, pco_nir_pfo, &data->fs); + } else if (nir->info.stage == MESA_SHADER_VERTEX) { + NIR_PASS(_, + nir, + nir_lower_point_size, + PVR_POINT_SIZE_RANGE_MIN, + PVR_POINT_SIZE_RANGE_MAX); + + if (!nir->info.internal) + NIR_PASS(_, nir, pco_nir_point_size); + + NIR_PASS(_, nir, pco_nir_pvi, &data->vs); + } + + NIR_PASS(_, + nir, + nir_lower_io_to_scalar, + nir_var_shader_in | nir_var_shader_out, + NULL, + NULL); + + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_dead_write_vars); + NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); + + pco_nir_opt(ctx, nir); + + bool progress; + do { + progress = false; + + NIR_PASS(_, nir, nir_opt_algebraic_late); + NIR_PASS(_, nir, pco_nir_lower_algebraic_late); + NIR_PASS(_, nir, nir_opt_constant_folding); + NIR_PASS(_, nir, nir_lower_load_const_to_scalar); + NIR_PASS(_, nir, nir_copy_prop); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_cse); + } while (progress); + + nir_variable_mode vec_modes = nir_var_shader_in; + /* Fragment shader needs scalar writes after pfo. */ + if (nir->info.stage != MESA_SHADER_FRAGMENT) + vec_modes |= nir_var_shader_out; + + NIR_PASS(_, nir, nir_opt_vectorize_io, vec_modes, false); + + /* Special case for frag coords: + * - x,y come from (non-consecutive) special regs - always scalar. + * - z,w are iterated and driver will make sure they're consecutive. + * - TODO: keep scalar for now, but add pass to vectorize. + */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, + nir, + nir_lower_io_to_scalar, + nir_var_shader_in, + frag_in_scalar_filter, + nir); + } + + pco_nir_opt(ctx, nir); + + if (pco_should_print_nir(nir)) { + puts("after pco_lower_nir:"); + nir_print_shader(nir, stdout); + } +} + +/** + * \brief Runs post-processing passes on a NIR shader. + * + * \param[in] ctx PCO compiler context. + * \param[in,out] nir NIR shader. + * \param[in,out] data Shader data. + */ +void pco_postprocess_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) +{ + nir_move_options move_options = nir_move_const_undef | nir_move_copies | + nir_move_comparisons | nir_move_alu; + NIR_PASS(_, nir, nir_opt_sink, move_options); + NIR_PASS(_, nir, nir_opt_move, move_options); + + NIR_PASS(_, nir, nir_lower_all_phis_to_scalar); + + NIR_PASS(_, nir, nir_convert_from_ssa, true, false); + NIR_PASS(_, nir, nir_copy_prop); + NIR_PASS(_, nir, nir_move_vec_src_uses_to_dest, false); + NIR_PASS(_, nir, nir_opt_dce); + + bool progress = false; + NIR_PASS(progress, nir, nir_opt_rematerialize_compares); + if (progress) + NIR_PASS(_, nir, nir_opt_dce); + + NIR_PASS(_, nir, nir_trivialize_registers); + + /* Re-index everything. */ + nir_foreach_function_with_impl (_, impl, nir) { + nir_index_blocks(impl); + nir_index_instrs(impl); + nir_index_ssa_defs(impl); + } + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + gather_data(nir, data); + + if (pco_should_print_nir(nir)) { + puts("after pco_postprocess_nir:"); + nir_print_shader(nir, stdout); + } +}