diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 905bf3fe729..d3bef10dbf3 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -5249,10 +5249,6 @@ bi_compile_variant_nir(nir_shader *nir, NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); } - - /* opt_cse can vectorize load_const, we need to lower this to scalar */ - NIR_PASS(progress, nir, nir_lower_load_const_to_scalar); - NIR_PASS(progress, nir, nir_opt_dce); } /* If nothing is pushed, all UBOs need to be uploaded */ diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c index 5a02d1c0272..f6c5caf5e25 100644 --- a/src/panfrost/compiler/bifrost/bifrost_nir.c +++ b/src/panfrost/compiler/bifrost/bifrost_nir.c @@ -349,9 +349,6 @@ bi_optimize_nir(nir_shader *nir, uint64_t gpu_id, NIR_PASS(_, nir, nir_opt_cse); } - NIR_PASS(_, nir, nir_lower_load_const_to_scalar); - NIR_PASS(_, nir, nir_opt_dce); - /* Backend scheduler is purely local, so do some global optimizations * to reduce register pressure. */ nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo | @@ -1124,6 +1121,11 @@ bifrost_compile_shader_nir(nir_shader *nir, bi_optimize_nir(nir, inputs->gpu_id, inputs->robust_modes); + /* Lower constants to scalar but then immediately fold so we get minimum- + * width vectors instead of scalars + */ + NIR_PASS(_, nir, nir_lower_load_const_to_scalar); + NIR_PASS(_, nir, nir_opt_constant_folding); uint64_t gpu_id = inputs->gpu_id; NIR_PASS(_, nir, nir_lower_phis_to_scalar, bi_vectorize_filter, &gpu_id); NIR_PASS(_, nir, nir_opt_copy_prop);