diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c
index 905bf3fe729..d3bef10dbf3 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@@ -5249,10 +5249,6 @@ bi_compile_variant_nir(nir_shader *nir,
          NIR_PASS(progress, nir, nir_opt_dead_cf);
          NIR_PASS(progress, nir, nir_opt_cse);
       }
-
-      /* opt_cse can vectorize load_const, we need to lower this to scalar */
-      NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
-      NIR_PASS(progress, nir, nir_opt_dce);
    }
 
    /* If nothing is pushed, all UBOs need to be uploaded */
diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c
index 5a02d1c0272..f6c5caf5e25 100644
--- a/src/panfrost/compiler/bifrost/bifrost_nir.c
+++ b/src/panfrost/compiler/bifrost/bifrost_nir.c
@@ -349,9 +349,6 @@ bi_optimize_nir(nir_shader *nir, uint64_t gpu_id,
       NIR_PASS(_, nir, nir_opt_cse);
    }
 
-   NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
-   NIR_PASS(_, nir, nir_opt_dce);
-
    /* Backend scheduler is purely local, so do some global optimizations
     * to reduce register pressure. */
    nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
@@ -1124,6 +1121,11 @@ bifrost_compile_shader_nir(nir_shader *nir,
 
    bi_optimize_nir(nir, inputs->gpu_id, inputs->robust_modes);
 
+   /* Lower constants to scalar but then immediately fold so we get minimum-
+    * width vectors instead of scalars
+    */
+   NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
+   NIR_PASS(_, nir, nir_opt_constant_folding);
    uint64_t gpu_id = inputs->gpu_id;
    NIR_PASS(_, nir, nir_lower_phis_to_scalar, bi_vectorize_filter, &gpu_id);
    NIR_PASS(_, nir, nir_opt_copy_prop);