diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 4f35748fddc..ce2738bd373 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5664,32 +5664,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts); NIR_PASS(_, nir, pan_lower_sample_pos); - NIR_PASS(_, nir, nir_lower_64bit_phis); NIR_PASS(_, nir, pan_lower_helper_invocation); - NIR_PASS(_, nir, nir_lower_int64); - NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id); - - NIR_PASS(_, nir, nir_opt_idiv_const, 8); - NIR_PASS(_, nir, nir_lower_idiv, - &(nir_lower_idiv_options){.allow_fp16 = true}); - - NIR_PASS(_, nir, nir_lower_tex, - &(nir_lower_tex_options){ - .lower_txs_lod = true, - .lower_txp = ~0, - .lower_tg4_broadcom_swizzle = true, - .lower_txd_cube_map = true, - .lower_invalid_implicit_lod = true, - .lower_index_to_offset = true, - }); - - NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL); - - /* on bifrost, lower MSAA load/stores to 3D load/stores */ - if (pan_arch(gpu_id) < 9) - NIR_PASS(_, nir, pan_nir_lower_image_ms); /* + * Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know + * how to lower imul reductions and scans. + * * TODO: we can implement certain operations (notably reductions, scans, * certain shuffles, etc) more efficiently than nir_lower_subgroups. Moreover * we can implement reductions and scans on f16vec2 values without splitting @@ -5727,6 +5707,30 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, nir_shader_intrinsics_pass, bi_lower_subgroups, nir_metadata_control_flow, &gpu_id); + NIR_PASS(_, nir, nir_lower_64bit_phis); + NIR_PASS(_, nir, nir_lower_int64); + NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id); + + NIR_PASS(_, nir, nir_opt_idiv_const, 8); + NIR_PASS(_, nir, nir_lower_idiv, + &(nir_lower_idiv_options){.allow_fp16 = true}); + + NIR_PASS(_, nir, nir_lower_tex, + &(nir_lower_tex_options){ + .lower_txs_lod = true, + .lower_txp = ~0, + .lower_tg4_broadcom_swizzle = true, + .lower_txd_cube_map = true, + .lower_invalid_implicit_lod = true, + .lower_index_to_offset = true, + }); + + NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL); + + /* on bifrost, lower MSAA load/stores to 3D load/stores */ + if (pan_arch(gpu_id) < 9) + NIR_PASS(_, nir, pan_nir_lower_image_ms); + NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16, nir_metadata_control_flow, NULL);