pan/bi: lower subgroups before lowering int64

nir_lower_int64 doesn't know how to lower 64-bit imul reductions and
scans. Lowering subgroup operations first leaves us with just 64-bit
ballot and read_invocation, which are easily lowered.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33365>
This commit is contained in:
Caterina Shablia 2025-03-04 22:31:11 +00:00 committed by Marge Bot
parent 0f520e3d5a
commit c4941376a9

View file

@ -5664,32 +5664,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);
NIR_PASS(_, nir, pan_lower_sample_pos);
NIR_PASS(_, nir, nir_lower_64bit_phis);
NIR_PASS(_, nir, pan_lower_helper_invocation);
NIR_PASS(_, nir, nir_lower_int64);
NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id);
NIR_PASS(_, nir, nir_opt_idiv_const, 8);
NIR_PASS(_, nir, nir_lower_idiv,
&(nir_lower_idiv_options){.allow_fp16 = true});
NIR_PASS(_, nir, nir_lower_tex,
&(nir_lower_tex_options){
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_broadcom_swizzle = true,
.lower_txd_cube_map = true,
.lower_invalid_implicit_lod = true,
.lower_index_to_offset = true,
});
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
/* on bifrost, lower MSAA load/stores to 3D load/stores */
if (pan_arch(gpu_id) < 9)
NIR_PASS(_, nir, pan_nir_lower_image_ms);
/*
* Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
* how to lower imul reductions and scans.
*
* TODO: we can implement certain operations (notably reductions, scans,
* certain shuffles, etc) more efficiently than nir_lower_subgroups. Moreover
* we can implement reductions and scans on f16vec2 values without splitting
@ -5727,6 +5707,30 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, nir_shader_intrinsics_pass, bi_lower_subgroups,
nir_metadata_control_flow, &gpu_id);
NIR_PASS(_, nir, nir_lower_64bit_phis);
NIR_PASS(_, nir, nir_lower_int64);
NIR_PASS(_, nir, nir_lower_bit_size, bi_lower_bit_size, &gpu_id);
NIR_PASS(_, nir, nir_opt_idiv_const, 8);
NIR_PASS(_, nir, nir_lower_idiv,
&(nir_lower_idiv_options){.allow_fp16 = true});
NIR_PASS(_, nir, nir_lower_tex,
&(nir_lower_tex_options){
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_broadcom_swizzle = true,
.lower_txd_cube_map = true,
.lower_invalid_implicit_lod = true,
.lower_index_to_offset = true,
});
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
/* on bifrost, lower MSAA load/stores to 3D load/stores */
if (pan_arch(gpu_id) < 9)
NIR_PASS(_, nir, pan_nir_lower_image_ms);
NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
nir_metadata_control_flow, NULL);