From 6ab7a03aef58a8f5a0b77648392a51e6668cbdda Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 13 Aug 2025 14:14:48 +0000 Subject: [PATCH] panfrost: Split texture lowering passes We now have lower_texture_early and lower_texture. lower_texture_early handle nir_lower_tex and (in the future) could handle anything that is backend specific that need to happen before nir_lower_io. lower_texture handles actual lowering of backend specific things that must happen after nir_lower_tex and nir_lower_io. This allows us to finally not run nir_lower_tex two times in panvk. Signed-off-by: Mary Guillemard Reviewed-by: Olivia Lee Part-of: --- src/gallium/drivers/panfrost/pan_fb_preload.c | 2 + src/gallium/drivers/panfrost/pan_shader.c | 4 ++ src/panfrost/clc/pan_compile.c | 2 + src/panfrost/compiler/bifrost_compile.c | 37 ++++++++----------- src/panfrost/compiler/bifrost_compile.h | 1 + src/panfrost/lib/pan_shader.h | 28 ++++++++++++++ src/panfrost/midgard/midgard_compile.c | 27 +++++--------- src/panfrost/midgard/midgard_compile.h | 1 + src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c | 2 + src/panfrost/vulkan/panvk_vX_shader.c | 21 ++++------- 10 files changed, 72 insertions(+), 53 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 09c9a666960..dea2b320103 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -562,6 +562,8 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache, BITSET_SET(b.shader->info.textures_used, i); pan_shader_preprocess(b.shader, inputs.gpu_id); + pan_shader_lower_texture_early(b.shader, inputs.gpu_id); + pan_shader_lower_texture(b.shader, inputs.gpu_id); pan_shader_postprocess(b.shader, inputs.gpu_id); if (PAN_ARCH == 4) { diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 6cb8ae62f4b..ea0d734d0e7 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -138,6 +138,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, */ if (mesa_shader_stage_is_compute(s->info.stage)) { pan_shader_preprocess(s, panfrost_device_gpu_id(dev)); + pan_shader_lower_texture_early(s, panfrost_device_gpu_id(dev)); + pan_shader_lower_texture(s, panfrost_device_gpu_id(dev)); pan_shader_postprocess(s, panfrost_device_gpu_id(dev)); } @@ -505,6 +507,7 @@ panfrost_create_shader_state(struct pipe_context *pctx, /* Then run the suite of lowering and optimization, including I/O lowering */ struct panfrost_device *dev = pan_device(pctx->screen); pan_shader_preprocess(nir, panfrost_device_gpu_id(dev)); + pan_shader_lower_texture_early(nir, panfrost_device_gpu_id(dev)); NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics); @@ -520,6 +523,7 @@ panfrost_create_shader_state(struct pipe_context *pctx, */ NIR_PASS(_, nir, nir_opt_constant_folding); + pan_shader_lower_texture(nir, panfrost_device_gpu_id(dev)); pan_shader_postprocess(nir, panfrost_device_gpu_id(dev)); if (nir->info.stage == MESA_SHADER_FRAGMENT) diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c index a9b1010b860..b80546aaefe 100644 --- a/src/panfrost/clc/pan_compile.c +++ b/src/panfrost/clc/pan_compile.c @@ -410,6 +410,8 @@ main(int argc, const char **argv) } while (progress); pan_shader_preprocess(s, inputs.gpu_id); + pan_shader_lower_texture_early(s, inputs.gpu_id); + pan_shader_lower_texture(s, inputs.gpu_id); pan_shader_postprocess(s, inputs.gpu_id); NIR_PASS(_, s, nir_opt_deref); diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 7d4e6a562ea..fd386d8460d 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5982,12 +5982,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id) }; NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts); - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - NIR_PASS(_, nir, nir_lower_is_helper_invocation); - NIR_PASS(_, nir, pan_lower_helper_invocation); - NIR_PASS(_, nir, pan_lower_sample_pos); - } - /* * Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know * how to lower imul reductions and scans. @@ -6038,22 +6032,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, nir_lower_idiv, &(nir_lower_idiv_options){.allow_fp16 = true}); - NIR_PASS(_, nir, nir_lower_tex, - &(nir_lower_tex_options){ - .lower_txs_lod = true, - .lower_txp = ~0, - .lower_tg4_broadcom_swizzle = true, - .lower_txd_cube_map = true, - .lower_invalid_implicit_lod = true, - .lower_index_to_offset = true, - }); - - NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL); - - /* on bifrost, lower MSAA load/stores to 3D load/stores */ - if (pan_arch(gpu_id) < 9) - NIR_PASS(_, nir, pan_nir_lower_image_ms); - NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16, nir_metadata_control_flow, NULL); @@ -6067,6 +6045,21 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, pan_nir_lower_frag_coord_zw); } +void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id) +{ + NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL); + + /* on Bifrost, lower MSAA load/stores to 3D load/stores */ + if (pan_arch(gpu_id) < 9) + NIR_PASS(_, nir, pan_nir_lower_image_ms); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, nir, nir_lower_is_helper_invocation); + NIR_PASS(_, nir, pan_lower_helper_invocation); + NIR_PASS(_, nir, pan_lower_sample_pos); + } +} + static bi_context * bi_compile_variant_nir(nir_shader *nir, const struct pan_compile_inputs *inputs, diff --git a/src/panfrost/compiler/bifrost_compile.h b/src/panfrost/compiler/bifrost_compile.h index 5ed6c586be0..7b87d30669f 100644 --- a/src/panfrost/compiler/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost_compile.h @@ -83,6 +83,7 @@ bifrost_precompiled_kernel_prepare_push_uniforms( void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id); void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id); +void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id); void bifrost_compile_shader_nir(nir_shader *nir, const struct pan_compile_inputs *inputs, diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h index 03b0c876661..1c55e4f1d1a 100644 --- a/src/panfrost/lib/pan_shader.h +++ b/src/panfrost/lib/pan_shader.h @@ -36,8 +36,10 @@ void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id); void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id); +void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id); void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id); void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id); +void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id); static unsigned pan_get_fixed_varying_mask(unsigned varyings_used) @@ -64,6 +66,32 @@ pan_shader_postprocess(nir_shader *nir, unsigned gpu_id) midgard_postprocess_nir(nir, gpu_id); } +static inline void +pan_shader_lower_texture_early(nir_shader *nir, unsigned gpu_id) +{ + nir_lower_tex_options lower_tex_options = { + .lower_txs_lod = true, + .lower_txp = ~0, + .lower_tg4_offsets = true, + .lower_tg4_broadcom_swizzle = true, + .lower_txd = pan_arch(gpu_id) < 6, + .lower_txd_cube_map = true, + .lower_invalid_implicit_lod = true, + .lower_index_to_offset = pan_arch(gpu_id) >= 6, + }; + + NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options); +} + +static inline void +pan_shader_lower_texture(nir_shader *nir, unsigned gpu_id) +{ + if (pan_arch(gpu_id) >= 6) + bifrost_lower_texture_nir(nir, gpu_id); + else + midgard_lower_texture_nir(nir, gpu_id); +} + static inline void pan_shader_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id, bool verbose) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index de4aac1f3fa..aa0369be91c 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -393,10 +393,8 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id) } void -midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id) +midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id) { - unsigned quirks = midgard_get_quirks(gpu_id); - if (nir->info.stage == MESA_SHADER_VERTEX) { /* nir_lower[_explicit]_io is lazy and emits mul+add chains even * for offsets it could figure out are constant. Do some @@ -434,21 +432,20 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, nir_lower_idiv, &idiv_options); - nir_lower_tex_options lower_tex_options = { - .lower_txs_lod = true, - .lower_txp = ~0, - .lower_tg4_broadcom_swizzle = true, - .lower_txd = true, - .lower_invalid_implicit_lod = true, - }; + NIR_PASS(_, nir, midgard_nir_lower_algebraic_early); + NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL); + NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */); + NIR_PASS(_, nir, nir_lower_var_copies); +} - NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options); +void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id) +{ NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL); /* TEX_GRAD fails to apply sampler descriptor settings on some * implementations, requiring a lowering. */ - if (quirks & MIDGARD_BROKEN_LOD) + if (midgard_get_quirks(gpu_id) & MIDGARD_BROKEN_LOD) NIR_PASS(_, nir, midgard_nir_lod_errata); /* lower MSAA image operations to 3D load before coordinate lowering */ @@ -463,13 +460,9 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, pan_lower_helper_invocation); NIR_PASS(_, nir, pan_lower_sample_pos); } - - NIR_PASS(_, nir, midgard_nir_lower_algebraic_early); - NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL); - NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */); - NIR_PASS(_, nir, nir_lower_var_copies); } + static void optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) { diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index d2f0e0f49ee..825665d6c2e 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -31,6 +31,7 @@ void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id); void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id); +void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id); void midgard_compile_shader_nir(nir_shader *nir, const struct pan_compile_inputs *inputs, diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index de9be178a54..2ddbc6dd361 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -153,6 +153,8 @@ get_preload_shader(struct panvk_device *dev, }; pan_shader_preprocess(nir, inputs.gpu_id); + pan_shader_lower_texture_early(nir, inputs.gpu_id); + pan_shader_lower_texture(nir, inputs.gpu_id); pan_shader_postprocess(nir, inputs.gpu_id); VkResult result = panvk_per_arch(create_internal_shader)( diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 6fdf6def48a..d3a478bf5da 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -414,37 +414,29 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev, NIR_PASS(_, nir, nir_opt_barrier_modes); NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_DEVICE); - /* Do texture lowering here. Yes, it's a duplication of the texture - * lowering in bifrost_compile. However, we need to lower texture stuff + /* Do texture lowering here. We need to lower texture stuff * now, before we call panvk_per_arch(nir_lower_descriptors)() because some * of the texture lowering generates nir_texop_txs which we handle as part * of descriptor lowering. * - * TODO: We really should be doing this in common code, not dpulicated in - * panvk. In order to do that, we need to rework the panfrost compile + * TODO: We really should be doing this in common code, not duplicated in + * panvk. In order to do that, we need to rework the panfrost compile * flow to look more like the Intel flow: * * 1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs * to be done really early. * - * 2. pan_preprocess_nir: Does common lowering and runs the optimization + * 2. pan_shader_preprocess: Does common lowering and runs the optimization * loop. Nothing here should be API-specific. * * 3. Do additional lowering in panvk * - * 4. pan_postprocess_nir: Does final lowering and runs the optimization + * 4. pan_shader_postprocess: Does final lowering and runs the optimization * loop again. This can happen as part of the final compile. * * This would give us a better place to do panvk-specific lowering. */ - nir_lower_tex_options lower_tex_options = { - .lower_txs_lod = true, - .lower_txp = ~0, - .lower_tg4_offsets = true, - .lower_txd_cube_map = true, - .lower_invalid_implicit_lod = true, - }; - NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options); + pan_shader_lower_texture_early(nir, pdev->kmod.props.gpu_id); NIR_PASS(_, nir, nir_lower_system_values); nir_lower_compute_system_values_options options = { @@ -939,6 +931,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, */ NIR_PASS(_, nir, nir_opt_constant_folding); + pan_shader_lower_texture(nir, compile_input->gpu_id); pan_shader_postprocess(nir, compile_input->gpu_id); if (stage == MESA_SHADER_VERTEX)