panfrost: Split texture lowering passes

We now have lower_texture_early and lower_texture.

lower_texture_early handle nir_lower_tex and (in the future) could handle
anything that is backend specific that need to happen before nir_lower_io.

lower_texture handles actual lowering of backend specific things that
must happen after nir_lower_tex and nir_lower_io.

This allows us to finally not run nir_lower_tex two times in panvk.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36776>
This commit is contained in:
Mary Guillemard 2025-08-13 14:14:48 +00:00 committed by Marge Bot
parent 310eabacc0
commit 6ab7a03aef
10 changed files with 72 additions and 53 deletions

View file

@ -562,6 +562,8 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
BITSET_SET(b.shader->info.textures_used, i);
pan_shader_preprocess(b.shader, inputs.gpu_id);
pan_shader_lower_texture_early(b.shader, inputs.gpu_id);
pan_shader_lower_texture(b.shader, inputs.gpu_id);
pan_shader_postprocess(b.shader, inputs.gpu_id);
if (PAN_ARCH == 4) {

View file

@ -138,6 +138,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
*/
if (mesa_shader_stage_is_compute(s->info.stage)) {
pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
pan_shader_lower_texture_early(s, panfrost_device_gpu_id(dev));
pan_shader_lower_texture(s, panfrost_device_gpu_id(dev));
pan_shader_postprocess(s, panfrost_device_gpu_id(dev));
}
@ -505,6 +507,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
/* Then run the suite of lowering and optimization, including I/O lowering */
struct panfrost_device *dev = pan_device(pctx->screen);
pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
pan_shader_lower_texture_early(nir, panfrost_device_gpu_id(dev));
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
@ -520,6 +523,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
pan_shader_lower_texture(nir, panfrost_device_gpu_id(dev));
pan_shader_postprocess(nir, panfrost_device_gpu_id(dev));
if (nir->info.stage == MESA_SHADER_FRAGMENT)

View file

@ -410,6 +410,8 @@ main(int argc, const char **argv)
} while (progress);
pan_shader_preprocess(s, inputs.gpu_id);
pan_shader_lower_texture_early(s, inputs.gpu_id);
pan_shader_lower_texture(s, inputs.gpu_id);
pan_shader_postprocess(s, inputs.gpu_id);
NIR_PASS(_, s, nir_opt_deref);

View file

@ -5982,12 +5982,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
};
NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, nir_lower_is_helper_invocation);
NIR_PASS(_, nir, pan_lower_helper_invocation);
NIR_PASS(_, nir, pan_lower_sample_pos);
}
/*
* Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
* how to lower imul reductions and scans.
@ -6038,22 +6032,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, nir_lower_idiv,
&(nir_lower_idiv_options){.allow_fp16 = true});
NIR_PASS(_, nir, nir_lower_tex,
&(nir_lower_tex_options){
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_broadcom_swizzle = true,
.lower_txd_cube_map = true,
.lower_invalid_implicit_lod = true,
.lower_index_to_offset = true,
});
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
/* on bifrost, lower MSAA load/stores to 3D load/stores */
if (pan_arch(gpu_id) < 9)
NIR_PASS(_, nir, pan_nir_lower_image_ms);
NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
nir_metadata_control_flow, NULL);
@ -6067,6 +6045,21 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, pan_nir_lower_frag_coord_zw);
}
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
{
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
/* on Bifrost, lower MSAA load/stores to 3D load/stores */
if (pan_arch(gpu_id) < 9)
NIR_PASS(_, nir, pan_nir_lower_image_ms);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, nir_lower_is_helper_invocation);
NIR_PASS(_, nir, pan_lower_helper_invocation);
NIR_PASS(_, nir, pan_lower_sample_pos);
}
}
static bi_context *
bi_compile_variant_nir(nir_shader *nir,
const struct pan_compile_inputs *inputs,

View file

@ -83,6 +83,7 @@ bifrost_precompiled_kernel_prepare_push_uniforms(
void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_compile_shader_nir(nir_shader *nir,
const struct pan_compile_inputs *inputs,

View file

@ -36,8 +36,10 @@
void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
static unsigned
pan_get_fixed_varying_mask(unsigned varyings_used)
@ -64,6 +66,32 @@ pan_shader_postprocess(nir_shader *nir, unsigned gpu_id)
midgard_postprocess_nir(nir, gpu_id);
}
static inline void
pan_shader_lower_texture_early(nir_shader *nir, unsigned gpu_id)
{
nir_lower_tex_options lower_tex_options = {
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_offsets = true,
.lower_tg4_broadcom_swizzle = true,
.lower_txd = pan_arch(gpu_id) < 6,
.lower_txd_cube_map = true,
.lower_invalid_implicit_lod = true,
.lower_index_to_offset = pan_arch(gpu_id) >= 6,
};
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
}
static inline void
pan_shader_lower_texture(nir_shader *nir, unsigned gpu_id)
{
if (pan_arch(gpu_id) >= 6)
bifrost_lower_texture_nir(nir, gpu_id);
else
midgard_lower_texture_nir(nir, gpu_id);
}
static inline void
pan_shader_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id,
bool verbose)

View file

@ -393,10 +393,8 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
}
void
midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
{
unsigned quirks = midgard_get_quirks(gpu_id);
if (nir->info.stage == MESA_SHADER_VERTEX) {
/* nir_lower[_explicit]_io is lazy and emits mul+add chains even
* for offsets it could figure out are constant. Do some
@ -434,21 +432,20 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);
nir_lower_tex_options lower_tex_options = {
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_broadcom_swizzle = true,
.lower_txd = true,
.lower_invalid_implicit_lod = true,
};
NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
NIR_PASS(_, nir, nir_lower_var_copies);
}
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
{
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
/* TEX_GRAD fails to apply sampler descriptor settings on some
* implementations, requiring a lowering.
*/
if (quirks & MIDGARD_BROKEN_LOD)
if (midgard_get_quirks(gpu_id) & MIDGARD_BROKEN_LOD)
NIR_PASS(_, nir, midgard_nir_lod_errata);
/* lower MSAA image operations to 3D load before coordinate lowering */
@ -463,13 +460,9 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, pan_lower_helper_invocation);
NIR_PASS(_, nir, pan_lower_sample_pos);
}
NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
NIR_PASS(_, nir, nir_lower_var_copies);
}
static void
optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
{

View file

@ -31,6 +31,7 @@
void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
void midgard_compile_shader_nir(nir_shader *nir,
const struct pan_compile_inputs *inputs,

View file

@ -153,6 +153,8 @@ get_preload_shader(struct panvk_device *dev,
};
pan_shader_preprocess(nir, inputs.gpu_id);
pan_shader_lower_texture_early(nir, inputs.gpu_id);
pan_shader_lower_texture(nir, inputs.gpu_id);
pan_shader_postprocess(nir, inputs.gpu_id);
VkResult result = panvk_per_arch(create_internal_shader)(

View file

@ -414,37 +414,29 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
NIR_PASS(_, nir, nir_opt_barrier_modes);
NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_DEVICE);
/* Do texture lowering here. Yes, it's a duplication of the texture
* lowering in bifrost_compile. However, we need to lower texture stuff
/* Do texture lowering here. We need to lower texture stuff
* now, before we call panvk_per_arch(nir_lower_descriptors)() because some
* of the texture lowering generates nir_texop_txs which we handle as part
* of descriptor lowering.
*
* TODO: We really should be doing this in common code, not dpulicated in
* panvk. In order to do that, we need to rework the panfrost compile
* TODO: We really should be doing this in common code, not duplicated in
* panvk. In order to do that, we need to rework the panfrost compile
* flow to look more like the Intel flow:
*
* 1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
* to be done really early.
*
* 2. pan_preprocess_nir: Does common lowering and runs the optimization
* 2. pan_shader_preprocess: Does common lowering and runs the optimization
* loop. Nothing here should be API-specific.
*
* 3. Do additional lowering in panvk
*
* 4. pan_postprocess_nir: Does final lowering and runs the optimization
* 4. pan_shader_postprocess: Does final lowering and runs the optimization
* loop again. This can happen as part of the final compile.
*
* This would give us a better place to do panvk-specific lowering.
*/
nir_lower_tex_options lower_tex_options = {
.lower_txs_lod = true,
.lower_txp = ~0,
.lower_tg4_offsets = true,
.lower_txd_cube_map = true,
.lower_invalid_implicit_lod = true,
};
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
pan_shader_lower_texture_early(nir, pdev->kmod.props.gpu_id);
NIR_PASS(_, nir, nir_lower_system_values);
nir_lower_compute_system_values_options options = {
@ -939,6 +931,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
pan_shader_lower_texture(nir, compile_input->gpu_id);
pan_shader_postprocess(nir, compile_input->gpu_id);
if (stage == MESA_SHADER_VERTEX)