mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
panfrost: Split texture lowering passes
We now have lower_texture_early and lower_texture. lower_texture_early handle nir_lower_tex and (in the future) could handle anything that is backend specific that need to happen before nir_lower_io. lower_texture handles actual lowering of backend specific things that must happen after nir_lower_tex and nir_lower_io. This allows us to finally not run nir_lower_tex two times in panvk. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Olivia Lee <olivia.lee@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36776>
This commit is contained in:
parent
310eabacc0
commit
6ab7a03aef
10 changed files with 72 additions and 53 deletions
|
|
@ -562,6 +562,8 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
|
|||
BITSET_SET(b.shader->info.textures_used, i);
|
||||
|
||||
pan_shader_preprocess(b.shader, inputs.gpu_id);
|
||||
pan_shader_lower_texture_early(b.shader, inputs.gpu_id);
|
||||
pan_shader_lower_texture(b.shader, inputs.gpu_id);
|
||||
pan_shader_postprocess(b.shader, inputs.gpu_id);
|
||||
|
||||
if (PAN_ARCH == 4) {
|
||||
|
|
|
|||
|
|
@ -138,6 +138,8 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
*/
|
||||
if (mesa_shader_stage_is_compute(s->info.stage)) {
|
||||
pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
|
||||
pan_shader_lower_texture_early(s, panfrost_device_gpu_id(dev));
|
||||
pan_shader_lower_texture(s, panfrost_device_gpu_id(dev));
|
||||
pan_shader_postprocess(s, panfrost_device_gpu_id(dev));
|
||||
}
|
||||
|
||||
|
|
@ -505,6 +507,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
|||
/* Then run the suite of lowering and optimization, including I/O lowering */
|
||||
struct panfrost_device *dev = pan_device(pctx->screen);
|
||||
pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
|
||||
pan_shader_lower_texture_early(nir, panfrost_device_gpu_id(dev));
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
|
||||
|
|
@ -520,6 +523,7 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
|||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
pan_shader_lower_texture(nir, panfrost_device_gpu_id(dev));
|
||||
pan_shader_postprocess(nir, panfrost_device_gpu_id(dev));
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
|
|
|
|||
|
|
@ -410,6 +410,8 @@ main(int argc, const char **argv)
|
|||
} while (progress);
|
||||
|
||||
pan_shader_preprocess(s, inputs.gpu_id);
|
||||
pan_shader_lower_texture_early(s, inputs.gpu_id);
|
||||
pan_shader_lower_texture(s, inputs.gpu_id);
|
||||
pan_shader_postprocess(s, inputs.gpu_id);
|
||||
|
||||
NIR_PASS(_, s, nir_opt_deref);
|
||||
|
|
|
|||
|
|
@ -5982,12 +5982,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
};
|
||||
NIR_PASS(_, nir, nir_lower_ssbo, &ssbo_opts);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, nir, nir_lower_is_helper_invocation);
|
||||
NIR_PASS(_, nir, pan_lower_helper_invocation);
|
||||
NIR_PASS(_, nir, pan_lower_sample_pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lower subgroups ops before lowering int64: nir_lower_int64 doesn't know
|
||||
* how to lower imul reductions and scans.
|
||||
|
|
@ -6038,22 +6032,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
NIR_PASS(_, nir, nir_lower_idiv,
|
||||
&(nir_lower_idiv_options){.allow_fp16 = true});
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex,
|
||||
&(nir_lower_tex_options){
|
||||
.lower_txs_lod = true,
|
||||
.lower_txp = ~0,
|
||||
.lower_tg4_broadcom_swizzle = true,
|
||||
.lower_txd_cube_map = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_index_to_offset = true,
|
||||
});
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
|
||||
|
||||
/* on bifrost, lower MSAA load/stores to 3D load/stores */
|
||||
if (pan_arch(gpu_id) < 9)
|
||||
NIR_PASS(_, nir, pan_nir_lower_image_ms);
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
|
|
@ -6067,6 +6045,21 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
NIR_PASS(_, nir, pan_nir_lower_frag_coord_zw);
|
||||
}
|
||||
|
||||
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
|
||||
{
|
||||
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
|
||||
|
||||
/* on Bifrost, lower MSAA load/stores to 3D load/stores */
|
||||
if (pan_arch(gpu_id) < 9)
|
||||
NIR_PASS(_, nir, pan_nir_lower_image_ms);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, nir, nir_lower_is_helper_invocation);
|
||||
NIR_PASS(_, nir, pan_lower_helper_invocation);
|
||||
NIR_PASS(_, nir, pan_lower_sample_pos);
|
||||
}
|
||||
}
|
||||
|
||||
static bi_context *
|
||||
bi_compile_variant_nir(nir_shader *nir,
|
||||
const struct pan_compile_inputs *inputs,
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ bifrost_precompiled_kernel_prepare_push_uniforms(
|
|||
|
||||
void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
|
||||
|
||||
void bifrost_compile_shader_nir(nir_shader *nir,
|
||||
const struct pan_compile_inputs *inputs,
|
||||
|
|
|
|||
|
|
@ -36,8 +36,10 @@
|
|||
|
||||
void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
|
||||
|
||||
static unsigned
|
||||
pan_get_fixed_varying_mask(unsigned varyings_used)
|
||||
|
|
@ -64,6 +66,32 @@ pan_shader_postprocess(nir_shader *nir, unsigned gpu_id)
|
|||
midgard_postprocess_nir(nir, gpu_id);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_shader_lower_texture_early(nir_shader *nir, unsigned gpu_id)
|
||||
{
|
||||
nir_lower_tex_options lower_tex_options = {
|
||||
.lower_txs_lod = true,
|
||||
.lower_txp = ~0,
|
||||
.lower_tg4_offsets = true,
|
||||
.lower_tg4_broadcom_swizzle = true,
|
||||
.lower_txd = pan_arch(gpu_id) < 6,
|
||||
.lower_txd_cube_map = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_index_to_offset = pan_arch(gpu_id) >= 6,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_shader_lower_texture(nir_shader *nir, unsigned gpu_id)
|
||||
{
|
||||
if (pan_arch(gpu_id) >= 6)
|
||||
bifrost_lower_texture_nir(nir, gpu_id);
|
||||
else
|
||||
midgard_lower_texture_nir(nir, gpu_id);
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_shader_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id,
|
||||
bool verbose)
|
||||
|
|
|
|||
|
|
@ -393,10 +393,8 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
|
|||
}
|
||||
|
||||
void
|
||||
midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||
midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
|
||||
{
|
||||
unsigned quirks = midgard_get_quirks(gpu_id);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
/* nir_lower[_explicit]_io is lazy and emits mul+add chains even
|
||||
* for offsets it could figure out are constant. Do some
|
||||
|
|
@ -434,21 +432,20 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
|
||||
NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);
|
||||
|
||||
nir_lower_tex_options lower_tex_options = {
|
||||
.lower_txs_lod = true,
|
||||
.lower_txp = ~0,
|
||||
.lower_tg4_broadcom_swizzle = true,
|
||||
.lower_txd = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
};
|
||||
NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
|
||||
NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
|
||||
NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
|
||||
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
|
||||
{
|
||||
NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
|
||||
|
||||
/* TEX_GRAD fails to apply sampler descriptor settings on some
|
||||
* implementations, requiring a lowering.
|
||||
*/
|
||||
if (quirks & MIDGARD_BROKEN_LOD)
|
||||
if (midgard_get_quirks(gpu_id) & MIDGARD_BROKEN_LOD)
|
||||
NIR_PASS(_, nir, midgard_nir_lod_errata);
|
||||
|
||||
/* lower MSAA image operations to 3D load before coordinate lowering */
|
||||
|
|
@ -463,13 +460,9 @@ midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
NIR_PASS(_, nir, pan_lower_helper_invocation);
|
||||
NIR_PASS(_, nir, pan_lower_sample_pos);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, midgard_nir_lower_algebraic_early);
|
||||
NIR_PASS(_, nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
|
||||
NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
|
||||
void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
|
||||
|
||||
void midgard_compile_shader_nir(nir_shader *nir,
|
||||
const struct pan_compile_inputs *inputs,
|
||||
|
|
|
|||
|
|
@ -153,6 +153,8 @@ get_preload_shader(struct panvk_device *dev,
|
|||
};
|
||||
|
||||
pan_shader_preprocess(nir, inputs.gpu_id);
|
||||
pan_shader_lower_texture_early(nir, inputs.gpu_id);
|
||||
pan_shader_lower_texture(nir, inputs.gpu_id);
|
||||
pan_shader_postprocess(nir, inputs.gpu_id);
|
||||
|
||||
VkResult result = panvk_per_arch(create_internal_shader)(
|
||||
|
|
|
|||
|
|
@ -414,37 +414,29 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
|
|||
NIR_PASS(_, nir, nir_opt_barrier_modes);
|
||||
NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_DEVICE);
|
||||
|
||||
/* Do texture lowering here. Yes, it's a duplication of the texture
|
||||
* lowering in bifrost_compile. However, we need to lower texture stuff
|
||||
/* Do texture lowering here. We need to lower texture stuff
|
||||
* now, before we call panvk_per_arch(nir_lower_descriptors)() because some
|
||||
* of the texture lowering generates nir_texop_txs which we handle as part
|
||||
* of descriptor lowering.
|
||||
*
|
||||
* TODO: We really should be doing this in common code, not dpulicated in
|
||||
* panvk. In order to do that, we need to rework the panfrost compile
|
||||
* TODO: We really should be doing this in common code, not duplicated in
|
||||
* panvk. In order to do that, we need to rework the panfrost compile
|
||||
* flow to look more like the Intel flow:
|
||||
*
|
||||
* 1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
|
||||
* to be done really early.
|
||||
*
|
||||
* 2. pan_preprocess_nir: Does common lowering and runs the optimization
|
||||
* 2. pan_shader_preprocess: Does common lowering and runs the optimization
|
||||
* loop. Nothing here should be API-specific.
|
||||
*
|
||||
* 3. Do additional lowering in panvk
|
||||
*
|
||||
* 4. pan_postprocess_nir: Does final lowering and runs the optimization
|
||||
* 4. pan_shader_postprocess: Does final lowering and runs the optimization
|
||||
* loop again. This can happen as part of the final compile.
|
||||
*
|
||||
* This would give us a better place to do panvk-specific lowering.
|
||||
*/
|
||||
nir_lower_tex_options lower_tex_options = {
|
||||
.lower_txs_lod = true,
|
||||
.lower_txp = ~0,
|
||||
.lower_tg4_offsets = true,
|
||||
.lower_txd_cube_map = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
|
||||
pan_shader_lower_texture_early(nir, pdev->kmod.props.gpu_id);
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
nir_lower_compute_system_values_options options = {
|
||||
|
|
@ -939,6 +931,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
pan_shader_lower_texture(nir, compile_input->gpu_id);
|
||||
pan_shader_postprocess(nir, compile_input->gpu_id);
|
||||
|
||||
if (stage == MESA_SHADER_VERTEX)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue