mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 05:38:16 +02:00
ir3: lower 64b registers before creating preamble
ir3_nir_lower_preamble cannot handle 64b @load/store_preamble so we have to make sure ir3_nir_opt_preamble will never produce them. Up to now, nir_lower_locals_to_regs was run after preamble lowering so 64b locals could still be around when lowering the preamble. This patch moves running this pass, as well as ir3_nir_lower_64b_regs, to before the preamble lowering. Fixed Piglit tests: - spec@arb_gpu_shader_fp64@execution@fs-indirect-temp-double-dst - spec@arb_gpu_shader_fp64@execution@built-in-functions@fs-frexp-dvec4-variable-index This patch has no impact on shader-db. Note: a few cleanup passes used to be run after nir_lower_locals_to_regs (nir_opt_algebraic, nir_opt_constant_folding) and after ir3_nir_lower_64b_regs (nir_lower_alu_to_scalar, nir_copy_prop). As far as I can tell, these are not necessary anymore when running the register lowering earlier so this patch removes them. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26737>
This commit is contained in:
parent
6cad2fc230
commit
2d273c520c
3 changed files with 10 additions and 30 deletions
|
|
@ -84,42 +84,14 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
ctx->s = nir_shader_clone(ctx, shader->nir);
|
||||
ir3_nir_lower_variant(so, ctx->s);
|
||||
|
||||
/* this needs to be the last pass run, so do this here instead of
|
||||
* in ir3_optimize_nir():
|
||||
*/
|
||||
bool progress = false;
|
||||
bool needs_late_alg = false;
|
||||
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
|
||||
|
||||
if (progress) {
|
||||
bool regs_progress = false;
|
||||
|
||||
/* Split 64b registers into two 32b ones. */
|
||||
NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
|
||||
|
||||
if (regs_progress) {
|
||||
/* After splitting registers, we might still have some 64b vecs. Run
|
||||
* some passes to get rid of them.
|
||||
*/
|
||||
NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS_V(ctx->s, nir_copy_prop);
|
||||
}
|
||||
}
|
||||
|
||||
/* we could need cleanup after lower_locals_to_regs */
|
||||
while (progress) {
|
||||
progress = false;
|
||||
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
|
||||
needs_late_alg = true;
|
||||
}
|
||||
|
||||
/* We want to lower nir_op_imul as late as possible, to catch also
|
||||
* those generated by earlier passes (e.g,
|
||||
* nir_lower_locals_to_regs). However, we want a final swing of a
|
||||
* few passes to have a chance at optimizing the result.
|
||||
*/
|
||||
progress = false;
|
||||
NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
|
||||
while (progress) {
|
||||
progress = false;
|
||||
|
|
|
|||
|
|
@ -722,6 +722,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
|||
/* Lower scratch writemasks */
|
||||
progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s);
|
||||
|
||||
if (OPT(s, nir_lower_locals_to_regs, 1)) {
|
||||
progress = true;
|
||||
|
||||
/* Split 64b registers into two 32b ones. */
|
||||
OPT_V(s, ir3_nir_lower_64b_regs);
|
||||
}
|
||||
|
||||
progress |= OPT(s, ir3_nir_lower_wide_load_store);
|
||||
progress |= OPT(s, ir3_nir_lower_64b_global);
|
||||
progress |= OPT(s, ir3_nir_lower_64b_intrinsics);
|
||||
|
|
|
|||
|
|
@ -440,8 +440,9 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
|
|||
* ...
|
||||
*/
|
||||
|
||||
b->cursor = nir_before_impl(main);
|
||||
|
||||
/* @decl_regs need to stay in the first block. */
|
||||
b->cursor = nir_after_reg_decls(main);
|
||||
|
||||
nir_if *outer_if = nir_push_if(b, nir_preamble_start_ir3(b, 1));
|
||||
{
|
||||
nir_if *inner_if = nir_push_if(b, nir_elect(b, 1));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue