From 3c295e513c47dc82ccdedbc234a335952df64c9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= <pavel.ondracka@gmail.com>
Date: Wed, 3 Dec 2025 12:51:38 +0100
Subject: [PATCH] r300: always convert to NIR and move ntr later

Always convert TGSI shaders to NIR up-front in r300_create_{fs,vs}_state
so the rest of the compile pipeline only ever has to deal with NIR. The
TGSI->RC translation in r300_translate_{fragment,vertex}_shader now
always goes through nir_to_rc.

This requires shifting r300_blitter_draw_rectangle's sprite_coord_enable
bit from 0 to 9. The blitter's GENERIC[0] FS input now lands at
fs_inputs->generic[9] after the +9 shift in ntr_fixup_varying_slots, so
the rasterizer's sprite_coord_enable check needs the matching bit.

The draw path still needs TGSI, so we convert it back explicitly for
now. The deTGSIzation of draw paths will come later.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41577>
---
 src/gallium/drivers/r300/compiler/nir_to_rc.c |  2 +-
 src/gallium/drivers/r300/compiler/nir_to_rc.h |  4 ++
 src/gallium/drivers/r300/r300_fs.c            |  7 ++-
 src/gallium/drivers/r300/r300_render.c        | 11 +++-
 src/gallium/drivers/r300/r300_state.c         | 58 ++++++++++++++-----
 src/gallium/drivers/r300/r300_vs.c            |  8 +++
 6 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c
index 20b9a2ff2c9..4cc38adca9e 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@@ -1760,7 +1760,7 @@ nir_to_rc_lower_txp(nir_shader *s)
  * like vs using generics and fs using texcoords. This function tries to fix it.
  * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/12749 for more details.
  */
-static void
+void
 ntr_fixup_varying_slots(nir_shader *s, nir_variable_mode mode)
 {
    if (s->info.name && !strcmp(s->info.name, "st/drawtex VS"))
diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.h b/src/gallium/drivers/r300/compiler/nir_to_rc.h
index d79e5eb3634..da5fa64f6dd 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.h
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.h
@@ -7,6 +7,7 @@
 #define NIR_TO_RC_H
 
 #include <stdbool.h>
+#include "compiler/nir/nir.h"
 #include "pipe/p_defines.h"
 
 struct nir_shader;
@@ -16,4 +17,7 @@ struct r300_fragment_program_external_state;
 const void *nir_to_rc(struct nir_shader *s, struct pipe_screen *screen,
                       struct r300_fragment_program_external_state state);
 
+void
+ntr_fixup_varying_slots(struct nir_shader *s, nir_variable_mode mode);
+
 #endif /* NIR_TO_RC_H */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 434a788cbb9..a2c3fb8ff36 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -24,6 +24,7 @@
 #include "compiler/radeon_compiler.h"
 #include "compiler/nir_to_rc.h"
 #include "nir.h"
+#include "nir/tgsi_to_nir.h"
 
 /* Convert info about FS input semantics to r300_shader_semantics. */
 void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
@@ -198,7 +199,7 @@ static void r300_dummy_fragment_shader(
     struct r300_context* r300,
     struct r300_fragment_shader_code* shader)
 {
-    struct pipe_shader_state state;
+    struct pipe_shader_state state = {};
     struct ureg_program *ureg;
     struct ureg_dst out;
     struct ureg_src imm;
@@ -214,7 +215,11 @@ static void r300_dummy_fragment_shader(
     state.tokens = ureg_finalize(ureg);
 
     shader->dummy = true;
+    state.type = PIPE_SHADER_IR_NIR;
+    /* We could just build a NIR directly, was lazy to figure it out for now... */
+    state.ir.nir = tgsi_to_nir(state.tokens, &r300->screen->screen, false);
     r300_translate_fragment_shader(r300, shader, state);
+    ralloc_free(state.ir.nir);
 
     ureg_destroy(ureg);
 }
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index d4f0e2b6a4d..79089e23ba5 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -1248,7 +1248,16 @@ void r300_blitter_draw_rectangle(struct blitter_context *blitter,
     r300->context.bind_vs_state(&r300->context, get_vs(blitter));
 
     if (type == UTIL_BLITTER_ATTRIB_TEXCOORD_XY) {
-        r300->sprite_coord_enable = 1;
+        /* The blitter's passthrough VS outputs GENERIC[0], which u_blitter
+         * encodes here as sprite_coord_enable bit 0. After
+         * ntr_fixup_varying_slots in nir_to_rc, the corresponding FS input
+         * lands at index 9 in fs_inputs->generic[] (VAR0 -> VAR9 from the
+         * +9 shift that leaves room for TEX0..TEX7 and PNTC). Match that
+         * by setting bit 9 instead of bit 0; the rest of the rasterizer
+         * setup (r300_state_derived.c) walks generic[i] and tests
+         * sprite_coord_enable & (1 << i) so the indices need to agree.
+         */
+        r300->sprite_coord_enable = 1 << 9;
         r300->is_point = true;
     }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5c82443ca68..329272594a7 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -15,6 +15,9 @@
 #include "util/u_transfer.h"
 #include "util/u_blend.h"
 
+#include "nir/nir_to_tgsi.h"
+#include "nir/tgsi_to_nir.h"
+
 #include "tgsi/tgsi_parse.h"
 
 #include "util/detect.h"
@@ -1232,8 +1235,9 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
         }
     } else {
        assert(fs->state.type == PIPE_SHADER_IR_TGSI);
-       /* we need to keep a local copy of the tokens */
-       fs->state.tokens = tgsi_dup_tokens(fs->state.tokens);
+       /* Convert to NIR. */
+       fs->state.ir.nir = tgsi_to_nir(fs->state.tokens, pipe->screen, false);
+       fs->state.type = PIPE_SHADER_IR_NIR;
     }
 
     /* Precompile the fragment shader at creation time to avoid jank at runtime.
@@ -1244,7 +1248,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
 
     if (fs->state.type == PIPE_SHADER_IR_NIR) {
         /* Pick something for the shadow samplers so that we have somewhat reliable shader stats later. */
-        nir_foreach_function_impl(impl, shader->ir.nir) {
+        nir_foreach_function_impl(impl, fs->state.ir.nir) {
             nir_foreach_block_safe(block, impl) {
                 nir_foreach_instr_safe(instr, block) {
                     if (instr->type != nir_instr_type_tex)
@@ -2165,27 +2169,47 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
     /* Copy state directly into shader. */
     vs->state = *shader;
 
-    if (vs->state.type == PIPE_SHADER_IR_NIR) {
-        r300_optimize_nir(shader->ir.nir, r300->screen);
+    /* Always convert TGSI input to NIR up front */
+    if (vs->state.type == PIPE_SHADER_IR_TGSI) {
+       vs->state.ir.nir = tgsi_to_nir(vs->state.tokens, pipe->screen, false);
+       vs->state.type = PIPE_SHADER_IR_NIR;
+    }
 
+    /* Run the same NIR optimization/lowering for both HW and SW TCL.
+     * The LLVM-based draw emulation likely doesn't need the full
+     * hardware-targeted set of passes (e.g. the scalar/vector
+     * vectorization-or-scalarization choices, or much of the algebraic
+     * lowering); we could trim this for the draw path later to just
+     * what nir_to_rc actually requires - notably scalarizing vector
+     * comparisons (otherwise nir_lower_bool_to_float asserts) and
+     * keeping VS outputs aligned with FS inputs via the +9 varying-slot
+     * shift.
+     */
+    r300_optimize_nir(vs->state.ir.nir, r300->screen);
+
+    if (r300->screen->caps.has_tcl) {
         /* R300/R400 can not do any kind of control flow, so abort early here. */
-        if (!r300->screen->caps.is_r500 && r300->screen->caps.has_tcl) {
-            char *msg = r300_check_control_flow(shader->ir.nir);
+        if (!r300->screen->caps.is_r500) {
+            char *msg = r300_check_control_flow(vs->state.ir.nir);
             if (msg && shader->report_compile_error) {
                 fprintf(stderr, "r300 VP: Compiler error: %s\n", msg);
                 ((struct pipe_shader_state *)shader)->error_message = strdup(msg);
-                ralloc_free(shader->ir.nir);
+                ralloc_free(vs->state.ir.nir);
                 FREE(vs);
                 return NULL;
             }
         }
-
-       struct r300_fragment_program_external_state state = {};
-       vs->state.tokens = nir_to_rc(shader->ir.nir, pipe->screen, state);
     } else {
-       assert(vs->state.type == PIPE_SHADER_IR_TGSI);
-       /* we need to keep a local copy of the tokens */
-       vs->state.tokens = tgsi_dup_tokens(vs->state.tokens);
+       /* r300_draw_init_vertex_shader needs TGSI tokens.
+        * Apply the +9 varying shift to keep VS outputs aligned with the
+        * FS inputs (which always go through nir_to_rc and pick up the
+        * same shift), then go through the stock gallium nir_to_tgsi.
+        */
+       nir_shader *clone = nir_shader_clone(NULL, vs->state.ir.nir);
+       ralloc_free(vs->state.ir.nir);
+       ntr_fixup_varying_slots(clone, nir_var_shader_out);
+       vs->state.tokens = nir_to_tgsi(clone, pipe->screen);
+       vs->state.type = PIPE_SHADER_IR_TGSI;
     }
 
     if (!vs->first)
@@ -2277,7 +2301,11 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
                 (struct draw_vertex_shader*)vs->draw_vs);
     }
 
-    FREE((void*)vs->state.tokens);
+    if (vs->state.type == PIPE_SHADER_IR_NIR) {
+        ralloc_free(vs->state.ir.nir);
+    } else {
+        FREE((void*)vs->state.tokens);
+    }
     FREE(shader);
 }
 
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index d2a08891f92..d097b6d5388 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -13,7 +13,9 @@
 
 #include "tgsi/tgsi_dump.h"
 
+#include "compiler/nir_to_rc.h"
 #include "compiler/radeon_compiler.h"
+#include "nir/nir.h"
 
 /* Convert info about VS output semantics into r300_shader_semantics. */
 static void r300_shader_read_vs_outputs(
@@ -167,11 +169,16 @@ void r300_translate_vertex_shader(struct r300_context *r300,
     unsigned i;
     struct r300_vertex_shader_code *vs = shader->shader;
 
+    nir_shader *clone = nir_shader_clone(NULL, shader->state.ir.nir);
+    struct r300_fragment_program_external_state external_state = {};
+    shader->state.tokens = nir_to_rc(clone, (struct pipe_screen *)r300->screen, external_state);
+
     r300_init_vs_outputs(r300, shader);
 
     /* Nothing to do if the shader does not write gl_Position. */
     if (vs->outputs.pos == ATTR_UNUSED) {
         vs->dummy = true;
+        FREE((void*)shader->state.tokens);
         return;
     }
 
@@ -204,6 +211,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
     ttr.info = &vs->info;
 
     r300_tgsi_to_rc(&ttr, shader->state.tokens);
+    FREE((void*)shader->state.tokens);
 
     if (ttr.error) {
         vs->error = strdup("Cannot translate shader from TGSI");