From cc33cafcacb1ad2eb9bb04283a57ee30385260fc Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Tue, 16 Jul 2024 15:53:34 -0500
Subject: [PATCH] nak/nir: Use an indirect load for sample locations

A single ldc is probably more efficient than a 64-bit load and the pile
of math we were generating before.  The only reason for the old method
was that it let us avoid indirect cbuf loads because we didn't support
them for a while.  Now that we can support all cbuf loads, we can just
do an indirect 1B load and call it good.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30218>
---
 src/nouveau/compiler/nak_nir_lower_fs_inputs.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c
index 51ee91b08ba..8d6e79756a4 100644
--- a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c
+++ b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c
@@ -107,13 +107,14 @@ static nir_def *
 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
                    const struct nak_fs_key *fs_key)
 {
-   nir_def *loc = nir_ldc_nv(b, 1, 64,
+   nir_def *loc = nir_ldc_nv(b, 1, 8,
                              nir_imm_int(b, fs_key->sample_locations_cb),
-                             nir_imm_int(b, fs_key->sample_locations_offset),
+                             nir_iadd_imm(b, sample_id,
+                                          fs_key->sample_locations_offset),
                              .align_mul = 8, .align_offset = 0);
 
-   /* Yay little endian */
-   loc = nir_ushr(b, loc, nir_imul_imm(b, sample_id, 8));
+   /* The rest of these calculations are in 32-bit */
+   loc = nir_u2u32(b, loc);
    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
    nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);