nak/nir: Use an indirect load for sample locations

A single ldc is probably more efficient than a 64-bit load and the pile of math we were generating before. The only reason for the old method was that it let us avoid indirect cbuf loads because we didn't support them for a while. Now that we can support all cbuf loads, we can just do an indirect 1B load and call it good. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30218>
2026-05-07 17:58:26 +02:00 · 2024-07-16 15:53:34 -05:00 · 2024-07-16 15:53:34 -05:00 · cc33cafcac
commit cc33cafcac
parent f673e2bf68
1 changed files with 5 additions and 4 deletions
--- a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c
+++ b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c
@ -107,13 +107,14 @@ static nir_def *
 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
                   const struct nak_fs_key *fs_key)
 {
-   nir_def *loc = nir_ldc_nv(b, 1, 64,
+   nir_def *loc = nir_ldc_nv(b, 1, 8,
                             nir_imm_int(b, fs_key->sample_locations_cb),
-                             nir_imm_int(b, fs_key->sample_locations_offset),
+                             nir_iadd_imm(b, sample_id,
+                                          fs_key->sample_locations_offset),
                             .align_mul = 8, .align_offset = 0);

-   /* Yay little endian */
-   loc = nir_ushr(b, loc, nir_imul_imm(b, sample_id, 8));
+   /* The rest of these calculations are in 32-bit */
+   loc = nir_u2u32(b, loc);
   nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
   nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
   nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);