From c10b4b1e47bbfba930f01dcf8dfe5270f7da4219 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 11 Apr 2026 15:23:20 +0200 Subject: [PATCH] nak: scalarize tex, tld and tld4 on SM70+ This helps RA a bit by reducing the size of the vectors passed to tex instructions and therefore eliminate a few movs. Totals from 145533 (12.51% of 1163204) affected shaders: CodeSize: 1868329120 -> 1855817520 (-0.67%); split: -0.70%, +0.03% Number of GPRs: 7007196 -> 7007028 (-0.00%); split: -0.01%, +0.01% Static cycle count: 1157484762 -> 1153189018 (-0.37%); split: -0.46%, +0.09% Spills to reg: 30581 -> 30580 (-0.00%) Fills from reg: 33263 -> 33262 (-0.00%) Max warps/SM: 5911104 -> 5911100 (-0.00%); split: +0.00%, -0.00% Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak_nir_lower_tex.c | 66 ++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/nouveau/compiler/nak_nir_lower_tex.c b/src/nouveau/compiler/nak_nir_lower_tex.c index 298a6ea328b..bdbc5c77b66 100644 --- a/src/nouveau/compiler/nak_nir_lower_tex.c +++ b/src/nouveau/compiler/nak_nir_lower_tex.c @@ -124,6 +124,68 @@ remap_sampler_dim(enum glsl_sampler_dim dim) } } +static bool +texop_supports_scalar(nir_tex_instr *tex, const struct nak_compiler *nak) +{ + if (nak->sm < 70) + return false; + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_tg4: + case nir_texop_txb: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txl: + return true; + default: + return false; + } +} + +static bool +try_scalarize_tex(nir_tex_instr *tex, const struct nak_compiler *nak, + nir_def **src0, unsigned *src0_comps, + nir_def **src1, unsigned *src1_comps) +{ + if (!texop_supports_scalar(tex, nak)) + return false; + + /* for SM70+ we simply rebalance the sources if there are 4 or less + * TODO: it might be worth to rebalance 3 + 1 or 1 + 3 combinations, + * but so far this seems to be a net negative. + */ + if (nak->sm >= 70 && *src1_comps == 0) { + assert(*src0_comps <= 4); + switch (*src0_comps) { + case 4: + src1[0] = src0[2]; + src1[1] = src0[3]; + *src0_comps = 2; + *src1_comps = 2; + return true; + case 3: + src1[0] = src0[2]; + *src0_comps = 2; + *src1_comps = 1; + return true; + case 2: + src1[0] = src0[1]; + *src0_comps = 1; + *src1_comps = 1; + return true; + case 1: + /* Nothing to do */ + return true; + default: + UNREACHABLE("unexpected src0 comps"); + return false; + } + } + + return false; +} + static bool lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak) { @@ -257,6 +319,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak) a[a##_comps++] = val; \ } while(0) + bool scalar = false; if (nak->sm >= 50) { nir_def *src0[4] = { NULL, }; nir_def *src1[4] = { NULL, }; @@ -312,6 +375,8 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak) PUSH(src1, z_cmpr); } + scalar = try_scalarize_tex(tex, nak, src0, &src0_comps, src1, &src1_comps); + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_vec(b, src0, src0_comps)); @@ -387,6 +452,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak) .has_z_cmpr = tex->is_shadow, .is_sparse = tex->is_sparse, .nodep = tex->skip_helpers, + .scalar = scalar, }; STATIC_ASSERT(sizeof(flags) == sizeof(tex->backend_flags)); memcpy(&tex->backend_flags, &flags, sizeof(flags));