nak: scalarize tex, tld and tld4 on SM70+

This helps RA a bit by reducing the size of the vectors passed to tex
instructions and therefore eliminate a few movs.

Totals from 145533 (12.51% of 1163204) affected shaders:
CodeSize: 1868329120 -> 1855817520 (-0.67%); split: -0.70%, +0.03%
Number of GPRs: 7007196 -> 7007028 (-0.00%); split: -0.01%, +0.01%
Static cycle count: 1157484762 -> 1153189018 (-0.37%); split: -0.46%, +0.09%
Spills to reg: 30581 -> 30580 (-0.00%)
Fills from reg: 33263 -> 33262 (-0.00%)
Max warps/SM: 5911104 -> 5911100 (-0.00%); split: +0.00%, -0.00%

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40900>
This commit is contained in:
Karol Herbst 2026-04-11 15:23:20 +02:00 committed by Marge Bot
parent b6fb51caf5
commit c10b4b1e47

View file

@ -124,6 +124,68 @@ remap_sampler_dim(enum glsl_sampler_dim dim)
}
}
static bool
texop_supports_scalar(nir_tex_instr *tex, const struct nak_compiler *nak)
{
if (nak->sm < 70)
return false;
switch (tex->op) {
case nir_texop_tex:
case nir_texop_tg4:
case nir_texop_txb:
case nir_texop_txf:
case nir_texop_txf_ms:
case nir_texop_txl:
return true;
default:
return false;
}
}
static bool
try_scalarize_tex(nir_tex_instr *tex, const struct nak_compiler *nak,
nir_def **src0, unsigned *src0_comps,
nir_def **src1, unsigned *src1_comps)
{
if (!texop_supports_scalar(tex, nak))
return false;
/* for SM70+ we simply rebalance the sources if there are 4 or less
* TODO: it might be worth to rebalance 3 + 1 or 1 + 3 combinations,
* but so far this seems to be a net negative.
*/
if (nak->sm >= 70 && *src1_comps == 0) {
assert(*src0_comps <= 4);
switch (*src0_comps) {
case 4:
src1[0] = src0[2];
src1[1] = src0[3];
*src0_comps = 2;
*src1_comps = 2;
return true;
case 3:
src1[0] = src0[2];
*src0_comps = 2;
*src1_comps = 1;
return true;
case 2:
src1[0] = src0[1];
*src0_comps = 1;
*src1_comps = 1;
return true;
case 1:
/* Nothing to do */
return true;
default:
UNREACHABLE("unexpected src0 comps");
return false;
}
}
return false;
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
{
@ -257,6 +319,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
a[a##_comps++] = val; \
} while(0)
bool scalar = false;
if (nak->sm >= 50) {
nir_def *src0[4] = { NULL, };
nir_def *src1[4] = { NULL, };
@ -312,6 +375,8 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
PUSH(src1, z_cmpr);
}
scalar = try_scalarize_tex(tex, nak, src0, &src0_comps, src1, &src1_comps);
nir_tex_instr_add_src(tex, nir_tex_src_backend1,
nir_vec(b, src0, src0_comps));
@ -387,6 +452,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
.has_z_cmpr = tex->is_shadow,
.is_sparse = tex->is_sparse,
.nodep = tex->skip_helpers,
.scalar = scalar,
};
STATIC_ASSERT(sizeof(flags) == sizeof(tex->backend_flags));
memcpy(&tex->backend_flags, &flags, sizeof(flags));