i915/corm: add late scalarization as variant dimension

Some shaders produce better code when fully scalarized after
optimization: vec3(a, b, 1.0) feeding a dot product creates a
cross-register vec construction, but scalarizing the fmul exposes
1.0*1.0 to constant folding, eliminating the vec entirely.

Other shaders are worse fully scalar because corm's vec construction
handles same_reg vecs at zero cost. Add late_scalar as a variant
dimension so the multi-variant framework picks whichever is better
per shader.

shader-db (I915_FS=nir): 254/403 compiled, 4063 alu
shader-db (I915_FS=both): nir won 254 (26 identical, 1 tied, 221 better, 6 only),
  36 TGSI, 113 neither

Assisted-by: Claude
This commit is contained in:
Adam Jackson 2026-05-07 12:40:35 -04:00
parent bfbba3f3b4
commit 6e38f519e0
2 changed files with 15 additions and 6 deletions

View file

@ -185,6 +185,7 @@ extern void i915_translate_fragment_program(struct i915_context *i915,
struct corm_compile_opts {
bool deferred_const;
bool seq_sne_opt;
bool late_scalar;
};
extern void i915_translate_fragment_program_nir(struct i915_context *i915,

View file

@ -741,6 +741,10 @@ i915_create_fs_state(struct pipe_context *pipe,
{ .deferred_const = false, .seq_sne_opt = true },
{ .deferred_const = true, .seq_sne_opt = false },
{ .deferred_const = true, .seq_sne_opt = true },
{ .deferred_const = false, .seq_sne_opt = false, .late_scalar = true },
{ .deferred_const = false, .seq_sne_opt = true, .late_scalar = true },
{ .deferred_const = true, .seq_sne_opt = false, .late_scalar = true },
{ .deferred_const = true, .seq_sne_opt = true, .late_scalar = true },
};
struct i915_fragment_shader nir_results[ARRAY_SIZE(corm_variants)];
@ -764,14 +768,19 @@ i915_create_fs_state(struct pipe_context *pipe,
nir_index_ssa_defs(nir_shader_get_entrypoint(nir_s));
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
nir_shader *variant_nir = (v == ARRAY_SIZE(corm_variants) - 1)
? nir_s : nir_shader_clone(NULL, nir_s);
nir_shader *variant_nir = nir_shader_clone(NULL, nir_s);
if (corm_variants[v].late_scalar) {
NIR_PASS(_, variant_nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(_, variant_nir, nir_opt_copy_prop);
NIR_PASS(_, variant_nir, nir_opt_algebraic);
NIR_PASS(_, variant_nir, nir_opt_dce);
nir_index_ssa_defs(nir_shader_get_entrypoint(variant_nir));
}
memset(&nir_results[v], 0, sizeof(nir_results[v]));
i915_populate_fs_metadata(&nir_results[v], variant_nir);
i915_translate_fragment_program_nir(i915, &nir_results[v],
variant_nir, &corm_variants[v]);
if (v < ARRAY_SIZE(corm_variants) - 1)
ralloc_free(variant_nir);
ralloc_free(variant_nir);
bool ok = !nir_results[v].error || !nir_results[v].error[0];
if (ok && (best_nir < 0 ||
@ -779,8 +788,7 @@ i915_create_fs_state(struct pipe_context *pipe,
best_nir = v;
}
if (try_tgsi)
ralloc_free(nir_s);
ralloc_free(nir_s);
}
if (try_tgsi) {