mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
i915/corm: add late scalarization as variant dimension
Some shaders produce better code when fully scalarized after optimization: vec3(a, b, 1.0) feeding a dot product creates a cross-register vec construction, but scalarizing the fmul exposes 1.0*1.0 to constant folding, eliminating the vec entirely. Other shaders are worse fully scalar because corm's vec construction handles same_reg vecs at zero cost. Add late_scalar as a variant dimension so the multi-variant framework picks whichever is better per shader. shader-db (I915_FS=nir): 254/403 compiled, 4063 alu shader-db (I915_FS=both): nir won 254 (26 identical, 1 tied, 221 better, 6 only), 36 TGSI, 113 neither Assisted-by: Claude
This commit is contained in:
parent
bfbba3f3b4
commit
6e38f519e0
2 changed files with 15 additions and 6 deletions
|
|
@ -185,6 +185,7 @@ extern void i915_translate_fragment_program(struct i915_context *i915,
|
|||
struct corm_compile_opts {
|
||||
bool deferred_const;
|
||||
bool seq_sne_opt;
|
||||
bool late_scalar;
|
||||
};
|
||||
|
||||
extern void i915_translate_fragment_program_nir(struct i915_context *i915,
|
||||
|
|
|
|||
|
|
@ -741,6 +741,10 @@ i915_create_fs_state(struct pipe_context *pipe,
|
|||
{ .deferred_const = false, .seq_sne_opt = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = false },
|
||||
{ .deferred_const = true, .seq_sne_opt = true },
|
||||
{ .deferred_const = false, .seq_sne_opt = false, .late_scalar = true },
|
||||
{ .deferred_const = false, .seq_sne_opt = true, .late_scalar = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = false, .late_scalar = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = true, .late_scalar = true },
|
||||
};
|
||||
|
||||
struct i915_fragment_shader nir_results[ARRAY_SIZE(corm_variants)];
|
||||
|
|
@ -764,14 +768,19 @@ i915_create_fs_state(struct pipe_context *pipe,
|
|||
nir_index_ssa_defs(nir_shader_get_entrypoint(nir_s));
|
||||
|
||||
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
|
||||
nir_shader *variant_nir = (v == ARRAY_SIZE(corm_variants) - 1)
|
||||
? nir_s : nir_shader_clone(NULL, nir_s);
|
||||
nir_shader *variant_nir = nir_shader_clone(NULL, nir_s);
|
||||
if (corm_variants[v].late_scalar) {
|
||||
NIR_PASS(_, variant_nir, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS(_, variant_nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, variant_nir, nir_opt_algebraic);
|
||||
NIR_PASS(_, variant_nir, nir_opt_dce);
|
||||
nir_index_ssa_defs(nir_shader_get_entrypoint(variant_nir));
|
||||
}
|
||||
memset(&nir_results[v], 0, sizeof(nir_results[v]));
|
||||
i915_populate_fs_metadata(&nir_results[v], variant_nir);
|
||||
i915_translate_fragment_program_nir(i915, &nir_results[v],
|
||||
variant_nir, &corm_variants[v]);
|
||||
if (v < ARRAY_SIZE(corm_variants) - 1)
|
||||
ralloc_free(variant_nir);
|
||||
ralloc_free(variant_nir);
|
||||
|
||||
bool ok = !nir_results[v].error || !nir_results[v].error[0];
|
||||
if (ok && (best_nir < 0 ||
|
||||
|
|
@ -779,8 +788,7 @@ i915_create_fs_state(struct pipe_context *pipe,
|
|||
best_nir = v;
|
||||
}
|
||||
|
||||
if (try_tgsi)
|
||||
ralloc_free(nir_s);
|
||||
ralloc_free(nir_s);
|
||||
}
|
||||
|
||||
if (try_tgsi) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue