lima: Move fdot lowering from NIR to lima

This change relocates the fdot lowering from the generic NIR to the lima,
since lima is the only consumer of this particular lowering. This avoids
potential conflicts with the similar fdot lowering already present in
nir_lower_alu_width.

Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Reviewed-by: Erico Nunes <nunes.erico@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34757>
This commit is contained in:
Christian Gmeiner 2025-04-30 11:38:20 +02:00 committed by Marge Bot
parent 0f747d0990
commit f17d350001
6 changed files with 12 additions and 11 deletions

View file

@ -510,12 +510,6 @@ optimizations.extend([
(('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)), (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
(('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')), (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
# Lower fdot to fsum when it is available
(('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
(('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
(('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
(('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
# If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
# If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
# If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0

View file

@ -292,9 +292,6 @@ typedef struct nir_shader_compiler_options {
/* lower fdph to fdot4 */ /* lower fdph to fdot4 */
bool lower_fdph; bool lower_fdph;
/** lower fdot to fmul and fsum/fadd. */
bool lower_fdot;
/* Does the native fdot instruction replicate its result for four /* Does the native fdot instruction replicate its result for four
* components? If so, then opt_algebraic_late will turn all fdotN * components? If so, then opt_algebraic_late will turn all fdotN
* instructions into fdotN_replicated instructions. * instructions into fdotN_replicated instructions.

View file

@ -66,6 +66,7 @@ struct ra_regs *ppir_regalloc_init(void *mem_ctx);
void lima_nir_lower_uniform_to_scalar(nir_shader *shader); void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
bool lima_nir_scale_trig(nir_shader *shader); bool lima_nir_scale_trig(nir_shader *shader);
bool lima_nir_lower_ftrunc(nir_shader *shader); bool lima_nir_lower_ftrunc(nir_shader *shader);
bool lima_nir_lower_fdot(nir_shader *shader);
bool lima_nir_split_load_input(nir_shader *shader); bool lima_nir_split_load_input(nir_shader *shader);
bool lima_nir_split_loads(nir_shader *shader); bool lima_nir_split_loads(nir_shader *shader);

View file

@ -37,6 +37,14 @@ lower_ftrunc = [
(('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a'))))) (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a')))))
] ]
lower_fdot = [
# Lower fdot to fsum
(('fdot2', 'a', 'b'), ('fsum2', ('fmul', 'a', 'b'))),
(('fdot3', 'a', 'b'), ('fsum3', ('fmul', 'a', 'b'))),
(('fdot4', 'a', 'b'), ('fsum4', ('fmul', 'a', 'b'))),
(('fsum2', 'a'), ('fadd', 'a.x', 'a.y')),
]
# PP fuse clamp_positive. Shared with Midgard/Bifrost # PP fuse clamp_positive. Shared with Midgard/Bifrost
ppir_algebraic_late = [ ppir_algebraic_late = [
(('fmax', 'a', 0.0), ('fclamp_pos', 'a')), (('fmax', 'a', 0.0), ('fclamp_pos', 'a')),
@ -59,6 +67,8 @@ def run():
scale_trig).render()) scale_trig).render())
print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc", print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc",
lower_ftrunc).render()) lower_ftrunc).render())
print(nir_algebraic.AlgebraicPass("lima_nir_lower_fdot",
lower_fdot).render())
print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late", print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late",
ppir_algebraic_late).render()) ppir_algebraic_late).render())

View file

@ -78,7 +78,6 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_flrp32 = true, .lower_flrp32 = true,
.lower_flrp64 = true, .lower_flrp64 = true,
.lower_fsign = true, .lower_fsign = true,
.lower_fdot = true,
.lower_fdph = true, .lower_fdph = true,
.lower_insert_byte = true, .lower_insert_byte = true,
.lower_insert_word = true, .lower_insert_word = true,
@ -260,6 +259,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
}; };
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options); NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, lima_nir_lower_fdot);
NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll); NIR_PASS(progress, s, nir_opt_loop_unroll);

View file

@ -3590,7 +3590,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
op.lower_fdph = false; op.lower_fdph = false;
op.lower_fdot = false;
op.fdot_replicates = false; // TODO op.fdot_replicates = false; // TODO
op.lower_ffloor = false; // TODO op.lower_ffloor = false; // TODO
op.lower_ffract = true; op.lower_ffract = true;