diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index d88de408239..9d1c0ce30eb 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -510,12 +510,6 @@ optimizations.extend([ (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)), (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')), - # Lower fdot to fsum when it is available - (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'), - (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'), - (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'), - (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'), - # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index e976b9e547b..db9f4e11ee5 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -292,9 +292,6 @@ typedef struct nir_shader_compiler_options { /* lower fdph to fdot4 */ bool lower_fdph; - /** lower fdot to fmul and fsum/fadd. */ - bool lower_fdot; - /* Does the native fdot instruction replicate its result for four * components? If so, then opt_algebraic_late will turn all fdotN * instructions into fdotN_replicated instructions. diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h index d94e3f70637..fbc3f2cd8c9 100644 --- a/src/gallium/drivers/lima/ir/lima_ir.h +++ b/src/gallium/drivers/lima/ir/lima_ir.h @@ -66,6 +66,7 @@ struct ra_regs *ppir_regalloc_init(void *mem_ctx); void lima_nir_lower_uniform_to_scalar(nir_shader *shader); bool lima_nir_scale_trig(nir_shader *shader); bool lima_nir_lower_ftrunc(nir_shader *shader); +bool lima_nir_lower_fdot(nir_shader *shader); bool lima_nir_split_load_input(nir_shader *shader); bool lima_nir_split_loads(nir_shader *shader); diff --git a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py index 3074c839717..fb0e53b9074 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py +++ b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py @@ -37,6 +37,14 @@ lower_ftrunc = [ (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a'))))) ] +lower_fdot = [ + # Lower fdot to fsum + (('fdot2', 'a', 'b'), ('fsum2', ('fmul', 'a', 'b'))), + (('fdot3', 'a', 'b'), ('fsum3', ('fmul', 'a', 'b'))), + (('fdot4', 'a', 'b'), ('fsum4', ('fmul', 'a', 'b'))), + (('fsum2', 'a'), ('fadd', 'a.x', 'a.y')), +] + # PP fuse clamp_positive. Shared with Midgard/Bifrost ppir_algebraic_late = [ (('fmax', 'a', 0.0), ('fclamp_pos', 'a')), @@ -59,6 +67,8 @@ def run(): scale_trig).render()) print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc", lower_ftrunc).render()) + print(nir_algebraic.AlgebraicPass("lima_nir_lower_fdot", + lower_fdot).render()) print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late", ppir_algebraic_late).render()) diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 1b5093030aa..f9fb395a02e 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -78,7 +78,6 @@ static const nir_shader_compiler_options fs_nir_options = { .lower_flrp32 = true, .lower_flrp64 = true, .lower_fsign = true, - .lower_fdot = true, .lower_fdph = true, .lower_insert_byte = true, .lower_insert_word = true, @@ -260,6 +259,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, }; NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options); NIR_PASS(progress, s, nir_opt_algebraic); + NIR_PASS(progress, s, lima_nir_lower_fdot); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_loop_unroll); diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index eb4af297f24..ccb32c29e7d 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3590,7 +3590,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); op.lower_fdph = false; - op.lower_fdot = false; op.fdot_replicates = false; // TODO op.lower_ffloor = false; // TODO op.lower_ffract = true;