mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 17:50:12 +01:00
lima: Move fdot lowering from NIR to lima
This change relocates the fdot lowering from the generic NIR to the lima, since lima is the only consumer of this particular lowering. This avoids potential conflicts with the similar fdot lowering already present in nir_lower_alu_width. Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> Reviewed-by: Erico Nunes <nunes.erico@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34757>
This commit is contained in:
parent
0f747d0990
commit
f17d350001
6 changed files with 12 additions and 11 deletions
|
|
@ -510,12 +510,6 @@ optimizations.extend([
|
|||
(('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
|
||||
(('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
|
||||
|
||||
# Lower fdot to fsum when it is available
|
||||
(('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
|
||||
(('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
|
||||
(('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
|
||||
(('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
|
||||
|
||||
# If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
|
||||
# If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
|
||||
# If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
|
||||
|
|
|
|||
|
|
@ -292,9 +292,6 @@ typedef struct nir_shader_compiler_options {
|
|||
/* lower fdph to fdot4 */
|
||||
bool lower_fdph;
|
||||
|
||||
/** lower fdot to fmul and fsum/fadd. */
|
||||
bool lower_fdot;
|
||||
|
||||
/* Does the native fdot instruction replicate its result for four
|
||||
* components? If so, then opt_algebraic_late will turn all fdotN
|
||||
* instructions into fdotN_replicated instructions.
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ struct ra_regs *ppir_regalloc_init(void *mem_ctx);
|
|||
void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
|
||||
bool lima_nir_scale_trig(nir_shader *shader);
|
||||
bool lima_nir_lower_ftrunc(nir_shader *shader);
|
||||
bool lima_nir_lower_fdot(nir_shader *shader);
|
||||
bool lima_nir_split_load_input(nir_shader *shader);
|
||||
bool lima_nir_split_loads(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,14 @@ lower_ftrunc = [
|
|||
(('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a')))))
|
||||
]
|
||||
|
||||
lower_fdot = [
|
||||
# Lower fdot to fsum
|
||||
(('fdot2', 'a', 'b'), ('fsum2', ('fmul', 'a', 'b'))),
|
||||
(('fdot3', 'a', 'b'), ('fsum3', ('fmul', 'a', 'b'))),
|
||||
(('fdot4', 'a', 'b'), ('fsum4', ('fmul', 'a', 'b'))),
|
||||
(('fsum2', 'a'), ('fadd', 'a.x', 'a.y')),
|
||||
]
|
||||
|
||||
# PP fuse clamp_positive. Shared with Midgard/Bifrost
|
||||
ppir_algebraic_late = [
|
||||
(('fmax', 'a', 0.0), ('fclamp_pos', 'a')),
|
||||
|
|
@ -59,6 +67,8 @@ def run():
|
|||
scale_trig).render())
|
||||
print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc",
|
||||
lower_ftrunc).render())
|
||||
print(nir_algebraic.AlgebraicPass("lima_nir_lower_fdot",
|
||||
lower_fdot).render())
|
||||
print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late",
|
||||
ppir_algebraic_late).render())
|
||||
|
||||
|
|
|
|||
|
|
@ -78,7 +78,6 @@ static const nir_shader_compiler_options fs_nir_options = {
|
|||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fsign = true,
|
||||
.lower_fdot = true,
|
||||
.lower_fdph = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
|
|
@ -260,6 +259,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
|
|||
};
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, &peephole_select_options);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, s, lima_nir_lower_fdot);
|
||||
NIR_PASS(progress, s, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
NIR_PASS(progress, s, nir_opt_loop_unroll);
|
||||
|
|
|
|||
|
|
@ -3590,7 +3590,6 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
|
|||
op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
|
||||
op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
|
||||
op.lower_fdph = false;
|
||||
op.lower_fdot = false;
|
||||
op.fdot_replicates = false; // TODO
|
||||
op.lower_ffloor = false; // TODO
|
||||
op.lower_ffract = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue