r300: explicitly check if sin/cos input is already in correct range

before applying the input range normalization. This allows to move the pass
to finalize nir later without worrying we would apply the fixup twice and
also saves few instructions in wined3d shaders, where d3d9 already
guarantees the correct input range.

RV530 shader-db (and similarly for R4xx) improves few Anno1404 shaders:
total instructions in shared programs: 129040 -> 129022 (-0.01%)
instructions in affected programs: 310 -> 292 (-5.81%)
helped: 5
HURT: 0

no change on RV370

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28070>
This commit is contained in:
Pavel Ondračka 2024-03-07 21:01:41 +01:00 committed by Marge Bot
parent 67fd490fe5
commit ba1bc94233
2 changed files with 80 additions and 4 deletions

View file

@ -23,6 +23,8 @@
#ifndef R300_NIR_H
#define R300_NIR_H
#include <math.h>
#include "pipe/p_screen.h"
#include "compiler/nir/nir.h"
@ -78,6 +80,80 @@ is_only_used_by_load_ubo_vec4(const nir_alu_instr *instr)
return true;
}
static inline bool
check_instr_and_src_value(nir_op op, nir_instr **instr, double value)
{
if ((*instr)->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(*instr);
if (alu->op != op)
return false;
unsigned i;
for (i = 0; i <= 2; i++) {
if (i == 2) {
return false;
}
nir_alu_src src = alu->src[i];
if (nir_src_is_const(src.src)) {
/* All components must be reading the same value. */
for (unsigned j = 0; j < alu->def.num_components - 1; j++) {
if (src.swizzle[j] != src.swizzle[j + 1]) {
return false;
}
}
if (fabs(nir_src_comp_as_float(src.src, src.swizzle[0]) - value) < 1e-5) {
break;
}
}
}
*instr = alu->src[1 - i].src.ssa->parent_instr;
return true;
}
static inline bool
needs_vs_trig_input_fixup(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
/* We are checking for fadd(fmul(ffract(a), 2*pi), -pi) pattern
* emitted by us and also some wined3d shaders.
* Start with check for fadd(a, -pi).
*/
nir_instr *parent = instr->src[src].src.ssa->parent_instr;
if (!check_instr_and_src_value(nir_op_fadd, &parent, -3.141592))
return true;
/* Now check for fmul(a, 2 * pi). */
if (!check_instr_and_src_value(nir_op_fmul, &parent, 6.283185))
return true;
/* Finally check for ffract(a). */
if (parent->type != nir_instr_type_alu)
return true;
nir_alu_instr *fract = nir_instr_as_alu(parent);
if (fract->op != nir_op_ffract)
return true;
return false;
}
static inline bool
needs_fs_trig_input_fixup(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
/* We are checking for ffract(a * (1 / 2 * pi)) pattern. */
nir_instr *parent = instr->src[src].src.ssa->parent_instr;
if (parent->type != nir_instr_type_alu)
return true;
nir_alu_instr *fract = nir_instr_as_alu(parent);
if (fract->op != nir_op_ffract)
return true;
parent = fract->src[0].src.ssa->parent_instr;
/* Now check for fmul(a, 1 / (2 * pi)). */
if (!check_instr_and_src_value(nir_op_fmul, &parent, 0.1591549))
return true;
return false;
}
bool r300_is_only_used_as_float(const nir_alu_instr *instr);
char *r300_finalize_nir(struct pipe_screen *pscreen, void *nir);

View file

@ -37,8 +37,8 @@ e = 'e'
# y = frac(x / 2PI + 0.5) * 2PI - PI
#
transform_trig_input_vs_r500 = [
(('fsin', 'a'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
(('fcos', 'a'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
(('fsin', 'a(needs_vs_trig_input_fixup)'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
(('fcos', 'a(needs_vs_trig_input_fixup)'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
]
# Transform input to range [-PI, PI]:
@ -46,8 +46,8 @@ transform_trig_input_vs_r500 = [
# y = frac(x / 2PI)
#
transform_trig_input_fs_r500 = [
(('fsin', 'a'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
(('fcos', 'a'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
(('fsin', 'a(needs_fs_trig_input_fixup)'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
(('fcos', 'a(needs_fs_trig_input_fixup)'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
]
# The is a pattern produced by wined3d for A0 register load.