mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-21 10:50:23 +01:00
nir: Add opcodes for saturated vector math.
This corresponds to instructions used on vc4 for its blending inside of
shaders. I've seen these opcodes on other architectures before, but I
think it's the first time these are needed in Mesa.
v2: Rename to 'u' instead of 'i', since they're all 'u'norm (from review
by jekstrand)
This commit is contained in:
parent
1066a372d8
commit
5b2fb138bc
2 changed files with 51 additions and 0 deletions
|
|
@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)")
|
|||
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
|
||||
binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
|
||||
|
||||
# Saturated vector add for 4 8bit ints.
|
||||
binop("usadd_4x8", tint, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
|
||||
}
|
||||
""")
|
||||
|
||||
# Saturated vector subtract for 4 8bit ints.
|
||||
binop("ussub_4x8", tint, "", """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
int src0_chan = (src0 >> i) & 0xff;
|
||||
int src1_chan = (src1 >> i) & 0xff;
|
||||
if (src0_chan > src1_chan)
|
||||
dst |= (src0_chan - src1_chan) << i;
|
||||
}
|
||||
""")
|
||||
|
||||
# vector min for 4 8bit ints.
|
||||
binop("umin_4x8", tint, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||
}
|
||||
""")
|
||||
|
||||
# vector max for 4 8bit ints.
|
||||
binop("umax_4x8", tint, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||
}
|
||||
""")
|
||||
|
||||
# unorm multiply: (a * b) / 255.
|
||||
binop("umul_unorm_4x8", tint, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
int src0_chan = (src0 >> i) & 0xff;
|
||||
int src1_chan = (src1 >> i) & 0xff;
|
||||
dst |= ((src0_chan * src1_chan) / 255) << i;
|
||||
}
|
||||
""")
|
||||
|
||||
binop("fpow", tfloat, "", "powf(src0, src1)")
|
||||
|
||||
binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
|
||||
|
|
|
|||
|
|
@ -56,12 +56,16 @@ optimizations = [
|
|||
(('iabs', ('ineg', a)), ('iabs', a)),
|
||||
(('fadd', a, 0.0), a),
|
||||
(('iadd', a, 0), a),
|
||||
(('usadd_4x8', a, 0), a),
|
||||
(('usadd_4x8', a, ~0), ~0),
|
||||
(('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||
(('fadd', ('fneg', a), a), 0.0),
|
||||
(('iadd', ('ineg', a), a), 0),
|
||||
(('fmul', a, 0.0), 0.0),
|
||||
(('imul', a, 0), 0),
|
||||
(('umul_unorm_4x8', a, 0), 0),
|
||||
(('umul_unorm_4x8', a, ~0), a),
|
||||
(('fmul', a, 1.0), a),
|
||||
(('imul', a, 1), a),
|
||||
(('fmul', a, -1.0), ('fneg', a)),
|
||||
|
|
@ -201,6 +205,8 @@ optimizations = [
|
|||
# Subtracts
|
||||
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
|
||||
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
|
||||
(('ussub_4x8', a, 0), a),
|
||||
(('ussub_4x8', a, ~0), 0),
|
||||
(('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
|
||||
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
|
||||
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue