nir: Add nir_op_sad_u8x4 which corresponds to AMD's v_sad_u8.

NIR currently doesn't have any intrinsics for a horizontal packed add,
so this one is modeled after AMD's v_sad_u8.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
This commit is contained in:
Timur Kristóf 2021-05-28 21:53:06 +02:00 committed by Marge Bot
parent 228169c87c
commit c92dab8e2b
2 changed files with 21 additions and 0 deletions

View file

@ -1097,6 +1097,24 @@ if (bits == 0) {
}
""")
triop_horiz("sad_u8x4", 1, 1, 1, 1, """
uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0;
uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8;
uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16;
uint8_t s0_b3 = (src0.x & 0xff000000) >> 24;
uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0;
uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8;
uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16;
uint8_t s1_b3 = (src1.x & 0xff000000) >> 24;
dst.x = src2.x +
(s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
(s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
(s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
(s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
""")
# Combines the first component of each input to make a 3-component vector.
triop_horiz("vec3", 3, 1, 1, 1, """

View file

@ -1579,6 +1579,9 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_op_u2u32:
res = MIN2(src0, max);
break;
case nir_op_sad_u8x4:
res = src2 + 4 * 255;
break;
default:
res = max;
break;