diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index eda8d11cb3d..b39c7b57498 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1097,6 +1097,24 @@ if (bits == 0) { } """) +triop_horiz("sad_u8x4", 1, 1, 1, 1, """ +uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; +uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; +uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; +uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; + +uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; +uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; +uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; +uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; + +dst.x = src2.x + + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); +""") + # Combines the first component of each input to make a 3-component vector. triop_horiz("vec3", 3, 1, 1, 1, """ diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 501084f14f5..e18d0446e3e 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -1579,6 +1579,9 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, case nir_op_u2u32: res = MIN2(src0, max); break; + case nir_op_sad_u8x4: + res = src2 + 4 * 255; + break; default: res = max; break;