mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
nir: add msad_4x8
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26907>
This commit is contained in:
parent
c511b8968a
commit
0477421f7d
4 changed files with 35 additions and 6 deletions
|
|
@ -3932,6 +3932,9 @@ typedef struct nir_shader_compiler_options {
|
|||
/** Backend supports uclz. */
|
||||
bool has_uclz;
|
||||
|
||||
/** Backend support msad_u4x8. */
|
||||
bool has_msad;
|
||||
|
||||
/**
|
||||
* Is this the Intel vec4 backend?
|
||||
*
|
||||
|
|
|
|||
|
|
@ -375,6 +375,18 @@ static uint32_t pack_2x16_to_unorm_10_2(uint32_t src0)
|
|||
return vfmul_v3d(vfsat_v3d(src0), 0x000303ff);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
msad(uint32_t src0, uint32_t src1, uint32_t src2) {
|
||||
uint32_t res = src2;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
const uint8_t ref = src0 >> (i * 8);
|
||||
const uint8_t src = src1 >> (i * 8);
|
||||
if (ref != 0)
|
||||
res += MAX2(ref, src) - MIN2(ref, src);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Some typed vector structures to make things like src0.y work */
|
||||
typedef int8_t int1_t;
|
||||
typedef uint8_t uint1_t;
|
||||
|
|
|
|||
|
|
@ -1126,11 +1126,6 @@ if (bits == 0) {
|
|||
}
|
||||
""")
|
||||
|
||||
# Sum of absolute differences with accumulation.
|
||||
# (Equivalent to AMD's v_sad_u8 instruction.)
|
||||
# The first two sources contain packed 8-bit unsigned integers, the instruction
|
||||
# will calculate the absolute difference of these, and then add them together.
|
||||
# There is also a third source which is a 32-bit unsigned integer and added to the result.
|
||||
triop_horiz("sad_u8x4", 1, 1, 1, 1, """
|
||||
uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0;
|
||||
uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8;
|
||||
|
|
@ -1147,6 +1142,24 @@ dst.x = src2.x +
|
|||
(s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
|
||||
(s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
|
||||
(s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
|
||||
""", description = """
|
||||
Sum of absolute differences with accumulation. Equivalent to AMD's v_sad_u8 instruction.
|
||||
|
||||
The first two sources contain packed 8-bit unsigned integers, the instruction will
|
||||
calculate the absolute difference of these, and then add them together. There is also a
|
||||
third source which is a 32-bit unsigned integer and added to the result.
|
||||
""")
|
||||
|
||||
triop("msad_4x8", tuint32, "", """
|
||||
dst = msad(src0, src1, src2);
|
||||
""", description = """
|
||||
Masked sum of absolute differences with accumulation. Equivalent to AMD's v_msad_u8
|
||||
instruction and DXIL's MSAD.
|
||||
|
||||
The first two sources contain packed 8-bit unsigned integers, the instruction
|
||||
will calculate the absolute difference of integers when src0's is non-zero, and
|
||||
then add them together. There is also a third source which is a 32-bit unsigned
|
||||
integer and added to the result.
|
||||
""")
|
||||
|
||||
# Combines the first component of each input to make a 3-component vector.
|
||||
|
|
|
|||
|
|
@ -1865,7 +1865,8 @@ get_alu_uub(struct analysis_state *state, struct uub_query q, uint32_t *result,
|
|||
*result = 1;
|
||||
break;
|
||||
case nir_op_sad_u8x4:
|
||||
*result = src[2] + 4 * 255;
|
||||
case nir_op_msad_4x8:
|
||||
*result = MIN2((uint64_t)src[2] + 4 * 255, UINT32_MAX);
|
||||
break;
|
||||
case nir_op_extract_u8:
|
||||
*result = MIN2(src[0], UINT8_MAX);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue