mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 23:10:11 +01:00
nir: remove sad_u8x4
All uses of this can be replaced with msad_4x8. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26907>
This commit is contained in:
parent
5fd747a502
commit
ae54cbeb3f
6 changed files with 4 additions and 40 deletions
|
|
@ -284,7 +284,7 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, unsigned num_lds_dwords
|
|||
*
|
||||
* If the v_dot instruction can't be used, we left-shift the packed bytes.
|
||||
* This will shift out the unneeded bytes and shift in zeroes instead,
|
||||
* then we sum them using v_sad_u8.
|
||||
* then we sum them using v_msad_u8.
|
||||
*/
|
||||
|
||||
nir_def *lane_id = nir_load_subgroup_invocation(b);
|
||||
|
|
@ -302,7 +302,7 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, unsigned num_lds_dwords
|
|||
return nir_udot_4x8_uadd(b, packed, dot_op, nir_imm_int(b, 0));
|
||||
} else {
|
||||
nir_def *sad_op = nir_ishl(b, nir_ishl(b, packed, shift), shift);
|
||||
return nir_sad_u8x4(b, sad_op, nir_imm_int(b, 0), nir_imm_int(b, 0));
|
||||
return nir_msad_4x8(b, sad_op, nir_imm_int(b, 0), nir_imm_int(b, 0));
|
||||
}
|
||||
} else if (num_lds_dwords == 2) {
|
||||
nir_def *dot_op = !use_dot ? NULL : nir_ushr(b, nir_ushr(b, nir_imm_int64(b, 0x0101010101010101), shift), shift);
|
||||
|
|
@ -317,8 +317,8 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, unsigned num_lds_dwords
|
|||
return nir_udot_4x8_uadd(b, packed_dw1, nir_unpack_64_2x32_split_y(b, dot_op), sum);
|
||||
} else {
|
||||
nir_def *sad_op = nir_ishl(b, nir_ishl(b, nir_pack_64_2x32_split(b, packed_dw0, packed_dw1), shift), shift);
|
||||
nir_def *sum = nir_sad_u8x4(b, nir_unpack_64_2x32_split_x(b, sad_op), nir_imm_int(b, 0), nir_imm_int(b, 0));
|
||||
return nir_sad_u8x4(b, nir_unpack_64_2x32_split_y(b, sad_op), nir_imm_int(b, 0), sum);
|
||||
nir_def *sum = nir_msad_4x8(b, nir_unpack_64_2x32_split_x(b, sad_op), nir_imm_int(b, 0), nir_imm_int(b, 0));
|
||||
return nir_msad_4x8(b, nir_unpack_64_2x32_split_y(b, sad_op), nir_imm_int(b, 0), sum);
|
||||
}
|
||||
} else {
|
||||
unreachable("Unimplemented NGG wave count");
|
||||
|
|
|
|||
|
|
@ -3420,11 +3420,6 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_sad_u8x4: {
|
||||
assert(dst.regClass() == v1);
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false);
|
||||
break;
|
||||
}
|
||||
case nir_op_msad_4x8: {
|
||||
assert(dst.regClass() == v1);
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_msad_u8, dst, false, 3u, true);
|
||||
|
|
|
|||
|
|
@ -392,7 +392,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_frexp_sig:
|
||||
case nir_op_frexp_exp:
|
||||
case nir_op_cube_amd:
|
||||
case nir_op_sad_u8x4:
|
||||
case nir_op_msad_4x8:
|
||||
case nir_op_udot_4x8_uadd:
|
||||
case nir_op_sdot_4x8_iadd:
|
||||
|
|
|
|||
|
|
@ -1253,11 +1253,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_op_sad_u8x4:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
|
||||
(LLVMValueRef[]){src[0], src[1], src[2]}, 3, 0);
|
||||
break;
|
||||
|
||||
case nir_op_msad_4x8:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.msad.u8", ctx->ac.i32,
|
||||
(LLVMValueRef[]){src[1], src[0], src[2]}, 3, 0);
|
||||
|
|
|
|||
|
|
@ -1126,30 +1126,6 @@ if (bits == 0) {
|
|||
}
|
||||
""")
|
||||
|
||||
triop_horiz("sad_u8x4", 1, 1, 1, 1, """
|
||||
uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0;
|
||||
uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8;
|
||||
uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16;
|
||||
uint8_t s0_b3 = (src0.x & 0xff000000) >> 24;
|
||||
|
||||
uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0;
|
||||
uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8;
|
||||
uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16;
|
||||
uint8_t s1_b3 = (src1.x & 0xff000000) >> 24;
|
||||
|
||||
dst.x = src2.x +
|
||||
(s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
|
||||
(s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
|
||||
(s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
|
||||
(s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
|
||||
""", description = """
|
||||
Sum of absolute differences with accumulation. Equivalent to AMD's v_sad_u8 instruction.
|
||||
|
||||
The first two sources contain packed 8-bit unsigned integers, the instruction will
|
||||
calculate the absolute difference of these, and then add them together. There is also a
|
||||
third source which is a 32-bit unsigned integer and added to the result.
|
||||
""")
|
||||
|
||||
triop("msad_4x8", tuint32, "", """
|
||||
dst = msad(src0, src1, src2);
|
||||
""", description = """
|
||||
|
|
|
|||
|
|
@ -1864,7 +1864,6 @@ get_alu_uub(struct analysis_state *state, struct uub_query q, uint32_t *result,
|
|||
case nir_op_b2i32:
|
||||
*result = 1;
|
||||
break;
|
||||
case nir_op_sad_u8x4:
|
||||
case nir_op_msad_4x8:
|
||||
*result = MIN2((uint64_t)src[2] + 4 * 255, UINT32_MAX);
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue