mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 13:00:21 +01:00
panvk: add support for {s,u}dot_4x8_{sat}
Generate IDPADD instruction to support integer dot product Support is added for both signed/unsigned dot product as well as saturated dot product. Support is only for v9+. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34812>
This commit is contained in:
parent
dc1c701489
commit
ffdc08dfb6
4 changed files with 22 additions and 7 deletions
|
|
@ -3515,6 +3515,20 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false);
|
||||
break;
|
||||
}
|
||||
case nir_op_udot_4x8_uadd_sat:
|
||||
case nir_op_udot_4x8_uadd: {
|
||||
assert(b->shader->arch >= 9);
|
||||
bi_idpadd_v4u8_to(b, dst, s0, s1, s2,
|
||||
instr->op == nir_op_udot_4x8_uadd_sat);
|
||||
break;
|
||||
}
|
||||
case nir_op_sdot_4x8_iadd_sat:
|
||||
case nir_op_sdot_4x8_iadd: {
|
||||
assert(b->shader->arch >= 9);
|
||||
bi_idpadd_v4s8_to(b, dst, s0, s1, s2,
|
||||
instr->op == nir_op_sdot_4x8_iadd_sat);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
|
||||
|
|
|
|||
|
|
@ -148,6 +148,10 @@ void bifrost_compile_shader_nir(nir_shader *nir,
|
|||
.support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES), \
|
||||
.lower_hadd = arch >= 11, \
|
||||
.discard_is_demote = true, \
|
||||
.has_udot_4x8 = arch >= 9, \
|
||||
.has_udot_4x8_sat = arch >= 9, \
|
||||
.has_sdot_4x8 = arch >= 9, \
|
||||
.has_sdot_4x8_sat = arch >= 9, \
|
||||
};
|
||||
|
||||
DEFINE_OPTIONS(6);
|
||||
|
|
|
|||
|
|
@ -2235,21 +2235,20 @@
|
|||
<src absneg="true">Z coordinate as 32-bit floating point</src>
|
||||
</ins>
|
||||
|
||||
<group name="IDP" title="8-bit dot product" dests="1" opcode="0xC2" unused="true" unit="FMA">
|
||||
<group name="IDPADD" title="8-bit dot product and accumulate" dests="1" opcode="0xC2" unit="FMA">
|
||||
<desc>
|
||||
8-bit integer dot product between 4 channel vectors, intended for machine
|
||||
learning. Available in both unsigned and signed variants, controlling
|
||||
sign-extension/zero-extension behaviour to the final 32-bit destination.
|
||||
Saturation is available. Corresponds to the `cl_arm_integer_dot_product_*`
|
||||
family of OpenCL extensions. Not for actual use, just for completeness.
|
||||
Instead, use your platform's neural accelerator.
|
||||
family of OpenCL extensions.
|
||||
|
||||
For $A, B \in \{ 0, \ldots, 255 \}^4$ and $\text{Accumulator} \in
|
||||
\mathbb{Z}$, calculates $(A \cdot B) + \text{Accumulator}$ and optionally
|
||||
saturates.
|
||||
</desc>
|
||||
<ins name="IDP.v4s8" opcode2="0"/>
|
||||
<ins name="IDP.v4u8" opcode2="1"/>
|
||||
<ins name="IDPADD.v4s8" opcode2="0"/>
|
||||
<ins name="IDPADD.v4u8" opcode2="1"/>
|
||||
<src>A</src>
|
||||
<src>B</src>
|
||||
<src>Accumulator</src>
|
||||
|
|
|
|||
|
|
@ -88,8 +88,6 @@ SKIP = set([
|
|||
"NOT_OLD.i64",
|
||||
|
||||
# TODO
|
||||
"IDP.v4s8",
|
||||
"IDP.v4u8",
|
||||
"FATAN_ASSIST.f32",
|
||||
"SEG_ADD.u64",
|
||||
"TEX_DUAL",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue