v3dv: Expose hardware-accelerated integer dot products on V3D 7.1+

Expose VK_KHR_shader_integer_dot_product 4x8-bit packed dot
products using native HW instructions v8dot and setnnmode.

QPU instruction count for sdot_4x8_iadd compute shader:

  Before (scalar decomposition):  18 ALU cycles
  After (setnnmode + v8dot):       3 ALU cycles  (6x)

We advertise integerDotProduct4x8BitPacked*Accelerated for V3D 7.1+

Assisted-by: Claude Opus 4.6
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41255>
This commit is contained in:
Jose Maria Casanova Crespo 2026-02-07 18:28:19 +01:00 committed by Marge Bot
parent 8f06961bf5
commit 3a8d5aeaa1
2 changed files with 11 additions and 4 deletions

View file

@ -1192,12 +1192,14 @@ get_device_properties(const struct v3dv_physical_device *device,
.storageTexelBufferOffsetSingleTexelAlignment = false,
.uniformTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
.uniformTexelBufferOffsetSingleTexelAlignment = false,
/* No native acceleration for integer dot product. We use NIR lowering. */
/* V3D 7.1+ has native v8dot instructions for 4x8-bit packed dot
* products (unsigned, signed, mixed signedness).
*/
.integerDotProduct8BitUnsignedAccelerated = false,
.integerDotProduct8BitMixedSignednessAccelerated = false,
.integerDotProduct4x8BitPackedUnsignedAccelerated = false,
.integerDotProduct4x8BitPackedSignedAccelerated = false,
.integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
.integerDotProduct4x8BitPackedUnsignedAccelerated = device->devinfo.ver >= 71,
.integerDotProduct4x8BitPackedSignedAccelerated = device->devinfo.ver >= 71,
.integerDotProduct4x8BitPackedMixedSignednessAccelerated = device->devinfo.ver >= 71,
.integerDotProduct16BitUnsignedAccelerated = false,
.integerDotProduct16BitSignedAccelerated = false,
.integerDotProduct16BitMixedSignednessAccelerated = false,

View file

@ -224,6 +224,11 @@ v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo)
if (!initialized) {
options.lower_fsat = devinfo->ver < 71;
if (devinfo->ver >= 71) {
options.has_udot_4x8 = true;
options.has_sdot_4x8 = true;
options.has_sudot_4x8 = true;
}
initialized = true;
}