intel/brw: Fix handling of cmat_signed_mask

For integer types, the signedness is determined by flags on the muladd
instruction. The types of the sources play no role. Previously we were
using the signedness of the type and ignoring the mask.

Adjust the types passed to the dpas_intel intrinsic to match.

Fixes various
dEQP-VK.compute.*.cooperative_matrix.khr_*.matrixmuladd_cross.* tests on
different Intel platforms. Some platforms had failing tests, and some
platforms failed EU validation before the tests could fail.

Fixes: 6b14da33ad ("intel/fs: nir: Add nir_intrinsic_dpas_intel")
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28822>
This commit is contained in:
Ian Romanick 2024-04-17 16:08:19 -07:00
parent ce05a7c3a2
commit 2ce558d928

View file

@ -646,14 +646,44 @@ lower_cmat_instr(nir_builder *b, nir_instr *instr, void *_state)
const unsigned packing_factor = get_packing_factor(dst_desc, dst_slice->type);
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
const nir_cmat_signed cmat_signed_mask =
nir_intrinsic_cmat_signed_mask(intrin);
assert(((cmat_signed_mask & NIR_CMAT_A_SIGNED) == 0) ==
((cmat_signed_mask & NIR_CMAT_B_SIGNED) == 0));
assert(((cmat_signed_mask & NIR_CMAT_A_SIGNED) == 0) ==
((cmat_signed_mask & NIR_CMAT_C_SIGNED) == 0));
assert(((cmat_signed_mask & NIR_CMAT_A_SIGNED) == 0) ==
((cmat_signed_mask & NIR_CMAT_RESULT_SIGNED) == 0));
nir_alu_type src_type =
nir_get_nir_type_for_glsl_base_type(src_desc.element_type);
nir_alu_type dest_type =
nir_get_nir_type_for_glsl_base_type(dst_desc.element_type);
/* For integer types, the signedness is determined by flags on the
* muladd instruction. The types of the sources play no role. Adjust the
* types passed to the dpas_intel intrinsic to match.
*/
if (nir_alu_type_get_base_type(src_type) == nir_type_uint ||
nir_alu_type_get_base_type(src_type) == nir_type_int) {
if ((cmat_signed_mask & NIR_CMAT_A_SIGNED) == 0) {
src_type = nir_alu_type_get_type_size(src_type) | nir_type_uint;
dest_type = nir_alu_type_get_type_size(dest_type) | nir_type_uint;
} else {
src_type = nir_alu_type_get_type_size(src_type) | nir_type_int;
dest_type = nir_alu_type_get_type_size(dest_type) | nir_type_int;
}
}
nir_def *result =
nir_dpas_intel(b,
packing_factor * glsl_base_type_get_bit_size(dst_desc.element_type),
nir_load_deref(b, accum_slice),
nir_load_deref(b, A_slice),
nir_load_deref(b, B_slice),
.dest_type = nir_get_nir_type_for_glsl_base_type(dst_desc.element_type),
.src_type = nir_get_nir_type_for_glsl_base_type(src_desc.element_type),
.dest_type = dest_type,
.src_type = src_type,
.saturate = nir_intrinsic_saturate(intrin),
.cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intrin),
.systolic_depth = 8,