diff --git a/src/intel/compiler/brw_nir_lower_cooperative_matrix.c b/src/intel/compiler/brw_nir_lower_cooperative_matrix.c index 25ff68abf25..efef878ffba 100644 --- a/src/intel/compiler/brw_nir_lower_cooperative_matrix.c +++ b/src/intel/compiler/brw_nir_lower_cooperative_matrix.c @@ -375,21 +375,60 @@ lower_cmat_load_store(nir_builder *b, nir_intrinsic_instr *intrin, nir_component_mask(num_components)); } +/* Unpack, apply operation, then pack again. */ +static nir_def * +emit_packed_alu1(nir_builder *b, + const slice_info *src_info, + const slice_info *dst_info, + nir_op op, + nir_def *src) +{ + nir_def *results[NIR_MAX_VEC_COMPONENTS]; + + const unsigned dst_bits = glsl_base_type_bit_size(dst_info->desc.element_type); + const unsigned src_bits = glsl_base_type_bit_size(src_info->desc.element_type); + + /* With the combinations of formats exposed on all platforms, matrices with + * the same dimensions will always have the same data size. The only real + * type conversion possible is int32 <-> float32. As a result + * dst_packing_factor == src_packing_factor. + */ + assert(dst_info->packing_factor == src_info->packing_factor); + + const unsigned num_components = glsl_get_vector_elements(dst_info->type); + + /* Stores at most dst_packing_factor partial results. */ + nir_def *v[4]; + assert(dst_info->packing_factor <= 4); + + for (unsigned i = 0; i < num_components; i++) { + nir_def *chan = nir_channel(b, src, i); + + for (unsigned j = 0; j < dst_info->packing_factor; j++) { + nir_def *src = + nir_channel(b, nir_unpack_bits(b, chan, src_bits), j); + + v[j] = nir_build_alu1(b, op, src); + } + + results[i] = + nir_pack_bits(b, nir_vec(b, v, dst_info->packing_factor), + dst_info->packing_factor * dst_bits); + } + + return nir_vec(b, results, num_components); +} + static void lower_cmat_unary_op(nir_builder *b, nir_intrinsic_instr *intrin, struct lower_cmat_state *state) { nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]); nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]); - nir_def *results[NIR_MAX_VEC_COMPONENTS]; - const unsigned num_components = glsl_get_vector_elements(dst_slice->type); const slice_info *dst_info = get_slice_info(state, dst_slice); const slice_info *src_info = get_slice_info(state, src_slice); - const unsigned dst_bits = glsl_base_type_bit_size(dst_info->desc.element_type); - const unsigned src_bits = glsl_base_type_bit_size(src_info->desc.element_type); - /* The type of the returned slice may be different from the type of the * input slice if this is a convert operation. */ @@ -411,34 +450,10 @@ lower_cmat_unary_op(nir_builder *b, nir_intrinsic_instr *intrin, nir_rounding_mode_undef); } - /* With the combinations of formats exposed on all platforms, matrices with - * the same dimensions will always have the same data size. The only real - * type conversion possible is int32 <-> float32. As a result - * dst_packing_factor == src_packing_factor. - */ - assert(dst_info->packing_factor == src_info->packing_factor); + nir_def *result = emit_packed_alu1(b, src_info, dst_info, op, + nir_load_deref(b, src_slice)); - /* Stores at most dst_packing_factor partial results. */ - nir_def *v[4]; - assert(dst_info->packing_factor <= 4); - - for (unsigned i = 0; i < num_components; i++) { - nir_def *chan = nir_channel(b, nir_load_deref(b, src_slice), i); - - for (unsigned j = 0; j < dst_info->packing_factor; j++) { - nir_def *src = - nir_channel(b, nir_unpack_bits(b, chan, src_bits), j); - - v[j] = nir_build_alu1(b, op, src); - } - - results[i] = - nir_pack_bits(b, nir_vec(b, v, dst_info->packing_factor), - dst_info->packing_factor * dst_bits); - } - - nir_store_deref(b, dst_slice, nir_vec(b, results, num_components), - nir_component_mask(num_components)); + nir_store_deref(b, dst_slice, result, nir_component_mask(result->num_components)); } static void