From d8b2ba91e8ffc049dec2e01181efd215ed7fd7ae Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 28 May 2026 11:04:40 -0400 Subject: [PATCH] jay/register_allocate: split out jay_stride.c These queries need to be used for partitioning too. And also this degunks the core RA logic in jay_register_allocate. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/jay/jay_ir.h | 3 + .../compiler/jay/jay_register_allocate.c | 122 ----------------- src/intel/compiler/jay/jay_stride.c | 128 ++++++++++++++++++ src/intel/compiler/jay/meson.build | 1 + 4 files changed, 132 insertions(+), 122 deletions(-) create mode 100644 src/intel/compiler/jay/jay_stride.c diff --git a/src/intel/compiler/jay/jay_ir.h b/src/intel/compiler/jay/jay_ir.h index 8cd084ead09..32f140964d2 100644 --- a/src/intel/compiler/jay/jay_ir.h +++ b/src/intel/compiler/jay/jay_ir.h @@ -712,6 +712,9 @@ jay_stride_to_bits(enum jay_stride s) return 16 << s; } +enum jay_stride jay_dst_stride_minmax(jay_inst *I, bool do_max); +enum jay_stride jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max); + #define jay_foreach_ra_file(file) \ for (enum jay_file file = 0; file < JAY_NUM_RA_FILES; ++file) diff --git a/src/intel/compiler/jay/jay_register_allocate.c b/src/intel/compiler/jay/jay_register_allocate.c index 6856839f556..c75f46c6bfd 100644 --- a/src/intel/compiler/jay/jay_register_allocate.c +++ b/src/intel/compiler/jay/jay_register_allocate.c @@ -42,128 +42,6 @@ jay_foreach_src(I, s) \ if (I->src[s].file < JAY_NUM_RA_FILES && !jay_is_null(I->src[s])) -static enum jay_stride -jay_min_stride_for_type(enum jay_type T) -{ - unsigned bits = jay_type_size_bits(T); - - /* We need at least enough contiguous bits per-lane to store a scalar */ - if (bits == 64) - return JAY_STRIDE_8; - else if (bits == 32) - return JAY_STRIDE_4; - else - return JAY_STRIDE_2; -} - -static enum jay_stride -jay_max_stride_for_type(enum jay_type T) -{ - /* Horizontal stride can be at most 4 */ - return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4; -} - -static bool -jay_restrict_mixed_strides(jay_inst *I, unsigned s) -{ - /* From the hardware spec section "Register Region Restrictions": - * - * "In case of all floating point data types used in destination:" and - * - * "In case where source or destination datatype is 64b or operation is - * integer DWord multiply:" and - * - * "Src2 Restrictions" - * - * Register Regioning patterns where register data bit location - * of the LSB of the channels are changed between source and - * destination are not supported on Src0 and Src1 except for - * broadcast of a scalar. - * - * Therefore, ban mixed-strides in these cases. - * - * Similarly, SENDs cannot do any regioning so restrict that too. - */ - return jay_type_is_any_float(I->type) || - jay_type_size_bits(I->type) == 64 || - jay_is_send_like(I) || - I->op == JAY_OPCODE_MUL_32X16 || - I->op == JAY_OPCODE_MUL_32 || - s == 2; -} - -static enum jay_stride -jay_dst_stride_minmax(jay_inst *I, bool do_max) -{ - enum jay_stride min = jay_min_stride_for_type(I->type); - enum jay_stride max = jay_max_stride_for_type(I->type); - - /* Destination stride must be equal to the ratio of the sizes of the - * execution data type to the destination type - */ - if (I->op == JAY_OPCODE_CVT) { - min = MAX2(min, jay_min_stride_for_type(jay_src_type(I, 0))); - } - - if (I->op == JAY_OPCODE_EXPAND_QUAD) { - return JAY_STRIDE_4; - } - - /* V/UV types are restricted */ - if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) { - return JAY_STRIDE_2; - } - - /* The src2 restriction quoted above effectively implies we should not stride - * destinations of 3-source instructions either. - */ - if (jay_num_isa_srcs(I) >= 3) { - return min; - } - - return (do_max && !jay_restrict_mixed_strides(I, 0)) ? max : min; -} - -static enum jay_stride -jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max) -{ - enum jay_stride min = jay_min_stride_for_type(jay_src_type(I, s)); - enum jay_stride max = jay_max_stride_for_type(jay_src_type(I, s)); - - /* SENDs cannot do any regioning so force exactly the types of the sources - * regardless of the type of the destination. - * - * Shuffles could theoretically support regioning but it would be nontrivial - * and probably pointless most of the time. - */ - if (jay_is_send_like(I) || jay_is_shuffle_like(I)) { - return min; - } - - /* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not. - * Conservatively assume the destination is packed and restrict the source - * stride accordingly. This satisfies the special restrictions. - */ - if (jay_type_size_bits(I->type) <= 16) { - max = JAY_STRIDE_4; - } - - /* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet - * (preferring to burn the upper bits) but it is used internally. - */ - if (I->op == JAY_OPCODE_LANE_ID_EXPAND) { - max = JAY_STRIDE_2; - } - - if (jay_restrict_mixed_strides(I, s) && - jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) { - - return jay_dst_stride_minmax(I, do_max); - } - - return (do_max && !jay_restrict_mixed_strides(I, s)) ? max : min; -} - struct affinity { /** * If there is a vector affinity defined for this SSA def, it is relative to diff --git a/src/intel/compiler/jay/jay_stride.c b/src/intel/compiler/jay/jay_stride.c new file mode 100644 index 00000000000..87d565143ec --- /dev/null +++ b/src/intel/compiler/jay/jay_stride.c @@ -0,0 +1,128 @@ +/* + * Copyright 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ +#include "jay_ir.h" + +static enum jay_stride +min_stride_for_type(enum jay_type T) +{ + unsigned bits = jay_type_size_bits(T); + + /* We need at least enough contiguous bits per-lane to store a scalar */ + if (bits == 64) + return JAY_STRIDE_8; + else if (bits == 32) + return JAY_STRIDE_4; + else + return JAY_STRIDE_2; +} + +static enum jay_stride +max_stride_for_type(enum jay_type T) +{ + /* Horizontal stride can be at most 4 */ + return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4; +} + +static bool +restrict_mixed_strides(jay_inst *I, unsigned s) +{ + /* From the hardware spec section "Register Region Restrictions": + * + * "In case of all floating point data types used in destination:" and + * + * "In case where source or destination datatype is 64b or operation is + * integer DWord multiply:" and + * + * "Src2 Restrictions" + * + * Register Regioning patterns where register data bit location + * of the LSB of the channels are changed between source and + * destination are not supported on Src0 and Src1 except for + * broadcast of a scalar. + * + * Therefore, ban mixed-strides in these cases. + * + * Similarly, SENDs cannot do any regioning so restrict that too. + */ + return jay_type_is_any_float(I->type) || + jay_type_size_bits(I->type) == 64 || + jay_is_send_like(I) || + I->op == JAY_OPCODE_MUL_32X16 || + I->op == JAY_OPCODE_MUL_32 || + s == 2; +} + +enum jay_stride +jay_dst_stride_minmax(jay_inst *I, bool do_max) +{ + enum jay_stride min = min_stride_for_type(I->type); + enum jay_stride max = max_stride_for_type(I->type); + + /* Destination stride must be equal to the ratio of the sizes of the + * execution data type to the destination type + */ + if (I->op == JAY_OPCODE_CVT) { + min = MAX2(min, min_stride_for_type(jay_src_type(I, 0))); + } + + if (I->op == JAY_OPCODE_EXPAND_QUAD) { + return JAY_STRIDE_4; + } + + /* V/UV types are restricted */ + if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) { + return JAY_STRIDE_2; + } + + /* The src2 restriction quoted above effectively implies we should not stride + * destinations of 3-source instructions either. + */ + if (jay_num_isa_srcs(I) >= 3) { + return min; + } + + return (do_max && !restrict_mixed_strides(I, 0)) ? max : min; +} + +enum jay_stride +jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max) +{ + enum jay_stride min = min_stride_for_type(jay_src_type(I, s)); + enum jay_stride max = max_stride_for_type(jay_src_type(I, s)); + + /* SENDs cannot do any regioning so force exactly the types of the sources + * regardless of the type of the destination. + * + * Shuffles could theoretically support regioning but it would be nontrivial + * and probably pointless most of the time. + */ + if (jay_is_send_like(I) || jay_is_shuffle_like(I)) { + return min; + } + + /* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not. + * Conservatively assume the destination is packed and restrict the source + * stride accordingly. This satisfies the special restrictions. + */ + if (jay_type_size_bits(I->type) <= 16) { + max = JAY_STRIDE_4; + } + + /* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet + * (preferring to burn the upper bits) but it is used internally. + */ + if (I->op == JAY_OPCODE_LANE_ID_EXPAND) { + max = JAY_STRIDE_2; + } + + if (restrict_mixed_strides(I, s) && + jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) { + + return jay_dst_stride_minmax(I, do_max); + } + + return (do_max && !restrict_mixed_strides(I, s)) ? max : min; +} + diff --git a/src/intel/compiler/jay/meson.build b/src/intel/compiler/jay/meson.build index d753b8ca29c..949127141f6 100644 --- a/src/intel/compiler/jay/meson.build +++ b/src/intel/compiler/jay/meson.build @@ -70,6 +70,7 @@ libintel_compiler_jay_files = files( 'jay_register_allocate.c', 'jay_simd_width.c', 'jay_schedule.c', + 'jay_stride.c', 'jay_spill.c', 'jay_to_binary.c', 'jay_validate.c',