jay/register_allocate: split out jay_stride.c

These queries need to be used for partitioning too. And also this degunks the
core RA logic in jay_register_allocate.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41872>
This commit is contained in:
Alyssa Rosenzweig 2026-05-28 11:04:40 -04:00 committed by Marge Bot
parent e900ac1750
commit d8b2ba91e8
4 changed files with 132 additions and 122 deletions

View file

@ -712,6 +712,9 @@ jay_stride_to_bits(enum jay_stride s)
return 16 << s;
}
enum jay_stride jay_dst_stride_minmax(jay_inst *I, bool do_max);
enum jay_stride jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max);
#define jay_foreach_ra_file(file) \
for (enum jay_file file = 0; file < JAY_NUM_RA_FILES; ++file)

View file

@ -42,128 +42,6 @@
jay_foreach_src(I, s) \
if (I->src[s].file < JAY_NUM_RA_FILES && !jay_is_null(I->src[s]))
static enum jay_stride
jay_min_stride_for_type(enum jay_type T)
{
unsigned bits = jay_type_size_bits(T);
/* We need at least enough contiguous bits per-lane to store a scalar */
if (bits == 64)
return JAY_STRIDE_8;
else if (bits == 32)
return JAY_STRIDE_4;
else
return JAY_STRIDE_2;
}
static enum jay_stride
jay_max_stride_for_type(enum jay_type T)
{
/* Horizontal stride can be at most 4 */
return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4;
}
static bool
jay_restrict_mixed_strides(jay_inst *I, unsigned s)
{
/* From the hardware spec section "Register Region Restrictions":
*
* "In case of all floating point data types used in destination:" and
*
* "In case where source or destination datatype is 64b or operation is
* integer DWord multiply:" and
*
* "Src2 Restrictions"
*
* Register Regioning patterns where register data bit location
* of the LSB of the channels are changed between source and
* destination are not supported on Src0 and Src1 except for
* broadcast of a scalar.
*
* Therefore, ban mixed-strides in these cases.
*
* Similarly, SENDs cannot do any regioning so restrict that too.
*/
return jay_type_is_any_float(I->type) ||
jay_type_size_bits(I->type) == 64 ||
jay_is_send_like(I) ||
I->op == JAY_OPCODE_MUL_32X16 ||
I->op == JAY_OPCODE_MUL_32 ||
s == 2;
}
static enum jay_stride
jay_dst_stride_minmax(jay_inst *I, bool do_max)
{
enum jay_stride min = jay_min_stride_for_type(I->type);
enum jay_stride max = jay_max_stride_for_type(I->type);
/* Destination stride must be equal to the ratio of the sizes of the
* execution data type to the destination type
*/
if (I->op == JAY_OPCODE_CVT) {
min = MAX2(min, jay_min_stride_for_type(jay_src_type(I, 0)));
}
if (I->op == JAY_OPCODE_EXPAND_QUAD) {
return JAY_STRIDE_4;
}
/* V/UV types are restricted */
if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) {
return JAY_STRIDE_2;
}
/* The src2 restriction quoted above effectively implies we should not stride
* destinations of 3-source instructions either.
*/
if (jay_num_isa_srcs(I) >= 3) {
return min;
}
return (do_max && !jay_restrict_mixed_strides(I, 0)) ? max : min;
}
static enum jay_stride
jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max)
{
enum jay_stride min = jay_min_stride_for_type(jay_src_type(I, s));
enum jay_stride max = jay_max_stride_for_type(jay_src_type(I, s));
/* SENDs cannot do any regioning so force exactly the types of the sources
* regardless of the type of the destination.
*
* Shuffles could theoretically support regioning but it would be nontrivial
* and probably pointless most of the time.
*/
if (jay_is_send_like(I) || jay_is_shuffle_like(I)) {
return min;
}
/* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not.
* Conservatively assume the destination is packed and restrict the source
* stride accordingly. This satisfies the special restrictions.
*/
if (jay_type_size_bits(I->type) <= 16) {
max = JAY_STRIDE_4;
}
/* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet
* (preferring to burn the upper bits) but it is used internally.
*/
if (I->op == JAY_OPCODE_LANE_ID_EXPAND) {
max = JAY_STRIDE_2;
}
if (jay_restrict_mixed_strides(I, s) &&
jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) {
return jay_dst_stride_minmax(I, do_max);
}
return (do_max && !jay_restrict_mixed_strides(I, s)) ? max : min;
}
struct affinity {
/**
* If there is a vector affinity defined for this SSA def, it is relative to

View file

@ -0,0 +1,128 @@
/*
* Copyright 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "jay_ir.h"
static enum jay_stride
min_stride_for_type(enum jay_type T)
{
unsigned bits = jay_type_size_bits(T);
/* We need at least enough contiguous bits per-lane to store a scalar */
if (bits == 64)
return JAY_STRIDE_8;
else if (bits == 32)
return JAY_STRIDE_4;
else
return JAY_STRIDE_2;
}
static enum jay_stride
max_stride_for_type(enum jay_type T)
{
/* Horizontal stride can be at most 4 */
return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4;
}
static bool
restrict_mixed_strides(jay_inst *I, unsigned s)
{
/* From the hardware spec section "Register Region Restrictions":
*
* "In case of all floating point data types used in destination:" and
*
* "In case where source or destination datatype is 64b or operation is
* integer DWord multiply:" and
*
* "Src2 Restrictions"
*
* Register Regioning patterns where register data bit location
* of the LSB of the channels are changed between source and
* destination are not supported on Src0 and Src1 except for
* broadcast of a scalar.
*
* Therefore, ban mixed-strides in these cases.
*
* Similarly, SENDs cannot do any regioning so restrict that too.
*/
return jay_type_is_any_float(I->type) ||
jay_type_size_bits(I->type) == 64 ||
jay_is_send_like(I) ||
I->op == JAY_OPCODE_MUL_32X16 ||
I->op == JAY_OPCODE_MUL_32 ||
s == 2;
}
enum jay_stride
jay_dst_stride_minmax(jay_inst *I, bool do_max)
{
enum jay_stride min = min_stride_for_type(I->type);
enum jay_stride max = max_stride_for_type(I->type);
/* Destination stride must be equal to the ratio of the sizes of the
* execution data type to the destination type
*/
if (I->op == JAY_OPCODE_CVT) {
min = MAX2(min, min_stride_for_type(jay_src_type(I, 0)));
}
if (I->op == JAY_OPCODE_EXPAND_QUAD) {
return JAY_STRIDE_4;
}
/* V/UV types are restricted */
if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) {
return JAY_STRIDE_2;
}
/* The src2 restriction quoted above effectively implies we should not stride
* destinations of 3-source instructions either.
*/
if (jay_num_isa_srcs(I) >= 3) {
return min;
}
return (do_max && !restrict_mixed_strides(I, 0)) ? max : min;
}
enum jay_stride
jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max)
{
enum jay_stride min = min_stride_for_type(jay_src_type(I, s));
enum jay_stride max = max_stride_for_type(jay_src_type(I, s));
/* SENDs cannot do any regioning so force exactly the types of the sources
* regardless of the type of the destination.
*
* Shuffles could theoretically support regioning but it would be nontrivial
* and probably pointless most of the time.
*/
if (jay_is_send_like(I) || jay_is_shuffle_like(I)) {
return min;
}
/* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not.
* Conservatively assume the destination is packed and restrict the source
* stride accordingly. This satisfies the special restrictions.
*/
if (jay_type_size_bits(I->type) <= 16) {
max = JAY_STRIDE_4;
}
/* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet
* (preferring to burn the upper bits) but it is used internally.
*/
if (I->op == JAY_OPCODE_LANE_ID_EXPAND) {
max = JAY_STRIDE_2;
}
if (restrict_mixed_strides(I, s) &&
jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) {
return jay_dst_stride_minmax(I, do_max);
}
return (do_max && !restrict_mixed_strides(I, s)) ? max : min;
}

View file

@ -70,6 +70,7 @@ libintel_compiler_jay_files = files(
'jay_register_allocate.c',
'jay_simd_width.c',
'jay_schedule.c',
'jay_stride.c',
'jay_spill.c',
'jay_to_binary.c',
'jay_validate.c',