mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-09 23:08:18 +02:00
jay/register_allocate: split out jay_stride.c
These queries need to be used for partitioning too. And also this degunks the core RA logic in jay_register_allocate. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41872>
This commit is contained in:
parent
e900ac1750
commit
d8b2ba91e8
4 changed files with 132 additions and 122 deletions
|
|
@ -712,6 +712,9 @@ jay_stride_to_bits(enum jay_stride s)
|
|||
return 16 << s;
|
||||
}
|
||||
|
||||
enum jay_stride jay_dst_stride_minmax(jay_inst *I, bool do_max);
|
||||
enum jay_stride jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max);
|
||||
|
||||
#define jay_foreach_ra_file(file) \
|
||||
for (enum jay_file file = 0; file < JAY_NUM_RA_FILES; ++file)
|
||||
|
||||
|
|
|
|||
|
|
@ -42,128 +42,6 @@
|
|||
jay_foreach_src(I, s) \
|
||||
if (I->src[s].file < JAY_NUM_RA_FILES && !jay_is_null(I->src[s]))
|
||||
|
||||
static enum jay_stride
|
||||
jay_min_stride_for_type(enum jay_type T)
|
||||
{
|
||||
unsigned bits = jay_type_size_bits(T);
|
||||
|
||||
/* We need at least enough contiguous bits per-lane to store a scalar */
|
||||
if (bits == 64)
|
||||
return JAY_STRIDE_8;
|
||||
else if (bits == 32)
|
||||
return JAY_STRIDE_4;
|
||||
else
|
||||
return JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
static enum jay_stride
|
||||
jay_max_stride_for_type(enum jay_type T)
|
||||
{
|
||||
/* Horizontal stride can be at most 4 */
|
||||
return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
static bool
|
||||
jay_restrict_mixed_strides(jay_inst *I, unsigned s)
|
||||
{
|
||||
/* From the hardware spec section "Register Region Restrictions":
|
||||
*
|
||||
* "In case of all floating point data types used in destination:" and
|
||||
*
|
||||
* "In case where source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply:" and
|
||||
*
|
||||
* "Src2 Restrictions"
|
||||
*
|
||||
* Register Regioning patterns where register data bit location
|
||||
* of the LSB of the channels are changed between source and
|
||||
* destination are not supported on Src0 and Src1 except for
|
||||
* broadcast of a scalar.
|
||||
*
|
||||
* Therefore, ban mixed-strides in these cases.
|
||||
*
|
||||
* Similarly, SENDs cannot do any regioning so restrict that too.
|
||||
*/
|
||||
return jay_type_is_any_float(I->type) ||
|
||||
jay_type_size_bits(I->type) == 64 ||
|
||||
jay_is_send_like(I) ||
|
||||
I->op == JAY_OPCODE_MUL_32X16 ||
|
||||
I->op == JAY_OPCODE_MUL_32 ||
|
||||
s == 2;
|
||||
}
|
||||
|
||||
static enum jay_stride
|
||||
jay_dst_stride_minmax(jay_inst *I, bool do_max)
|
||||
{
|
||||
enum jay_stride min = jay_min_stride_for_type(I->type);
|
||||
enum jay_stride max = jay_max_stride_for_type(I->type);
|
||||
|
||||
/* Destination stride must be equal to the ratio of the sizes of the
|
||||
* execution data type to the destination type
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_CVT) {
|
||||
min = MAX2(min, jay_min_stride_for_type(jay_src_type(I, 0)));
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_EXPAND_QUAD) {
|
||||
return JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
/* V/UV types are restricted */
|
||||
if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) {
|
||||
return JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
/* The src2 restriction quoted above effectively implies we should not stride
|
||||
* destinations of 3-source instructions either.
|
||||
*/
|
||||
if (jay_num_isa_srcs(I) >= 3) {
|
||||
return min;
|
||||
}
|
||||
|
||||
return (do_max && !jay_restrict_mixed_strides(I, 0)) ? max : min;
|
||||
}
|
||||
|
||||
static enum jay_stride
|
||||
jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max)
|
||||
{
|
||||
enum jay_stride min = jay_min_stride_for_type(jay_src_type(I, s));
|
||||
enum jay_stride max = jay_max_stride_for_type(jay_src_type(I, s));
|
||||
|
||||
/* SENDs cannot do any regioning so force exactly the types of the sources
|
||||
* regardless of the type of the destination.
|
||||
*
|
||||
* Shuffles could theoretically support regioning but it would be nontrivial
|
||||
* and probably pointless most of the time.
|
||||
*/
|
||||
if (jay_is_send_like(I) || jay_is_shuffle_like(I)) {
|
||||
return min;
|
||||
}
|
||||
|
||||
/* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not.
|
||||
* Conservatively assume the destination is packed and restrict the source
|
||||
* stride accordingly. This satisfies the special restrictions.
|
||||
*/
|
||||
if (jay_type_size_bits(I->type) <= 16) {
|
||||
max = JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
/* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet
|
||||
* (preferring to burn the upper bits) but it is used internally.
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_LANE_ID_EXPAND) {
|
||||
max = JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
if (jay_restrict_mixed_strides(I, s) &&
|
||||
jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) {
|
||||
|
||||
return jay_dst_stride_minmax(I, do_max);
|
||||
}
|
||||
|
||||
return (do_max && !jay_restrict_mixed_strides(I, s)) ? max : min;
|
||||
}
|
||||
|
||||
struct affinity {
|
||||
/**
|
||||
* If there is a vector affinity defined for this SSA def, it is relative to
|
||||
|
|
|
|||
128
src/intel/compiler/jay/jay_stride.c
Normal file
128
src/intel/compiler/jay/jay_stride.c
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "jay_ir.h"
|
||||
|
||||
static enum jay_stride
|
||||
min_stride_for_type(enum jay_type T)
|
||||
{
|
||||
unsigned bits = jay_type_size_bits(T);
|
||||
|
||||
/* We need at least enough contiguous bits per-lane to store a scalar */
|
||||
if (bits == 64)
|
||||
return JAY_STRIDE_8;
|
||||
else if (bits == 32)
|
||||
return JAY_STRIDE_4;
|
||||
else
|
||||
return JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
static enum jay_stride
|
||||
max_stride_for_type(enum jay_type T)
|
||||
{
|
||||
/* Horizontal stride can be at most 4 */
|
||||
return (jay_type_size_bits(T) >= 16) ? JAY_STRIDE_8 : JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
static bool
|
||||
restrict_mixed_strides(jay_inst *I, unsigned s)
|
||||
{
|
||||
/* From the hardware spec section "Register Region Restrictions":
|
||||
*
|
||||
* "In case of all floating point data types used in destination:" and
|
||||
*
|
||||
* "In case where source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply:" and
|
||||
*
|
||||
* "Src2 Restrictions"
|
||||
*
|
||||
* Register Regioning patterns where register data bit location
|
||||
* of the LSB of the channels are changed between source and
|
||||
* destination are not supported on Src0 and Src1 except for
|
||||
* broadcast of a scalar.
|
||||
*
|
||||
* Therefore, ban mixed-strides in these cases.
|
||||
*
|
||||
* Similarly, SENDs cannot do any regioning so restrict that too.
|
||||
*/
|
||||
return jay_type_is_any_float(I->type) ||
|
||||
jay_type_size_bits(I->type) == 64 ||
|
||||
jay_is_send_like(I) ||
|
||||
I->op == JAY_OPCODE_MUL_32X16 ||
|
||||
I->op == JAY_OPCODE_MUL_32 ||
|
||||
s == 2;
|
||||
}
|
||||
|
||||
enum jay_stride
|
||||
jay_dst_stride_minmax(jay_inst *I, bool do_max)
|
||||
{
|
||||
enum jay_stride min = min_stride_for_type(I->type);
|
||||
enum jay_stride max = max_stride_for_type(I->type);
|
||||
|
||||
/* Destination stride must be equal to the ratio of the sizes of the
|
||||
* execution data type to the destination type
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_CVT) {
|
||||
min = MAX2(min, min_stride_for_type(jay_src_type(I, 0)));
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_EXPAND_QUAD) {
|
||||
return JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
/* V/UV types are restricted */
|
||||
if (I->op == JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4) {
|
||||
return JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
/* The src2 restriction quoted above effectively implies we should not stride
|
||||
* destinations of 3-source instructions either.
|
||||
*/
|
||||
if (jay_num_isa_srcs(I) >= 3) {
|
||||
return min;
|
||||
}
|
||||
|
||||
return (do_max && !restrict_mixed_strides(I, 0)) ? max : min;
|
||||
}
|
||||
|
||||
enum jay_stride
|
||||
jay_src_stride_minmax(jay_inst *I, unsigned s, bool do_max)
|
||||
{
|
||||
enum jay_stride min = min_stride_for_type(jay_src_type(I, s));
|
||||
enum jay_stride max = max_stride_for_type(jay_src_type(I, s));
|
||||
|
||||
/* SENDs cannot do any regioning so force exactly the types of the sources
|
||||
* regardless of the type of the destination.
|
||||
*
|
||||
* Shuffles could theoretically support regioning but it would be nontrivial
|
||||
* and probably pointless most of the time.
|
||||
*/
|
||||
if (jay_is_send_like(I) || jay_is_shuffle_like(I)) {
|
||||
return min;
|
||||
}
|
||||
|
||||
/* While "add.u16 r0<2>, r1<4>" is legal, "add.u16 r0, r1<4>" is not.
|
||||
* Conservatively assume the destination is packed and restrict the source
|
||||
* stride accordingly. This satisfies the special restrictions.
|
||||
*/
|
||||
if (jay_type_size_bits(I->type) <= 16) {
|
||||
max = JAY_STRIDE_4;
|
||||
}
|
||||
|
||||
/* "add.u16 r0.8, g1<2>" is not legal. We don't generate this normally yet
|
||||
* (preferring to burn the upper bits) but it is used internally.
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_LANE_ID_EXPAND) {
|
||||
max = JAY_STRIDE_2;
|
||||
}
|
||||
|
||||
if (restrict_mixed_strides(I, s) &&
|
||||
jay_type_size_bits(jay_src_type(I, s)) < jay_type_size_bits(I->type)) {
|
||||
|
||||
return jay_dst_stride_minmax(I, do_max);
|
||||
}
|
||||
|
||||
return (do_max && !restrict_mixed_strides(I, s)) ? max : min;
|
||||
}
|
||||
|
||||
|
|
@ -70,6 +70,7 @@ libintel_compiler_jay_files = files(
|
|||
'jay_register_allocate.c',
|
||||
'jay_simd_width.c',
|
||||
'jay_schedule.c',
|
||||
'jay_stride.c',
|
||||
'jay_spill.c',
|
||||
'jay_to_binary.c',
|
||||
'jay_validate.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue