mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
intel: add Jay
Jay is a new SSA-based compiler for Intel GPUs. This is an early work-in-progress. It isn't ready to ship, but we'd like to move development in tree rather than rebasing the world every week. Please don't bother testing yet - we know the status and we're working on it! Jay's design is similar to other modern NIR backends, particularly ACO, NAK and AGX. It is fully SSA, deconstructing phis after RA. We use a Colombet register allocator similar to NAK, allowing us to handle Intel's complex register regioning restrictions in a straightforward way. Spilling logical registers is straightforward with Braun-Hack. Thanks to the SSA-based design, the entire backend is essentially linear time, regardless of register pressure, addressing brw's excessive compile time when especially spilling with brw. In this current early draft, we support a limited subset of all three APIs on Xe2. A lot works and a lot doesn't. The core compiler is there (spilling, scoreboarding, SIMD32, etc should more or less work), but there are details to fill in for both performance and correctness. We essentially pass conformance on OpenGL ES 3.0 and OpenCL 3.0, and we're busy iterating on Vulkan. Likewise, additional hardware support will come down the line. There's nothing fundamentally Xe2-specific here. I just have a Lunarlake laptop on my desk, Ken has a Battlemage card, and we had to pick _something_ as the first target. Co-authored-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40835>
This commit is contained in:
parent
7468261d3d
commit
e42e319313
37 changed files with 13892 additions and 0 deletions
|
|
@ -300,6 +300,52 @@ ForEachMacros:
|
|||
- foreach_bo
|
||||
- foreach_bo_safe
|
||||
|
||||
# intel
|
||||
- jay_foreach_ssa_file
|
||||
- jay_foreach_function
|
||||
- jay_foreach_block
|
||||
- jay_foreach_block_safe
|
||||
- jay_foreach_block_rev
|
||||
- jay_foreach_block_from
|
||||
- jay_foreach_block_from_rev
|
||||
- jay_foreach_dst
|
||||
- jay_foreach_dst_index
|
||||
- jay_foreach_inst_in_block
|
||||
- jay_foreach_inst_in_block_rev
|
||||
- jay_foreach_inst_in_block_safe
|
||||
- jay_foreach_inst_in_block_safe_rev
|
||||
- jay_foreach_inst_in_block_from
|
||||
- jay_foreach_inst_in_block_from_rev
|
||||
- jay_foreach_inst_in_shader
|
||||
- jay_foreach_inst_in_shader_rev
|
||||
- jay_foreach_inst_in_shader_safe
|
||||
- jay_foreach_inst_in_shader_safe_rev
|
||||
- jay_foreach_inst_in_func
|
||||
- jay_foreach_inst_in_func_rev
|
||||
- jay_foreach_inst_in_func_safe
|
||||
- jay_foreach_inst_in_func_safe_rev
|
||||
- jay_foreach_successor
|
||||
- jay_foreach_predecessor
|
||||
- jay_foreach_comp
|
||||
- jay_foreach_comp_rev
|
||||
- jay_foreach_src
|
||||
- jay_foreach_src_rev
|
||||
- jay_foreach_ssa_src
|
||||
- jay_foreach_ssa_src_rev
|
||||
- jay_foreach_ssa_src_comp
|
||||
- jay_foreach_index
|
||||
- jay_foreach_index_rev
|
||||
- jay_foreach_src_index
|
||||
- jay_foreach_src_index_rev
|
||||
- jay_repair_foreach_phi
|
||||
- jay_foreach_phi_src_in_block
|
||||
- jay_foreach_phi_dst_in_block
|
||||
- jay_foreach_preload
|
||||
- jay_foreach_killed
|
||||
- jay_foreach_ra_src
|
||||
- jay_foreach_ra_file
|
||||
- jay_foreach_pipe
|
||||
|
||||
# Disable clang formatting by default. Drivers that use clang-format
|
||||
# inherit from this .clang-format file and re-enable formatting:
|
||||
#
|
||||
|
|
|
|||
31
src/intel/compiler/jay/.clang-format
Normal file
31
src/intel/compiler/jay/.clang-format
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
BasedOnStyle: InheritParentConfig
|
||||
DisableFormat: false
|
||||
|
||||
AlignConsecutiveBitFields: Consecutive
|
||||
BitFieldColonSpacing: None
|
||||
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveMacros:
|
||||
Enabled: true
|
||||
AcrossComments: true
|
||||
AlignArrayOfStructures: Left
|
||||
|
||||
ColumnLimit: 80
|
||||
|
||||
BreakStringLiterals: false
|
||||
SpaceBeforeParens: ControlStatementsExceptControlMacros
|
||||
SpaceAfterCStyleCast: true
|
||||
BinPackParameters: OnePerLine
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 100
|
||||
ReferenceAlignment: Middle
|
||||
|
||||
BreakBeforeBinaryOperators: None
|
||||
PenaltyBreakAssignment: 0
|
||||
|
||||
SpacesInContainerLiterals: true
|
||||
Cpp11BracedListStyle: false
|
||||
|
||||
AlignOperands: Align
|
||||
BreakBinaryOperations: RespectPrecedence
|
||||
BreakBeforeTernaryOperators: false
|
||||
3
src/intel/compiler/jay/README.md
Normal file
3
src/intel/compiler/jay/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
Xe2 compiler experiments.
|
||||
|
||||
**Work-in-progress, not ready for users/benchmarks.**
|
||||
25
src/intel/compiler/jay/jay.h
Normal file
25
src/intel/compiler/jay/jay.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler/brw/brw_compiler.h"
|
||||
#include "util/shader_stats.h"
|
||||
#include "nir.h"
|
||||
|
||||
struct intel_device_info;
|
||||
struct nir_shader_compiler_options;
|
||||
|
||||
struct jay_shader_bin {
|
||||
const uint32_t *kernel;
|
||||
uint32_t size;
|
||||
struct genisa_stats stats;
|
||||
};
|
||||
|
||||
struct jay_shader_bin *jay_compile(const struct intel_device_info *devinfo,
|
||||
void *mem_ctx,
|
||||
nir_shader *nir,
|
||||
union brw_any_prog_data *prog_data,
|
||||
union brw_any_prog_key *key);
|
||||
365
src/intel/compiler/jay/jay_assign_flags.c
Normal file
365
src/intel/compiler/jay/jay_assign_flags.c
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/*
|
||||
* Instruction selection works on SSA FLAG and UFLAG variables. This pass
|
||||
* implements a flag register allocator, assigning each FLAG/UFLAG either to a
|
||||
* hardware flag register and/or spilling to a GPR/UGPR.
|
||||
*
|
||||
* As a simplification, hardware flags are block-local. At block boundaries,
|
||||
* 32-bit 0/~0 (U)GPRs are our canonical representation for (U)FLAGs.
|
||||
*
|
||||
* Producers: CMP produce both 0/~0 GPRs and flags, while conditional modifiers
|
||||
* produce only flags. Boolean arithmetic is lowered to GPRs.
|
||||
*
|
||||
* Consumers: SEL/CSEL consumes both GPRs and flags, while predication consumes
|
||||
* only flags. Boolean arithmetic again requires GPRs.
|
||||
*
|
||||
* Our strategy is to turn flags into GPR representations globally while keeping
|
||||
* copies in flags where it makes sense locally.
|
||||
*/
|
||||
|
||||
static inline jay_def
|
||||
canonicalize_flag(jay_def x)
|
||||
{
|
||||
assert(jay_is_flag(x));
|
||||
x.file = x.file == UFLAG ? UGPR : GPR;
|
||||
return x;
|
||||
}
|
||||
|
||||
struct var_info {
|
||||
unsigned flag :3;
|
||||
bool uniform :1;
|
||||
bool read_by_predication:1;
|
||||
bool free_canonical :1;
|
||||
unsigned pad :2;
|
||||
} PACKED;
|
||||
static_assert(sizeof(struct var_info) == 1);
|
||||
|
||||
struct flag_ra {
|
||||
jay_builder *b;
|
||||
struct var_info *vars;
|
||||
uint32_t flag_to_global[JAY_MAX_FLAGS];
|
||||
uint32_t flag_to_local[JAY_MAX_FLAGS];
|
||||
unsigned roundrobin;
|
||||
unsigned ballots:JAY_MAX_FLAGS;
|
||||
};
|
||||
|
||||
static jay_def
|
||||
assign_flag(struct flag_ra *ra,
|
||||
jay_def flag,
|
||||
enum jay_file file,
|
||||
bool free_canonical,
|
||||
bool ballot)
|
||||
{
|
||||
jay_def canonical = canonicalize_flag(flag);
|
||||
jay_def tmp = jay_alloc_def(ra->b, file, 1);
|
||||
|
||||
/* Dedicate a flag for ballot since uniform access would clobber the zeroing.
|
||||
* TODO: We could optimize this with more tracking.
|
||||
*/
|
||||
unsigned num_flags = jay_num_regs(ra->b->shader, FLAG);
|
||||
tmp.reg = ballot ? 0 : (1 + (ra->roundrobin++) % (num_flags - 2));
|
||||
|
||||
ra->vars[jay_index(canonical)] = (struct var_info) {
|
||||
.uniform = tmp.file == UFLAG,
|
||||
.flag = tmp.reg,
|
||||
.free_canonical = free_canonical,
|
||||
};
|
||||
|
||||
ra->flag_to_global[tmp.reg] = jay_index(canonical);
|
||||
ra->flag_to_local[tmp.reg] = jay_index(tmp);
|
||||
|
||||
if (ballot) {
|
||||
ra->ballots |= BITFIELD_BIT(tmp.reg);
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_sel_with_zero(jay_inst *I, unsigned zero)
|
||||
{
|
||||
jay_def flag = I->src[2];
|
||||
unsigned other = 1 - zero;
|
||||
|
||||
if (!jay_defs_equivalent(I->src[zero], jay_imm(0)) ||
|
||||
I->src[other].abs ||
|
||||
I->src[other].negate ||
|
||||
jay_type_size_bits(I->type) != 32) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (jay_defs_equivalent(I->src[other], jay_imm(0xffffffff)) && zero == 1) {
|
||||
/* (c ? 0xffffffff : 0) -> canonical(c) */
|
||||
I->op = JAY_OPCODE_MOV;
|
||||
I->src[0] = canonicalize_flag(flag);
|
||||
jay_shrink_sources(I, 1);
|
||||
} else {
|
||||
/* ([!]c ? a : 0) --> (a & [~]canonical(c)) and
|
||||
* ([!]c ? 0 : a) --> (a & ~[~]canonical(c))
|
||||
*/
|
||||
I->op = JAY_OPCODE_AND;
|
||||
I->src[0] = I->src[other];
|
||||
I->src[1] = canonicalize_flag(flag);
|
||||
I->src[1].negate ^= (zero == 0);
|
||||
jay_shrink_sources(I, 2);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_sel_to_csel(jay_inst *I)
|
||||
{
|
||||
if (jay_type_size_bits(I->type) != 32) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* SEL.f32 lowers to CSEL.f32 to preserve source modifiers & float controls.
|
||||
* That works since we reinterpret 0/~0 as 0.0/NaN.
|
||||
*/
|
||||
jay_def flag = I->src[2];
|
||||
I->op = JAY_OPCODE_CSEL;
|
||||
I->conditional_mod = flag.negate ? JAY_CONDITIONAL_EQ : JAY_CONDITIONAL_NE;
|
||||
I->src[2] = canonicalize_flag(flag);
|
||||
I->src[2].negate = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_without_flag(struct flag_ra *ra, jay_inst *I, unsigned s, bool in_flag)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_PHI_SRC) {
|
||||
I->src[s] = canonicalize_flag(I->src[s]);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (jay_debug & JAY_DBG_NOOPT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_SEL &&
|
||||
(!in_flag || ra->vars[jay_index(I->src[s])].free_canonical) &&
|
||||
!I->predication) {
|
||||
|
||||
return rewrite_sel_with_zero(I, 0) ||
|
||||
rewrite_sel_with_zero(I, 1) ||
|
||||
(!in_flag && rewrite_sel_to_csel(I));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
assign_block(jay_function *func, jay_block *block, struct var_info *var_to_flag)
|
||||
{
|
||||
jay_builder b = { .shader = func->shader, .func = func };
|
||||
struct flag_ra ra_ = { .b = &b, .vars = var_to_flag }, *ra = &ra_;
|
||||
|
||||
jay_foreach_inst_in_block_safe(block, I) {
|
||||
if (I->op == JAY_OPCODE_CAST_CANONICAL_TO_FLAG) {
|
||||
/* Assume the source is already 0/~0 canonical and use it. */
|
||||
I->op = JAY_OPCODE_MOV;
|
||||
I->type = JAY_TYPE_U32;
|
||||
I->dst = canonicalize_flag(I->dst);
|
||||
continue;
|
||||
} else if (I->type == JAY_TYPE_U1) {
|
||||
/* Boolean logic turns into bitwise logic on the canonical form */
|
||||
if (!jay_is_null(I->dst)) {
|
||||
I->dst = canonicalize_flag(I->dst);
|
||||
}
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
if (!(s == 2 && I->op == JAY_OPCODE_SEL) &&
|
||||
jay_src_type(I, s) == JAY_TYPE_U1) {
|
||||
if (jay_is_imm(I->src[s])) {
|
||||
/* Convert 1-bit boolean to 0/~0 */
|
||||
assert(jay_is_imm(I->src[s]) && jay_as_uint(I->src[s]) <= 1);
|
||||
I->src[s] = jay_imm(jay_as_uint(I->src[s]) ? ~0 : 0);
|
||||
} else {
|
||||
I->src[s] = canonicalize_flag(I->src[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
I->type = JAY_TYPE_U32;
|
||||
}
|
||||
|
||||
/* Handle flag sources */
|
||||
jay_foreach_src(I, s) {
|
||||
if (!jay_is_flag(I->src[s])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned index = jay_index(I->src[s]);
|
||||
bool ballot = jay_src_type(I, s) != JAY_TYPE_U1;
|
||||
enum jay_file file = I->dst.file == UGPR && !ballot ? UFLAG : FLAG;
|
||||
bool in_flag = ra->flag_to_global[var_to_flag[index].flag] == index &&
|
||||
((file == UFLAG) == var_to_flag[index].uniform);
|
||||
|
||||
/* If we don't actually need the flag, we're done. */
|
||||
if (rewrite_without_flag(ra, I, s, in_flag)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise, ensure we have the value in a flag. */
|
||||
if (!in_flag) {
|
||||
jay_def tmp = assign_flag(ra, I->src[s], file, false, ballot);
|
||||
|
||||
/* XXX: We need a more systematic approach to modifiers :/ */
|
||||
b.cursor = jay_before_inst(I);
|
||||
jay_def d = I->src[s];
|
||||
d.negate = false;
|
||||
jay_CMP(&b, JAY_TYPE_U32, JAY_CONDITIONAL_NE, tmp,
|
||||
canonicalize_flag(d), 0);
|
||||
}
|
||||
|
||||
/* ...and rewrite to use the flag */
|
||||
unsigned reg = var_to_flag[index].flag;
|
||||
jay_def flag = jay_scalar(file, ra->flag_to_local[reg]);
|
||||
flag.reg = reg;
|
||||
jay_replace_src(&I->src[s], flag);
|
||||
}
|
||||
|
||||
/* Handle flag writes */
|
||||
b.cursor = jay_after_inst(I);
|
||||
|
||||
/* If the flag is written directly (for an inverse ballot), recover the
|
||||
* canonical representation with a SEL.
|
||||
*/
|
||||
if (!jay_is_null(I->dst) && jay_is_flag(I->dst)) {
|
||||
jay_def canonical = canonicalize_flag(I->dst);
|
||||
I->dst = assign_flag(ra, I->dst, I->dst.file, false, false);
|
||||
jay_SEL(&b, JAY_TYPE_U32, canonical, ~0, 0, I->dst);
|
||||
}
|
||||
|
||||
if (!jay_is_null(I->cond_flag)) {
|
||||
I->broadcast_flag =
|
||||
var_to_flag[jay_index(I->cond_flag)].read_by_predication &&
|
||||
I->cond_flag.file == UFLAG &&
|
||||
I->op == JAY_OPCODE_CMP;
|
||||
|
||||
jay_def canonical = canonicalize_flag(I->cond_flag);
|
||||
I->cond_flag =
|
||||
assign_flag(ra, I->cond_flag,
|
||||
I->broadcast_flag ? FLAG : I->cond_flag.file,
|
||||
I->op == JAY_OPCODE_CMP, false);
|
||||
|
||||
if (I->op == JAY_OPCODE_CMP) {
|
||||
assert(jay_is_null(I->dst));
|
||||
|
||||
if (I->broadcast_flag) {
|
||||
/* We need to recover the UGPR from the replicated FLAG. Thanks
|
||||
* to our write-masking and broadcasting, the flag is already
|
||||
* 0/~0. We simply need to sign-extend.
|
||||
*/
|
||||
jay_i2i32(&b, canonical, b.shader->dispatch_width, I->cond_flag);
|
||||
} else if (jay_type_size_bits(I->type) != 32) {
|
||||
I->dst = jay_alloc_def(&b, canonical.file,
|
||||
jay_type_vector_length(I->type));
|
||||
jay_i2i32(&b, canonical, jay_type_size_bits(I->type), I->dst);
|
||||
} else {
|
||||
/* 32-bit CMP returns the canonical form */
|
||||
I->dst = canonical;
|
||||
}
|
||||
} else {
|
||||
assert(jay_type_size_bits(I->type) == 32 && "limited cmod prop");
|
||||
|
||||
if (jay_is_null(I->dst)) {
|
||||
I->dst = jay_alloc_def(&b, canonical.file,
|
||||
jay_type_vector_length(I->type));
|
||||
}
|
||||
|
||||
/* Recover the canonical representation with a CMP. Hopefully,
|
||||
* either the CMP or the cmod will be eliminated by a later DCE.
|
||||
*/
|
||||
jay_CMP(&b, I->type, I->conditional_mod, canonical, I->dst, 0)
|
||||
->cond_flag.reg =
|
||||
jay_num_regs(b.shader, FLAG) - 1; // TODO: no null flag
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Ballots require zeroing flags */
|
||||
b.cursor = jay_before_block(block);
|
||||
u_foreach_bit(i, ra->ballots) {
|
||||
jay_ZERO_FLAG(&b, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
copyprop(jay_function *f)
|
||||
{
|
||||
jay_inst **defs = calloc(f->ssa_alloc, sizeof(defs[0]));
|
||||
|
||||
jay_foreach_inst_in_func_safe(f, block, I) {
|
||||
jay_foreach_dst_index(I, _, d) {
|
||||
defs[d] = I;
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND)
|
||||
continue;
|
||||
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
jay_def src = I->src[s];
|
||||
if (src.collect)
|
||||
continue;
|
||||
|
||||
jay_inst *def = defs[jay_base_index(src)];
|
||||
if (jay_defs_equivalent(def->dst, src) &&
|
||||
!def->predication &&
|
||||
def->op == JAY_OPCODE_MOV &&
|
||||
(I->src[s].file == def->src[0].file ||
|
||||
(I->op == JAY_OPCODE_CMP && jay_is_imm(def->src[0])))) {
|
||||
|
||||
jay_replace_src(&I->src[s], def->src[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(defs);
|
||||
}
|
||||
|
||||
void
|
||||
jay_assign_flags(jay_shader *s)
|
||||
{
|
||||
jay_foreach_function(s, f) {
|
||||
struct var_info *map = calloc(f->ssa_alloc, sizeof(map[0]));
|
||||
uint32_t *def_to_block = calloc(f->ssa_alloc, sizeof(def_to_block));
|
||||
|
||||
jay_foreach_inst_in_func(f, block, I) {
|
||||
if (!jay_is_null(I->cond_flag)) {
|
||||
def_to_block[jay_index(I->cond_flag)] = block->index + 1;
|
||||
}
|
||||
|
||||
if (I->predication) {
|
||||
jay_def predicate = *jay_inst_get_predicate(I);
|
||||
if (def_to_block[jay_index(predicate)] == block->index + 1) {
|
||||
map[jay_index(predicate)].read_by_predication = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_block(f, b) {
|
||||
assign_block(f, b, map);
|
||||
}
|
||||
|
||||
free(map);
|
||||
free(def_to_block);
|
||||
|
||||
/* Flag RA leaves moves. Clean up after ourselves. */
|
||||
copyprop(f);
|
||||
}
|
||||
}
|
||||
/* TODO: revisit
|
||||
* dEQP-GLES3.functional.shaders.arrays.compare.equal_highp_vec4_highp_vec4_vertex
|
||||
*/
|
||||
643
src/intel/compiler/jay/jay_builder.h
Normal file
643
src/intel/compiler/jay/jay_builder.h
Normal file
|
|
@ -0,0 +1,643 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler/brw/brw_eu.h"
|
||||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
|
||||
/* Like in NIR, for use with the builder */
|
||||
enum jay_cursor_option {
|
||||
jay_cursor_after_block,
|
||||
jay_cursor_before_inst,
|
||||
jay_cursor_after_inst
|
||||
};
|
||||
|
||||
typedef struct PACKED {
|
||||
union {
|
||||
jay_block *block;
|
||||
jay_inst *inst;
|
||||
};
|
||||
|
||||
enum jay_cursor_option option;
|
||||
} jay_cursor;
|
||||
|
||||
static inline bool
|
||||
jay_cursors_equal(jay_cursor a, jay_cursor b)
|
||||
{
|
||||
return !memcmp(&a, &b, sizeof(a));
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_after_block(jay_block *block)
|
||||
{
|
||||
return (jay_cursor) { .block = block, .option = jay_cursor_after_block };
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_before_inst(jay_inst *I)
|
||||
{
|
||||
return (jay_cursor) { .inst = I, .option = jay_cursor_before_inst };
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_after_inst(jay_inst *I)
|
||||
{
|
||||
return (jay_cursor) { .inst = I, .option = jay_cursor_after_inst };
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_before_block(jay_block *block)
|
||||
{
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
if (I->op != JAY_OPCODE_PHI_DST &&
|
||||
I->op != JAY_OPCODE_PRELOAD &&
|
||||
I->op != JAY_OPCODE_ELSE)
|
||||
return jay_before_inst(I);
|
||||
}
|
||||
|
||||
/* Whole block is phis, so insert at the end */
|
||||
return jay_after_block(block);
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_after_block_logical(jay_block *block)
|
||||
{
|
||||
jay_foreach_inst_in_block_rev(block, I) {
|
||||
if (I->op != JAY_OPCODE_PHI_SRC && !jay_op_is_control_flow(I->op))
|
||||
return jay_after_inst(I);
|
||||
}
|
||||
|
||||
/* Whole block is phis, so insert at the start */
|
||||
return jay_before_block(block);
|
||||
}
|
||||
|
||||
static inline jay_cursor
|
||||
jay_before_jump(jay_block *block)
|
||||
{
|
||||
jay_inst *jump = jay_block_ending_jump(block);
|
||||
return jump ? jay_before_inst(jump) : jay_after_block(block);
|
||||
}
|
||||
|
||||
/* Get a cursor at the start of a function, after any preloads */
|
||||
static inline jay_cursor
|
||||
jay_before_function(jay_function *f)
|
||||
{
|
||||
jay_block *block = jay_first_block(f);
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
if (I->op != JAY_OPCODE_PRELOAD)
|
||||
return jay_before_inst(I);
|
||||
}
|
||||
|
||||
/* The whole block is preloads, so insert at the end */
|
||||
return jay_after_block(block);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map a control flow edge to a block. If the block has one successor, the
|
||||
* predecessor is unique. Else, the successor is unique; the successor must not
|
||||
* have other predecessorss since there are no critical edges.
|
||||
*/
|
||||
static inline jay_block *
|
||||
jay_edge_to_block(jay_block *pred, jay_block *succ)
|
||||
{
|
||||
assert(jay_num_successors(pred) == 1 || jay_num_predecessors(succ) == 1);
|
||||
return jay_num_successors(pred) == 1 ? pred : succ;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a cursor to insert along a control flow edge: either at the start of
|
||||
* the successor or the end of the predecessor. This relies on the control
|
||||
* flow graph having no critical edges.
|
||||
*/
|
||||
static inline jay_cursor
|
||||
jay_along_edge(jay_block *pred, jay_block *succ)
|
||||
{
|
||||
jay_block *to = jay_edge_to_block(pred, succ);
|
||||
|
||||
if (to == pred)
|
||||
return jay_after_block_logical(pred);
|
||||
else
|
||||
return jay_before_block(succ);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
jay_shader *shader;
|
||||
jay_function *func;
|
||||
jay_cursor cursor;
|
||||
} jay_builder;
|
||||
|
||||
static inline jay_builder
|
||||
jay_init_builder(jay_function *f, jay_cursor cursor)
|
||||
{
|
||||
return (jay_builder) { .shader = f->shader, .func = f, .cursor = cursor };
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_builder_insert(jay_builder *b, jay_inst *I)
|
||||
{
|
||||
jay_cursor *cursor = &b->cursor;
|
||||
|
||||
if (cursor->option == jay_cursor_after_inst) {
|
||||
list_add(&I->link, &cursor->inst->link);
|
||||
} else if (cursor->option == jay_cursor_after_block) {
|
||||
list_addtail(&I->link, &cursor->block->instructions);
|
||||
} else {
|
||||
assert(cursor->option == jay_cursor_before_inst);
|
||||
list_addtail(&I->link, &cursor->inst->link);
|
||||
}
|
||||
|
||||
cursor->option = jay_cursor_after_inst;
|
||||
cursor->inst = I;
|
||||
}
|
||||
|
||||
static inline jay_def
|
||||
jay_alloc_def(jay_builder *b, enum jay_file file, unsigned size)
|
||||
{
|
||||
unsigned idx = b->func->ssa_alloc;
|
||||
b->func->ssa_alloc += size;
|
||||
return jay_contiguous_def(file, idx, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Collect SSA indices into a source. If the indices are not contiguous, this
|
||||
* uses a heap-allocated collect. Otherwise, a contiguous def is used.
|
||||
*/
|
||||
static inline jay_def
|
||||
jay_collect(jay_builder *b,
|
||||
enum jay_file file,
|
||||
const uint32_t *indices,
|
||||
unsigned nr)
|
||||
{
|
||||
if (nr == 0)
|
||||
return jay_null();
|
||||
|
||||
for (unsigned i = 1; i < nr; ++i) {
|
||||
if (indices[i] != (indices[0] + i)) {
|
||||
static_assert(sizeof(uintptr_t) <= sizeof(uint64_t) &&
|
||||
"sorry, no Morello support");
|
||||
void *dup =
|
||||
linear_memdup(b->shader->lin_ctx, indices, sizeof(uint32_t) * nr);
|
||||
uint64_t payload = (uintptr_t) dup;
|
||||
|
||||
/* We require pointers to fit within (32+JAY_REG_BITS) bits. Luckily
|
||||
* this will always be the case on common architectures.
|
||||
*/
|
||||
assert(payload < (1ull << (32 + JAY_REG_BITS)));
|
||||
|
||||
return (jay_def) {
|
||||
._payload = (uint32_t) payload,
|
||||
.reg = (uint32_t) (payload >> 32),
|
||||
.file = file,
|
||||
.num_values_m1 = nr - 1,
|
||||
.collect = true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return jay_contiguous_def(file, indices[0], nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the n'th channel of a def to index. This requires a copy-on-write.
|
||||
*
|
||||
* This implementation could likely be optimized.
|
||||
*/
|
||||
static inline void
|
||||
jay_insert_channel(jay_builder *b, jay_def *d, unsigned c, jay_def scalar)
|
||||
{
|
||||
uint32_t indices[JAY_MAX_DEF_LENGTH];
|
||||
uint32_t count = jay_num_values(*d);
|
||||
|
||||
assert(scalar.file == d->file && !scalar.negate && !scalar.abs);
|
||||
assert(c < count && count <= ARRAY_SIZE(indices));
|
||||
|
||||
/* First, decompress the def. */
|
||||
jay_foreach_comp(*d, i) {
|
||||
indices[i] = jay_channel(*d, i);
|
||||
}
|
||||
|
||||
/* Next, update the indices in place */
|
||||
indices[c] = jay_index(scalar);
|
||||
|
||||
/* Now collect it back. */
|
||||
jay_replace_src(d, jay_collect(b, d->file, indices, count));
|
||||
}
|
||||
|
||||
/*
|
||||
* Concatenate a list of vectors, collecting all the indices in order.
|
||||
*/
|
||||
static inline jay_def
|
||||
jay_collect_vectors(jay_builder *b, jay_def *vecs, uint32_t nr)
|
||||
{
|
||||
uint32_t indices[JAY_MAX_DEF_LENGTH];
|
||||
uint32_t nr_indices = 0;
|
||||
|
||||
for (unsigned i = 0; i < nr; ++i) {
|
||||
assert(vecs[i].file == vecs[0].file && jay_is_ssa(vecs[i]));
|
||||
assert(!vecs[i].negate && !vecs[i].abs);
|
||||
|
||||
jay_foreach_comp(vecs[i], c) {
|
||||
assert(nr_indices < ARRAY_SIZE(indices));
|
||||
indices[nr_indices++] = jay_channel(vecs[i], c);
|
||||
}
|
||||
}
|
||||
|
||||
return jay_collect(b, vecs[0].file, indices, nr_indices);
|
||||
}
|
||||
|
||||
static inline jay_def
|
||||
jay_collect_two(jay_builder *b, jay_def u, jay_def v)
|
||||
{
|
||||
jay_def vecs[] = { u, v };
|
||||
return jay_collect_vectors(b, vecs, 2);
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_alloc_inst(jay_builder *b,
|
||||
enum jay_opcode op,
|
||||
uint8_t num_srcs,
|
||||
unsigned extra_bytes)
|
||||
{
|
||||
const size_t size =
|
||||
offsetof(jay_inst, src) + num_srcs * sizeof(jay_def) + extra_bytes;
|
||||
|
||||
jay_inst *I = (jay_inst *) linear_zalloc_child(b->shader->lin_ctx, size);
|
||||
I->op = op;
|
||||
I->num_srcs = num_srcs;
|
||||
I->dst = jay_null();
|
||||
I->cond_flag = jay_null();
|
||||
|
||||
return I;
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_shrink_sources(jay_inst *I, uint8_t new_num_srcs)
|
||||
{
|
||||
assert(new_num_srcs < I->num_srcs);
|
||||
unsigned info_size = jay_inst_info_size(I);
|
||||
|
||||
memmove(&I->src[new_num_srcs], &I->src[I->num_srcs], info_size);
|
||||
I->num_srcs = new_num_srcs;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_clone_inst(jay_builder *b, jay_inst *I, uint8_t new_num_srcs)
|
||||
{
|
||||
assert(new_num_srcs >= I->num_srcs);
|
||||
unsigned info_size = jay_inst_info_size(I);
|
||||
|
||||
jay_inst *clone = jay_alloc_inst(b, I->op, new_num_srcs, info_size);
|
||||
|
||||
memcpy((uint8_t *) clone + sizeof(struct list_head),
|
||||
(uint8_t *) I + sizeof(struct list_head),
|
||||
sizeof(jay_inst) - sizeof(struct list_head));
|
||||
|
||||
clone->num_srcs = new_num_srcs;
|
||||
|
||||
memcpy(clone->src, I->src, I->num_srcs * sizeof(jay_def));
|
||||
memcpy(&clone->src[new_num_srcs], &I->src[I->num_srcs], info_size);
|
||||
return clone;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_grow_sources(jay_builder *b, jay_inst *I, uint8_t new_num_srcs)
|
||||
{
|
||||
jay_inst *clone = jay_clone_inst(b, I, new_num_srcs);
|
||||
|
||||
if ((b->cursor.option == jay_cursor_before_inst ||
|
||||
b->cursor.option == jay_cursor_after_inst) &&
|
||||
b->cursor.inst == I) {
|
||||
|
||||
b->cursor.inst = clone;
|
||||
}
|
||||
|
||||
jay_builder b_ = jay_init_builder(b->func, jay_before_inst(I));
|
||||
jay_builder_insert(&b_, clone);
|
||||
jay_remove_instruction(I);
|
||||
return clone;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_add_predicate_else(jay_builder *b,
|
||||
jay_inst *I,
|
||||
jay_def predicate,
|
||||
jay_def default_value)
|
||||
{
|
||||
assert(!I->predication && "pre-condition");
|
||||
assert(jay_is_flag(predicate) && jay_is_ssa(default_value));
|
||||
|
||||
unsigned pred_index = I->num_srcs;
|
||||
I = jay_grow_sources(b, I, pred_index + 2);
|
||||
I->src[pred_index] = predicate;
|
||||
I->src[pred_index + 1] = default_value;
|
||||
I->predication = JAY_PREDICATED_DEFAULT;
|
||||
return I;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_add_predicate(jay_builder *b, jay_inst *I, jay_def predicate)
|
||||
{
|
||||
assert(!I->predication && "pre-condition");
|
||||
assert(jay_is_flag(predicate));
|
||||
|
||||
unsigned pred_index = I->num_srcs;
|
||||
I = jay_grow_sources(b, I, pred_index + 1);
|
||||
I->src[pred_index] = predicate;
|
||||
I->predication = JAY_PREDICATED;
|
||||
return I;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_set_cond_flag(jay_builder *b, jay_inst *I, jay_def cond_flag)
|
||||
{
|
||||
assert(jay_is_flag(cond_flag) && jay_is_null(I->cond_flag));
|
||||
|
||||
I->cond_flag = cond_flag;
|
||||
return I;
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_set_conditional_mod(jay_builder *b,
|
||||
jay_inst *I,
|
||||
jay_def cond_flag,
|
||||
enum jay_conditional_mod cmod)
|
||||
{
|
||||
I->conditional_mod = cmod;
|
||||
return jay_set_cond_flag(b, I, cond_flag);
|
||||
}
|
||||
|
||||
static inline jay_def
|
||||
jay_identity_def(jay_def x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
static inline jay_def
|
||||
JAY_BUILD_SRC(jay_def x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
static inline jay_def
|
||||
JAY_BUILD_SRC(uint32_t x)
|
||||
{
|
||||
return jay_imm(x);
|
||||
}
|
||||
#else
|
||||
#define JAY_BUILD_SRC(X) \
|
||||
_Generic((X), \
|
||||
jay_def: jay_identity_def, \
|
||||
uint32_t: jay_imm, \
|
||||
int32_t: jay_imm, \
|
||||
uint8_t: jay_imm)(X)
|
||||
#endif
|
||||
|
||||
/* Include generated builder helpers */
|
||||
#include "jay_builder_opcodes.h"
|
||||
|
||||
static inline jay_inst *
|
||||
_jay_CMP(jay_builder *b,
|
||||
enum jay_type src_type,
|
||||
enum jay_conditional_mod cmod,
|
||||
jay_def dst,
|
||||
jay_def src0,
|
||||
jay_def src1)
|
||||
{
|
||||
jay_inst *I = jay_alloc_inst(b, JAY_OPCODE_CMP, 2, 0);
|
||||
I->type = src_type;
|
||||
I->src[0] = src0;
|
||||
I->src[1] = src1;
|
||||
|
||||
/* Even if we want to write a 32-bit 0/~0 result, we still need to
|
||||
* register-allocate a flag, since the hardware will implicitly clobber one
|
||||
* regardless.
|
||||
*/
|
||||
if (!jay_is_flag(dst)) {
|
||||
I->dst = dst;
|
||||
dst = jay_alloc_def(b, dst.file == UGPR ? UFLAG : FLAG, 1);
|
||||
}
|
||||
|
||||
jay_set_conditional_mod(b, I, dst, cmod);
|
||||
jay_builder_insert(b, I);
|
||||
return I;
|
||||
}
|
||||
|
||||
#define jay_CMP(b, st, cmod, dst, src0, src1) \
|
||||
_jay_CMP(b, st, cmod, dst, JAY_BUILD_SRC(src0), JAY_BUILD_SRC(src1))
|
||||
|
||||
struct jayb_send_params {
|
||||
enum brw_sfid sfid;
|
||||
uint64_t msg_desc;
|
||||
jay_def dst;
|
||||
jay_def header;
|
||||
jay_def *srcs;
|
||||
jay_def desc, ex_desc;
|
||||
enum jay_type type;
|
||||
enum jay_type src_type[2];
|
||||
unsigned nr_srcs;
|
||||
uint32_t ex_desc_imm;
|
||||
bool eot;
|
||||
bool check_tdr;
|
||||
bool uniform;
|
||||
bool bindless;
|
||||
};
|
||||
|
||||
static inline jay_inst *
|
||||
_jay_SEND(jay_builder *b, const struct jayb_send_params p)
|
||||
{
|
||||
const struct intel_device_info *devinfo = b->shader->devinfo;
|
||||
jay_inst *I = jay_alloc_inst(b, JAY_OPCODE_SEND, 4, sizeof(jay_send_info));
|
||||
jay_send_info *info = jay_get_send_info(I);
|
||||
bool has_header = !jay_is_null(p.header);
|
||||
|
||||
I->dst = p.dst;
|
||||
I->type = p.type;
|
||||
|
||||
assert(I->type);
|
||||
info->type_0 = p.src_type[0] ? p.src_type[0] : I->type;
|
||||
info->type_1 = p.src_type[1] ? p.src_type[1] : info->type_0;
|
||||
|
||||
if (has_header) {
|
||||
assert(p.nr_srcs == 1 || info->type_0 == info->type_1);
|
||||
|
||||
/* If there is a message header, split the send into <header> and
|
||||
* <payload> since the header is UGPR but the payload is GPR.
|
||||
*/
|
||||
I->src[2] = p.header;
|
||||
I->src[3] = jay_collect_vectors(b, &p.srcs[0], p.nr_srcs);
|
||||
info->type_1 = info->type_0;
|
||||
info->type_0 = JAY_TYPE_U32 /* header type */;
|
||||
} else if (jay_type_size_bits(info->type_0) == 16 &&
|
||||
!p.uniform &&
|
||||
b->shader->dispatch_width == 32) {
|
||||
/* Pack 16-bit vectors to match the hardware with the data model.
|
||||
*
|
||||
* XXX: This is a hack. Move to NIR for better
|
||||
* codegen in tests like
|
||||
* dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array.
|
||||
*/
|
||||
assert(info->type_0 == info->type_1);
|
||||
jay_def srcs[8];
|
||||
unsigned n = 0, i;
|
||||
for (i = 0; i + 2 <= p.nr_srcs; i += 2) {
|
||||
assert(p.srcs[i].file == p.srcs[i + 1].file);
|
||||
assert(jay_num_values(p.srcs[i]) == jay_num_values(p.srcs[i + 1]));
|
||||
|
||||
for (unsigned c = 1; c < jay_num_values(p.srcs[i]); ++c) {
|
||||
assert(jay_channel(p.srcs[i], c) == 0);
|
||||
assert(jay_channel(p.srcs[i + 1], c) == 0);
|
||||
}
|
||||
|
||||
jay_def lo = jay_extract(p.srcs[i], 0),
|
||||
hi = jay_extract(p.srcs[i + 1], 0);
|
||||
jay_def bfi = jay_BFI2_u32(b, 0xffff0000, hi, lo);
|
||||
|
||||
if (p.srcs[i].file == UGPR) {
|
||||
uint32_t defs[16] = { jay_index(bfi) };
|
||||
srcs[n++] = jay_collect(b, UGPR, defs, jay_ugpr_per_grf(b->shader));
|
||||
} else {
|
||||
srcs[n++] = bfi;
|
||||
}
|
||||
}
|
||||
if (i < p.nr_srcs) {
|
||||
srcs[n++] = p.srcs[i++];
|
||||
}
|
||||
assert(i == p.nr_srcs);
|
||||
|
||||
I->src[2] = jay_collect_vectors(b, srcs, n);
|
||||
I->src[3] = jay_null();
|
||||
} else if (p.nr_srcs <= 2) {
|
||||
/* Easy case: keep everything scalar */
|
||||
I->src[2] = p.nr_srcs > 0 ? p.srcs[0] : jay_null();
|
||||
I->src[3] = p.nr_srcs > 1 ? p.srcs[1] : jay_null();
|
||||
} else {
|
||||
/* Otherwise, we need to pick a point to split at.
|
||||
*
|
||||
* Heuristic: don't split render targer writes becuase RA gets confused
|
||||
* with the EOT requirements. Split everything else in half.
|
||||
*
|
||||
* TODO: Come up with a better heuristic.
|
||||
*/
|
||||
assert(info->type_0 == info->type_1);
|
||||
unsigned split = !p.check_tdr ? DIV_ROUND_UP(p.nr_srcs, 2) : p.nr_srcs;
|
||||
I->src[2] = jay_collect_vectors(b, &p.srcs[0], split);
|
||||
I->src[3] = jay_collect_vectors(b, &p.srcs[split], p.nr_srcs - split);
|
||||
}
|
||||
|
||||
/* For message headers we pack a UGPR vector as a single GRF */
|
||||
unsigned lens[3];
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
jay_def x = i == 0 ? I->dst : I->src[1 + i];
|
||||
lens[i] = jay_num_values(x);
|
||||
|
||||
/* XXX: For the non-transpose uniform case, do we need to pad out
|
||||
* with undefs for correctness so we don't fall off the side of the
|
||||
* regfile? for sends like:
|
||||
*
|
||||
* (1&W) mov.u32 u10.0, u0.8 | A@1
|
||||
(1&W) mov.u32 u10.1, u0.9 | A@1
|
||||
(1&W) send.u32 u12, g10, _, 0x04403580, 0x00000000
|
||||
ugm MsgDesc: ( load, a64, d32, V4, L1STATE_L3MOCS dst_len =
|
||||
4, src0_len = 2, src1_len = 0 flat ) base_offset 0 | A@1 $0
|
||||
|
||||
* We don't care what's in g11, but it has to *exist*. But that is
|
||||
* probably implicitly correct as long as the reg file ends with GRFs.
|
||||
* Which it has to <Xe3 because of EOT. So no code change needed but I
|
||||
* need to document this.
|
||||
*/
|
||||
if (x.file == UGPR) {
|
||||
lens[i] = DIV_ROUND_UP(lens[i], jay_ugpr_per_grf(b->shader));
|
||||
} else {
|
||||
lens[i] *= jay_grf_per_gpr(b->shader);
|
||||
}
|
||||
|
||||
lens[i] *= reg_unit(devinfo);
|
||||
}
|
||||
|
||||
info->sfid = p.sfid;
|
||||
info->eot = p.eot;
|
||||
info->check_tdr = p.check_tdr;
|
||||
info->uniform = p.uniform;
|
||||
info->bindless = p.bindless;
|
||||
info->ex_desc_imm = p.ex_desc_imm;
|
||||
info->ex_mlen = lens[2];
|
||||
I->src[0] = jay_imm(((uint32_t) p.msg_desc) |
|
||||
brw_message_desc(devinfo, lens[1], lens[0], has_header));
|
||||
|
||||
if (!jay_is_null(p.desc)) {
|
||||
jay_def a = jay_alloc_def(b, J_ADDRESS, 1);
|
||||
jay_OR(b, JAY_TYPE_U32, a, p.desc, I->src[0]);
|
||||
I->src[0] = a;
|
||||
}
|
||||
|
||||
if (jay_is_null(p.ex_desc)) {
|
||||
I->src[1] =
|
||||
jay_imm(brw_message_ex_desc(devinfo, lens[2]) | (p.msg_desc >> 32));
|
||||
} else if (p.ex_desc.file == J_ADDRESS) {
|
||||
I->src[1] = p.ex_desc;
|
||||
} else {
|
||||
I->src[1] = jay_alloc_def(b, J_ADDRESS, 1);
|
||||
if (info->bindless) {
|
||||
jay_MOV(b, I->src[1], p.ex_desc);
|
||||
} else {
|
||||
jay_OR(b, JAY_TYPE_U32, I->src[1], p.ex_desc,
|
||||
brw_message_ex_desc(devinfo, info->ex_mlen));
|
||||
}
|
||||
}
|
||||
|
||||
assert(!info->uniform || jay_is_null(I->dst) || I->dst.file == UGPR);
|
||||
jay_builder_insert(b, I);
|
||||
return I;
|
||||
}
|
||||
|
||||
#define jay_SEND(b, ...) _jay_SEND(b, (struct jayb_send_params) { __VA_ARGS__ })
|
||||
|
||||
static inline void
|
||||
jay_copy_strided(jay_builder *b, jay_def dst, jay_def src, bool src_strided)
|
||||
{
|
||||
unsigned src_stride = src_strided ? jay_ugpr_per_grf(b->shader) : 1;
|
||||
uint32_t n = MIN2(jay_num_values(dst), jay_num_values(src) / src_stride);
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
jay_MOV(b, jay_extract(dst, i), jay_extract(src, i * src_stride));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_copy(jay_builder *b, jay_def dst, jay_def src)
|
||||
{
|
||||
jay_copy_strided(b, dst, src, false);
|
||||
}
|
||||
|
||||
static inline jay_def
|
||||
jay_as_gpr(jay_builder *b, jay_def src)
|
||||
{
|
||||
if (src.file == GPR || jay_is_null(src))
|
||||
return src;
|
||||
|
||||
jay_def def = jay_alloc_def(b, GPR, jay_num_values(src));
|
||||
jay_copy(b, def, src);
|
||||
return def;
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_i2i32(jay_builder *b, jay_def dst, unsigned src_bits, jay_def src)
|
||||
{
|
||||
if (src_bits < 32) {
|
||||
jay_CVT(b, JAY_TYPE_S32, dst, src, jay_type(JAY_TYPE_S, src_bits),
|
||||
JAY_ROUND, 0);
|
||||
} else if (src_bits == 32) {
|
||||
jay_MOV(b, dst, src);
|
||||
} else {
|
||||
assert(src.reg == 0 && ".reg not preserved in this path but that's OK");
|
||||
jay_MOV(b, dst, jay_extract(src, 0));
|
||||
}
|
||||
}
|
||||
153
src/intel/compiler/jay/jay_builder_opcodes.h.py
Normal file
153
src/intel/compiler/jay/jay_builder_opcodes.h.py
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright 2026 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from mako import exceptions
|
||||
from mako.template import Template
|
||||
|
||||
from jay_opcodes import OPCODES
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from jay_opcodes import Opcode
|
||||
|
||||
|
||||
def infer_type(op: 'Opcode') -> bool:
|
||||
return op.has_dest and (set(op.types) <= set(["u1", "u32", "u64"]) or
|
||||
op.name == 'mov')
|
||||
|
||||
|
||||
def signature(op: 'Opcode', with_dest: bool = True, with_types: bool = False,
|
||||
mode: str = 'prototype', type_: str = 't', src: str = '{}') -> str:
|
||||
arr = [('jay_builder *', 'b')]
|
||||
|
||||
if with_types and len(op.types) > 1 and not infer_type(op):
|
||||
arr += [('enum jay_type', type_)]
|
||||
|
||||
if with_dest and op.has_dest:
|
||||
arr += [('jay_def', 'dst')]
|
||||
|
||||
arr += [('jay_def', src.format(f'src{i}')) for i in range(op.num_srcs)]
|
||||
arr += [x for x in op.extra_struct if not x[1].startswith('pad')]
|
||||
|
||||
return ', '.join([(t + ' ' if mode == 'prototype' else '') + v for t, v in arr])
|
||||
|
||||
|
||||
TEMPLATE = """
|
||||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "jay_private.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define type_assert(op, ...) if (!(__VA_ARGS__)) { fprintf(stderr, "%s does not allow type: ", #op); jay_print_type(stderr, t); fprintf(stderr, "\\n"); } assert(__VA_ARGS__)
|
||||
#else
|
||||
#define type_assert(...)
|
||||
#endif
|
||||
|
||||
% for op in opcodes.values():
|
||||
<%
|
||||
OPCODE = op.name.upper()
|
||||
num_srcs = op.num_srcs
|
||||
has_dest = op.has_dest
|
||||
multi_type = len(op.types) > 1
|
||||
info_size = f'sizeof(jay_{op.name}_info)' if op.extra_struct else '0'
|
||||
operands = ["dst"] + [f"src{i}" for i in range(num_srcs)]
|
||||
if num_srcs > 0:
|
||||
uniform = " && " .join([f"jay_is_uniform(src{i})" for i in range(num_srcs)])
|
||||
reg_file = f"({uniform}) ? UGPR : GPR"
|
||||
else:
|
||||
reg_file = "GPR"
|
||||
if not op.types:
|
||||
continue
|
||||
# Ignore the lane index when determining the type of a shuffle
|
||||
infer_operands = operands[0:-1] if op.name == "shuffle" else operands
|
||||
%>
|
||||
static inline jay_inst *
|
||||
_jay_${OPCODE}(${signature(op, with_types = True)})
|
||||
{
|
||||
% if infer_type(op):
|
||||
enum jay_type t = jay_num_values(dst) == 2 ? JAY_TYPE_U64 :
|
||||
${" && ".join([f"(jay_is_flag({x}) || jay_is_imm({x}))" for x in infer_operands])}
|
||||
? JAY_TYPE_U1 : JAY_TYPE_U32;
|
||||
% elif multi_type:
|
||||
type_assert(${OPCODE}, 0
|
||||
% for type in op.types:
|
||||
|| t == JAY_TYPE_${type.upper()}
|
||||
% endfor
|
||||
);
|
||||
|
||||
% else:
|
||||
enum jay_type t = JAY_TYPE_${op.types[0].upper()};
|
||||
|
||||
% endif
|
||||
jay_inst *inst = jay_alloc_inst(b, JAY_OPCODE_${OPCODE}, ${num_srcs}, ${info_size});
|
||||
% for _, prop in op.extra_struct:
|
||||
% if not prop.startswith('pad'):
|
||||
jay_set_${op.name}_${prop}(inst, ${prop});
|
||||
% endif
|
||||
% endfor
|
||||
|
||||
inst->type = t;
|
||||
% if op.has_dest:
|
||||
inst->dst = dst;
|
||||
% endif
|
||||
% for i in range(num_srcs):
|
||||
inst->src[${i}] = src${i};
|
||||
% endfor
|
||||
|
||||
jay_builder_insert(b, inst);
|
||||
return inst;
|
||||
}
|
||||
|
||||
#define jay_${OPCODE}(${signature(op, with_types = True, mode = 'call')}) _jay_${OPCODE}(${signature(op, with_types = True, src = 'JAY_BUILD_SRC({})', mode='call')})
|
||||
|
||||
% for type in op.types:
|
||||
static inline ${'jay_def' if op.has_dest else 'void'}
|
||||
_jay_${OPCODE}_${type}(${signature(op, with_dest = False)})
|
||||
{
|
||||
% if op.has_dest:
|
||||
jay_def dst = jay_alloc_def(b, ${reg_file}, ${2 if '64' in type else 1});
|
||||
%endif
|
||||
jay_${OPCODE}(${signature(op, with_types = True, type_ = 'JAY_TYPE_'+type.upper(), mode = 'call')});
|
||||
% if op.has_dest:
|
||||
return dst;
|
||||
% endif
|
||||
}
|
||||
#define jay_${OPCODE}_${type}(${signature(op, with_dest = False, mode =
|
||||
'call')}) _jay_${OPCODE}_${type}(${signature(op, src='JAY_BUILD_SRC({})', mode = 'call', with_dest = False)})
|
||||
% endfor
|
||||
|
||||
% endfor
|
||||
|
||||
#undef type_assert
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('output', action='store')
|
||||
args = parser.parse_args()
|
||||
|
||||
ops = {op: v for (op, v) in OPCODES.items() if op not in {'cmp', 'send'}}
|
||||
|
||||
try:
|
||||
with open(args.output, 'w', encoding='utf-8') as f:
|
||||
f.write(Template(TEMPLATE).render(
|
||||
opcodes=ops,
|
||||
signature=signature,
|
||||
infer_type=infer_type))
|
||||
except Exception:
|
||||
print(exceptions.text_error_template().render())
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
153
src/intel/compiler/jay/jay_extra_info.h.py
Normal file
153
src/intel/compiler/jay/jay_extra_info.h.py
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright 2026 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from mako import exceptions
|
||||
from mako.template import Template
|
||||
|
||||
from jay_opcodes import OPCODES, ENUMS
|
||||
|
||||
TEMPLATE = """/* Do not include directly */
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
|
||||
% for enum, (prefix, values) in enums.items():
|
||||
% if enum.startswith('jay'):
|
||||
enum PACKED ${enum} {
|
||||
% for v in values:
|
||||
${prefix}_${v.upper()},
|
||||
% endfor
|
||||
};
|
||||
% endif
|
||||
% endfor
|
||||
|
||||
% for name, op in opcodes:
|
||||
typedef struct jay_${name}_info {
|
||||
% for T, prop in op.extra_struct:
|
||||
${T} ${prop};
|
||||
% endfor
|
||||
} jay_${name}_info;
|
||||
|
||||
% for prefix, _suffix in [('const ', '_const'), ('', '')]:
|
||||
static inline ${prefix} struct jay_${name}_info *
|
||||
jay_get_${name}_info${_suffix}(${prefix}jay_inst *I)
|
||||
{
|
||||
assert(I->op == JAY_OPCODE_${name.upper()});
|
||||
return (${prefix}struct jay_${name}_info *) &I->src[I->num_srcs];
|
||||
}
|
||||
|
||||
% endfor
|
||||
% for T, prop in op.extra_struct:
|
||||
% if not prop.startswith('pad'):
|
||||
static inline ${T}
|
||||
jay_${name}_${prop}(const jay_inst *I)
|
||||
{
|
||||
return jay_get_${name}_info_const(I)->${prop};
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_set_${name}_${prop}(jay_inst *I, ${T} value)
|
||||
{
|
||||
jay_get_${name}_info(I)->${prop} = value;
|
||||
}
|
||||
|
||||
% endif
|
||||
% endfor
|
||||
% endfor
|
||||
|
||||
static inline unsigned
|
||||
jay_inst_info_size(jay_inst *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
% for name, op in opcodes:
|
||||
case JAY_OPCODE_${name.upper()}: return sizeof(struct jay_${name}_info);
|
||||
% endfor
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __cplusplus
|
||||
static inline const char *
|
||||
jay_print_inst_info(FILE *fp, const jay_inst *I, const char *sep)
|
||||
{
|
||||
switch (I->op) {
|
||||
% for name, op in opcodes:
|
||||
case JAY_OPCODE_${name.upper()}: {
|
||||
% for T, prop in op.extra_struct:
|
||||
% if not (prop.startswith('pad') or name == 'bfn' or T == 'enum jay_type'):
|
||||
<%
|
||||
value = f"jay_{name}_{prop}(I)"
|
||||
spec = '0x%"PRIx64"' if T == 'uint64_t' else "%u"
|
||||
%>
|
||||
% if T.startswith('enum') and T[5:] in enums:
|
||||
<%
|
||||
bare = T[5:]
|
||||
prefix, values = enums[bare]
|
||||
%>
|
||||
const char *${bare}_str[] = {
|
||||
% for v in values:
|
||||
[${prefix}_${v.upper()}] = "${v}",
|
||||
% endfor
|
||||
};
|
||||
assert(${value} < ARRAY_SIZE(${bare}_str));
|
||||
<%
|
||||
spec = "%s"
|
||||
value = f'{T[5:]}_str[{value}]'
|
||||
%>
|
||||
% endif
|
||||
% if T == 'enum jay_rounding_mode':
|
||||
if (strcmp(${value}, "round")) {
|
||||
fprintf(fp, "%s%s", sep, ${value});
|
||||
sep = ", ";
|
||||
}
|
||||
% elif T == 'bool':
|
||||
if (${value}) {
|
||||
fprintf(fp, "%s${prop}", sep);
|
||||
sep = ", ";
|
||||
}
|
||||
% elif T.startswith('enum') or len(op.extra_struct) == 1:
|
||||
fprintf(fp, "%s${spec}", sep, ${value});
|
||||
sep = ", ";
|
||||
% else:
|
||||
if (${value}) {
|
||||
fprintf(fp, "%s${prop}=${spec}", sep, ${value});
|
||||
sep = ", ";
|
||||
}
|
||||
% endif
|
||||
% endif
|
||||
% endfor
|
||||
break;
|
||||
}
|
||||
% endfor
|
||||
default: break;
|
||||
}
|
||||
|
||||
return sep;
|
||||
}
|
||||
#endif
|
||||
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('output', action='store')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
with open(args.output, 'w', encoding='utf-8') as f:
|
||||
f.write(Template(TEMPLATE).render(
|
||||
opcodes=[(k, v) for k, v in OPCODES.items() if v.extra_struct],
|
||||
enums=ENUMS))
|
||||
except Exception:
|
||||
print(exceptions.text_error_template().render())
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
3838
src/intel/compiler/jay/jay_from_nir.c
Normal file
3838
src/intel/compiler/jay/jay_from_nir.c
Normal file
File diff suppressed because it is too large
Load diff
1408
src/intel/compiler/jay/jay_ir.h
Normal file
1408
src/intel/compiler/jay/jay_ir.h
Normal file
File diff suppressed because it is too large
Load diff
203
src/intel/compiler/jay/jay_liveness.c
Normal file
203
src/intel/compiler/jay/jay_liveness.c
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_worklist.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/* LiveIn = GEN + (LiveOut - KILL) */
|
||||
static void
|
||||
update_liveness_for_inst(BITSET_WORD *dead_defs,
|
||||
struct u_sparse_bitset *live_in,
|
||||
jay_inst *I)
|
||||
{
|
||||
/* No destination is live-in before the instruction, but any destination not
|
||||
* live-in after is immediately dead.
|
||||
*/
|
||||
jay_foreach_dst_index(I, _, def) {
|
||||
if (u_sparse_bitset_test(live_in, def)) {
|
||||
u_sparse_bitset_clear(live_in, def);
|
||||
} else {
|
||||
BITSET_SET(dead_defs, def);
|
||||
}
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_PHI_SRC) {
|
||||
/* Phi sources do not require last-use bits. */
|
||||
jay_foreach_src_index(I, src_idx, comp, index) {
|
||||
u_sparse_bitset_set(live_in, index);
|
||||
}
|
||||
} else {
|
||||
BITSET_ZERO(I->last_use);
|
||||
unsigned last_use_i = 0;
|
||||
|
||||
jay_foreach_src_index(I, s, comp, index) {
|
||||
/* If the source is not live after this instruction, but becomes
|
||||
* live at this instruction, this is the last use.
|
||||
*/
|
||||
if (!u_sparse_bitset_test(live_in, index)) {
|
||||
assert(last_use_i < JAY_NUM_LAST_USE_BITS);
|
||||
BITSET_SET(I->last_use, last_use_i);
|
||||
}
|
||||
|
||||
u_sparse_bitset_set(live_in, index);
|
||||
++last_use_i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate liveness information for SSA values.
|
||||
*
|
||||
* This populates the jay_block::live_in/live_out bitsets and last_use flags.
|
||||
*/
|
||||
void
|
||||
jay_compute_liveness(jay_function *f)
|
||||
{
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, f->num_blocks, NULL);
|
||||
|
||||
ralloc_free(f->dead_defs);
|
||||
f->dead_defs = BITSET_RZALLOC(f, f->ssa_alloc);
|
||||
|
||||
jay_foreach_block(f, block) {
|
||||
u_sparse_bitset_free(&block->live_in);
|
||||
u_sparse_bitset_free(&block->live_out);
|
||||
|
||||
u_sparse_bitset_init(&block->live_in, f->ssa_alloc, block);
|
||||
u_sparse_bitset_init(&block->live_out, f->ssa_alloc, block);
|
||||
|
||||
jay_worklist_push_head(&worklist, block);
|
||||
}
|
||||
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order since liveness is a backwards pass */
|
||||
jay_block *block = jay_worklist_pop_head(&worklist);
|
||||
|
||||
/* Update its liveness information:
|
||||
* 1. Assume everything liveout from this block was live_in
|
||||
* 2. Clear live_in for anything defined in this block
|
||||
*/
|
||||
u_sparse_bitset_dup(&block->live_in, &block->live_out);
|
||||
|
||||
jay_foreach_inst_in_block_rev(block, inst) {
|
||||
update_liveness_for_inst(f->dead_defs, &block->live_in, inst);
|
||||
}
|
||||
|
||||
/* Propagate block->live_in[] to the live_out[] of predecessors. Since
|
||||
* phis are split, they are handled naturally without special cases.
|
||||
*/
|
||||
jay_foreach_predecessor(block, p) {
|
||||
if (u_sparse_bitset_merge(&(*p)->live_out, &block->live_in)) {
|
||||
jay_worklist_push_tail(&worklist, *p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
jay_block *first_block = jay_first_block(f);
|
||||
jay_block *last_block = list_last_entry(&f->blocks, jay_block, link);
|
||||
|
||||
assert(u_sparse_bitset_count(&first_block->live_in) == 0 && "invariant");
|
||||
assert(u_sparse_bitset_count(&last_block->live_out) == 0 && "invariant");
|
||||
#endif
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the register demand for each SSA file using the previously
|
||||
* calculated liveness analysis. SSA makes this exact in linear-time.
|
||||
*/
|
||||
void
|
||||
jay_calculate_register_demands(jay_function *func)
|
||||
{
|
||||
enum jay_file *files = calloc(func->ssa_alloc, sizeof(enum jay_file));
|
||||
BITSET_WORD *killed = BITSET_CALLOC(func->ssa_alloc);
|
||||
unsigned *max_demand = func->demand;
|
||||
memset(max_demand, 0, sizeof(func->demand));
|
||||
|
||||
jay_foreach_inst_in_func(func, block, I) {
|
||||
jay_foreach_dst_index(I, def, index) {
|
||||
files[index] = def.file;
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
unsigned demands[JAY_NUM_SSA_FILES] = {};
|
||||
|
||||
/* Everything live-in. */
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) {
|
||||
++demands[files[i]];
|
||||
}
|
||||
|
||||
jay_foreach_ssa_file(f) {
|
||||
max_demand[f] = MAX2(demands[f], max_demand[f]);
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
/* We must have enough register file space for the register payload */
|
||||
if (I->op == JAY_OPCODE_PRELOAD) {
|
||||
uint32_t max = jay_preload_reg(I) + jay_num_values(I->dst);
|
||||
max_demand[I->dst.file] = MAX2(max_demand[I->dst.file], max);
|
||||
}
|
||||
|
||||
/* Collect source values to kill */
|
||||
jay_foreach_killed(I, s, c) {
|
||||
BITSET_SET(killed, jay_channel(I->src[s], c));
|
||||
}
|
||||
|
||||
/* Make destinations live */
|
||||
jay_foreach_dst(I, d) {
|
||||
demands[d.file] += util_next_power_of_two(jay_num_values(d));
|
||||
}
|
||||
|
||||
/* Update maximum demands */
|
||||
jay_foreach_ssa_file(f) {
|
||||
max_demand[f] = MAX2(demands[f], max_demand[f]);
|
||||
}
|
||||
|
||||
/* Dead destinations are those written by the instruction but killed
|
||||
* immediately after the instruction finishes.
|
||||
*/
|
||||
jay_foreach_dst_index(I, d, index) {
|
||||
if (BITSET_TEST(func->dead_defs, index)) {
|
||||
assert(demands[d.file] > 0);
|
||||
--demands[d.file];
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_dst(I, d) {
|
||||
unsigned n = jay_num_values(d);
|
||||
demands[d.file] -= util_next_power_of_two(n) - n;
|
||||
}
|
||||
|
||||
/* Late-kill sources */
|
||||
jay_foreach_killed(I, s, c) {
|
||||
uint32_t index = jay_channel(I->src[s], c);
|
||||
|
||||
if (BITSET_TEST(killed, index)) {
|
||||
BITSET_CLEAR(killed, index);
|
||||
|
||||
assert(demands[I->src[s].file] > 0);
|
||||
--demands[I->src[s].file];
|
||||
}
|
||||
}
|
||||
|
||||
if (jay_debug & JAY_DBG_PRINTDEMAND) {
|
||||
printf("(LA) [G:%u\tU:%u] ", demands[GPR], demands[UGPR]);
|
||||
jay_print_inst(stdout, I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(files);
|
||||
free(killed);
|
||||
}
|
||||
153
src/intel/compiler/jay/jay_lower_post_ra.c
Normal file
153
src/intel/compiler/jay/jay_lower_post_ra.c
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/*
|
||||
* If default != dest, we need to lower. Predicated moves generalize as SEL,
|
||||
* with default in src0 to allow for immediates.
|
||||
*
|
||||
* For anything else, we have to insert a copy.
|
||||
*/
|
||||
static void
|
||||
lower_non_tied_default(jay_builder *b, jay_inst *I, jay_def default_)
|
||||
{
|
||||
jay_def not_pred = jay_negate(*jay_inst_get_predicate(I));
|
||||
assert(default_.file != FLAG && "we don't support this");
|
||||
|
||||
if (I->op == JAY_OPCODE_MOV) {
|
||||
jay_SEL(b, I->type, I->dst, default_, I->src[0], not_pred);
|
||||
jay_remove_instruction(I);
|
||||
} else {
|
||||
jay_foreach_comp(I->dst, c) {
|
||||
jay_def dst = jay_extract_post_ra(I->dst, c);
|
||||
jay_def src = jay_extract_post_ra(default_, c);
|
||||
|
||||
jay_add_predicate(b, jay_MOV(b, dst, src), not_pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline jay_def
|
||||
hi(jay_def x)
|
||||
{
|
||||
x.hi = true;
|
||||
return x;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower(jay_builder *b, jay_inst *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case JAY_OPCODE_PRELOAD:
|
||||
case JAY_OPCODE_PHI_DST:
|
||||
case JAY_OPCODE_INDETERMINATE:
|
||||
/* Delete instructions that only exist for RA. Uninitialized register
|
||||
* contents is a perfectly cromulent indeterminate value.
|
||||
*/
|
||||
return true;
|
||||
|
||||
case JAY_OPCODE_MOV: {
|
||||
/* Delete trivial moves */
|
||||
if (jay_regs_equal(I->dst, I->src[0]) && !I->predication)
|
||||
return true;
|
||||
|
||||
if (I->dst.file == GPR && I->src[0].file == GPR) {
|
||||
jay_def dst = I->dst, src = I->src[0], tmp4 = jay_bare_reg(GPR, 0);
|
||||
enum jay_stride dst_stride = jay_def_stride(b->shader, dst);
|
||||
enum jay_stride src_stride = jay_def_stride(b->shader, src);
|
||||
assert(jay_def_stride(b->shader, tmp4) == JAY_STRIDE_4 && "ABI");
|
||||
|
||||
if (dst_stride == JAY_STRIDE_8 && src_stride == JAY_STRIDE_2) {
|
||||
jay_MOV(b, dst, tmp4);
|
||||
jay_MOV(b, tmp4, src)->type = JAY_TYPE_U16;
|
||||
jay_MOV(b, hi(tmp4), hi(src))->type = JAY_TYPE_U16;
|
||||
|
||||
jay_XOR(b, JAY_TYPE_U32, dst, dst, tmp4);
|
||||
jay_XOR(b, JAY_TYPE_U32, tmp4, dst, tmp4);
|
||||
jay_XOR(b, JAY_TYPE_U32, dst, dst, tmp4);
|
||||
return true;
|
||||
} else if (dst_stride == JAY_STRIDE_2 && src_stride == JAY_STRIDE_8) {
|
||||
jay_MOV(b, dst, tmp4)->type = JAY_TYPE_U16;
|
||||
jay_MOV(b, hi(dst), hi(tmp4))->type = JAY_TYPE_U16;
|
||||
jay_MOV(b, tmp4, src);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
jay_XOR(b, JAY_TYPE_U16, i == 1 ? tmp4 : dst, dst, tmp4);
|
||||
jay_XOR(b, JAY_TYPE_U16, i == 1 ? hi(tmp4) : hi(dst), hi(dst),
|
||||
hi(tmp4));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Lower 4B<-->2B copies. To pack the register file, RA
|
||||
* sometimes inserts 32-bit copies involving 16-bit strided sources like
|
||||
* "mov.u32 r4 <32-bit>, r50 <16-bit>". This cannot be implemented in a
|
||||
* single hardware instruction, so we split into two 16-bit copies.
|
||||
*/
|
||||
enum jay_stride min_stride = MIN2(dst_stride, src_stride);
|
||||
unsigned stride_sz = jay_stride_to_bits(min_stride);
|
||||
unsigned type_sz = jay_type_size_bits(I->type);
|
||||
|
||||
if (stride_sz < type_sz) {
|
||||
assert(stride_sz == 16 && type_sz == 32 && "no other case hit");
|
||||
I->type = JAY_TYPE_U16;
|
||||
jay_MOV(b, hi(dst), hi(src))->type = JAY_TYPE_U16;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_SWAP: {
|
||||
jay_def x = I->src[0], y = I->src[1];
|
||||
/* TODO: Need stride-aware lowering here too like MOV. Same ideas. */
|
||||
if (jay_def_stride(b->shader, x) != jay_def_stride(b->shader, y))
|
||||
UNREACHABLE("todo");
|
||||
|
||||
jay_XOR(b, JAY_TYPE_U32, x, y, x);
|
||||
jay_XOR(b, JAY_TYPE_U32, y, x, y);
|
||||
jay_XOR(b, JAY_TYPE_U32, x, y, x);
|
||||
return true;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_ZERO_FLAG: {
|
||||
jay_MOV(b, jay_bare_reg(FLAG, jay_zero_flag_reg(I)), 0)->type =
|
||||
JAY_TYPE_U32;
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_lower_post_ra(jay_shader *s)
|
||||
{
|
||||
jay_foreach_inst_in_shader_safe(s, func, I) {
|
||||
jay_builder b = jay_init_builder(func, jay_before_inst(I));
|
||||
|
||||
if (jay_inst_has_default(I)) {
|
||||
if (!jay_regs_equal(I->dst, *jay_inst_get_default(I))) {
|
||||
lower_non_tied_default(&b, I, *jay_inst_get_default(I));
|
||||
}
|
||||
|
||||
/* Now just drop the default source */
|
||||
jay_shrink_sources(I, I->num_srcs - 1);
|
||||
I->predication = JAY_PREDICATED;
|
||||
}
|
||||
|
||||
if (lower(&b, I)) {
|
||||
jay_remove_instruction(I);
|
||||
}
|
||||
}
|
||||
}
|
||||
200
src/intel/compiler/jay/jay_lower_pre_ra.c
Normal file
200
src/intel/compiler/jay/jay_lower_pre_ra.c
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/lut.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_math.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/*
|
||||
* Register allocation operates only on power-of-two vectors. Pad out
|
||||
* non-power-of-two vectors with null values to simplify RA.
|
||||
*/
|
||||
static jay_def
|
||||
lower_npot_vector(jay_builder *b, jay_def x)
|
||||
{
|
||||
unsigned n = jay_num_values(x);
|
||||
|
||||
if (!util_is_power_of_two_or_zero(n)) {
|
||||
uint32_t indices[JAY_MAX_DEF_LENGTH] = { 0 };
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
indices[i] = jay_channel(x, i);
|
||||
}
|
||||
|
||||
x = jay_collect(b, x.file, indices, util_next_power_of_two(n));
|
||||
}
|
||||
|
||||
assert(util_is_power_of_two_or_zero(jay_num_values(x)) && "post-cond");
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Vectors need to be allocated to contiguous registers. Furthermore, we
|
||||
* require power-of-two sizes in certain cases, that's handled here too.
|
||||
*
|
||||
* This means that a value cannot appear in multiple channels of an
|
||||
* instruction, as register allocation would need to assign the same value to
|
||||
* locations <X+i> and <X+j>. Scalars don't have this restriction, except for
|
||||
* SENDs because the hardware bans repeated sources.
|
||||
*
|
||||
* If a value appears in multiple positions, we emit copies so that each
|
||||
* can be register allocated in the correct position.
|
||||
*/
|
||||
static void
|
||||
lower_contiguous_sources(jay_builder *b, jay_inst *I)
|
||||
{
|
||||
b->cursor = jay_before_inst(I);
|
||||
uint32_t seen[JAY_MAX_DEF_LENGTH], nr_seen = 0;
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
if (jay_num_values(I->src[s]) > 1 || I->op == JAY_OPCODE_SEND) {
|
||||
jay_foreach_index(I->src[s], c, index) {
|
||||
/* Search for the index */
|
||||
unsigned i;
|
||||
for (i = 0; i < nr_seen && seen[i] != index; ++i) {
|
||||
}
|
||||
|
||||
if (i == nr_seen) {
|
||||
/* Record a new index */
|
||||
assert(nr_seen < ARRAY_SIZE(seen));
|
||||
seen[nr_seen++] = index;
|
||||
} else {
|
||||
/* Insert a copy to access a duplicated index */
|
||||
jay_def copy = jay_alloc_def(b, I->src[s].file, 1);
|
||||
jay_MOV(b, copy, jay_extract(I->src[s], c));
|
||||
jay_insert_channel(b, &I->src[s], c, copy);
|
||||
}
|
||||
}
|
||||
|
||||
jay_replace_src(&I->src[s], lower_npot_vector(b, I->src[s]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static jay_def
|
||||
lower_imm_to_ugpr(jay_builder *b,
|
||||
jay_inst *I,
|
||||
unsigned s,
|
||||
struct hash_table_u64 *constants)
|
||||
{
|
||||
/* Although only 32-bit constants are supported, 64-bit constants are
|
||||
* separate in the key since they must be zero-extended. We could optimize
|
||||
* this but it doesn't really matter.
|
||||
*/
|
||||
uint32_t imm = jay_as_uint(I->src[s]);
|
||||
bool is_64bit = jay_type_size_bits(jay_src_type(I, s)) == 64;
|
||||
uint64_t key = imm | (is_64bit ? BITFIELD64_BIT(32) : 0);
|
||||
|
||||
jay_inst *mov = _mesa_hash_table_u64_search(constants, key);
|
||||
if (mov)
|
||||
return mov->dst;
|
||||
|
||||
/* Try to use source modifiers to reuse a constant if we can */
|
||||
if (jay_src_type(I, s) == JAY_TYPE_F32 && jay_has_src_mods(I, s)) {
|
||||
mov = _mesa_hash_table_u64_search(constants, fui(-uif(imm)));
|
||||
if (mov)
|
||||
return jay_negate(mov->dst);
|
||||
}
|
||||
|
||||
/* If this is a new constant, insert a move and cache it. Currently, we pool
|
||||
* constants per-function. Inserting everything at the start guarantees that
|
||||
* these moves dominate all their uses, although it hurts register pressure.
|
||||
* The spiller should rematerialize constants where necessary to ensure we
|
||||
* don't lose the wave, but we could still probably optimize this.
|
||||
*/
|
||||
jay_def x = jay_alloc_def(b, UGPR, is_64bit ? 2 : 1);
|
||||
b->cursor = jay_before_function(b->func);
|
||||
_mesa_hash_table_u64_insert(constants, key, jay_MOV(b, x, imm));
|
||||
return x;
|
||||
}
|
||||
|
||||
static bool
|
||||
try_swap_src01(jay_inst *I)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_SEL) {
|
||||
/* sel(a, b, p) = sel(b, a, !p) */
|
||||
I->src[2].negate ^= true;
|
||||
} else if (I->op == JAY_OPCODE_CMP) {
|
||||
I->conditional_mod = jay_conditional_mod_swap_sources(I->conditional_mod);
|
||||
} else if (I->op == JAY_OPCODE_BFN) {
|
||||
jay_set_bfn_ctrl(I, util_lut3_swap_sources(jay_bfn_ctrl(I), 0, 1));
|
||||
} else if (!jay_opcode_infos[I->op]._2src_commutative) {
|
||||
/* Nothing to do for commutative, but otherwise we give up */
|
||||
return false;
|
||||
}
|
||||
|
||||
SWAP(I->src[0], I->src[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Instructions can only encode immediates in certain positions. Lower
|
||||
* immediates to moves where necessary.
|
||||
*/
|
||||
static void
|
||||
lower_immediates(jay_builder *b, jay_inst *I, struct hash_table_u64 *constants)
|
||||
{
|
||||
/* Canonicalize compare-with-zero to increase freedom */
|
||||
if (I->op == JAY_OPCODE_CMP &&
|
||||
jay_is_zero(I->src[1]) &&
|
||||
jay_is_null(I->dst) &&
|
||||
I->type == JAY_TYPE_U32) {
|
||||
|
||||
assert(!jay_is_null(I->cond_flag) && !I->predication);
|
||||
I->op = JAY_OPCODE_MOV;
|
||||
jay_shrink_sources(I, 1);
|
||||
}
|
||||
|
||||
/* One source supports immediates but the other does not, so swap. */
|
||||
unsigned other = I->op == JAY_OPCODE_BFN ? 1 : 0;
|
||||
if (jay_is_imm(I->src[other]) &&
|
||||
!_mesa_hash_table_u64_search(constants, jay_as_uint(I->src[other]))) {
|
||||
|
||||
try_swap_src01(I);
|
||||
}
|
||||
|
||||
/* Immediates allowed only in certain cases, lower the rest */
|
||||
jay_foreach_src(I, s) {
|
||||
if (jay_is_imm(I->src[s])) {
|
||||
uint32_t imm = jay_as_uint(I->src[s]);
|
||||
|
||||
bool last = s == (jay_num_isa_srcs(I) - 1);
|
||||
bool allowed = s < 2 && (last || I->op == JAY_OPCODE_SEND);
|
||||
allowed |= (I->op == JAY_OPCODE_BFN && s == 0 && imm < UINT16_MAX);
|
||||
|
||||
if (!allowed) {
|
||||
I->src[s] = lower_imm_to_ugpr(b, I, s, constants);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_lower_pre_ra(jay_shader *s)
|
||||
{
|
||||
struct hash_table_u64 *constants = _mesa_hash_table_u64_create(NULL);
|
||||
|
||||
jay_foreach_function(s, f) {
|
||||
/* Pool constants per function. */
|
||||
_mesa_hash_table_u64_clear(constants);
|
||||
|
||||
jay_foreach_inst_in_func(f, block, I) {
|
||||
jay_builder b = { .shader = s, .func = f };
|
||||
|
||||
/* lower_immediates must be last since it consumes I */
|
||||
lower_contiguous_sources(&b, I);
|
||||
lower_immediates(&b, I, constants);
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_destroy(constants);
|
||||
}
|
||||
376
src/intel/compiler/jay/jay_lower_scoreboard.c
Normal file
376
src/intel/compiler/jay/jay_lower_scoreboard.c
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/macros.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/* TODO: Shrink */
|
||||
#define MAX_KEYS (2 * JAY_NUM_UGPR)
|
||||
#define NUM_TOKENS (16)
|
||||
|
||||
/** SEND scoreboarding */
|
||||
struct gpr_range {
|
||||
unsigned base, width;
|
||||
};
|
||||
|
||||
static inline struct gpr_range
|
||||
def_to_gpr(jay_function *func, jay_inst *I, jay_def x)
|
||||
{
|
||||
if (x.file == GPR || x.file == UGPR) {
|
||||
unsigned base = x.file == UGPR ? func->shader->num_regs[GPR] : 0;
|
||||
return (struct gpr_range) { base + x.reg, jay_num_values(x) };
|
||||
} else {
|
||||
return (struct gpr_range) { 0, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
sync_sbid(jay_function *func, jay_inst *I, uint32_t *busy, unsigned sbid)
|
||||
{
|
||||
jay_builder b = jay_init_builder(func, jay_before_inst(I));
|
||||
jay_SYNC(&b, TGL_SYNC_NOP)->dep = tgl_swsb_sbid(TGL_SBID_DST, sbid);
|
||||
*busy &= ~BITFIELD_BIT(sbid);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_send_local(jay_function *func, jay_block *block)
|
||||
{
|
||||
struct {
|
||||
BITSET_DECLARE(reading, MAX_KEYS);
|
||||
BITSET_DECLARE(writing, MAX_KEYS);
|
||||
} tokens[NUM_TOKENS];
|
||||
|
||||
uint32_t busy = 0;
|
||||
unsigned roundrobin = 0;
|
||||
|
||||
jay_foreach_inst_in_block_safe(block, I) {
|
||||
/* Read-after-write */
|
||||
jay_foreach_src(I, s) {
|
||||
struct gpr_range src = def_to_gpr(func, I, I->src[s]);
|
||||
|
||||
u_foreach_bit(sbid, busy) {
|
||||
if (BITSET_TEST_COUNT(tokens[sbid].writing, src.base, src.width)) {
|
||||
sync_sbid(func, I, &busy, sbid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Write-after-write & write-after-read */
|
||||
jay_foreach_dst(I, d) {
|
||||
struct gpr_range dst = def_to_gpr(func, I, I->dst);
|
||||
|
||||
u_foreach_bit(sbid, busy) {
|
||||
if (BITSET_TEST_COUNT(tokens[sbid].reading, dst.base, dst.width) ||
|
||||
BITSET_TEST_COUNT(tokens[sbid].writing, dst.base, dst.width)) {
|
||||
sync_sbid(func, I, &busy, sbid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_SEND && !jay_send_eot(I)) {
|
||||
unsigned sbid = (roundrobin++) % NUM_TOKENS;
|
||||
jay_set_send_sbid(I, sbid);
|
||||
|
||||
if (!(busy & BITSET_BIT(sbid))) {
|
||||
busy |= BITSET_BIT(sbid);
|
||||
BITSET_ZERO(tokens[sbid].writing);
|
||||
BITSET_ZERO(tokens[sbid].reading);
|
||||
}
|
||||
|
||||
struct gpr_range dst = def_to_gpr(func, I, I->dst);
|
||||
BITSET_SET_COUNT(tokens[sbid].writing, dst.base, dst.width);
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
struct gpr_range src = def_to_gpr(func, I, I->src[s]);
|
||||
BITSET_SET_COUNT(tokens[sbid].reading, src.base, src.width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Sync on block boundaries. */
|
||||
if (block != jay_last_block(func)) {
|
||||
jay_builder b = jay_init_builder(func, jay_before_jump(block));
|
||||
|
||||
u_foreach_bit(sbid, busy) {
|
||||
jay_SYNC(&b, TGL_SYNC_NOP)->dep = tgl_swsb_sbid(TGL_SBID_DST, sbid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Regdist scoreboarding
|
||||
*
|
||||
* Register access is tracked per pipe, with 0 (NONE) having data on the writer
|
||||
* packed into a u32 with the following macros.
|
||||
*/
|
||||
#define make_writer(pipe, ip) (((uint32_t) ip << 3) | (uint32_t) (pipe))
|
||||
#define writer_ip(writer) (writer >> 3)
|
||||
#define writer_pipe(writer) (enum tgl_pipe)(writer & BITFIELD_MASK(3))
|
||||
|
||||
#define TGL_NUM_PIPES (TGL_PIPE_ALL)
|
||||
typedef uint32_t u32_per_pipe[TGL_NUM_PIPES];
|
||||
|
||||
struct swsb_state {
|
||||
unsigned ip[TGL_NUM_PIPES];
|
||||
unsigned last_shape[TGL_NUM_PIPES];
|
||||
|
||||
/* finished_ip[X][Y] = ip means from the perspective of pipe X, ip on pipe Y
|
||||
* has already been waited on.
|
||||
*/
|
||||
unsigned finished_ip[TGL_NUM_PIPES][TGL_NUM_PIPES];
|
||||
u32_per_pipe *access;
|
||||
};
|
||||
|
||||
static enum tgl_pipe
|
||||
inst_exec_pipe(const struct intel_device_info *devinfo, jay_inst *I)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_SEND || jay_op_is_control_flow(I->op) /* XXX*/) {
|
||||
return TGL_PIPE_NONE;
|
||||
} else if (I->op == JAY_OPCODE_MATH) {
|
||||
return TGL_PIPE_MATH;
|
||||
} else if (I->type == JAY_TYPE_F64) {
|
||||
return TGL_PIPE_LONG;
|
||||
} else if (jay_type_is_any_float(I->type)) {
|
||||
return TGL_PIPE_FLOAT;
|
||||
} else {
|
||||
return TGL_PIPE_INT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the RegDist pipeline the hardware will synchronize with if no
|
||||
* pipeline information is provided in the SWSB annotation of an
|
||||
* instruction (e.g. when TGL_PIPE_NONE is specified in tgl_swsb).
|
||||
*/
|
||||
static enum tgl_pipe
|
||||
inferred_sync_pipe(const struct intel_device_info *devinfo, const jay_inst *I)
|
||||
{
|
||||
bool has_int_src = false, has_long_src = false;
|
||||
|
||||
if (devinfo->verx10 >= 125) {
|
||||
jay_foreach_src(I, s) {
|
||||
has_int_src |= !jay_type_is_any_float(jay_src_type(I, s));
|
||||
has_long_src |= jay_src_type(I, s) == JAY_TYPE_F64;
|
||||
}
|
||||
|
||||
/* Avoid emitting (RegDist, SWSB) annotations for long instructions on
|
||||
* platforms where they are unordered as they may not be allowed.
|
||||
*/
|
||||
if (devinfo->has_64bit_float_via_math_pipe && has_long_src)
|
||||
return TGL_PIPE_NONE;
|
||||
}
|
||||
|
||||
return I->op == JAY_OPCODE_SEND ? TGL_PIPE_NONE :
|
||||
has_long_src ? TGL_PIPE_LONG :
|
||||
has_int_src ? TGL_PIPE_INT :
|
||||
TGL_PIPE_FLOAT;
|
||||
}
|
||||
|
||||
static void
|
||||
depend_on_writer(struct swsb_state *state, struct gpr_range r, unsigned *dep)
|
||||
{
|
||||
for (unsigned i = 0; i < r.width; ++i) {
|
||||
uint32_t w = state->access[r.base + i][0];
|
||||
dep[writer_pipe(w)] = MAX2(dep[writer_pipe(w)], writer_ip(w));
|
||||
}
|
||||
}
|
||||
|
||||
#define jay_foreach_pipe(pipe) \
|
||||
for (unsigned pipe = 1; pipe < TGL_NUM_PIPES; ++pipe)
|
||||
|
||||
static void
|
||||
lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access)
|
||||
{
|
||||
struct swsb_state state = { .access = access };
|
||||
jay_inst *last_sync = NULL;
|
||||
bool need_deswizzle_wait = false;
|
||||
|
||||
jay_foreach_inst_in_block_safe(block, I) {
|
||||
enum tgl_pipe exec_pipe = inst_exec_pipe(func->shader->devinfo, I);
|
||||
unsigned dep[TGL_NUM_PIPES] = { 0 };
|
||||
if (I->op == JAY_OPCODE_SYNC) {
|
||||
last_sync = I;
|
||||
continue;
|
||||
} else if (I->op == JAY_OPCODE_DESWIZZLE_16) {
|
||||
need_deswizzle_wait = true;
|
||||
state.ip[TGL_PIPE_INT]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Force a wait on the deswizzles at the start of the program. XXX: Is
|
||||
* there a cleaner way to deal with this?
|
||||
*/
|
||||
if (need_deswizzle_wait) {
|
||||
dep[TGL_PIPE_INT] = state.ip[TGL_PIPE_INT];
|
||||
need_deswizzle_wait = false;
|
||||
}
|
||||
|
||||
/* Write-after-{write, read} */
|
||||
jay_foreach_dst(I, def) {
|
||||
struct gpr_range r = def_to_gpr(func, I, def);
|
||||
depend_on_writer(&state, r, dep);
|
||||
|
||||
for (unsigned i = 0; i < r.width; ++i) {
|
||||
jay_foreach_pipe(p) {
|
||||
dep[p] = MAX2(dep[p], state.access[r.base + i][p]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Read-after-write */
|
||||
jay_foreach_src(I, s) {
|
||||
depend_on_writer(&state, def_to_gpr(func, I, I->src[s]), dep);
|
||||
}
|
||||
|
||||
unsigned nr_waits = 0;
|
||||
unsigned last_pipe = TGL_PIPE_NONE;
|
||||
|
||||
/* If dependency P implies dependency Q, drop dependency Q to avoid
|
||||
* unnecessary annotations.
|
||||
*/
|
||||
jay_foreach_pipe(p) {
|
||||
if (dep[p]) {
|
||||
jay_foreach_pipe(q) {
|
||||
if (dep[q] && state.finished_ip[p][q] >= dep[q]) {
|
||||
dep[q] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned min_delta = 7;
|
||||
jay_foreach_pipe(p) {
|
||||
if (dep[p] && (exec_pipe == TGL_PIPE_NONE /* TODO: Sends */ ||
|
||||
dep[p] > state.finished_ip[exec_pipe][p])) {
|
||||
unsigned delta = state.ip[p] - dep[p] + 1;
|
||||
min_delta = MIN2(min_delta, delta);
|
||||
state.finished_ip[exec_pipe][p] = dep[p];
|
||||
nr_waits++;
|
||||
last_pipe = p;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we're SIMD split the same way as our dependency, we can relax the
|
||||
* dependency to have each half wait in parallel. We could do even better
|
||||
* with more tracking but this should be good enough for now.
|
||||
*/
|
||||
unsigned simd_split = jay_simd_split(func->shader, I);
|
||||
unsigned shape = ((simd_split << 2) | jay_macro_length(I)) + 1;
|
||||
bool same_shape = state.last_shape[last_pipe] == shape;
|
||||
|
||||
if (simd_split && same_shape && nr_waits == 1 && min_delta == 1) {
|
||||
min_delta += ((1 << simd_split) - 1) * jay_macro_length(I);
|
||||
I->replicate_dep = true;
|
||||
I->decrement_dep = last_pipe != exec_pipe;
|
||||
}
|
||||
|
||||
bool has_sbid = I->op == JAY_OPCODE_SEND && !jay_send_eot(I);
|
||||
I->dep = (struct tgl_swsb) {
|
||||
.sbid = has_sbid ? jay_send_sbid(I) : 0,
|
||||
.mode = has_sbid ? TGL_SBID_SET : TGL_SBID_NULL,
|
||||
.regdist = nr_waits ? min_delta : 0,
|
||||
.pipe = nr_waits == 1 && (!has_sbid ||
|
||||
last_pipe == TGL_PIPE_FLOAT ||
|
||||
last_pipe == TGL_PIPE_INT) ?
|
||||
last_pipe :
|
||||
TGL_PIPE_ALL,
|
||||
};
|
||||
|
||||
/* Fold the immediate preceding SYNC.nop into this instruction, allowing
|
||||
* us to wait on both ALU and a SEND in the same annotation.
|
||||
*/
|
||||
if (last_sync &&
|
||||
jay_sync_op(last_sync) == TGL_SYNC_NOP &&
|
||||
I->dep.mode == TGL_SBID_NULL &&
|
||||
(I->dep.regdist == 0 ||
|
||||
inferred_sync_pipe(func->shader->devinfo, I) == I->dep.pipe)) {
|
||||
|
||||
assert(last_sync->dep.regdist == 0);
|
||||
assert(last_sync->dep.pipe == TGL_PIPE_NONE);
|
||||
|
||||
I->dep.mode = last_sync->dep.mode;
|
||||
I->dep.sbid = last_sync->dep.sbid;
|
||||
|
||||
jay_remove_instruction(last_sync);
|
||||
}
|
||||
|
||||
if (exec_pipe != TGL_PIPE_NONE) {
|
||||
/* Advance the IP by the number of physical instructions emitted */
|
||||
state.ip[exec_pipe] +=
|
||||
jay_macro_length(I) << jay_simd_split(func->shader, I);
|
||||
|
||||
struct gpr_range r = def_to_gpr(func, I, I->dst);
|
||||
uint32_t now = make_writer(exec_pipe, state.ip[exec_pipe]);
|
||||
|
||||
for (unsigned i = 0; i < r.width; ++i) {
|
||||
state.access[r.base + i][0] = now;
|
||||
}
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
struct gpr_range r = def_to_gpr(func, I, I->src[s]);
|
||||
for (unsigned i = 0; i < r.width; ++i) {
|
||||
state.access[r.base + i][exec_pipe] = state.ip[exec_pipe];
|
||||
}
|
||||
}
|
||||
|
||||
state.last_shape[exec_pipe] = shape;
|
||||
}
|
||||
|
||||
last_sync = NULL;
|
||||
}
|
||||
|
||||
/* Sync on block boundaries. */
|
||||
jay_inst *first = jay_first_inst(block);
|
||||
if (block != jay_first_block(func) && first && first->op != JAY_OPCODE_SEND) {
|
||||
first->dep = tgl_swsb_regdist(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Trivial scoreboard lowering pass for debugging use. Stalls after every
|
||||
* instruction and assigns SBID zero to all messages.
|
||||
*/
|
||||
static void
|
||||
lower_trivial(jay_function *func)
|
||||
{
|
||||
jay_foreach_inst_in_func_safe(func, block, I) {
|
||||
if (I->op == JAY_OPCODE_SEND && !jay_send_eot(I)) {
|
||||
I->dep = tgl_swsb_dst_dep(tgl_swsb_sbid(TGL_SBID_SET, 0), 1);
|
||||
|
||||
jay_builder b = jay_init_builder(func, jay_after_inst(I));
|
||||
jay_SYNC(&b, TGL_SYNC_NOP)->dep = tgl_swsb_sbid(TGL_SBID_DST, 0);
|
||||
} else {
|
||||
I->dep = tgl_swsb_regdist(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_lower_scoreboard(jay_shader *s)
|
||||
{
|
||||
uint32_t nr_keys = s->num_regs[GPR] + s->num_regs[UGPR];
|
||||
assert(nr_keys <= MAX_KEYS && "SENDs use uninitialized stack allocation");
|
||||
u32_per_pipe *access = malloc(sizeof(*access) * nr_keys);
|
||||
|
||||
jay_foreach_function(s, func) {
|
||||
if (jay_debug & JAY_DBG_SYNC) {
|
||||
lower_trivial(func);
|
||||
} else {
|
||||
jay_foreach_block(func, block) {
|
||||
memset(access, 0, sizeof(*access) * nr_keys);
|
||||
lower_send_local(func, block);
|
||||
lower_regdist_local(func, block, access);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(access);
|
||||
}
|
||||
156
src/intel/compiler/jay/jay_lower_spill.c
Normal file
156
src/intel/compiler/jay/jay_lower_spill.c
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/* We reserve an address register for spilling by ABI */
|
||||
#define ADDRESS_REG jay_bare_reg(J_ADDRESS, 2)
|
||||
|
||||
static void
|
||||
insert_spill_fill(jay_builder *b,
|
||||
jay_def mem,
|
||||
jay_def gpr,
|
||||
jay_def sp,
|
||||
bool load,
|
||||
unsigned *sp_delta_B,
|
||||
unsigned umem_base)
|
||||
{
|
||||
assert(jay_is_mem(mem) && !jay_is_mem(gpr));
|
||||
|
||||
bool uniform = mem.file == UMEM;
|
||||
unsigned offs_B = mem.reg * 4;
|
||||
unsigned mem_reg_B =
|
||||
uniform ? (umem_base + offs_B) : (offs_B * b->shader->dispatch_width);
|
||||
|
||||
/* The stack pointer needs to be offset to the desired offset */
|
||||
signed sp_adjust_B = mem_reg_B - (*sp_delta_B);
|
||||
if (sp_adjust_B) {
|
||||
jay_ADD(b, JAY_TYPE_U32, sp, sp, sp_adjust_B);
|
||||
*sp_delta_B = mem_reg_B;
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = b->shader->devinfo;
|
||||
unsigned cache = load ? LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS) :
|
||||
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS);
|
||||
uint32_t desc = lsc_msg_desc(devinfo, load ? LSC_OP_LOAD : LSC_OP_STORE,
|
||||
LSC_ADDR_SURFTYPE_SS, LSC_ADDR_SIZE_A32,
|
||||
LSC_DATA_SIZE_D32, 1, uniform, cache);
|
||||
if (uniform) {
|
||||
sp.num_values_m1 = 0;
|
||||
}
|
||||
|
||||
jay_def srcs[] = { sp, gpr };
|
||||
|
||||
jay_SEND(b, .sfid = BRW_SFID_UGM, .msg_desc = desc, .srcs = srcs,
|
||||
.nr_srcs = load ? 1 : 2, .dst = load ? gpr : jay_null(),
|
||||
.type = JAY_TYPE_U32, .uniform = uniform, .ex_desc = ADDRESS_REG);
|
||||
}
|
||||
|
||||
void
|
||||
jay_lower_spill(jay_function *func)
|
||||
{
|
||||
jay_builder b = jay_init_builder(func, jay_before_function(func));
|
||||
|
||||
/* We reserve the top UGPRs for spilling by ABI */
|
||||
unsigned ugpr_reservation = func->shader->num_regs[UGPR];
|
||||
assert(util_is_aligned(ugpr_reservation + 1, func->shader->dispatch_width));
|
||||
|
||||
jay_def surf = jay_bare_reg(UGPR, ugpr_reservation);
|
||||
jay_def sp = jay_bare_reg(UGPR, ugpr_reservation + 1);
|
||||
sp.num_values_m1 = func->shader->dispatch_width - 1;
|
||||
|
||||
/* Calculate how much stack space we need */
|
||||
unsigned nr_mem = 0, nr_umem = 0;
|
||||
jay_foreach_inst_in_func(func, block, I) {
|
||||
if (I->op == JAY_OPCODE_MOV && jay_is_send_like(I)) {
|
||||
jay_def mem = jay_is_mem(I->dst) ? I->dst : I->src[0];
|
||||
unsigned *nr = mem.file == UMEM ? &nr_umem : &nr_mem;
|
||||
|
||||
*nr = MAX2(*nr, mem.reg + 1);
|
||||
}
|
||||
}
|
||||
|
||||
assert((nr_umem > 0) || (nr_mem > 0));
|
||||
unsigned umem_base = (func->shader->dispatch_width * nr_mem * 4);
|
||||
|
||||
/* We burn the address & stack pointer registers for all spills/fills in a
|
||||
* shader. Preinitialize at the top using a scratch register.
|
||||
*
|
||||
* TODO: Need ABI for multi-function.
|
||||
*/
|
||||
assert(func->is_entrypoint);
|
||||
jay_AND(&b, JAY_TYPE_U32, surf, jay_bare_reg(UGPR, 5), ~BITFIELD_MASK(10));
|
||||
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
|
||||
|
||||
/* We use a 32-bit strided stack: SP = scratch + (lane ID * 4) */
|
||||
jay_def tmp2 = jay_bare_reg(GPR, func->shader->partition.base2);
|
||||
jay_LANE_ID_8(&b, tmp2);
|
||||
for (unsigned i = 8; i < b.shader->dispatch_width; i *= 2) {
|
||||
jay_LANE_ID_EXPAND(&b, tmp2, tmp2, i);
|
||||
}
|
||||
|
||||
jay_SHL(&b, JAY_TYPE_U16, tmp2, tmp2, util_logbase2(4));
|
||||
jay_CVT(&b, JAY_TYPE_U32, sp, tmp2, JAY_TYPE_U16, JAY_ROUND, 0);
|
||||
if (b.shader->scratch_size) {
|
||||
jay_ADD(&b, JAY_TYPE_U32, sp, sp, b.shader->scratch_size);
|
||||
}
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
/* We offset the stack pointer locally within a block to form offsets. By
|
||||
* contract keep it in its canonical (unoffset) form at block boundaries.
|
||||
*/
|
||||
unsigned sp_delta_B = 0;
|
||||
bool address_valid = true;
|
||||
|
||||
jay_foreach_inst_in_block_safe(block, I) {
|
||||
b.cursor = jay_before_inst(I);
|
||||
|
||||
if (I->op == JAY_OPCODE_MOV && jay_is_send_like(I)) {
|
||||
if (!address_valid) {
|
||||
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
|
||||
address_valid = true;
|
||||
}
|
||||
|
||||
if (jay_is_mem(I->dst)) {
|
||||
insert_spill_fill(&b, I->dst, I->src[0], sp, false, &sp_delta_B,
|
||||
umem_base);
|
||||
func->shader->spills++;
|
||||
} else {
|
||||
insert_spill_fill(&b, I->src[0], I->dst, sp, true, &sp_delta_B,
|
||||
umem_base);
|
||||
func->shader->fills++;
|
||||
}
|
||||
|
||||
jay_remove_instruction(I);
|
||||
} else if (I->op == JAY_OPCODE_SHUFFLE) {
|
||||
/* Shuffles implicitly clobber the address register so we'll need to
|
||||
* rematerialize the surface state (but be lazy).
|
||||
*/
|
||||
address_valid = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Canonicalize our internal registers at block boundaries */
|
||||
if (jay_num_successors(block) > 0) {
|
||||
if (!address_valid) {
|
||||
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
|
||||
}
|
||||
|
||||
if (sp_delta_B > 0) {
|
||||
jay_ADD(&b, JAY_TYPE_U32, sp, sp, -sp_delta_B);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Note this is bogus with recursion, but recursion is not supported on any
|
||||
* current graphics/compute API.
|
||||
*/
|
||||
func->shader->scratch_size += umem_base + (nr_umem * 4);
|
||||
}
|
||||
95
src/intel/compiler/jay/jay_nir_algebraic.py
Normal file
95
src/intel/compiler/jay/jay_nir_algebraic.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from math import pi
|
||||
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
c = 'c'
|
||||
|
||||
lower_fsign = [
|
||||
(('fsign', a), ('bcsel', ('!flt', 0, a), +1.0,
|
||||
('bcsel', ('!flt', a, 0), -1.0, 0.0))),
|
||||
(('fceil', a), ('fneg', ('ffloor', ('fneg', a)))),
|
||||
|
||||
# inot is free on and/or/xor sources but not dests. Apply De Morgan's.
|
||||
(('inot', ('iand(is_used_once)', ('inot', a), b)), ('ior', a, ('inot', b))),
|
||||
(('inot', ('ior(is_used_once)', ('inot', a), b)), ('iand', a, ('inot', b))),
|
||||
(('inot', ('ixor(is_used_once)', ('inot', a), b)), ('ixor', a, b)),
|
||||
(('inot', ('iand(is_used_once)', a, b)), ('ior', ('inot', a), ('inot', b))),
|
||||
(('inot', ('ior(is_used_once)', a, b)), ('iand', ('inot', a), ('inot', b))),
|
||||
(('inot', ('ixor(is_used_once)', a, b)), ('ixor', ('inot', a), b)),
|
||||
|
||||
# Remove the zeroing. Down-conversion is free but extracts are not.
|
||||
(('u2f32', ('extract_u8', a, 0)), ('u2f32', ('u2u8', a))),
|
||||
(('u2f32', ('extract_u16', a, 0)), ('u2f32', ('u2u16', a))),
|
||||
(('i2f32', ('extract_i8', a, 0)), ('i2f32', ('i2i8', a))),
|
||||
(('i2f32', ('extract_i16', a, 0)), ('i2f32', ('i2i16', a))),
|
||||
|
||||
(('pack_half_2x16_split', a, b),
|
||||
('pack_32_2x16_split', ('f2f16', a), ('f2f16', b))),
|
||||
|
||||
# Allows us to use more modifiers
|
||||
(('bcsel', a, ('iadd(is_used_once)', b, c), b),
|
||||
('iadd', ('bcsel', a, c, 0), b)),
|
||||
]
|
||||
|
||||
|
||||
lower_bool = [
|
||||
# Try to use conditional modifiers more
|
||||
(('ieq', ('iand(is_used_once)', a, b), b),
|
||||
('ieq', ('iand', ('inot', a), b), 0)),
|
||||
(('ine', ('iand(is_used_once)', a, b), b),
|
||||
('ine', ('iand', ('inot', a), b), 0)),
|
||||
]
|
||||
|
||||
for T, sizes, one in [('f', [16, 32], 1.0),
|
||||
('i', [8, 16, 32], 1),
|
||||
('b', [8, 16, 32], -1)]:
|
||||
for sz in sizes:
|
||||
if T in ['f', 'i']:
|
||||
lower_bool.extend([
|
||||
((f'{T}neg', (f'b2{T}{sz}', ('inot', 'a@1'))),
|
||||
('bcsel', a, 0, -one)),
|
||||
((f'{T}neg', (f'b2{T}{sz}', 'a@1')), ('bcsel', a, -one, 0)),
|
||||
])
|
||||
|
||||
lower_bool.extend([
|
||||
((f'b2{T}{sz}', ('inot', 'a@1')), ('bcsel', a, 0, one)),
|
||||
((f'b2{T}{sz}', 'a@1'), ('bcsel', a, one, 0)),
|
||||
])
|
||||
|
||||
lower_bool.extend([
|
||||
((f'b2i64', 'a@1'), ('pack_64_2x32_split', ('bcsel', a, 1, 0), 0)),
|
||||
])
|
||||
|
||||
opt_sel_zero = [
|
||||
(('bcsel@32', a, 0, 1), ('iadd', ('bcsel', a, 0xffffffff, 0), 1)),
|
||||
(('bcsel@32', a, 1, 0), ('ineg', ('bcsel', a, 0xffffffff, 0))),
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('output')
|
||||
args = parser.parse_args()
|
||||
|
||||
sys.path.insert(0, args.import_path)
|
||||
import nir_algebraic # pylint: disable=import-error
|
||||
|
||||
with open(args.output, 'w', encoding='utf-8') as f:
|
||||
f.write('#include "jay_private.h"')
|
||||
|
||||
f.write(nir_algebraic.AlgebraicPass(
|
||||
"jay_nir_lower_fsign", lower_fsign).render())
|
||||
f.write(nir_algebraic.AlgebraicPass(
|
||||
"jay_nir_lower_bool", lower_bool).render())
|
||||
f.write(nir_algebraic.AlgebraicPass(
|
||||
"jay_nir_opt_sel_zero", opt_sel_zero).render())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
233
src/intel/compiler/jay/jay_opcodes.py
Normal file
233
src/intel/compiler/jay/jay_opcodes.py
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
# Copyright 2026 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from dataclasses import dataclass
|
||||
import enum
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
|
||||
@dataclass
|
||||
class Opcode:
|
||||
name: str
|
||||
has_dest: bool
|
||||
num_srcs: int
|
||||
types: list[str]
|
||||
negate: int
|
||||
sat: bool
|
||||
cmod: bool
|
||||
side_effects: bool
|
||||
_2src_commutative: bool
|
||||
extra_struct: list[tuple[str, str]]
|
||||
|
||||
|
||||
@enum.unique
|
||||
class Props(enum.IntEnum):
|
||||
NEGATE0 = 1 << 0
|
||||
NEGATE1 = 1 << 1
|
||||
NEGATE2 = 1 << 2
|
||||
NEGATE3 = 1 << 3
|
||||
SAT = 1 << 4
|
||||
CMOD = 1 << 5
|
||||
SIDE_EFFECTS = 1 << 6
|
||||
COMMUTATIVE = 1 << 7
|
||||
NO_DEST_ = 1 << 8
|
||||
NEGATE = NEGATE0 | NEGATE1 | NEGATE2 | NEGATE3
|
||||
NO_DEST = SIDE_EFFECTS | NO_DEST_
|
||||
|
||||
|
||||
_opcodes: dict[str, Opcode] = {}
|
||||
|
||||
|
||||
def op(name: str, num_srcs: int, types: str | None = None,
|
||||
props: int = 0, extra_struct: str | list[str] | None = None) -> None:
|
||||
types_ = types.split(' ') if types else ['untyped']
|
||||
|
||||
# We can always negate the predicate.
|
||||
negate_mask = (props & Props.NEGATE) | (1 << num_srcs)
|
||||
|
||||
if extra_struct is not None:
|
||||
extra_struct_ = [(' '.join(x.split(' ')[0:-1]), x.split(' ')[-1])
|
||||
for x in extra_struct]
|
||||
else:
|
||||
extra_struct_ = []
|
||||
|
||||
_opcodes[name] = Opcode(name, not bool(props & Props.NO_DEST_),
|
||||
num_srcs, types_, negate_mask,
|
||||
bool(props & Props.SAT), bool(props & Props.CMOD),
|
||||
bool(props & Props.SIDE_EFFECTS),
|
||||
bool(props & Props.COMMUTATIVE),
|
||||
extra_struct_)
|
||||
|
||||
|
||||
op('and', 2, 'u1 u16 u32', Props.NEGATE | Props.CMOD | Props.COMMUTATIVE)
|
||||
op('or', 2, 'u1 u16 u32', Props.NEGATE | Props.CMOD | Props.COMMUTATIVE)
|
||||
op('xor', 2, 'u1 u16 u32', Props.NEGATE | Props.CMOD | Props.COMMUTATIVE)
|
||||
|
||||
op('add', 2, 'u32 s32 u64 s64 f32 f64 f16 bf16 u16 s16',
|
||||
Props.SAT | Props.CMOD | Props.COMMUTATIVE | Props.NEGATE)
|
||||
op('add3', 3, 'u32 s32 u64 s64 u16 s16', Props.SAT |
|
||||
Props.CMOD | Props.COMMUTATIVE | Props.NEGATE)
|
||||
op('asr', 2, 's32 s64 s16', Props.CMOD | Props.NEGATE0)
|
||||
op('avg', 2, 's16 s32 u16 u32', Props.NEGATE | Props.CMOD)
|
||||
op('bfe', 3, 'u32 s32', Props.NEGATE0)
|
||||
op('bfi1', 2, 'u32')
|
||||
op('bfi2', 3, 'u32')
|
||||
op('bfn', 3, 'u32', Props.CMOD, ['uint8_t ctrl'])
|
||||
op('bfrev', 1, 'u32', Props.NEGATE)
|
||||
op('cbit', 1, 'u32', Props.NEGATE | Props.CMOD)
|
||||
op('cmp', 2, 'u32', Props.NEGATE | Props.CMOD)
|
||||
|
||||
|
||||
# With an 8/16-bit type, `index` specifies the element index of the source
|
||||
# within the 32-bit word. For example, if src_type == U16 and index == 1, this
|
||||
# converts the upper 16-bits of the input.
|
||||
op('cvt', 1, 'u8 s8 u16 s16 u32 s32 u64 s64 f32 f64 f16 bf16', Props.NEGATE | Props.SAT, [
|
||||
'enum jay_type src_type',
|
||||
'enum jay_rounding_mode rounding_mode',
|
||||
'uint8_t index',
|
||||
'uint8_t pad'
|
||||
])
|
||||
|
||||
op('fbh', 1, 'u32 s32')
|
||||
op('fbl', 1, 'u32')
|
||||
op('lzd', 1, 'u32')
|
||||
op('frc', 1, 'f32 f64', Props.NEGATE | Props.CMOD)
|
||||
op('mad', 3, 'u32 s32 u16 s16 f32 f64 f16 bf16',
|
||||
Props.NEGATE | Props.SAT | Props.CMOD | Props.COMMUTATIVE)
|
||||
op('max', 2, 'u32 s32 u64 s64 u16 s16 f32 f64 f16 bf16',
|
||||
Props.NEGATE | Props.SAT | Props.COMMUTATIVE)
|
||||
op('min', 2, 'u32 s32 u64 s64 u16 s16 f32 f64 f16 bf16',
|
||||
Props.NEGATE | Props.SAT | Props.COMMUTATIVE)
|
||||
op('mov', 1, 'u1 u16 u32 u64', Props.NEGATE0 | Props.CMOD)
|
||||
op('modifier', 1, 'f32 f64 f16 s16 s32 s64 u16 u32 u64 s8',
|
||||
Props.NEGATE | Props.SAT | Props.CMOD)
|
||||
op('mul', 2, 'u16 s16 f32 f64 f16 bf16',
|
||||
Props.NEGATE | Props.SAT | Props.CMOD | Props.COMMUTATIVE)
|
||||
op('mul_high', 2, 'u32 s32', Props.COMMUTATIVE)
|
||||
op('mul_32x16', 2, 'u32 s32')
|
||||
op('mul_32', 2, 'u32 s32', Props.COMMUTATIVE, ['bool high'])
|
||||
op('sel', 3, 'u32 f32 u1 u16', Props.NEGATE)
|
||||
op('csel', 3, 'u32 s32 f32', Props.NEGATE)
|
||||
op('dp4a_uu', 3, 'u32', Props.SAT)
|
||||
op('dp4a_ss', 3, 's32', Props.SAT)
|
||||
op('dp4a_su', 3, 's32', Props.SAT)
|
||||
op('rndd', 1, 'f16 f32 f64', Props.NEGATE | Props.SAT)
|
||||
op('rndz', 1, 'f16 f32 f64', Props.NEGATE | Props.SAT)
|
||||
op('rnde', 1, 'f16 f32 f64', Props.NEGATE | Props.SAT)
|
||||
op('math', 1, 'f16 f32', Props.NEGATE | Props.SAT, ['enum jay_math op'])
|
||||
|
||||
for n in ['rol', 'ror', 'shl', 'shr']:
|
||||
op(n, 2, 'u32 u64 u16 s16 s32 s64', Props.CMOD | Props.NEGATE0)
|
||||
|
||||
op('quad_swizzle', 1, 'u1 u32', 0, ['enum jay_quad_swizzle swizzle'])
|
||||
op('sync', 0, None, Props.NO_DEST, ['enum tgl_sync_function op'])
|
||||
|
||||
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
|
||||
'endif', 'while', 'break', 'cont', 'call', 'calla', 'jmpi', 'ret',
|
||||
'loop_once']:
|
||||
op(n, 0, None, Props.NO_DEST)
|
||||
|
||||
op('send', 4, None, Props.SIDE_EFFECTS, [
|
||||
'enum brw_sfid sfid',
|
||||
'uint8_t sbid',
|
||||
'bool eot',
|
||||
'bool check_tdr',
|
||||
'bool uniform',
|
||||
'bool bindless',
|
||||
'enum jay_type type_0',
|
||||
'enum jay_type type_1',
|
||||
'uint8_t ex_mlen',
|
||||
'uint32_t ex_desc_imm',
|
||||
])
|
||||
|
||||
op('reloc', 0, 'u32 u64', 0, ['unsigned param', 'unsigned base'])
|
||||
op('preload', 0, 'u32', 0, ['unsigned reg'])
|
||||
op('deswizzle_16', 0, 'u32', Props.NO_DEST, ['unsigned dst', 'unsigned src'])
|
||||
|
||||
# Calculating the lane ID requires multiple power-of-two steps each involving
|
||||
# complex architectural features not modelled in the IR.
|
||||
op('lane_id_8', 0, 'u16')
|
||||
op('lane_id_expand', 1, 'u16', 0, ['unsigned width'])
|
||||
|
||||
# Sample ID calculation
|
||||
op('extract_byte_per_8lanes', 2, 'u32')
|
||||
op('shr_odd_subspans_by_4', 1, 'u16')
|
||||
op('and_u32_u16', 2, 'u32')
|
||||
|
||||
# Pixel coord calculations. expand_quad replicates out the per-2x2 values from
|
||||
# its source g0.[10...13] and - in the case of SIMD32 - g1.[10...13] into a
|
||||
# per-lane value. Then offset_packed_pixel_coords adds the appropriate packed
|
||||
# 2x16-bit offset within each quad, giving 2x16-bit per-lane coordinates.
|
||||
op('expand_quad', 2, 'u32')
|
||||
op('offset_packed_pixel_coords', 1, 'u32')
|
||||
op('extract_layer', 2, 'u32')
|
||||
|
||||
# Generated by RA and lowered after. Valid only for GPR/UGPR.
|
||||
op('swap', 2, 'u32', Props.NO_DEST)
|
||||
|
||||
# Phi function representations
|
||||
#
|
||||
# Unlike in NIR, we represent Phi functions as a pair of opcodes, purely
|
||||
# for convenience since it makes many things easier to work with.
|
||||
#
|
||||
# Phis locially exist along control flow edges between blocks. PHI_DST
|
||||
# lives where 𝜙 would traditionally be written, at the point where the new
|
||||
# value is defined. A PHI_DST will have a corresponding PHI_SRC in each of
|
||||
# its predecessor block, representing value coming in along that edge. This
|
||||
# ensures that source modifiers, scalar to vector promotion, or other source
|
||||
# evaluation happens in the predecessor block.
|
||||
#
|
||||
# The PHI_SRC refers to the SSA index of the PHI_DST. For example, 'if (..) r3 =
|
||||
# r1 else r3 = r2 endif' might look
|
||||
#
|
||||
# (following block) | (then block) | (else block)
|
||||
# START B3 <B1 <B2 | ... | ...
|
||||
# r3 = 𝜙 | 𝜙3 = r1 | 𝜙3 = r2
|
||||
# ... | END B1 | END B2
|
||||
#
|
||||
# Here, PHI_DST defines a new SSA value r3. The PHI_SRC in blocks B1 and B2 each
|
||||
# indicate that the r3 phi's value is r1 when coming from B1, and r2 when coming
|
||||
# from B2. This would traditionally be written r3 = 𝜙(r1, r2).
|
||||
#
|
||||
# Phis operate on whole 32-bit lane values. Phis are not allowed to mix files.
|
||||
op('phi_src', 1, 'u1 u32', Props.NO_DEST, ['uint32_t index'])
|
||||
op('phi_dst', 0, 'u1 u32')
|
||||
|
||||
# Output from a unit test to prevent dead code elimination.
|
||||
op('unit_test', 1, 'u32', Props.NO_DEST)
|
||||
|
||||
# Produces a stable indeterminate value. Freeze(Poison) in LLVM parlance.
|
||||
op('indeterminate', 0, 'u1 u32')
|
||||
|
||||
op('not', 1, 'u1 u32', Props.CMOD)
|
||||
op('cast_canonical_to_flag', 1, 'u1')
|
||||
|
||||
op('mov_imm64', 0, 'u64', 0, ['uint64_t imm'])
|
||||
op('zero_flag', 0, 'u1', Props.NO_DEST, ['unsigned reg'])
|
||||
|
||||
# Cross-lane shuffle. src0=data, src1=offset in bytes. Clobbers an address reg.
|
||||
op('shuffle', 2, 'u1 u32')
|
||||
|
||||
# Shuffle with a constant lane index.
|
||||
op('broadcast_imm', 1, 'u1 u32', 0, ['unsigned lane'])
|
||||
|
||||
OPCODES = _opcodes
|
||||
|
||||
ENUMS: 'Mapping[str, tuple[str, list[str]]]' = {
|
||||
'jay_quad_swizzle': ('JAY_QUAD_SWIZZLE', ['xxxx', 'yyyy', 'zzzz', 'wwww',
|
||||
'xyxy', 'zwzw', 'xxzz', 'yyww']),
|
||||
'jay_rounding_mode': ('JAY', ['round', 'rne', 'ru', 'rd', 'rtz']),
|
||||
'jay_math': ('JAY_MATH', ['_', 'inv', 'log', 'exp', 'sqrt', 'rsq', 'sin', 'cos']),
|
||||
'brw_sfid': ('BRW_SFID', ['null', 'sampler', 'message_gateway',
|
||||
'render_cache', 'urb', 'bindless_thread_dispatch',
|
||||
'ray_trace_accelerator', 'hdc0',
|
||||
'pixel_interpolator', 'tgm', 'slm', 'ugm']),
|
||||
'tgl_sync_function': ('TGL_SYNC', ['nop', 'allrd', 'allwr', 'fence', 'bar', 'host']),
|
||||
}
|
||||
|
||||
# Clean up namespace
|
||||
del op
|
||||
del _opcodes
|
||||
99
src/intel/compiler/jay/jay_opcodes_gen.py
Normal file
99
src/intel/compiler/jay/jay_opcodes_gen.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# Copyright 2026 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from mako import exceptions
|
||||
from mako.template import Template
|
||||
|
||||
from jay_opcodes import OPCODES
|
||||
|
||||
HEADER_TEMPLATE = """/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "util/macros.h"
|
||||
|
||||
enum PACKED jay_opcode {
|
||||
% for opcode in opcodes:
|
||||
JAY_OPCODE_${opcode.upper()},
|
||||
% endfor
|
||||
JAY_NUM_OPCODES
|
||||
};
|
||||
static_assert(sizeof(enum jay_opcode) == 1);
|
||||
|
||||
struct jay_opcode_info {
|
||||
const char *name;
|
||||
unsigned num_srcs;
|
||||
|
||||
/** Bitfield of sources which support negation/abs */
|
||||
uint8_t src_mods;
|
||||
|
||||
/** Which modifiers are broadly supported by the opcode. Note there may be
|
||||
* further restrictions (e.g. based on types) not encoded here.
|
||||
*/
|
||||
bool sat;
|
||||
bool cmod;
|
||||
|
||||
/** Whether the operation has side effects not expressed in the SSA IR */
|
||||
bool side_effects;
|
||||
|
||||
/** op(a, b, c, ...) = op(b, a, c, ...) */
|
||||
bool _2src_commutative;
|
||||
};
|
||||
|
||||
extern const struct jay_opcode_info jay_opcode_infos[JAY_NUM_OPCODES];
|
||||
"""
|
||||
|
||||
CODE_TEMPLATE = """/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "jay_opcodes.h"
|
||||
|
||||
const struct jay_opcode_info jay_opcode_infos[JAY_NUM_OPCODES] = {
|
||||
% for opcode, op in opcodes.items():
|
||||
[JAY_OPCODE_${opcode.upper()}] = {
|
||||
.name = "${opcode}",
|
||||
.num_srcs = ${op.num_srcs},
|
||||
.src_mods = ${bin(op.negate)},
|
||||
% for mod in ["sat", "cmod", "side_effects", "_2src_commutative"]:
|
||||
% if getattr(op, mod):
|
||||
.${mod} = true,
|
||||
% endif
|
||||
% endfor
|
||||
},
|
||||
% endfor
|
||||
};
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--code', action='store', default=None)
|
||||
parser.add_argument('--header', action='store', default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not (args.header or args.code):
|
||||
parser.error('At least one of --code or --header is required')
|
||||
|
||||
try:
|
||||
if args.code is not None:
|
||||
with open(args.code, 'w', encoding='utf-8') as f:
|
||||
f.write(Template(CODE_TEMPLATE).render(opcodes=OPCODES))
|
||||
if args.header is not None:
|
||||
with open(args.header, 'w', encoding='utf-8') as f:
|
||||
f.write(Template(HEADER_TEMPLATE).render(opcodes=OPCODES))
|
||||
except Exception:
|
||||
print(exceptions.text_error_template().render())
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
137
src/intel/compiler/jay/jay_opt_control_flow.c
Normal file
137
src/intel/compiler/jay/jay_opt_control_flow.c
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/list.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/*
|
||||
* Detect the block "else; endif" and remove the no-op else, effectively
|
||||
* removing empty else blocks. Logically, that causes critical edges, so this
|
||||
* pass must run late (post-RA).
|
||||
*/
|
||||
static void
|
||||
opt_empty_else(jay_block *blk)
|
||||
{
|
||||
unsigned i = 0;
|
||||
enum jay_opcode ops[] = { JAY_OPCODE_ELSE, JAY_OPCODE_ENDIF };
|
||||
|
||||
jay_foreach_inst_in_block(blk, I) {
|
||||
if (i >= ARRAY_SIZE(ops) || ops[i++] != I->op)
|
||||
return;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(ops)) {
|
||||
jay_remove_instruction(jay_first_inst(blk));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace short if-statements with predication. Assumes opt_empty_else already
|
||||
* ran. TODO: Generalize.
|
||||
*/
|
||||
static void
|
||||
opt_predicate(jay_function *f, jay_block *block)
|
||||
{
|
||||
jay_inst *if_ = jay_last_inst(block);
|
||||
if (!if_ || if_->op != JAY_OPCODE_IF)
|
||||
return;
|
||||
|
||||
/* If's fallthrough to the then */
|
||||
jay_block *then_block = jay_next_block(block);
|
||||
assert(block->successors[0] == then_block && "successors for if");
|
||||
|
||||
/* We're searching for a single block then, so the next block is else */
|
||||
jay_block *else_block = jay_next_block(then_block);
|
||||
if (block->successors[1] != else_block ||
|
||||
list_length(&then_block->instructions) > 3 ||
|
||||
!list_is_singular(&else_block->instructions))
|
||||
return;
|
||||
|
||||
/* We can only access one flag per instruction, so do not predicate anything
|
||||
* accessing flags. This also ensures the if-condition flag is kept live.
|
||||
*
|
||||
* MIN/MAX turn into SEL which cannot be predicated despite not using flags.
|
||||
*
|
||||
* Predicating NoMask instructions doesn't work if we are electing a nonzero
|
||||
* lane but the NoMask forces lane 0. This should be optimized later.
|
||||
*/
|
||||
jay_foreach_inst_in_block(then_block, I) {
|
||||
if (jay_uses_flag(I) ||
|
||||
I->op == JAY_OPCODE_MIN ||
|
||||
I->op == JAY_OPCODE_MAX ||
|
||||
I->op == JAY_OPCODE_CSEL ||
|
||||
jay_is_no_mask(I))
|
||||
return;
|
||||
}
|
||||
|
||||
jay_inst *endif = jay_last_inst(else_block);
|
||||
if (endif->op != JAY_OPCODE_ENDIF)
|
||||
return;
|
||||
|
||||
/* Rewrite with predication */
|
||||
jay_builder b = jay_init_builder(f, jay_after_block(block));
|
||||
assert(if_->predication == JAY_PREDICATED && "if's are always predicated");
|
||||
|
||||
jay_foreach_inst_in_block_safe(then_block, I) {
|
||||
jay_add_predicate(&b, I, *jay_inst_get_predicate(if_));
|
||||
}
|
||||
|
||||
/* Remove the jumps */
|
||||
jay_remove_instruction(if_);
|
||||
jay_remove_instruction(endif);
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimize "(f0) break; while" to "(!f0) while". As break/while appear in
|
||||
* different blocks, we optimize the entire function at a time.
|
||||
*/
|
||||
static void
|
||||
opt_predicate_while(jay_function *func)
|
||||
{
|
||||
jay_inst *prev_break = NULL;
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
if (list_is_empty(&block->instructions)) {
|
||||
/* Ignore empty blocks */
|
||||
} else if (jay_last_inst(block)->op == JAY_OPCODE_BREAK) {
|
||||
prev_break = jay_last_inst(block);
|
||||
} else if (jay_first_inst(block)->op == JAY_OPCODE_WHILE &&
|
||||
prev_break &&
|
||||
prev_break->predication) {
|
||||
assert(!jay_first_inst(block)->predication);
|
||||
jay_inst_get_predicate(prev_break)->negate ^= true;
|
||||
|
||||
jay_remove_instruction(jay_first_inst(block));
|
||||
jay_remove_instruction(prev_break);
|
||||
|
||||
jay_builder b = jay_init_builder(func, jay_before_block(block));
|
||||
jay_builder_insert(&b, prev_break);
|
||||
|
||||
prev_break->op = JAY_OPCODE_WHILE;
|
||||
prev_break = NULL;
|
||||
} else {
|
||||
prev_break = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_opt_control_flow(jay_shader *s)
|
||||
{
|
||||
jay_foreach_function(s, f) {
|
||||
/* Iterating blocks in reverse lets both opts converge in 1 pass */
|
||||
jay_foreach_block_rev(f, block) {
|
||||
opt_empty_else(block);
|
||||
opt_predicate(f, block);
|
||||
}
|
||||
|
||||
/* Do last: opt_predicate_while depends on both previous optimizations */
|
||||
opt_predicate_while(f);
|
||||
}
|
||||
}
|
||||
58
src/intel/compiler/jay/jay_opt_dead_code.c
Normal file
58
src/intel/compiler/jay/jay_opt_dead_code.c
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
static void
|
||||
pass(jay_function *f)
|
||||
{
|
||||
BITSET_WORD *live_set = BITSET_CALLOC(f->ssa_alloc);
|
||||
|
||||
jay_foreach_inst_in_func_safe_rev(f, block, I) {
|
||||
/* TODO: Allow for atomics? */
|
||||
if (!BITSET_TEST_COUNT(live_set, jay_base_index(I->dst),
|
||||
jay_num_values(I->dst)) &&
|
||||
I->op != JAY_OPCODE_SEND) {
|
||||
I->dst = jay_null();
|
||||
}
|
||||
|
||||
if (!jay_is_null(I->cond_flag) &&
|
||||
!BITSET_TEST(live_set, jay_index(I->cond_flag)) &&
|
||||
(I->op != JAY_OPCODE_CMP || jay_is_null(I->dst))) {
|
||||
|
||||
I->cond_flag = jay_null();
|
||||
I->conditional_mod = 0;
|
||||
}
|
||||
|
||||
bool no_dest = jay_is_null(I->dst) && jay_is_null(I->cond_flag);
|
||||
bool side_effects = jay_opcode_infos[I->op].side_effects;
|
||||
|
||||
if (no_dest && !side_effects) {
|
||||
jay_remove_instruction(I);
|
||||
} else {
|
||||
jay_foreach_src_index(I, s, _, index) {
|
||||
BITSET_SET(live_set, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Eliminate phis. This step may leave dead code but it's good enough in
|
||||
* practice since NIR already eliminated dead phis.
|
||||
*/
|
||||
jay_foreach_block(f, block) {
|
||||
jay_foreach_phi_src_in_block(block, I) {
|
||||
if (!BITSET_TEST(live_set, jay_phi_src_index(I))) {
|
||||
jay_remove_instruction(I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(live_set);
|
||||
}
|
||||
|
||||
JAY_DEFINE_FUNCTION_PASS(jay_opt_dead_code, pass)
|
||||
282
src/intel/compiler/jay/jay_opt_propagate.c
Normal file
282
src/intel/compiler/jay/jay_opt_propagate.c
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/lut.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
static enum jay_type
|
||||
canonicalize_for_bit_compare(enum jay_type type)
|
||||
{
|
||||
enum jay_type base = jay_base_type(type);
|
||||
return (base == JAY_TYPE_S) ? jay_type_rebase(type, JAY_TYPE_U) : type;
|
||||
}
|
||||
|
||||
static bool
|
||||
propagate_cmod(jay_function *func, jay_inst *I, jay_inst **defs)
|
||||
{
|
||||
enum jay_type cmp_type = I->type;
|
||||
enum jay_conditional_mod cmod = I->conditional_mod;
|
||||
jay_inst *def = NULL;
|
||||
|
||||
/* TODO: Generalize cmod propagation */
|
||||
if (jay_type_size_bits(cmp_type) != 32)
|
||||
return false;
|
||||
|
||||
/* Pattern match `cmp ssa, 0` or `cmp 0, ssa`. */
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
if (jay_is_zero(I->src[1 - s])) {
|
||||
def = defs[jay_base_index(I->src[s])];
|
||||
|
||||
/* Canonicalize the cmod to have the zero second */
|
||||
cmod = s == 1 ? jay_conditional_mod_swap_sources(cmod) : cmod;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we can fold into the def */
|
||||
if (!def || !jay_is_null(def->cond_flag) || !jay_opcode_infos[def->op].cmod)
|
||||
return false;
|
||||
|
||||
/* "Neither Saturate nor conditional modifier allowed with DW integer
|
||||
* multiply."
|
||||
*
|
||||
* Could be refined.
|
||||
*/
|
||||
if (def->op == JAY_OPCODE_MUL && !jay_type_is_any_float(def->type))
|
||||
return false;
|
||||
|
||||
enum jay_type instr_type = def->type;
|
||||
|
||||
if (cmod == JAY_CONDITIONAL_NE || cmod == JAY_CONDITIONAL_EQ) {
|
||||
cmp_type = canonicalize_for_bit_compare(cmp_type);
|
||||
instr_type = canonicalize_for_bit_compare(instr_type);
|
||||
}
|
||||
|
||||
if (instr_type != cmp_type)
|
||||
return false;
|
||||
|
||||
jay_builder b = jay_init_builder(func, jay_before_inst(I));
|
||||
jay_set_conditional_mod(&b, def, I->cond_flag, cmod);
|
||||
return true;
|
||||
}
|
||||
|
||||
static jay_def
|
||||
jay_compose_src(jay_def to, jay_def from)
|
||||
{
|
||||
if (to.abs) {
|
||||
from.negate = false;
|
||||
from.abs = true;
|
||||
}
|
||||
|
||||
from.negate ^= to.negate;
|
||||
return from;
|
||||
}
|
||||
|
||||
static bool
|
||||
uses_modifiers(const jay_inst *I)
|
||||
{
|
||||
jay_foreach_src(I, s) {
|
||||
if (I->src[s].abs || I->src[s].negate)
|
||||
return true;
|
||||
}
|
||||
|
||||
return I->saturate;
|
||||
}
|
||||
|
||||
static void
|
||||
propagate_modifier(jay_inst *I, unsigned s, jay_inst *mod)
|
||||
{
|
||||
/* Check if we can propagate abs/neg here in general */
|
||||
if (!jay_has_src_mods(I, s) || mod->saturate)
|
||||
return;
|
||||
|
||||
/* Try to make the types compatible. */
|
||||
if (jay_src_type(I, s) != mod->type) {
|
||||
if (I->op == JAY_OPCODE_SEL && !uses_modifiers(I)) {
|
||||
I->type = mod->type;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
jay_replace_src(&I->src[s], mod->src[0]);
|
||||
I->src[s] = jay_compose_src(I->src[s], mod->src[0]);
|
||||
}
|
||||
|
||||
static void
|
||||
propagate_not(jay_inst *I, unsigned s, jay_inst *mod)
|
||||
{
|
||||
/* Handle inot specially for predicates, and logic operations per bspec text:
|
||||
*
|
||||
* When used with logic instructions (and, not, or, xor), [the
|
||||
* negate] field indicates whether the source bits are
|
||||
* inverted... regardless of the source type.
|
||||
*/
|
||||
if ((s == I->num_srcs - I->predication) ||
|
||||
I->op == JAY_OPCODE_AND ||
|
||||
I->op == JAY_OPCODE_OR ||
|
||||
I->op == JAY_OPCODE_XOR) {
|
||||
jay_replace_src(&I->src[s], mod->src[0]);
|
||||
I->src[s].negate ^= true;
|
||||
} else if (I->op == JAY_OPCODE_BFN) {
|
||||
jay_replace_src(&I->src[s], mod->src[0]);
|
||||
jay_set_bfn_ctrl(I, util_lut3_invert_source(jay_bfn_ctrl(I), s));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
propagate_forwards(jay_function *f)
|
||||
{
|
||||
jay_inst **defs = calloc(f->ssa_alloc, sizeof(defs[0]));
|
||||
|
||||
jay_foreach_inst_in_func_safe(f, block, I) {
|
||||
jay_builder b = jay_init_builder(f, jay_before_inst(I));
|
||||
|
||||
jay_foreach_dst_index(I, _, d) {
|
||||
defs[d] = I;
|
||||
}
|
||||
|
||||
/* Copy propagate individual components into vectors */
|
||||
jay_foreach_src_index(I, s, c, idx) {
|
||||
jay_inst *def = defs[idx];
|
||||
assert(def != NULL && "SSA");
|
||||
|
||||
if (def->op == JAY_OPCODE_MOV &&
|
||||
!def->predication &&
|
||||
jay_num_values(def->dst) == 1 &&
|
||||
jay_num_values(def->src[0]) == 1 &&
|
||||
I->src[s].file == def->src[0].file) {
|
||||
|
||||
jay_insert_channel(&b, &I->src[s], c, def->src[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't propagate into phis yet - TODO: File awareness */
|
||||
if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND)
|
||||
continue;
|
||||
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
/* Copy propagate whole vectors */
|
||||
jay_def src = I->src[s];
|
||||
if (src.collect)
|
||||
continue;
|
||||
|
||||
jay_inst *def = defs[jay_base_index(src)];
|
||||
assert(def != NULL && "SSA");
|
||||
|
||||
if (!jay_defs_equivalent(def->dst, src) || def->predication)
|
||||
continue;
|
||||
|
||||
if (def->op == JAY_OPCODE_MOV) {
|
||||
/* Default values must have the same file as their dest, do not
|
||||
* propagate invalid there. Also don't propagate inverse-ballots.
|
||||
* Also only source 0 can read ARF (i.e. ballotted flags).
|
||||
*/
|
||||
if ((I->src[s].file == def->src[0].file) ||
|
||||
((!jay_inst_has_default(I) ||
|
||||
&I->src[s] != jay_inst_get_default(I)) &&
|
||||
!(I->src[s].file == UFLAG && !jay_is_imm(def->src[0])) &&
|
||||
!(I->src[s].file == FLAG) &&
|
||||
(s == 0 || !jay_is_flag(def->src[0])) &&
|
||||
!(jay_is_imm(def->src[0]) && I->src[s].negate))) {
|
||||
|
||||
jay_replace_src(&I->src[s], def->src[0]);
|
||||
}
|
||||
} else if (def->op == JAY_OPCODE_MODIFIER && !jay_uses_flag(def)) {
|
||||
propagate_modifier(I, s, def);
|
||||
} else if (def->op == JAY_OPCODE_NOT && !jay_uses_flag(def)) {
|
||||
propagate_not(I, s, def);
|
||||
}
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_CMP && propagate_cmod(f, I, defs)) {
|
||||
/* Even if we propagate the predicate write, there might be uses of the
|
||||
* register value (TODO: Maybe check for this and skip propagating in
|
||||
* that case?). So we cannot remove the compare, just strip the cond
|
||||
* flag. Furthermore the CMP we always clobber some predicate, so give
|
||||
* it an immediately-dead one instead.
|
||||
*/
|
||||
I->cond_flag = jay_alloc_def(&b, I->cond_flag.file, 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
free(defs);
|
||||
}
|
||||
|
||||
static bool
|
||||
propagate_fsat(jay_inst *I, jay_inst *fsat)
|
||||
{
|
||||
if (fsat->op != JAY_OPCODE_MODIFIER ||
|
||||
fsat->predication ||
|
||||
fsat->src[0].negate ||
|
||||
fsat->src[0].abs ||
|
||||
(fsat->conditional_mod && !jay_opcode_infos[I->op].cmod) ||
|
||||
I->conditional_mod ||
|
||||
I->type != fsat->type ||
|
||||
!jay_type_is_any_float(fsat->type))
|
||||
return false;
|
||||
|
||||
/* saturate(saturate(x)) = saturate(x) */
|
||||
I->saturate |= fsat->saturate;
|
||||
I->dst = fsat->dst;
|
||||
I->cond_flag = fsat->cond_flag;
|
||||
I->conditional_mod = fsat->conditional_mod;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
propagate_backwards(jay_function *f)
|
||||
{
|
||||
jay_inst **uses = calloc(f->ssa_alloc, sizeof(uses[0]));
|
||||
BITSET_WORD *multiple = BITSET_CALLOC(f->ssa_alloc);
|
||||
|
||||
jay_foreach_inst_in_func_rev(f, block, I) {
|
||||
/* Record uses */
|
||||
jay_foreach_src_index(I, s, c, ssa_index) {
|
||||
if (uses[ssa_index])
|
||||
BITSET_SET(multiple, ssa_index);
|
||||
else
|
||||
uses[ssa_index] = I;
|
||||
}
|
||||
|
||||
/* TODO: f64 sat propagation */
|
||||
if (jay_num_values(I->dst) != 1)
|
||||
continue;
|
||||
|
||||
assert(jay_is_ssa(I->dst));
|
||||
|
||||
jay_inst *use = uses[jay_base_index(I->dst)];
|
||||
if (!use || BITSET_TEST(multiple, jay_base_index(I->dst)))
|
||||
continue;
|
||||
|
||||
if (jay_opcode_infos[I->op].sat &&
|
||||
jay_type_is_any_float(I->type) &&
|
||||
propagate_fsat(I, use)) {
|
||||
|
||||
jay_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Fold UGPR->{GPR, FLAG} copies coming out of NIR */
|
||||
if (I->type == use->type &&
|
||||
I->op != JAY_OPCODE_PHI_DST &&
|
||||
use->op == JAY_OPCODE_MOV) {
|
||||
|
||||
I->dst = use->dst;
|
||||
jay_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
free(multiple);
|
||||
free(uses);
|
||||
}
|
||||
|
||||
JAY_DEFINE_FUNCTION_PASS(jay_opt_propagate_forwards, propagate_forwards)
|
||||
JAY_DEFINE_FUNCTION_PASS(jay_opt_propagate_backwards, propagate_backwards)
|
||||
309
src/intel/compiler/jay/jay_print.c
Normal file
309
src/intel/compiler/jay/jay_print.c
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "util/lut.h"
|
||||
#include "util/macros.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
#define ENUM_TO_STR(x, arr) \
|
||||
({ \
|
||||
assert(x < ARRAY_SIZE(arr)); \
|
||||
arr[x]; \
|
||||
})
|
||||
|
||||
static const char *jay_conditional_mod_str[] = {
|
||||
[JAY_CONDITIONAL_EQ] = ".eq", [JAY_CONDITIONAL_NE] = ".ne",
|
||||
[JAY_CONDITIONAL_GT] = ".gt", [JAY_CONDITIONAL_LT] = ".lt",
|
||||
[JAY_CONDITIONAL_GE] = ".ge", [JAY_CONDITIONAL_LE] = ".le",
|
||||
[JAY_CONDITIONAL_OV] = ".ov", [JAY_CONDITIONAL_NAN] = ".nan",
|
||||
};
|
||||
|
||||
static const char *jay_arf_str[] = {
|
||||
[JAY_ARF_NULL] = "_",
|
||||
[JAY_ARF_MASK] = "mask",
|
||||
[JAY_ARF_CONTROL] = "ctrl",
|
||||
[JAY_ARF_TIMESTAMP] = "timestamp",
|
||||
};
|
||||
|
||||
static const char *jay_file_str[JAY_FILE_LAST + 1] = {
|
||||
[GPR] = "r", [UGPR] = "u", [FLAG] = "f", [UFLAG] = "uf",
|
||||
[J_ADDRESS] = "a", [ACCUM] = "acc", [UACCUM] = "uacc", [J_ARF] = "arf",
|
||||
[MEM] = "m", [UMEM] = "um", [TEST_FILE] = "t",
|
||||
};
|
||||
|
||||
static const char *jay_base_types[] = {
|
||||
[JAY_TYPE_U] = "u", [JAY_TYPE_S] = "s", [JAY_TYPE_F] = "f", [JAY_TYPE_BF] = "bf"
|
||||
};
|
||||
|
||||
void
|
||||
jay_print_type(FILE *fp, enum jay_type t)
|
||||
{
|
||||
fprintf(fp, ".%s%u", ENUM_TO_STR(jay_base_type(t), jay_base_types),
|
||||
jay_type_size_bits(t));
|
||||
}
|
||||
|
||||
static void
|
||||
jay_print_def(FILE *fp, const jay_inst *I, int src)
|
||||
{
|
||||
jay_def def = src == -2 ? I->cond_flag : src == -1 ? I->dst : I->src[src];
|
||||
unsigned len = jay_num_values(def);
|
||||
const char *file = ENUM_TO_STR(def.file, jay_file_str);
|
||||
bool has_lu = jay_is_ssa(def) && !jay_is_null(def) && src >= 0;
|
||||
unsigned lu_bit = has_lu ? jay_source_last_use_bit(I->src, src) : 0;
|
||||
|
||||
bool has_index = jay_channel(def, 0) != JAY_SENTINEL;
|
||||
bool has_reg = !def.collect && def.reg && def.file != J_ARF;
|
||||
|
||||
if (jay_is_null(def)) {
|
||||
has_reg = false;
|
||||
fprintf(fp, "_");
|
||||
} else if (def.file == J_ARF) {
|
||||
fputs(ENUM_TO_STR(jay_base_index(def), jay_arf_str), fp);
|
||||
} else if (def.collect) {
|
||||
assert(has_index && "else would be contiguous");
|
||||
fprintf(fp, "(");
|
||||
for (unsigned i = 0; i < len; ++i) {
|
||||
if (i)
|
||||
fprintf(fp, ", ");
|
||||
|
||||
if (jay_channel(def, i)) {
|
||||
if (has_lu && BITSET_TEST(I->last_use, lu_bit))
|
||||
fprintf(fp, "*");
|
||||
|
||||
fprintf(fp, "%s%u", file, jay_channel(def, i));
|
||||
++lu_bit;
|
||||
} else {
|
||||
fprintf(fp, "_");
|
||||
}
|
||||
}
|
||||
fprintf(fp, ")");
|
||||
} else if (has_index) {
|
||||
fprintf(fp, "%s%s%u",
|
||||
has_lu && BITSET_TEST(I->last_use, lu_bit) ? "*" : "", file,
|
||||
jay_channel(def, 0));
|
||||
if (len > 1) {
|
||||
fprintf(fp, ":%s%u", file, jay_channel(def, len - 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (has_reg) {
|
||||
if (has_index)
|
||||
fprintf(fp, "(");
|
||||
|
||||
fprintf(fp, "%s%u%s", file, def.reg, def.hi ? "h" : "");
|
||||
if (len > 1) {
|
||||
fprintf(fp, ":%s%u", file, def.reg + len - 1);
|
||||
}
|
||||
|
||||
if (has_index)
|
||||
fprintf(fp, ")");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
jay_print_src(FILE *fp, jay_inst *I, unsigned s)
|
||||
{
|
||||
jay_def src = I->src[s];
|
||||
fprintf(fp, "%s%s", src.negate ? "-" : "", src.abs ? "(abs)" : "");
|
||||
|
||||
if (jay_is_imm(src)) {
|
||||
fprintf(fp, "0x%X", jay_as_uint(src));
|
||||
if (util_is_probably_float(jay_as_uint(src))) {
|
||||
float f = uif(jay_as_uint(src));
|
||||
fprintf(fp, fabs(f) >= 1000000.0 ? " (%e)" : " (%f)", f);
|
||||
}
|
||||
} else {
|
||||
jay_print_def(fp, I, s);
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: copypaste of brw_print_swsb */
|
||||
static void
|
||||
jay_print_swsb(FILE *f, const struct tgl_swsb swsb)
|
||||
{
|
||||
if (swsb.regdist) {
|
||||
fprintf(f, "%s@%d",
|
||||
(swsb.pipe == TGL_PIPE_FLOAT ? "F" :
|
||||
swsb.pipe == TGL_PIPE_INT ? "I" :
|
||||
swsb.pipe == TGL_PIPE_LONG ? "L" :
|
||||
swsb.pipe == TGL_PIPE_ALL ? "A" :
|
||||
swsb.pipe == TGL_PIPE_MATH ? "M" :
|
||||
swsb.pipe == TGL_PIPE_SCALAR ? "S" :
|
||||
""),
|
||||
swsb.regdist);
|
||||
}
|
||||
|
||||
if (swsb.mode) {
|
||||
if (swsb.regdist)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, "$%d%s", swsb.sbid,
|
||||
(swsb.mode & TGL_SBID_SET ? "" :
|
||||
swsb.mode & TGL_SBID_DST ? ".dst" :
|
||||
".src"));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_print_inst(FILE *fp, jay_inst *I)
|
||||
{
|
||||
const char *sep = "";
|
||||
|
||||
if (!jay_is_null(I->dst)) {
|
||||
jay_print_def(fp, I, -1);
|
||||
sep = ", ";
|
||||
}
|
||||
|
||||
if (!jay_is_null(I->cond_flag)) {
|
||||
fprintf(fp, "%s", sep);
|
||||
jay_print_def(fp, I, -2);
|
||||
}
|
||||
|
||||
if (!jay_is_null(I->dst) || !jay_is_null(I->cond_flag)) {
|
||||
fprintf(fp, " = ");
|
||||
}
|
||||
|
||||
if (I->predication) {
|
||||
fprintf(fp, "(");
|
||||
jay_print_src(fp, I, jay_inst_get_predicate(I) - I->src);
|
||||
|
||||
if (jay_inst_has_default(I)) {
|
||||
fprintf(fp, "/");
|
||||
jay_print_src(fp, I, jay_inst_get_default(I) - I->src);
|
||||
}
|
||||
|
||||
fprintf(fp, ")");
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_MATH) {
|
||||
jay_print_inst_info(fp, I, "");
|
||||
} else {
|
||||
fprintf(fp, "%s", jay_opcode_infos[I->op].name);
|
||||
}
|
||||
|
||||
if (I->type != JAY_TYPE_UNTYPED) {
|
||||
jay_print_type(fp, I->type);
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_BFN) {
|
||||
fprintf(fp, ".(%s)", util_lut3_to_str[jay_bfn_ctrl(I)]);
|
||||
}
|
||||
|
||||
const char *cmod = ENUM_TO_STR(I->conditional_mod, jay_conditional_mod_str);
|
||||
fprintf(fp, "%s%s ", I->saturate ? ".sat" : "", cmod ? cmod : "");
|
||||
sep = "";
|
||||
|
||||
for (unsigned i = 0; i < I->num_srcs - I->predication; i++) {
|
||||
fprintf(fp, "%s", sep);
|
||||
jay_print_src(fp, I, i);
|
||||
|
||||
enum jay_type T = jay_src_type(I, i);
|
||||
if (T != I->type && !(T == JAY_TYPE_U1 && jay_is_flag(I->src[i]))) {
|
||||
jay_print_type(fp, T);
|
||||
}
|
||||
|
||||
sep = ", ";
|
||||
}
|
||||
|
||||
if (I->op != JAY_OPCODE_MATH) {
|
||||
sep = jay_print_inst_info(fp, I, sep);
|
||||
}
|
||||
|
||||
/* Software scoreboard dependency info */
|
||||
if (I->dep.regdist || I->dep.mode) {
|
||||
fprintf(fp, "%s%s%s", strlen(sep) ? " {" : "{",
|
||||
I->replicate_dep ? "*" : "", I->decrement_dep ? "+" : "");
|
||||
jay_print_swsb(fp, I->dep);
|
||||
fprintf(fp, "}");
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
static inline void
|
||||
indent(FILE *fp, jay_block *block, bool interior)
|
||||
{
|
||||
for (unsigned i = 0; i < block->indent + interior; i++)
|
||||
fprintf(fp, " ");
|
||||
}
|
||||
|
||||
static void
|
||||
comma_separate(FILE *fp, jay_block *block, bool *first)
|
||||
{
|
||||
if (*first) {
|
||||
indent(fp, block, true);
|
||||
*first = false;
|
||||
} else {
|
||||
fprintf(fp, ", ");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_print_block(FILE *fp, jay_block *block)
|
||||
{
|
||||
indent(fp, block, false);
|
||||
fprintf(fp, "B%d%s%s", block->index, block->uniform ? " [uniform]" : "",
|
||||
block->loop_header ? " [loop header]" : "");
|
||||
bool first = true;
|
||||
jay_foreach_predecessor(block, p) {
|
||||
fprintf(fp, "%s B%d", first ? " <-" : "", (*p)->index);
|
||||
first = false;
|
||||
}
|
||||
fprintf(fp, " {\n");
|
||||
|
||||
/* We group phi destinations/sources for legibility */
|
||||
first = true;
|
||||
jay_foreach_phi_dst_in_block(block, phi) {
|
||||
comma_separate(fp, block, &first);
|
||||
jay_print_def(fp, phi, -1);
|
||||
}
|
||||
fprintf(fp, "%s", first ? "" : " = 𝜙\n");
|
||||
|
||||
jay_foreach_inst_in_block(block, inst) {
|
||||
if (inst->op != JAY_OPCODE_PHI_DST && inst->op != JAY_OPCODE_PHI_SRC) {
|
||||
indent(fp, block, true);
|
||||
jay_print_inst(fp, inst);
|
||||
}
|
||||
}
|
||||
|
||||
first = true;
|
||||
jay_foreach_phi_src_in_block(block, phi) {
|
||||
comma_separate(fp, block, &first);
|
||||
fprintf(fp, "𝜙%u = ", jay_phi_src_index(phi));
|
||||
jay_print_def(fp, phi, 0);
|
||||
}
|
||||
fprintf(fp, "%s", first ? "" : "\n");
|
||||
|
||||
indent(fp, block, false);
|
||||
fprintf(fp, "}");
|
||||
first = true;
|
||||
jay_foreach_successor(block, succ) {
|
||||
if (succ) {
|
||||
fprintf(fp, "%s B%d", first ? " ->" : "", succ->index);
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
}
|
||||
|
||||
void
|
||||
jay_print_func(FILE *fp, jay_function *f)
|
||||
{
|
||||
fprintf(fp, "Jay function: \n\n");
|
||||
jay_foreach_block(f, block) {
|
||||
jay_print_block(fp, block);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_print(FILE *fp, jay_shader *s)
|
||||
{
|
||||
jay_foreach_function(s, f) {
|
||||
jay_print_func(fp, f);
|
||||
}
|
||||
}
|
||||
72
src/intel/compiler/jay/jay_private.h
Normal file
72
src/intel/compiler/jay/jay_private.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "jay_ir.h"
|
||||
#include "nir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define JAY_DBG_NOOPT BITFIELD_BIT(0)
|
||||
#define JAY_DBG_PRINTDEMAND BITFIELD_BIT(1)
|
||||
#define JAY_DBG_SPILL BITFIELD_BIT(2)
|
||||
#define JAY_DBG_SYNC BITFIELD_BIT(3)
|
||||
extern int jay_debug;
|
||||
|
||||
bool jay_nir_lower_bool(nir_shader *nir);
|
||||
bool jay_nir_opt_sel_zero(nir_shader *nir);
|
||||
bool jay_nir_lower_fsign(nir_shader *nir);
|
||||
|
||||
void jay_compute_liveness(jay_function *f);
|
||||
void jay_calculate_register_demands(jay_function *f);
|
||||
|
||||
void jay_spill(jay_function *func, enum jay_file file, unsigned limit);
|
||||
void jay_partition_grf(jay_shader *shader);
|
||||
void jay_register_allocate(jay_shader *s);
|
||||
void jay_assign_flags(jay_shader *s);
|
||||
void jay_repair_ssa(jay_function *func);
|
||||
|
||||
const char *jay_file_to_string(enum jay_file file);
|
||||
void jay_print_type(FILE *f, enum jay_type t);
|
||||
void jay_print_inst(FILE *f, jay_inst *I);
|
||||
void jay_print_block(FILE *f, jay_block *block);
|
||||
void jay_print_func(FILE *fp, jay_function *func);
|
||||
void jay_print(FILE *f, jay_shader *s);
|
||||
|
||||
#ifndef NDEBUG
|
||||
void jay_validate(jay_shader *s, const char *when);
|
||||
void jay_validate_ra(jay_function *func);
|
||||
#else
|
||||
static inline void
|
||||
jay_validate(jay_shader *s, const char *when)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
jay_validate_ra(jay_function *func)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void jay_opt_propagate_forwards(jay_shader *s);
|
||||
void jay_opt_propagate_backwards(jay_shader *s);
|
||||
void jay_opt_dead_code(jay_shader *s);
|
||||
void jay_opt_control_flow(jay_shader *s);
|
||||
|
||||
void jay_lower_pre_ra(jay_shader *s);
|
||||
void jay_lower_post_ra(jay_shader *s);
|
||||
void jay_lower_spill(jay_function *func);
|
||||
void jay_lower_simd_width(jay_shader *s);
|
||||
void jay_lower_scoreboard(jay_shader *s);
|
||||
|
||||
struct jay_shader_bin *
|
||||
jay_to_binary(jay_shader *s, void *const_data, size_t const_data_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
1659
src/intel/compiler/jay/jay_register_allocate.c
Normal file
1659
src/intel/compiler/jay/jay_register_allocate.c
Normal file
File diff suppressed because it is too large
Load diff
247
src/intel/compiler/jay/jay_repair_ssa.c
Normal file
247
src/intel/compiler/jay/jay_repair_ssa.c
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2023 Alyssa Rosenzweig
|
||||
* Copyright 2023 Valve Corporation
|
||||
* Copyright 2022 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/*
|
||||
* Implementation of "Simple and Efficient
|
||||
* Construction of Static Single Assignment Form", also by Braun et al.
|
||||
* https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf
|
||||
*/
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
struct incomplete_phi {
|
||||
jay_def old;
|
||||
unsigned new;
|
||||
};
|
||||
|
||||
struct phi {
|
||||
jay_block *block;
|
||||
unsigned *src;
|
||||
jay_def old;
|
||||
unsigned dst;
|
||||
};
|
||||
|
||||
struct ctx {
|
||||
/* Array of index->index maps with the remapped definition at block end */
|
||||
struct hash_table_u64 **defs;
|
||||
struct hash_table_u64 *remap;
|
||||
struct util_dynarray phis, indices, *incomplete_phis;
|
||||
BITSET_WORD *sealed;
|
||||
void *linctx;
|
||||
unsigned alloc, idx_i;
|
||||
};
|
||||
|
||||
#define jay_repair_foreach_phi(ctx, phi) \
|
||||
util_dynarray_foreach(&(ctx)->phis, struct phi, phi) \
|
||||
if (phi->block != NULL)
|
||||
|
||||
static unsigned lookup(struct ctx *ctx, jay_block *block, jay_def def);
|
||||
|
||||
static unsigned
|
||||
remap_idx(struct ctx *ctx, unsigned idx)
|
||||
{
|
||||
/* TODO: Switch to union-find */
|
||||
void *remapped;
|
||||
while ((remapped = _mesa_hash_table_u64_search(ctx->remap, idx))) {
|
||||
idx = (uintptr_t) remapped;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static bool
|
||||
try_remove_trivial_phi(struct ctx *ctx, struct phi *phi)
|
||||
{
|
||||
unsigned same = 0;
|
||||
for (unsigned i = 0; i < jay_num_predecessors(phi->block); ++i) {
|
||||
unsigned src = remap_idx(ctx, phi->src[i]);
|
||||
if (same && src != same && src != phi->dst) {
|
||||
/* Nontrivial */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src != phi->dst) {
|
||||
same = src;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx->remap, phi->dst, (void *) (uintptr_t) same);
|
||||
phi->block = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
add_phi(struct ctx *ctx, jay_block *block, jay_def src, unsigned dst)
|
||||
{
|
||||
unsigned i = 0, n = jay_num_predecessors(block);
|
||||
unsigned *srcs = linear_alloc_array(ctx->linctx, unsigned, n);
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
assert(i < n);
|
||||
srcs[i++] = lookup(ctx, *pred, src);
|
||||
}
|
||||
|
||||
struct phi tmpl = { .block = block, .old = src, .dst = dst, .src = srcs };
|
||||
if (!try_remove_trivial_phi(ctx, &tmpl)) {
|
||||
util_dynarray_append(&ctx->phis, tmpl);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lookup(struct ctx *ctx, jay_block *block, jay_def def)
|
||||
{
|
||||
/* Lookup within a block */
|
||||
struct hash_table_u64 *ht = ctx->defs[block->index];
|
||||
void *local = _mesa_hash_table_u64_search(ht, jay_index(def));
|
||||
if (local) {
|
||||
return (uintptr_t) local;
|
||||
}
|
||||
|
||||
/* For a single predecessor, we can recurse without adding a phi. */
|
||||
bool insert_phi = jay_num_predecessors(block) > 1;
|
||||
unsigned val = insert_phi ? ctx->alloc++ :
|
||||
lookup(ctx, jay_first_predecessor(block), def);
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx->defs[block->index], jay_index(def),
|
||||
(void *) (uintptr_t) val);
|
||||
|
||||
if (block->loop_header && !BITSET_TEST(ctx->sealed, block->index)) {
|
||||
struct incomplete_phi tmpl = { .old = def, .new = val };
|
||||
util_dynarray_append(&ctx->incomplete_phis[block->index], tmpl);
|
||||
} else if (insert_phi) {
|
||||
add_phi(ctx, block, def, val);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static void
|
||||
remap(struct ctx *ctx, jay_builder *b, jay_def *inout)
|
||||
{
|
||||
jay_def def = *inout;
|
||||
unsigned reg = def.reg;
|
||||
jay_foreach_index(def, c, index) {
|
||||
unsigned el = ctx->idx_i++;
|
||||
assert(el < util_dynarray_num_elements(&ctx->indices, unsigned));
|
||||
unsigned idx = *util_dynarray_element(&ctx->indices, unsigned, el);
|
||||
idx = remap_idx(ctx, idx);
|
||||
jay_insert_channel(b, inout, c, jay_scalar(def.file, idx));
|
||||
}
|
||||
|
||||
/* We run after flag RA, so preserve flag registers */
|
||||
if (jay_is_flag(def)) {
|
||||
inout->reg = reg;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_repair_ssa(jay_function *func)
|
||||
{
|
||||
jay_builder b = jay_init_builder(func, jay_before_function(func));
|
||||
void *memctx = ralloc_context(NULL);
|
||||
void *linctx = linear_context(memctx);
|
||||
BITSET_WORD *sealed = BITSET_LINEAR_ZALLOC(linctx, func->num_blocks);
|
||||
struct ctx ctx = { .sealed = sealed, .alloc = 1, .linctx = linctx };
|
||||
unsigned *phi_remap = linear_zalloc_array(linctx, unsigned, func->ssa_alloc);
|
||||
|
||||
ctx.remap = _mesa_hash_table_u64_create(memctx);
|
||||
ctx.defs =
|
||||
linear_alloc_array(linctx, struct hash_table_u64 *, func->num_blocks);
|
||||
ctx.incomplete_phis =
|
||||
linear_alloc_array(linctx, struct util_dynarray, func->num_blocks);
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
ctx.defs[block->index] = _mesa_hash_table_u64_create(memctx);
|
||||
util_dynarray_init(&ctx.incomplete_phis[block->index], memctx);
|
||||
}
|
||||
|
||||
util_dynarray_init(&ctx.phis, memctx);
|
||||
util_dynarray_init(&ctx.indices, memctx);
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
jay_foreach_src_index(I, s, c, index) {
|
||||
unsigned val = lookup(&ctx, block, jay_extract(I->src[s], c));
|
||||
util_dynarray_append(&ctx.indices, val);
|
||||
}
|
||||
|
||||
jay_foreach_dst_index(I, d, index) {
|
||||
unsigned val = ctx.alloc++;
|
||||
util_dynarray_append(&ctx.indices, val);
|
||||
if (I->op == JAY_OPCODE_PHI_DST) {
|
||||
phi_remap[index] = val;
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx.defs[block->index], index,
|
||||
(void *) (uintptr_t) val);
|
||||
}
|
||||
}
|
||||
|
||||
/* Seal loop headers after processing the back edge */
|
||||
jay_foreach_successor(block, succ) {
|
||||
if (succ->loop_header && succ->index <= block->index) {
|
||||
util_dynarray_foreach(&ctx.incomplete_phis[succ->index],
|
||||
struct incomplete_phi, el) {
|
||||
add_phi(&ctx, succ, el->old, el->new);
|
||||
}
|
||||
|
||||
assert(!BITSET_TEST(sealed, succ->index) && "unique backedge");
|
||||
BITSET_SET(sealed, succ->index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Optimize trivial phis resulting from backedges. Use-lists would avoid the
|
||||
* fixed point algorithm but this should be good enough for now.
|
||||
*/
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
jay_repair_foreach_phi(&ctx, phi) {
|
||||
progress |= try_remove_trivial_phi(&ctx, phi);
|
||||
}
|
||||
} while (progress);
|
||||
|
||||
/* Now apply everything */
|
||||
jay_foreach_block(func, block) {
|
||||
jay_foreach_phi_src_in_block(block, I) {
|
||||
jay_set_phi_src_index(I, phi_remap[jay_phi_src_index(I)]);
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
remap(&ctx, &b, &I->src[s]);
|
||||
}
|
||||
|
||||
remap(&ctx, &b, &I->dst);
|
||||
remap(&ctx, &b, &I->cond_flag);
|
||||
}
|
||||
}
|
||||
|
||||
jay_repair_foreach_phi(&ctx, phi) {
|
||||
b.cursor = jay_before_block(phi->block);
|
||||
jay_PHI_DST(&b, jay_scalar(phi->old.file, phi->dst));
|
||||
|
||||
unsigned i = 0;
|
||||
jay_foreach_predecessor(phi->block, pred) {
|
||||
b.cursor = jay_before_jump(*pred);
|
||||
unsigned idx = remap_idx(&ctx, phi->src[i++]);
|
||||
jay_PHI_SRC_u32(&b, jay_scalar(phi->old.file, idx), phi->dst);
|
||||
}
|
||||
}
|
||||
|
||||
func->ssa_alloc = ctx.alloc;
|
||||
ralloc_free(memctx);
|
||||
}
|
||||
63
src/intel/compiler/jay/jay_simd_width.c
Normal file
63
src/intel/compiler/jay/jay_simd_width.c
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
|
||||
static unsigned
|
||||
max_simd_width(jay_shader *shader, const jay_inst *I)
|
||||
{
|
||||
/* Only certain "complex" quad swizzles require splitting down to SIMD4 */
|
||||
if (I->op == JAY_OPCODE_QUAD_SWIZZLE &&
|
||||
(jay_quad_swizzle_swizzle(I) == JAY_QUAD_SWIZZLE_XYXY ||
|
||||
jay_quad_swizzle_swizzle(I) == JAY_QUAD_SWIZZLE_ZWZW)) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
/* These special instructions need to be split for various reasons. */
|
||||
if (I->op == JAY_OPCODE_EXPAND_QUAD ||
|
||||
I->op == JAY_OPCODE_EXTRACT_LAYER ||
|
||||
I->op == JAY_OPCODE_EXTRACT_BYTE_PER_8LANES ||
|
||||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
|
||||
I->op == JAY_OPCODE_MUL_32 ||
|
||||
I->op == JAY_OPCODE_SHUFFLE) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
if (I->op != JAY_OPCODE_SEND) {
|
||||
/* If any source/destination is 64-bit strided, we must split to avoid
|
||||
* crossing more than 2 GRFs. Note that SENDs don't have this restriction,
|
||||
* we don't have to split A64 load/store.
|
||||
*/
|
||||
if (I->dst.file == GPR &&
|
||||
jay_def_stride(shader, I->dst) == JAY_STRIDE_8) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
if (I->src[s].file == GPR &&
|
||||
jay_def_stride(shader, I->src[s]) == JAY_STRIDE_8) {
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* TODO: Do we ever split SENDs? ..Can we even split SENDs given we don't
|
||||
* have stride control? How is this supposed to work?
|
||||
*
|
||||
* XXX
|
||||
*/
|
||||
}
|
||||
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned
|
||||
jay_simd_split(jay_shader *s, const jay_inst *I)
|
||||
{
|
||||
unsigned actual = jay_simd_width_logical(s, I);
|
||||
unsigned max = max_simd_width(s, I);
|
||||
|
||||
return (actual > max) ? (util_logbase2(actual) - util_logbase2(max)) : 0;
|
||||
}
|
||||
849
src/intel/compiler/jay/jay_spill.c
Normal file
849
src/intel/compiler/jay/jay_spill.c
Normal file
|
|
@ -0,0 +1,849 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2023-2024 Alyssa Rosenzweig
|
||||
* Copyright 2023-2024 Valve Corporation
|
||||
* Copyright 2022 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_qsort.h"
|
||||
#include "util/u_worklist.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/*
|
||||
* An implementation of "Register Spilling and Live-Range Splitting for SSA-Form
|
||||
* Programs" by Braun and Hack.
|
||||
*
|
||||
* Next-use distances are logically in ℤ ∪ {∞}, modelled as saturating uint32
|
||||
* and referred to as dist_t. Within a block, next-use data is dense. At block
|
||||
* boundaries, next-use maps are stored as key-value pairs, where only variables
|
||||
* with later uses (finite distance) are stored. That sparse representation
|
||||
* ensures linear-time even for shaders with many blocks.
|
||||
*/
|
||||
#define DIST_INFINITY (UINT32_MAX)
|
||||
typedef uint32_t dist_t;
|
||||
|
||||
struct next_use {
|
||||
uint32_t index;
|
||||
dist_t dist;
|
||||
};
|
||||
|
||||
static void
|
||||
add_next_use(struct util_dynarray *nu, unsigned node, dist_t dist)
|
||||
{
|
||||
struct next_use use = { .index = node, .dist = dist };
|
||||
util_dynarray_append(nu, use);
|
||||
}
|
||||
|
||||
#define foreach_next_use(nu, it) util_dynarray_foreach(nu, struct next_use, it)
|
||||
|
||||
static dist_t
|
||||
add_dist(dist_t A, dist_t B)
|
||||
{
|
||||
return (A + B < A) ? DIST_INFINITY : (A + B);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the minimum of two next-use sets. Values absent from one of the
|
||||
* underlying sets are infinity so do not contribute to the minimum, instead
|
||||
* acting like a set union.
|
||||
*/
|
||||
static bool
|
||||
minimum_next_uses(struct util_dynarray *nu,
|
||||
const struct util_dynarray *from,
|
||||
dist_t *tmp_dist,
|
||||
struct u_sparse_bitset *tmp_set)
|
||||
{
|
||||
/* Convert "from" to be dense */
|
||||
u_sparse_bitset_clear_all(tmp_set);
|
||||
|
||||
foreach_next_use(from, it) {
|
||||
u_sparse_bitset_set(tmp_set, it->index);
|
||||
tmp_dist[it->index] = it->dist;
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
|
||||
/* Take the minimum of common elements */
|
||||
foreach_next_use(nu, it) {
|
||||
if (u_sparse_bitset_test(tmp_set, it->index)) {
|
||||
if (tmp_dist[it->index] < it->dist) {
|
||||
it->dist = tmp_dist[it->index];
|
||||
progress = true;
|
||||
}
|
||||
|
||||
u_sparse_bitset_clear(tmp_set, it->index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add elements that are only in "from" */
|
||||
U_SPARSE_BITSET_FOREACH_SET(tmp_set, index) {
|
||||
add_next_use(nu, index, tmp_dist[index]);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
inst_cycles(const jay_inst *I)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct spill_block {
|
||||
/* W/S sets at the start/end of the block, see spill_ctx::{W,S} */
|
||||
struct u_sparse_bitset W_in, W_out, S_in, S_out;
|
||||
|
||||
/* Next-use maps at the start/end of the block */
|
||||
struct util_dynarray next_use_in, next_use_out;
|
||||
|
||||
/* Estimated cycle count of the block */
|
||||
uint32_t cycles;
|
||||
};
|
||||
|
||||
struct spill_ctx {
|
||||
jay_function *func;
|
||||
|
||||
/* Register file being spilled */
|
||||
enum jay_file file;
|
||||
|
||||
/* Set of values whose file equals `file` */
|
||||
BITSET_WORD *in_file;
|
||||
|
||||
/* Set of values currently available in the register file */
|
||||
struct u_sparse_bitset W;
|
||||
|
||||
/* For W-entry calculation, phis with a spilled source. For
|
||||
* coupling calculation, phis defined along the given edge.
|
||||
*/
|
||||
struct u_sparse_bitset phi_set;
|
||||
|
||||
/* |W| = Current register pressure */
|
||||
unsigned nW;
|
||||
|
||||
/* For each variable in N, local IPs of next-use. Else, infinite. */
|
||||
struct u_sparse_bitset N;
|
||||
dist_t *next_uses;
|
||||
|
||||
/* Current local IP relative to the start of the block */
|
||||
uint32_t ip;
|
||||
|
||||
/* Set of live values that have been spilled. Contrary to the paper, this
|
||||
* is not a subset of W: the definition in the paper is bogus.
|
||||
*/
|
||||
struct u_sparse_bitset S;
|
||||
|
||||
/* If a value is rematerializable or a phi, its definition. Else, NULL */
|
||||
jay_inst **defs;
|
||||
|
||||
/* Maximum register pressure allowed */
|
||||
unsigned k;
|
||||
|
||||
/* Number of variables */
|
||||
unsigned n;
|
||||
|
||||
/* Information on blocks indexed in source order */
|
||||
struct spill_block *blocks;
|
||||
|
||||
/* Preallocated array of candidates for calculating W entry */
|
||||
struct next_use *candidates;
|
||||
struct util_dynarray next_ip;
|
||||
};
|
||||
|
||||
static inline jay_def
|
||||
jay_def_as_mem(struct spill_ctx *ctx, jay_def idx)
|
||||
{
|
||||
assert(idx.file == GPR || idx.file == UGPR);
|
||||
idx.file = idx.file == UGPR ? UMEM : MEM;
|
||||
idx._payload = jay_base_index(idx) + ctx->n;
|
||||
return idx;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_remat(jay_inst *I)
|
||||
{
|
||||
/* TODO */
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_remat_node(struct spill_ctx *ctx, unsigned node)
|
||||
{
|
||||
return ctx->defs[node] && ctx->defs[node]->op != JAY_OPCODE_PHI_DST;
|
||||
}
|
||||
|
||||
static jay_inst *
|
||||
remat_to(jay_builder *b, jay_def dst, struct spill_ctx *ctx, unsigned node)
|
||||
{
|
||||
jay_inst *I = ctx->defs[node];
|
||||
assert(can_remat(I));
|
||||
|
||||
UNREACHABLE("invalid remat");
|
||||
}
|
||||
|
||||
static void
|
||||
insert_spill(jay_builder *b, struct spill_ctx *ctx, unsigned node)
|
||||
{
|
||||
if (!can_remat_node(ctx, node)) {
|
||||
jay_def idx = jay_scalar(ctx->file, node);
|
||||
jay_MOV(b, jay_def_as_mem(ctx, idx), idx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
insert_reload(struct spill_ctx *ctx,
|
||||
jay_block *block,
|
||||
jay_cursor cursor,
|
||||
unsigned node)
|
||||
{
|
||||
jay_builder b = jay_init_builder(ctx->func, cursor);
|
||||
jay_def idx = jay_scalar(ctx->file, node);
|
||||
|
||||
/* Reloading breaks SSA, but jay_repair_ssa will repair */
|
||||
if (can_remat_node(ctx, node)) {
|
||||
remat_to(&b, idx, ctx, node);
|
||||
} else {
|
||||
jay_MOV(&b, idx, jay_def_as_mem(ctx, idx));
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert into the register file */
|
||||
static void
|
||||
insert_W(struct spill_ctx *ctx, unsigned v)
|
||||
{
|
||||
assert(!u_sparse_bitset_test(&ctx->W, v));
|
||||
assert(BITSET_TEST(ctx->in_file, v));
|
||||
|
||||
u_sparse_bitset_set(&ctx->W, v);
|
||||
ctx->nW++;
|
||||
}
|
||||
|
||||
/* Remove from the register file */
|
||||
static void
|
||||
remove_W(struct spill_ctx *ctx, unsigned v)
|
||||
{
|
||||
assert(u_sparse_bitset_test(&ctx->W, v));
|
||||
assert(BITSET_TEST(ctx->in_file, v));
|
||||
|
||||
u_sparse_bitset_clear(&ctx->W, v);
|
||||
ctx->nW--;
|
||||
}
|
||||
|
||||
static int
|
||||
nu_score(struct spill_ctx *ctx, struct next_use nu)
|
||||
{
|
||||
/* We assume that rematerializing - even before every instuction - is
|
||||
* cheaper than spilling. As long as one of the nodes is rematerializable
|
||||
* (with distance > 0), we choose it over spilling. Within a class of nodes
|
||||
* (rematerializable or not), compare by next-use-distance.
|
||||
*/
|
||||
bool remat = can_remat_node(ctx, nu.index) && nu.dist > 0;
|
||||
return (remat ? 0 : 100000) + nu.dist;
|
||||
}
|
||||
|
||||
static int
|
||||
cmp_dist(const void *left_, const void *right_, void *ctx)
|
||||
{
|
||||
const struct next_use *left = left_;
|
||||
const struct next_use *right = right_;
|
||||
int l = nu_score(ctx, *left), r = nu_score(ctx, *right);
|
||||
|
||||
return (l > r) - (l < r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit the register file W to maximum size m by evicting registers.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
limit(struct spill_ctx *ctx, jay_inst *I, unsigned m)
|
||||
{
|
||||
/* Nothing to do if we're already below the limit */
|
||||
if (ctx->nW <= m) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Gather candidates for eviction. Note that next_uses gives IPs whereas
|
||||
* cmp_dist expects relative distances. This requires us to subtract ctx->ip
|
||||
* to ensure that cmp_dist works properly. Even though logically it shouldn't
|
||||
* affect the sorted order, practically this matters for correctness with
|
||||
* rematerialization. See the dist=0 test in cmp_dist.
|
||||
*/
|
||||
struct next_use vars[JAY_NUM_UGPR];
|
||||
unsigned j = 0;
|
||||
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ctx->W, i) {
|
||||
assert(ctx->next_uses[i] != DIST_INFINITY && "live in W");
|
||||
dist_t dist = ctx->next_uses[i] - ctx->ip;
|
||||
|
||||
assert(j < ARRAY_SIZE(vars));
|
||||
vars[j++] = (struct next_use) { .index = i, .dist = dist };
|
||||
}
|
||||
|
||||
/* Sort by next-use distance */
|
||||
util_qsort_r(vars, j, sizeof(struct next_use), cmp_dist, ctx);
|
||||
|
||||
/* Evict what doesn't fit, inserting a spill for evicted values that we
|
||||
* haven't spilled before with a future use.
|
||||
*/
|
||||
for (unsigned i = m; i < j; ++i) {
|
||||
if (!u_sparse_bitset_test(&ctx->S, vars[i].index)) {
|
||||
jay_builder b = jay_init_builder(ctx->func, jay_before_inst(I));
|
||||
insert_spill(&b, ctx, vars[i].index);
|
||||
u_sparse_bitset_set(&ctx->S, vars[i].index);
|
||||
}
|
||||
|
||||
remove_W(ctx, vars[i].index);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert coupling code on block boundaries. This must ensure:
|
||||
*
|
||||
* - anything live-in we expect to have spilled is spilled
|
||||
* - anything live-in we expect to have filled is filled
|
||||
* - phi sources are spilled if the destination is spilled
|
||||
* - phi sources are filled if the destination is not spilled
|
||||
*
|
||||
* The latter two requirements ensure correct pressure calculations for phis.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
||||
{
|
||||
jay_builder b = jay_init_builder(ctx->func, jay_before_function(ctx->func));
|
||||
struct spill_block *sp = &ctx->blocks[pred->index];
|
||||
struct spill_block *ss = &ctx->blocks[succ->index];
|
||||
|
||||
/* Insert spill/fill at phi sources to match their destination */
|
||||
jay_foreach_phi_src_in_block(pred, phi_src) {
|
||||
jay_inst *phi_dst = ctx->defs[jay_phi_src_index(phi_src)];
|
||||
unsigned src = jay_index(phi_src->src[0]);
|
||||
|
||||
if (phi_src->src[0].file == ctx->file) {
|
||||
if (jay_is_mem(phi_dst->dst)) {
|
||||
if (!u_sparse_bitset_test(&sp->S_out, src)) {
|
||||
/* Spill the phi source. TODO: avoid redundant spills here */
|
||||
b.cursor = jay_after_block_logical(pred);
|
||||
insert_spill(&b, ctx, src);
|
||||
}
|
||||
|
||||
if (can_remat_node(ctx, jay_index(phi_src->src[0]))) {
|
||||
jay_def idx = jay_scalar(ctx->file, src);
|
||||
jay_def tmp = jay_alloc_def(&b, ctx->file, 1);
|
||||
|
||||
b.cursor = jay_before_function(ctx->func);
|
||||
remat_to(&b, tmp, ctx, src);
|
||||
jay_MOV(&b, jay_def_as_mem(ctx, idx), tmp);
|
||||
}
|
||||
|
||||
/* Use the spilled version */
|
||||
phi_src->src[0] = jay_def_as_mem(ctx, phi_src->src[0]);
|
||||
jay_set_phi_src_index(phi_src, jay_index(phi_dst->dst));
|
||||
} else if (!u_sparse_bitset_test(&sp->W_out, src)) {
|
||||
/* Fill the phi source in the predecessor */
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ), src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Anything assumed to be spilled in succ must be spilled along all edges. */
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ss->S_in, v) {
|
||||
if (!u_sparse_bitset_test(&sp->S_out, v)) {
|
||||
b.cursor = jay_along_edge(pred, succ);
|
||||
insert_spill(&b, ctx, v);
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_phi_dst_in_block(succ, phi) {
|
||||
u_sparse_bitset_set(&ctx->phi_set, jay_index(phi->dst));
|
||||
}
|
||||
|
||||
/* Variables in W at the start of succ must be defined along the edge.
|
||||
* If not live at the end of the predecessor (and it's not a phi defined in
|
||||
* the successor), insert a reload.
|
||||
*/
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ss->W_in, v) {
|
||||
if (!u_sparse_bitset_test(&sp->W_out, v) &&
|
||||
!u_sparse_bitset_test(&ctx->phi_set, v)) {
|
||||
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static dist_t
|
||||
lookup_next_use(struct spill_ctx *ctx, unsigned v)
|
||||
{
|
||||
return u_sparse_bitset_test(&ctx->N, v) ? ctx->next_uses[v] : DIST_INFINITY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Produce an array of next-use IPs relative to the start of the block. This is
|
||||
* an array of dist_t scalars, representing the next-use IP of each SSA dest
|
||||
* (right-to-left) and SSA source (left-to-right) of each instuction in the
|
||||
* block (bottom-to-top). Its size equals the # of SSA sources in the block.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
populate_local_next_use(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
struct spill_block *sb = &ctx->blocks[block->index];
|
||||
unsigned ip = sb->cycles;
|
||||
|
||||
foreach_next_use(&sb->next_use_out, it) {
|
||||
dist_t d = add_dist(it->dist, ip);
|
||||
|
||||
if (d != DIST_INFINITY) {
|
||||
u_sparse_bitset_set(&ctx->N, it->index);
|
||||
ctx->next_uses[it->index] = d;
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_block_rev(block, I) {
|
||||
ip -= inst_cycles(I);
|
||||
|
||||
jay_foreach_src_index(I, s, c, v) {
|
||||
if (I->src[s].file == ctx->file) {
|
||||
if (I->op != JAY_OPCODE_PHI_SRC) {
|
||||
util_dynarray_append(&ctx->next_ip, lookup_next_use(ctx, v));
|
||||
}
|
||||
|
||||
ctx->next_uses[v] = ip;
|
||||
u_sparse_bitset_set(&ctx->N, v);
|
||||
}
|
||||
}
|
||||
|
||||
if (I->dst.file == ctx->file) {
|
||||
jay_foreach_index_rev(I->dst, _, v) {
|
||||
util_dynarray_append(&ctx->next_ip, lookup_next_use(ctx, v));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(ip == 0 && "cycle counting is consistent");
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert spills/fills for a single basic block, following Belady's algorithm.
|
||||
* Corresponds to minAlgorithm from the paper.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
min_algorithm(struct spill_ctx *ctx,
|
||||
jay_block *block,
|
||||
struct spill_block *sb,
|
||||
dist_t *next_ips,
|
||||
unsigned next_use_cursor)
|
||||
{
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
assert(ctx->nW <= ctx->k && "invariant");
|
||||
|
||||
/* Phis are special since they happen along the edge. When we initialized
|
||||
* W and S, we implicitly chose which phis are spilled. So, here we just
|
||||
* need to rewrite the phis to write into memory.
|
||||
*
|
||||
* Phi sources are handled later.
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_PHI_DST) {
|
||||
if (I->dst.file == ctx->file) {
|
||||
if (!u_sparse_bitset_test(&ctx->W, jay_index(I->dst))) {
|
||||
u_sparse_bitset_set(&ctx->S, jay_index(I->dst));
|
||||
I->dst = jay_def_as_mem(ctx, I->dst);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->ip += inst_cycles(I);
|
||||
continue;
|
||||
} else if (I->op == JAY_OPCODE_PHI_SRC) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Any source that is not in W needs to be reloaded. Gather the set R of
|
||||
* such values, and add them to the register file.
|
||||
*/
|
||||
unsigned R[JAY_MAX_SRCS], nR = 0;
|
||||
|
||||
jay_foreach_src_index(I, s, c, v) {
|
||||
if (I->src[s].file == ctx->file && !u_sparse_bitset_test(&ctx->W, v)) {
|
||||
R[nR++] = v;
|
||||
insert_W(ctx, v);
|
||||
|
||||
assert(u_sparse_bitset_test(&ctx->S, v) && "must have spilled");
|
||||
assert(nR <= ARRAY_SIZE(R) && "maximum source count");
|
||||
}
|
||||
}
|
||||
|
||||
/* Limit W to make space for the operands.
|
||||
*
|
||||
* We need to round up to power-of-two destination sizes to match the
|
||||
* rounding in demand calculation.
|
||||
*/
|
||||
bool has_dst = I->dst.file == ctx->file;
|
||||
unsigned dst_size = util_next_power_of_two(jay_num_values(I->dst));
|
||||
limit(ctx, I, ctx->k - (has_dst ? dst_size : 0));
|
||||
|
||||
/* Add destinations to the register file */
|
||||
if (I->dst.file == ctx->file) {
|
||||
jay_foreach_index(I->dst, _, index) {
|
||||
assert(next_use_cursor >= 1);
|
||||
ctx->next_uses[index] = next_ips[--next_use_cursor];
|
||||
|
||||
if (ctx->next_uses[index] != DIST_INFINITY) {
|
||||
insert_W(ctx, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update next-use distances for this instuction. Unlike the paper, we
|
||||
* require W contain only live values (with finite next-use distance).
|
||||
*
|
||||
* This happens after the above limit() calls to model sources as
|
||||
* late-kill. This is conservative and could be improved, but it matches
|
||||
* how we currently estimate register demand.
|
||||
*/
|
||||
jay_foreach_src_index_rev(I, s, c, node) {
|
||||
if (I->src[s].file == ctx->file) {
|
||||
assert(next_use_cursor >= 1);
|
||||
ctx->next_uses[node] = next_ips[--next_use_cursor];
|
||||
|
||||
if (ctx->next_uses[node] == DIST_INFINITY) {
|
||||
remove_W(ctx, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Add reloads for the sources in front of the instuction. */
|
||||
for (unsigned i = 0; i < nR; ++i) {
|
||||
insert_reload(ctx, block, jay_before_inst(I), R[i]);
|
||||
}
|
||||
|
||||
ctx->ip += inst_cycles(I);
|
||||
|
||||
if (jay_debug & JAY_DBG_PRINTDEMAND) {
|
||||
printf("(SP) %u: ", ctx->nW);
|
||||
jay_print_inst(stdout, I);
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_use_cursor == 0 && "exactly sized");
|
||||
|
||||
u_sparse_bitset_dup(&sb->W_out, &ctx->W);
|
||||
u_sparse_bitset_dup(&sb->S_out, &ctx->S);
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Implement section 4.2 of the paper.
|
||||
*
|
||||
* For now, we implement the simpler heuristic in Hack's thesis: sort
|
||||
* the live-in set (+ destinations of phis) by next-use distance.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
compute_w_entry_loop_header(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
unsigned j = 0;
|
||||
/* TODO: Account for phis too! */
|
||||
foreach_next_use(&ctx->blocks[block->index].next_use_in, it) {
|
||||
assert(j < ctx->n);
|
||||
ctx->candidates[j++] = *it;
|
||||
}
|
||||
|
||||
/* Take the best candidates sorted by next-use distance */
|
||||
unsigned n = MIN2(j, ctx->k - ctx->nW);
|
||||
if (n < j) {
|
||||
util_qsort_r(ctx->candidates, j, sizeof(struct next_use), cmp_dist, ctx);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
insert_W(ctx, ctx->candidates[i].index);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute W_entry for a block. Section 4.2 in the paper.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
unsigned j = 0;
|
||||
|
||||
/* Variables that are in all predecessors are assumed in W_entry. Phis and
|
||||
* variables in some predecessors are scored by next-use.
|
||||
*/
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ctx->N, i) {
|
||||
bool all = true, any = false;
|
||||
|
||||
jay_foreach_predecessor(block, P) {
|
||||
bool in = u_sparse_bitset_test(&ctx->blocks[(*P)->index].W_out, i);
|
||||
all &= in;
|
||||
any |= in;
|
||||
}
|
||||
|
||||
if (all) {
|
||||
insert_W(ctx, i);
|
||||
} else if (any) {
|
||||
ctx->candidates[j++] =
|
||||
(struct next_use) { .index = i, .dist = ctx->next_uses[i] };
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_phi_src_in_block(*pred, I) {
|
||||
if (!u_sparse_bitset_test(&ctx->blocks[(*pred)->index].W_out,
|
||||
jay_index(I->src[0]))) {
|
||||
|
||||
u_sparse_bitset_set(&ctx->phi_set, jay_phi_src_index(I));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Heuristic: if any phi source is spilled, spill the phi. While suboptimal,
|
||||
* this reduces pointless spills/fills with massive phi webs.
|
||||
*/
|
||||
jay_foreach_phi_dst_in_block(block, I) {
|
||||
if (!u_sparse_bitset_test(&ctx->phi_set, jay_index(I->dst))) {
|
||||
ctx->candidates[j++] = (struct next_use) {
|
||||
.index = jay_index(I->dst),
|
||||
.dist = ctx->next_uses[jay_index(I->dst)],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/* Take the best candidates sorted by next-use distance */
|
||||
unsigned n = MIN2(j, ctx->k - ctx->nW);
|
||||
if (n < j) {
|
||||
util_qsort_r(ctx->candidates, j, sizeof(struct next_use), cmp_dist, ctx);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
insert_W(ctx, ctx->candidates[i].index);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We initialize S with the union of S at the exit of (forward edge)
|
||||
* predecessors and the complement of W, intersected with the live-in set. The
|
||||
* former propagates S forward. The latter ensures we spill along the edge when
|
||||
* a live value is not selected for the entry W.
|
||||
*/
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
compute_s_entry(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ctx->blocks[(*pred)->index].S_out, v) {
|
||||
if (u_sparse_bitset_test(&block->live_in, v)) {
|
||||
u_sparse_bitset_set(&ctx->S, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_in, v) {
|
||||
if (BITSET_TEST(ctx->in_file, v) && !u_sparse_bitset_test(&ctx->W, v)) {
|
||||
u_sparse_bitset_set(&ctx->S, v);
|
||||
}
|
||||
}
|
||||
|
||||
u_sparse_bitset_dup(&ctx->blocks[block->index].S_in, &ctx->S);
|
||||
}
|
||||
|
||||
static ATTRIBUTE_NOINLINE void
|
||||
global_next_use_distances(struct spill_ctx *ctx, void *memctx)
|
||||
{
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, ctx->func->num_blocks, NULL);
|
||||
|
||||
jay_foreach_block(ctx->func, block) {
|
||||
struct spill_block *sb = &ctx->blocks[block->index];
|
||||
|
||||
util_dynarray_init(&sb->next_use_in, memctx);
|
||||
util_dynarray_init(&sb->next_use_out, memctx);
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
sb->cycles += inst_cycles(I);
|
||||
}
|
||||
|
||||
jay_worklist_push_head(&worklist, block);
|
||||
}
|
||||
|
||||
/* Iterate the work list in reverse order since liveness is backwards */
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
jay_block *block = jay_worklist_pop_head(&worklist);
|
||||
struct spill_block *sb = &ctx->blocks[block->index];
|
||||
|
||||
/* Clear locally accessed set (W) */
|
||||
u_sparse_bitset_clear_all(&ctx->W);
|
||||
util_dynarray_clear(&sb->next_use_in);
|
||||
|
||||
uint32_t cycle = 0;
|
||||
|
||||
/* Calculate dists */
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
/* Record first use before def */
|
||||
jay_foreach_src_index(I, s, c, index) {
|
||||
if (I->src[s].file == ctx->file &&
|
||||
!u_sparse_bitset_test(&ctx->W, index)) {
|
||||
|
||||
add_next_use(&sb->next_use_in, index, cycle);
|
||||
u_sparse_bitset_set(&ctx->W, index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Record defs */
|
||||
jay_foreach_index(I->dst, _, index) {
|
||||
u_sparse_bitset_set(&ctx->W, index);
|
||||
}
|
||||
|
||||
cycle += inst_cycles(I);
|
||||
}
|
||||
|
||||
/* Apply transfer function to get our entry state. */
|
||||
foreach_next_use(&sb->next_use_out, it) {
|
||||
if (!u_sparse_bitset_test(&ctx->W, it->index)) {
|
||||
add_next_use(&sb->next_use_in, it->index,
|
||||
add_dist(it->dist, sb->cycles));
|
||||
}
|
||||
}
|
||||
|
||||
/* Propagate successor live-in to pred live-out, joining with min */
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
if (minimum_next_uses(&ctx->blocks[(*pred)->index].next_use_out,
|
||||
&sb->next_use_in, ctx->next_uses,
|
||||
&ctx->phi_set)) {
|
||||
jay_worklist_push_tail(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* In debug builds, validate the following invariant:
|
||||
*
|
||||
* Next-use distance is finite iff live and in file.
|
||||
*/
|
||||
jay_foreach_block(ctx->func, blk) {
|
||||
struct spill_block *sb = &ctx->blocks[blk->index];
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
struct util_dynarray *nu = i ? &sb->next_use_out : &sb->next_use_in;
|
||||
struct u_sparse_bitset *live = i ? &blk->live_out : &blk->live_in;
|
||||
|
||||
u_sparse_bitset_clear_all(&ctx->W);
|
||||
|
||||
foreach_next_use(nu, it) {
|
||||
assert(u_sparse_bitset_test(live, it->index) &&
|
||||
BITSET_TEST(ctx->in_file, it->index));
|
||||
|
||||
u_sparse_bitset_set(&ctx->W, it->index);
|
||||
}
|
||||
|
||||
U_SPARSE_BITSET_FOREACH_SET(live, i) {
|
||||
if (BITSET_TEST(ctx->in_file, i)) {
|
||||
assert(u_sparse_bitset_test(&ctx->W, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
||||
{
|
||||
void *memctx = ralloc_context(NULL);
|
||||
void *linctx = linear_context(memctx);
|
||||
struct spill_ctx ctx = { .func = func, .file = file, .k = k };
|
||||
|
||||
ctx.n = func->ssa_alloc;
|
||||
ctx.in_file = BITSET_LINEAR_ZALLOC(linctx, ctx.n);
|
||||
ctx.defs = linear_zalloc_array(linctx, jay_inst *, ctx.n);
|
||||
ctx.next_uses = linear_alloc_array(linctx, dist_t, ctx.n);
|
||||
ctx.candidates = linear_alloc_array(linctx, struct next_use, ctx.n);
|
||||
ctx.blocks =
|
||||
linear_zalloc_array(linctx, struct spill_block, func->num_blocks);
|
||||
|
||||
jay_foreach_inst_in_func(func, block, I) {
|
||||
if (can_remat(I) || I->op == JAY_OPCODE_PHI_DST) {
|
||||
ctx.defs[jay_index(I->dst)] = I;
|
||||
}
|
||||
|
||||
if (I->dst.file == file) {
|
||||
BITSET_SET_COUNT(ctx.in_file, jay_base_index(I->dst),
|
||||
jay_num_values(I->dst));
|
||||
}
|
||||
}
|
||||
|
||||
u_sparse_bitset_init(&ctx.W, ctx.n, memctx);
|
||||
u_sparse_bitset_init(&ctx.S, ctx.n, memctx);
|
||||
u_sparse_bitset_init(&ctx.N, ctx.n, memctx);
|
||||
u_sparse_bitset_init(&ctx.phi_set, ctx.n, memctx);
|
||||
util_dynarray_init(&ctx.next_ip, memctx);
|
||||
|
||||
global_next_use_distances(&ctx, memctx);
|
||||
|
||||
/* Reserve a memory variable for every regular variable */
|
||||
func->ssa_alloc *= 2;
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
ctx.nW = 0;
|
||||
ctx.ip = 0;
|
||||
|
||||
u_sparse_bitset_clear_all(&ctx.W);
|
||||
u_sparse_bitset_clear_all(&ctx.S);
|
||||
u_sparse_bitset_clear_all(&ctx.N);
|
||||
util_dynarray_clear(&ctx.next_ip);
|
||||
|
||||
populate_local_next_use(&ctx, block);
|
||||
|
||||
struct spill_block *sb = &ctx.blocks[block->index];
|
||||
dist_t *next_ips = util_dynarray_element(&ctx.next_ip, dist_t, 0);
|
||||
unsigned nu_cursor = util_dynarray_num_elements(&ctx.next_ip, dist_t);
|
||||
|
||||
/* Populate next-use with phi destinations, which are not in the
|
||||
* next_use_in set but are accounted for when computing W_entry.
|
||||
*/
|
||||
jay_foreach_phi_dst_in_block(block, I) {
|
||||
if (I->dst.file == file) {
|
||||
assert(nu_cursor >= 1);
|
||||
ctx.next_uses[jay_index(I->dst)] = next_ips[--nu_cursor];
|
||||
u_sparse_bitset_set(&ctx.N, jay_index(I->dst));
|
||||
}
|
||||
}
|
||||
|
||||
if (block->loop_header) {
|
||||
compute_w_entry_loop_header(&ctx, block);
|
||||
} else if (jay_num_predecessors(block) /* skip start blocks */) {
|
||||
compute_w_entry(&ctx, block);
|
||||
}
|
||||
|
||||
assert(ctx.nW <= ctx.k && "invariant");
|
||||
u_sparse_bitset_dup(&sb->W_in, &ctx.W);
|
||||
|
||||
compute_s_entry(&ctx, block);
|
||||
min_algorithm(&ctx, block, sb, next_ips, nu_cursor);
|
||||
}
|
||||
|
||||
/* Now that all blocks are processed separately, stitch it together */
|
||||
jay_foreach_block(func, block) {
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
u_sparse_bitset_clear_all(&ctx.phi_set);
|
||||
insert_coupling_code(&ctx, *pred, block);
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(memctx);
|
||||
|
||||
/* Spilling breaks SSA, so we need to repair before validating */
|
||||
jay_repair_ssa(func);
|
||||
jay_validate(func->shader, "Spilling");
|
||||
|
||||
/* Remat can introduce dead code */
|
||||
jay_opt_dead_code(func->shader);
|
||||
}
|
||||
576
src/intel/compiler/jay/jay_to_binary.c
Normal file
576
src/intel/compiler/jay/jay_to_binary.c
Normal file
|
|
@ -0,0 +1,576 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "compiler/brw/brw_disasm_info.h"
|
||||
#include "compiler/brw/brw_eu.h"
|
||||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "compiler/brw/brw_eu_inst.h"
|
||||
#include "compiler/brw/brw_reg.h"
|
||||
#include "compiler/brw/brw_reg_type.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_math.h"
|
||||
#include "jay.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
static inline enum brw_reg_type
|
||||
to_brw_reg_type(enum jay_type type)
|
||||
{
|
||||
/* clang-format off */
|
||||
switch (type) {
|
||||
case JAY_TYPE_UNTYPED:
|
||||
case JAY_TYPE_U8: return BRW_TYPE_UB;
|
||||
case JAY_TYPE_U16: return BRW_TYPE_UW;
|
||||
case JAY_TYPE_U32: return BRW_TYPE_UD;
|
||||
case JAY_TYPE_U64: return BRW_TYPE_UQ;
|
||||
case JAY_TYPE_S8: return BRW_TYPE_B;
|
||||
case JAY_TYPE_S16: return BRW_TYPE_W;
|
||||
case JAY_TYPE_S32: return BRW_TYPE_D;
|
||||
case JAY_TYPE_S64: return BRW_TYPE_Q;
|
||||
case JAY_TYPE_F16: return BRW_TYPE_HF;
|
||||
case JAY_TYPE_F32: return BRW_TYPE_F;
|
||||
case JAY_TYPE_F64: return BRW_TYPE_DF;
|
||||
case JAY_TYPE_BF16: return BRW_TYPE_BF;
|
||||
default: UNREACHABLE("invalid type");
|
||||
}
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
to_def_grf_16(struct jay_partition *p, jay_def d)
|
||||
{
|
||||
unsigned count = jay_num_values(d);
|
||||
if (count == 0 || !(d.file == GPR || d.file == UGPR)) {
|
||||
return d.reg;
|
||||
}
|
||||
|
||||
unsigned base = 0;
|
||||
for (unsigned i = 0; i < JAY_PARTITION_BLOCKS; ++i) {
|
||||
unsigned offset = d.reg - base;
|
||||
|
||||
if (offset < p->blocks[d.file][i].len) {
|
||||
assert(offset + count <= p->blocks[d.file][i].len &&
|
||||
"vectors must not cross partition boundaries");
|
||||
|
||||
return (p->blocks[d.file][i].start + offset) * 2 + d.hi;
|
||||
}
|
||||
|
||||
base += p->blocks[d.file][i].len;
|
||||
}
|
||||
|
||||
UNREACHABLE("virtual register must be in a block");
|
||||
}
|
||||
|
||||
static inline brw_reg
|
||||
to_brw_reg(jay_function *f,
|
||||
const jay_inst *I,
|
||||
signed idx,
|
||||
unsigned simd_offs,
|
||||
bool force_hi)
|
||||
{
|
||||
bool is_dest = idx < 0;
|
||||
enum jay_type type = is_dest ? I->type : jay_src_type(I, idx);
|
||||
jay_def d = is_dest ? I->dst : I->src[idx];
|
||||
d.hi |= force_hi;
|
||||
|
||||
struct brw_reg R;
|
||||
unsigned reg = to_def_grf_16(&f->shader->partition, d), offset_B = 0;
|
||||
|
||||
if (jay_is_imm(d)) {
|
||||
/* Immediates have size restrictions but can zero extend */
|
||||
if (jay_type_size_bits(type) == 64) {
|
||||
type = jay_type_resize(type, 32);
|
||||
} else if (I->op == JAY_OPCODE_BFN) {
|
||||
assert(jay_as_uint(d) < UINT16_MAX);
|
||||
type = JAY_TYPE_U16;
|
||||
}
|
||||
|
||||
R = brw_imm_ud(jay_as_uint(d));
|
||||
} else if (jay_is_null(d)) {
|
||||
R = brw_null_reg();
|
||||
} else if (d.file == UGPR) {
|
||||
unsigned grf = (reg >> 1) / 8;
|
||||
offset_B = ((reg >> 1) % 8) * 4;
|
||||
|
||||
if (d.file == UGPR) {
|
||||
R = brw_ud1_grf(grf, 0);
|
||||
} else {
|
||||
R = brw_ud1_reg(ARF, BRW_ARF_ACCUMULATOR + (grf * 2), 0);
|
||||
}
|
||||
|
||||
/* Handle 3-src restrictions and vectorized uniform code. */
|
||||
if (is_dest || jay_num_values(d) >= 8) {
|
||||
R = vec8(R);
|
||||
}
|
||||
|
||||
/* Some operations have special restrictions on the destination stride,
|
||||
* but if we write a single UGPR the stride is ignored.. Specify
|
||||
* whatever stride is needed to satisfy the rules.
|
||||
*/
|
||||
if (is_dest) {
|
||||
/* BSpec 56640 "Special Restrictions" says:
|
||||
*
|
||||
* "Conversion between HF and Integer must be DWord-aligned
|
||||
* and strided by a DWord on the destination."
|
||||
*/
|
||||
enum jay_type src0_type = jay_src_type(I, 0);
|
||||
if ((I->type == JAY_TYPE_F16 && !jay_type_is_any_float(src0_type)) ||
|
||||
(src0_type == JAY_TYPE_F16 && !jay_type_is_any_float(I->type))) {
|
||||
assert(jay_num_values(d) == 1 && "must not vectorize HF<->Int");
|
||||
R = stride(R, 8, 2, 4);
|
||||
}
|
||||
|
||||
/* Packed floats have restrictions on mixed sizes. Use <2>. */
|
||||
if (jay_type_size_bits(I->type) == 16 &&
|
||||
jay_type_size_bits(jay_src_type(I, 0)) != 16) {
|
||||
assert(jay_num_values(d) == 1 && "must not vectorize mixed float");
|
||||
R = stride(R, 4, 2, 2);
|
||||
}
|
||||
}
|
||||
} else if (d.file == GPR) {
|
||||
enum jay_stride def_stride = jay_def_stride(f->shader, d);
|
||||
uint32_t type_bits = jay_type_size_bits(type);
|
||||
unsigned stride_bits = jay_stride_to_bits(def_stride);
|
||||
unsigned simd_width = jay_simd_width_physical(f->shader, I);
|
||||
|
||||
unsigned grf;
|
||||
if (def_stride == JAY_STRIDE_2) {
|
||||
/* Bit 0 selects between lo/hi halves of the GPR */
|
||||
grf = (reg / 2) * jay_grf_per_gpr(f->shader);
|
||||
offset_B = (reg & 1) * 2 * f->shader->dispatch_width;
|
||||
} else {
|
||||
/* Low bits are an offset in 2-byte words into the GRF */
|
||||
unsigned mask = BITFIELD_MASK(stride_bits / 32);
|
||||
grf = ((reg & ~mask) / 2) * jay_grf_per_gpr(f->shader);
|
||||
offset_B = (reg & mask) * 2;
|
||||
}
|
||||
|
||||
R = byte_offset(xe2_vec8_grf(grf, 0),
|
||||
simd_offs * simd_width * stride_bits / 8);
|
||||
|
||||
if (stride_bits == (type_bits * 4)) {
|
||||
R = stride(R, 8, 2, 4);
|
||||
} else if (stride_bits == (type_bits * 2)) {
|
||||
R = stride(R, 4, 2, 2);
|
||||
} else {
|
||||
assert(stride_bits == type_bits);
|
||||
}
|
||||
|
||||
/* Broadcast is equivalent to <8, 8, 1> for SIMD1 instructions. Use that
|
||||
* instead due to regioning restrictions.
|
||||
*/
|
||||
if (simd_width == 1) {
|
||||
R = vec1(R);
|
||||
}
|
||||
} else if (jay_is_flag(d)) {
|
||||
/* Explicit flags act like UGPRs. As sources they broadcast to all lanes,
|
||||
* so we may ignore the SIMD offset. As destinations, they are written by
|
||||
* SIMD1 instructions and are never SIMD split.
|
||||
*/
|
||||
assert(simd_offs == 0 || idx >= 0);
|
||||
unsigned offs_B = d.reg * (f->shader->dispatch_width / 8);
|
||||
R = brw_flag_subreg(offs_B / 2);
|
||||
} else if (d.file == J_ADDRESS) {
|
||||
R = brw_address_reg(d.reg);
|
||||
} else if (d.file == J_ARF) {
|
||||
R = brw_ud1_reg(ARF, jay_base_index(d), 0);
|
||||
} else {
|
||||
UNREACHABLE("unexpected file");
|
||||
}
|
||||
|
||||
R.negate = d.negate;
|
||||
R.abs = d.abs;
|
||||
return byte_offset(retype(R, to_brw_reg_type(type)), offset_B);
|
||||
}
|
||||
|
||||
#define SRC(i) to_brw_reg(f, I, i, simd_offs, false)
|
||||
|
||||
#define OP0(hw) \
|
||||
case JAY_OPCODE_##hw: \
|
||||
brw_##hw(p); \
|
||||
break;
|
||||
|
||||
#define OP1(jay, hw) \
|
||||
case JAY_OPCODE_##jay: \
|
||||
brw_alu1(p, BRW_OPCODE_##hw, dst, SRC(0)); \
|
||||
break;
|
||||
|
||||
#define OP2(jay, hw) \
|
||||
case JAY_OPCODE_##jay: \
|
||||
brw_alu2(p, BRW_OPCODE_##hw, dst, SRC(0), SRC(1)); \
|
||||
break;
|
||||
|
||||
#define OP3(jay, hw) \
|
||||
case JAY_OPCODE_##jay: \
|
||||
brw_alu3(p, BRW_OPCODE_##hw, dst, SRC(0), SRC(1), SRC(2)); \
|
||||
break;
|
||||
|
||||
#define OP3_SWAP(jay, hw) \
|
||||
case JAY_OPCODE_##jay: \
|
||||
brw_alu3(p, BRW_OPCODE_##hw, dst, SRC(2), SRC(1), SRC(0)); \
|
||||
break;
|
||||
|
||||
static struct brw_reg
|
||||
quad_swizzle(struct brw_reg r, const jay_inst *I)
|
||||
{
|
||||
/* clang-format off */
|
||||
switch (jay_quad_swizzle_swizzle(I)) {
|
||||
case JAY_QUAD_SWIZZLE_XXXX: return suboffset(stride(r, 4, 4, 0), 0);
|
||||
case JAY_QUAD_SWIZZLE_YYYY: return suboffset(stride(r, 4, 4, 0), 1);
|
||||
case JAY_QUAD_SWIZZLE_ZZZZ: return suboffset(stride(r, 4, 4, 0), 2);
|
||||
case JAY_QUAD_SWIZZLE_WWWW: return suboffset(stride(r, 4, 4, 0), 3);
|
||||
case JAY_QUAD_SWIZZLE_XXZZ: return suboffset(stride(r, 2, 2, 0), 0);
|
||||
case JAY_QUAD_SWIZZLE_YYWW: return suboffset(stride(r, 2, 2, 0), 1);
|
||||
case JAY_QUAD_SWIZZLE_XYXY: return suboffset(stride(r, 0, 2, 1), 0);
|
||||
case JAY_QUAD_SWIZZLE_ZWZW: return suboffset(stride(r, 0, 2, 1), 2);
|
||||
}
|
||||
/* clang-format on */
|
||||
|
||||
UNREACHABLE("invalid quad swizzle");
|
||||
}
|
||||
|
||||
/* Runs once per SIMD-split, so must not modify the instruction! */
|
||||
static void
|
||||
emit(struct brw_codegen *p,
|
||||
jay_function *f,
|
||||
const jay_inst *I,
|
||||
unsigned simd_offs)
|
||||
{
|
||||
ASSERTED unsigned nr_ins_before = p->nr_insn;
|
||||
unsigned exec_size = jay_simd_width_physical(f->shader, I);
|
||||
// jay_print_inst(stdout, (jay_inst *) I);
|
||||
|
||||
/* Fix up SWSB dependencies for SIMD split instructions. The latter
|
||||
* instructions do not need to redundantly wait on an SBID but might
|
||||
* replicate their regdists.
|
||||
*/
|
||||
struct tgl_swsb dep =
|
||||
simd_offs && !I->replicate_dep ? tgl_swsb_null() : I->dep;
|
||||
dep.mode = simd_offs ? TGL_SBID_NULL : dep.mode;
|
||||
|
||||
if (I->decrement_dep) {
|
||||
unsigned delta = simd_offs * jay_macro_length(I);
|
||||
assert(dep.regdist > delta);
|
||||
dep.regdist -= delta;
|
||||
}
|
||||
|
||||
brw_set_default_exec_size(p, util_logbase2(exec_size));
|
||||
brw_set_default_mask_control(p, jay_is_no_mask(I));
|
||||
brw_set_default_swsb(p, dep);
|
||||
brw_set_default_saturate(p, I->saturate);
|
||||
|
||||
/* Quad swizzle can get split down to SIMD4 even on Xe2 where we don't have
|
||||
* NibCtrl. Fortunately, it's NoMask so it doesn't matter.
|
||||
*/
|
||||
if (I->op != JAY_OPCODE_QUAD_SWIZZLE) {
|
||||
brw_set_default_group(p, simd_offs * exec_size);
|
||||
}
|
||||
|
||||
/* Grab the hardware predicate, corresponding either to a logical predicate
|
||||
* or SEL's selector.
|
||||
*/
|
||||
const jay_def *pred = I->predication ? jay_inst_get_predicate((void *) I) :
|
||||
I->op == JAY_OPCODE_SEL ? &I->src[2] :
|
||||
NULL;
|
||||
|
||||
brw_set_default_predicate_control(p, pred ? BRW_PREDICATE_NORMAL :
|
||||
BRW_PREDICATE_NONE);
|
||||
brw_set_default_predicate_inverse(p, pred && pred->negate);
|
||||
|
||||
/* Jay/brw enums line up by construction */
|
||||
enum brw_conditional_mod cmod =
|
||||
(enum brw_conditional_mod) I->conditional_mod;
|
||||
|
||||
if (!jay_is_null(I->cond_flag)) {
|
||||
assert(!(pred && pred->reg != I->cond_flag.reg) && "must be tied");
|
||||
pred = &I->cond_flag;
|
||||
}
|
||||
|
||||
if (pred) {
|
||||
unsigned reg = pred->reg * jay_phys_flag_per_virt(f->shader);
|
||||
brw_set_default_flag_reg(p, reg / 2, reg % 2);
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_MIN) {
|
||||
cmod = BRW_CONDITIONAL_L;
|
||||
} else if (I->op == JAY_OPCODE_MAX) {
|
||||
cmod = BRW_CONDITIONAL_GE;
|
||||
}
|
||||
|
||||
struct brw_reg dst = to_brw_reg(f, I, -1, simd_offs, false);
|
||||
|
||||
switch (I->op) {
|
||||
OP0(ELSE)
|
||||
OP0(ENDIF)
|
||||
OP0(WHILE)
|
||||
OP0(BREAK)
|
||||
OP1(MOV, MOV)
|
||||
OP1(MODIFIER, MOV)
|
||||
OP1(RNDD, RNDD)
|
||||
OP1(RNDZ, RNDZ)
|
||||
OP1(RNDE, RNDE)
|
||||
OP1(FRC, FRC)
|
||||
OP1(BFREV, BFREV)
|
||||
OP1(CBIT, CBIT)
|
||||
OP1(NOT, NOT)
|
||||
OP1(FBL, FBL)
|
||||
OP1(FBH, FBH)
|
||||
OP1(LZD, LZD)
|
||||
OP2(ROL, ROL)
|
||||
OP2(AVG, AVG)
|
||||
OP2(ADD, ADD)
|
||||
OP2(MUL, MUL)
|
||||
OP2(SEL, SEL)
|
||||
OP2(MIN, SEL)
|
||||
OP2(MAX, SEL)
|
||||
OP2(MUL_32X16, MUL)
|
||||
OP2(AND, AND)
|
||||
OP2(AND_U32_U16, AND)
|
||||
OP2(OR, OR)
|
||||
OP2(XOR, XOR)
|
||||
OP2(ASR, ASR)
|
||||
OP2(SHR, SHR)
|
||||
OP2(SHL, SHL)
|
||||
OP2(BFI1, BFI1)
|
||||
OP3(BFI2, BFI2)
|
||||
OP3(ADD3, ADD3)
|
||||
OP3(CSEL, CSEL)
|
||||
OP3(DP4A_UU, DP4A)
|
||||
OP3(DP4A_SS, DP4A)
|
||||
OP3(DP4A_SU, DP4A)
|
||||
OP3_SWAP(MAD, MAD)
|
||||
OP3_SWAP(BFE, BFE)
|
||||
|
||||
case JAY_OPCODE_LOOP_ONCE:
|
||||
/* TODO: Is there a better way to do this? */
|
||||
brw_BREAK(p);
|
||||
brw_WHILE(p);
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_IF:
|
||||
brw_IF(p, util_logbase2(exec_size));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_MATH:
|
||||
gfx6_math(p, dst, jay_math_op(I), SRC(0),
|
||||
retype(brw_null_reg(), to_brw_reg_type(I->type)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_BFN:
|
||||
brw_BFN(p, dst, SRC(0), SRC(1), SRC(2), brw_imm_ud(jay_bfn_ctrl(I)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_DESWIZZLE_16:
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_16);
|
||||
brw_MOV(p, retype(xe2_vec8_grf(jay_deswizzle_16_dst(I), 0), BRW_TYPE_UD),
|
||||
retype(xe2_vec8_grf(jay_deswizzle_16_src(I), 0), BRW_TYPE_UD));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_CVT: {
|
||||
unsigned index = jay_cvt_index(I);
|
||||
bool force_hi = false;
|
||||
|
||||
/* We will apply a suboffset for the specific subword being converted. In
|
||||
* the case where we have a subword (16-bit) stride, accesses to the upper
|
||||
* half will be instead to a discontiguous GRF so we have to fix up. This
|
||||
* affects u8->u32 conversions.
|
||||
*/
|
||||
if (I->src[0].file == GPR) {
|
||||
unsigned type_size_B = jay_type_size_bits(jay_cvt_src_type(I)) / 8;
|
||||
unsigned index_B = index * type_size_B;
|
||||
unsigned stride_B =
|
||||
jay_stride_to_bits(jay_def_stride(f->shader, I->src[0])) / 8;
|
||||
|
||||
if (index_B >= stride_B) {
|
||||
assert(stride_B == 2 && index_B <= 4 && !I->src[0].hi);
|
||||
force_hi = true;
|
||||
index = (index_B % stride_B) / type_size_B;
|
||||
}
|
||||
}
|
||||
|
||||
brw_MOV(p, dst,
|
||||
suboffset(to_brw_reg(f, I, 0, simd_offs, force_hi), index));
|
||||
break;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_SYNC:
|
||||
brw_SYNC(p, jay_sync_op(I));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_CMP:
|
||||
brw_CMP(p, dst, I->conditional_mod, SRC(0), SRC(1));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_MOV_IMM64:
|
||||
brw_MOV(p, dst, brw_imm_u64(jay_mov_imm64_imm(I)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_RELOC:
|
||||
brw_MOV_reloc_imm(p, dst, BRW_TYPE_UD, jay_reloc_param(I),
|
||||
jay_reloc_base(I));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_QUAD_SWIZZLE:
|
||||
brw_MOV(p, dst, quad_swizzle(SRC(0), I));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_BROADCAST_IMM:
|
||||
brw_MOV(p, dst, get_element(SRC(0), jay_broadcast_imm_lane(I)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_SEND:
|
||||
brw_SEND(p, jay_send_sfid(I), dst, SRC(2), SRC(3), SRC(0), SRC(1),
|
||||
jay_send_ex_desc_imm(I), jay_send_ex_mlen(I),
|
||||
jay_send_bindless(I), jay_send_eot(I), false /* gather */);
|
||||
if (jay_send_check_tdr(I)) {
|
||||
brw_eu_inst_set_opcode(p->isa, brw_eu_last_inst(p), BRW_OPCODE_SENDC);
|
||||
}
|
||||
break;
|
||||
|
||||
/* Gfx20+ has separate Render Target Array indices for each pair of subspans
|
||||
* in order to support multiple polygons, so we need to use a <1;8,0> region
|
||||
* in order to select the word for each channel.
|
||||
*/
|
||||
case JAY_OPCODE_EXTRACT_LAYER:
|
||||
brw_AND(p, dst, stride(retype(SRC(simd_offs), BRW_TYPE_UW), 1, 8, 0),
|
||||
brw_imm_uw(0x7ff));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_EXPAND_QUAD:
|
||||
brw_MOV(p, dst, stride(SRC(simd_offs), 1, 4, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS:
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_32);
|
||||
brw_set_default_group(p, 0);
|
||||
brw_ADD(p, retype(dst, BRW_TYPE_UW), retype(SRC(0), BRW_TYPE_UW),
|
||||
brw_imm_uv(0x11100100));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_LANE_ID_8:
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_MOV(p, dst, brw_imm_uv(0x76543210));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_LANE_ID_EXPAND:
|
||||
brw_set_default_exec_size(p, util_logbase2(jay_lane_id_expand_width(I)));
|
||||
brw_ADD(p, suboffset(dst, jay_lane_id_expand_width(I)), SRC(0),
|
||||
brw_imm_uw(jay_lane_id_expand_width(I)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_EXTRACT_BYTE_PER_8LANES:
|
||||
brw_MOV(p, dst, stride(retype(SRC(simd_offs), BRW_TYPE_UB), 1, 8, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4:
|
||||
brw_SHR(p, dst, SRC(0), brw_imm_uv(0x44440000));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_MUL_32: {
|
||||
brw_MUL(p, retype(brw_acc_reg(1), to_brw_reg_type(I->type)), SRC(0),
|
||||
subscript(SRC(1), BRW_TYPE_UW, 0));
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_alu2(p, jay_mul_32_high(I) ? BRW_OPCODE_MACH : BRW_OPCODE_MACL, dst,
|
||||
SRC(0), SRC(1));
|
||||
break;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_SHUFFLE: {
|
||||
struct brw_reg a0 = brw_address_reg(0);
|
||||
unsigned grf_16 = to_def_grf_16(&f->shader->partition, I->src[0]);
|
||||
unsigned offset_B = grf_16 * 2 * f->shader->dispatch_width;
|
||||
|
||||
brw_ADD(p, a0, subscript(SRC(1), BRW_TYPE_UW, 0), brw_imm_uw(offset_B));
|
||||
brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), BRW_TYPE_UD));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
jay_print_inst(stderr, (jay_inst *) I);
|
||||
UNREACHABLE("Unhandled opcode");
|
||||
}
|
||||
|
||||
if (cmod != BRW_CONDITIONAL_NONE) {
|
||||
brw_eu_inst_set_cond_modifier(p->devinfo, brw_eu_last_inst(p), cmod);
|
||||
}
|
||||
|
||||
assert(p->nr_insn == (nr_ins_before + jay_macro_length(I)) &&
|
||||
"Jay instructions must map 1:n to GEN instructions");
|
||||
}
|
||||
|
||||
struct jay_shader_bin *
|
||||
jay_to_binary(jay_shader *s, void *const_data, size_t const_data_size)
|
||||
{
|
||||
struct jay_shader_bin *bin = rzalloc(s, struct jay_shader_bin);
|
||||
|
||||
struct util_dynarray prog;
|
||||
util_dynarray_init(&prog, bin);
|
||||
|
||||
struct brw_isa_info isa;
|
||||
struct brw_codegen p;
|
||||
|
||||
brw_init_isa_info(&isa, s->devinfo);
|
||||
brw_init_codegen(&isa, &p, bin);
|
||||
int start_offset = p.next_insn_offset;
|
||||
|
||||
/* TODO: Multifunction properly */
|
||||
jay_foreach_function(s, f) {
|
||||
jay_foreach_block(f, block) {
|
||||
if (block->loop_header) {
|
||||
brw_DO(&p, 0);
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
for (unsigned i = 0; i < (1 << jay_simd_split(s, I)); ++i) {
|
||||
emit(&p, f, I, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int final_halt_offset = -1 /* TODO */;
|
||||
brw_set_uip_jip(&p, start_offset, final_halt_offset);
|
||||
|
||||
struct disasm_info *disasm = disasm_initialize(p.isa, NULL);
|
||||
|
||||
disasm_new_inst_group(disasm, 0);
|
||||
disasm_new_inst_group(disasm, p.next_insn_offset);
|
||||
|
||||
UNUSED bool valid = true;
|
||||
#ifndef NDEBUG
|
||||
valid =
|
||||
brw_validate_instructions(p.isa, p.store, 0, p.next_insn_offset, disasm);
|
||||
#endif
|
||||
|
||||
brw_compact_instructions(&p, start_offset, disasm);
|
||||
|
||||
if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(s->stage)) || !valid) {
|
||||
dump_assembly(p.store, 0, p.next_insn_offset, disasm, NULL, stdout);
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
UNREACHABLE("invalid assembly");
|
||||
}
|
||||
|
||||
struct brw_stage_prog_data *prog_data = &s->prog_data->base;
|
||||
|
||||
assert(prog_data->const_data_size == 0);
|
||||
if (const_data_size > 0) {
|
||||
prog_data->const_data_size = const_data_size;
|
||||
prog_data->const_data_offset =
|
||||
brw_append_data(&p, const_data, const_data_size, 32);
|
||||
}
|
||||
|
||||
bin->kernel = brw_get_program(&p, &bin->size);
|
||||
s->prog_data->base.relocs =
|
||||
brw_get_shader_relocs(&p, &s->prog_data->base.num_relocs);
|
||||
|
||||
return bin;
|
||||
}
|
||||
328
src/intel/compiler/jay/jay_validate.c
Normal file
328
src/intel/compiler/jay/jay_validate.c
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
enum validate_block_state {
|
||||
STATE_PHI_DST,
|
||||
STATE_NORMAL,
|
||||
STATE_LATE,
|
||||
};
|
||||
|
||||
struct validate_state {
|
||||
bool failed;
|
||||
bool post_ra;
|
||||
const char *when;
|
||||
jay_inst *I;
|
||||
jay_block *block;
|
||||
jay_function *func;
|
||||
BITSET_WORD *defs;
|
||||
enum jay_file *files;
|
||||
enum validate_block_state block_state;
|
||||
};
|
||||
|
||||
static enum validate_block_state
|
||||
block_state_for_inst(jay_inst *I)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_PHI_DST || I->op == JAY_OPCODE_PRELOAD) {
|
||||
return STATE_PHI_DST;
|
||||
} else if (I->op == JAY_OPCODE_PHI_SRC ||
|
||||
(jay_op_is_control_flow(I->op) && I->op != JAY_OPCODE_ELSE)) {
|
||||
return STATE_LATE;
|
||||
} else {
|
||||
return STATE_NORMAL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
chirp(struct validate_state *validate, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
||||
if (!validate->failed) {
|
||||
fprintf(stderr, "jay shader validation failed (after %s):\n",
|
||||
validate->when);
|
||||
validate->failed = true;
|
||||
}
|
||||
if (validate->I) {
|
||||
fprintf(stderr,
|
||||
" invalid instruction in block %d: ", validate->block->index);
|
||||
jay_print_inst(stderr, validate->I);
|
||||
}
|
||||
fprintf(stderr, " ");
|
||||
vfprintf(stderr, fmt, args);
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
#define CHECK(cond) \
|
||||
if (!(cond)) { \
|
||||
chirp(validate, "assertion failed at %s:%u\n %s", __FILE__, __LINE__, \
|
||||
#cond); \
|
||||
}
|
||||
|
||||
static void
|
||||
validate_flagness(struct validate_state *validate,
|
||||
jay_def def,
|
||||
enum jay_type type,
|
||||
const char *name)
|
||||
{
|
||||
CHECK(type != JAY_TYPE_U1 || jay_is_flag(def) || jay_is_null(def));
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_src_words(struct validate_state *validate, jay_inst *I, unsigned s)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_EXPAND_QUAD) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
bool vectorized = I->dst.file == UGPR &&
|
||||
jay_num_values(I->dst) > jay_type_vector_length(I->type) &&
|
||||
I->op != JAY_OPCODE_SEND &&
|
||||
jay_num_values(I->src[s]) > 1;
|
||||
|
||||
unsigned elsize = jay_type_vector_length(jay_src_type(I, s));
|
||||
unsigned words = elsize * (vectorized ? jay_num_values(I->dst) : 1);
|
||||
|
||||
if (vectorized && I->src[s].file == GPR) {
|
||||
CHECK(words == validate->func->shader->dispatch_width);
|
||||
return 1;
|
||||
} else {
|
||||
return words;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate the fundamental invariants of static single assignment form.
|
||||
*/
|
||||
static void
|
||||
validate_ssa(struct validate_state *validate, jay_inst *I)
|
||||
{
|
||||
jay_foreach_src_index(I, src_index, _, ssa_index) {
|
||||
CHECK(BITSET_TEST(validate->defs, ssa_index) && "defs dominate uses");
|
||||
CHECK(validate->files[ssa_index] == I->src[src_index].file &&
|
||||
"consistent files");
|
||||
}
|
||||
|
||||
jay_foreach_dst_index(I, d, ssa_index) {
|
||||
CHECK(!BITSET_TEST(validate->defs, ssa_index) && "single definition");
|
||||
BITSET_SET(validate->defs, ssa_index);
|
||||
validate->files[ssa_index] = d.file;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate the invariants of jay_def.
|
||||
*/
|
||||
static void
|
||||
validate_def(struct validate_state *validate, jay_def def, const char *kind)
|
||||
{
|
||||
CHECK(!jay_is_null(def) || !def.reg);
|
||||
|
||||
if (def.collect) {
|
||||
CHECK(jay_num_values(def) >= 2);
|
||||
CHECK(def.file == GPR || def.file == UGPR);
|
||||
|
||||
bool contiguous = true;
|
||||
jay_foreach_comp(def, c) {
|
||||
uint32_t index = jay_channel(def, c);
|
||||
contiguous &= index == (jay_channel(def, 0) + c);
|
||||
CHECK(index != JAY_SENTINEL);
|
||||
}
|
||||
|
||||
CHECK(!contiguous);
|
||||
} else if (def.file == J_IMM) {
|
||||
CHECK(!def.reg);
|
||||
CHECK(!def.num_values_m1);
|
||||
CHECK(!def.negate);
|
||||
CHECK(!def.abs);
|
||||
} else if (def.file == ACCUM || def.file == UACCUM || def.hi) {
|
||||
CHECK(validate->post_ra);
|
||||
} else {
|
||||
CHECK(jay_base_index(def) != JAY_SENTINEL || validate->post_ra);
|
||||
}
|
||||
|
||||
if (jay_is_ssa(def) && jay_channel(def, 0) != JAY_SENTINEL) {
|
||||
jay_foreach_comp(def, c) {
|
||||
CHECK(jay_channel(def, c) < validate->func->ssa_alloc);
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(jay_num_values(def) == 1 || !jay_is_flag(def));
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate an instruction.
|
||||
*/
|
||||
static void
|
||||
validate_inst(struct validate_state *validate, jay_inst *I)
|
||||
{
|
||||
validate->I = I;
|
||||
|
||||
/* Block states are monotonic. */
|
||||
enum validate_block_state state = block_state_for_inst(I);
|
||||
CHECK(state >= validate->block_state);
|
||||
validate->block_state = state;
|
||||
|
||||
const struct jay_opcode_info *opinfo = &jay_opcode_infos[I->op];
|
||||
|
||||
validate_def(validate, I->dst, "dst");
|
||||
validate_def(validate, I->cond_flag, "cond_flag");
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
validate_def(validate, I->src[s], "source");
|
||||
}
|
||||
|
||||
if (!validate->post_ra) {
|
||||
validate_ssa(validate, I);
|
||||
}
|
||||
|
||||
CHECK(I->num_srcs <= JAY_MAX_SRCS);
|
||||
|
||||
validate_flagness(validate, I->dst, I->type, "destination");
|
||||
validate_flagness(validate, I->cond_flag, JAY_TYPE_U1, "cond_flag");
|
||||
|
||||
CHECK(!I->conditional_mod ||
|
||||
!jay_is_null(I->cond_flag) ||
|
||||
I->op == JAY_OPCODE_CSEL);
|
||||
|
||||
/* These assumptions are baked into the definition of broadcast_flag and
|
||||
* required to ensure correctness with the lane masking.
|
||||
*/
|
||||
CHECK(!I->broadcast_flag ||
|
||||
(!jay_is_null(I->cond_flag) &&
|
||||
jay_is_null(I->dst) &&
|
||||
I->cond_flag.file == FLAG &&
|
||||
(I->op == JAY_OPCODE_CMP || I->op == JAY_OPCODE_MOV)));
|
||||
|
||||
/* Standard modifiers only allowed on some instructions */
|
||||
CHECK(!I->conditional_mod || opinfo->cmod || I->op == JAY_OPCODE_CSEL);
|
||||
CHECK(!I->saturate || opinfo->sat);
|
||||
|
||||
unsigned num_srcs = I->num_srcs;
|
||||
|
||||
if (I->predication) {
|
||||
CHECK(num_srcs >= I->predication);
|
||||
|
||||
if (jay_inst_has_default(I)) {
|
||||
CHECK(jay_inst_get_default(I)->file == I->dst.file);
|
||||
}
|
||||
|
||||
CHECK(jay_is_flag(*jay_inst_get_predicate(I)));
|
||||
CHECK(!jay_is_null(*jay_inst_get_predicate(I)));
|
||||
|
||||
num_srcs -= I->predication;
|
||||
}
|
||||
|
||||
if (validate->post_ra) {
|
||||
CHECK(jay_simd_width_logical(validate->func->shader, I) > 0);
|
||||
CHECK(jay_simd_width_physical(validate->func->shader, I) > 0);
|
||||
}
|
||||
|
||||
/* Number of sources should match for our opcode. If opinfo->num_srcs
|
||||
* is zero, then it may actually take a variable number of sources.
|
||||
*/
|
||||
CHECK(num_srcs == opinfo->num_srcs || opinfo->num_srcs == 0);
|
||||
|
||||
for (unsigned s = 0; s < num_srcs; s++) {
|
||||
if (jay_is_ssa(I->src[s]) && !jay_is_null(I->src[s])) {
|
||||
unsigned expected = get_src_words(validate, I, s);
|
||||
unsigned words = jay_num_values(I->src[s]);
|
||||
if (I->op != JAY_OPCODE_SEND || s < 2) {
|
||||
CHECK(expected == words);
|
||||
}
|
||||
|
||||
validate_flagness(validate, I->src[s], jay_src_type(I, s), "source");
|
||||
}
|
||||
|
||||
CHECK(!I->src[s].negate || jay_has_src_mods(I, s));
|
||||
}
|
||||
|
||||
switch (I->op) {
|
||||
case JAY_OPCODE_SEL:
|
||||
CHECK(jay_is_flag(I->src[2]) && "SEL src[2] (selector) must be a flag");
|
||||
break;
|
||||
case JAY_OPCODE_SWAP:
|
||||
CHECK(I->src[0].file == I->src[1].file && "SWAP files must match");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
jay_validate_function(struct validate_state *validate)
|
||||
{
|
||||
validate->defs = BITSET_CALLOC(validate->func->ssa_alloc);
|
||||
validate->files =
|
||||
calloc(validate->func->ssa_alloc, sizeof(validate->files[0]));
|
||||
|
||||
jay_foreach_block(validate->func, block) {
|
||||
validate->block = block;
|
||||
validate->I = NULL;
|
||||
|
||||
CHECK(block->successors[0] || !block->successors[1]);
|
||||
|
||||
/* Post-RA we can remove physical jumps though they exist logically */
|
||||
if (block->successors[1] && !validate->post_ra) {
|
||||
CHECK(jay_block_ending_jump(block) != NULL);
|
||||
}
|
||||
|
||||
/* If a block has multiple successors, and one of them has multiple
|
||||
* predecessors, then we've detected a critical edge.
|
||||
*/
|
||||
if (jay_num_successors(block) > 1 && !validate->post_ra) {
|
||||
jay_foreach_successor(block, succ) {
|
||||
if (jay_num_predecessors(succ) > 1) {
|
||||
chirp(validate, "Critical edge (B%u -> B%u) is not allowed",
|
||||
block->index, succ->index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validate->block_state = 0;
|
||||
jay_foreach_inst_in_block(block, inst) {
|
||||
validate_inst(validate, inst);
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate that there are no dead phis. RA relies on this. */
|
||||
if (!validate->post_ra) {
|
||||
jay_foreach_block(validate->func, block) {
|
||||
jay_foreach_phi_src_in_block(block, phi) {
|
||||
CHECK(BITSET_TEST(validate->defs, jay_phi_src_index(phi)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(validate->defs);
|
||||
free(validate->files);
|
||||
}
|
||||
|
||||
void
|
||||
jay_validate(jay_shader *s, const char *when)
|
||||
{
|
||||
struct validate_state validate = { .when = when, .post_ra = s->post_ra };
|
||||
|
||||
jay_foreach_function(s, f) {
|
||||
validate.func = f;
|
||||
jay_validate_function(&validate);
|
||||
}
|
||||
|
||||
if (validate.failed) {
|
||||
fprintf(stderr, "jay shader that failed validation:\n");
|
||||
jay_print(stderr, s);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
217
src/intel/compiler/jay/jay_validate_ra.c
Normal file
217
src/intel/compiler/jay/jay_validate_ra.c
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
/* Validatation doesn't make sense in release builds */
|
||||
#ifndef NDEBUG
|
||||
|
||||
struct regfile {
|
||||
/* For each register in each file, records the SSA index currently stored
|
||||
* in that register (or zero if undefined contents).
|
||||
*/
|
||||
uint32_t *r[JAY_NUM_SSA_FILES];
|
||||
|
||||
/* Size of each register file */
|
||||
size_t n[JAY_NUM_SSA_FILES];
|
||||
};
|
||||
|
||||
static uint32_t *
|
||||
reg(struct regfile *rf, enum jay_file file, uint32_t reg)
|
||||
{
|
||||
/* FLAG and UFLAG share their registers. TODO: Rework? */
|
||||
if (file == UFLAG) {
|
||||
file = FLAG;
|
||||
}
|
||||
|
||||
assert(file < JAY_NUM_SSA_FILES);
|
||||
assert(reg < rf->n[file]);
|
||||
return &rf->r[file][reg];
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
def_reg(struct regfile *rf, jay_def x, uint32_t component)
|
||||
{
|
||||
return reg(rf, x.file, x.reg + component);
|
||||
}
|
||||
|
||||
static void
|
||||
print_regfile(struct regfile *rf, FILE *fp)
|
||||
{
|
||||
fprintf(fp, "regfile: \n");
|
||||
jay_foreach_ssa_file(file) {
|
||||
for (unsigned i = 0; i < rf->n[file]; ++i) {
|
||||
uint32_t v = *reg(rf, file, i);
|
||||
const char *prefixes = "ruf"; /* XXX: share with jay_print */
|
||||
|
||||
if (v) {
|
||||
fprintf(fp, " %c%u = %u\n", prefixes[file], i, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
static bool
|
||||
validate_src(struct jay_partition *partition,
|
||||
jay_inst *I,
|
||||
unsigned s,
|
||||
struct regfile *rf,
|
||||
jay_def def)
|
||||
{
|
||||
jay_foreach_comp(def, c) {
|
||||
uint32_t actual = *def_reg(rf, def, c);
|
||||
|
||||
if (def.file == GPR) {
|
||||
assert(jay_gpr_to_stride(partition, def.reg) ==
|
||||
jay_gpr_to_stride(partition, def.reg + c));
|
||||
}
|
||||
|
||||
if (actual == 0 || actual != jay_channel(def, c)) {
|
||||
fprintf(stderr, "invalid RA for source %u, channel %u.\n", s, c);
|
||||
|
||||
fprintf(stderr, "expected index %u but", jay_channel(def, c));
|
||||
if (actual)
|
||||
fprintf(stderr, " got index %u\n", actual);
|
||||
else
|
||||
fprintf(stderr, " register is undefined\n");
|
||||
|
||||
jay_print_inst(stderr, I);
|
||||
print_regfile(rf, stderr);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
validate_block(jay_function *func, jay_block *block, struct regfile *blocks)
|
||||
{
|
||||
struct regfile *rf = &blocks[block->index];
|
||||
bool success = true;
|
||||
|
||||
/* Pathological shaders can end up with loop headers that have only a
|
||||
* single predecessor and act like normal blocks. Validate them as such,
|
||||
* since RA treats them as such implicitly. Affects:
|
||||
*
|
||||
* dEQP-VK.graphicsfuzz.spv-stable-mergesort-dead-code
|
||||
*/
|
||||
bool loop_header = block->loop_header && jay_num_predecessors(block) > 1;
|
||||
|
||||
/* Initialize the register file based on predecessors. */
|
||||
/* Initialize with the exit state of any one predecessor */
|
||||
jay_block *first_pred = jay_first_predecessor(block);
|
||||
if (first_pred) {
|
||||
struct regfile *pred_rf = &blocks[first_pred->index];
|
||||
|
||||
jay_foreach_ssa_file(f) {
|
||||
memcpy(rf->r[f], pred_rf->r[f], rf->n[f] * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Handle loop header validation better */
|
||||
if (!loop_header) {
|
||||
/* Intersect with the other predecessor. If a register has different
|
||||
* values coming in from each block, it is considered undefined at the
|
||||
* start of the block.
|
||||
*/
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
struct regfile *pred_rf = &blocks[(*pred)->index];
|
||||
|
||||
jay_foreach_ssa_file(file) {
|
||||
for (unsigned r = 0; r < rf->n[file]; ++r) {
|
||||
if (*reg(rf, file, r) != *reg(pred_rf, file, r)) {
|
||||
*reg(rf, file, r) = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
/* Validate sources */
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
if (jay_channel(I->src[s], 0) != JAY_SENTINEL) {
|
||||
success &=
|
||||
validate_src(&func->shader->partition, I, s, rf, I->src[s]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Record destinations */
|
||||
jay_foreach_dst(I, dst) {
|
||||
if (jay_channel(dst, 0) != JAY_SENTINEL) {
|
||||
jay_foreach_comp(dst, c) {
|
||||
*def_reg(rf, dst, c) = jay_channel(dst, c);
|
||||
|
||||
if (dst.file == GPR) {
|
||||
struct jay_partition *p = &func->shader->partition;
|
||||
assert(jay_gpr_to_stride(p, dst.reg) ==
|
||||
jay_gpr_to_stride(p, dst.reg + c));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (I->op == JAY_OPCODE_MOV &&
|
||||
jay_channel(I->dst, 0) == JAY_SENTINEL &&
|
||||
jay_is_ssa(I->src[0]) &&
|
||||
jay_channel(I->src[0], 0) == JAY_SENTINEL) {
|
||||
|
||||
/* Lowered live range splits don't have SSA associated, handle
|
||||
* directly at the register level.
|
||||
*/
|
||||
assert(jay_num_values(I->dst) == jay_num_values(I->src[0]));
|
||||
|
||||
jay_foreach_comp(I->dst, c) {
|
||||
*def_reg(rf, I->dst, c) = *def_reg(rf, I->src[0], c);
|
||||
}
|
||||
} else if (I->op == JAY_OPCODE_SWAP) {
|
||||
assert(jay_num_values(I->src[0]) == jay_num_values(I->src[1]));
|
||||
|
||||
jay_foreach_comp(I->src[0], c) {
|
||||
SWAP(*def_reg(rf, I->src[0], c), *def_reg(rf, I->src[1], c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
void
|
||||
jay_validate_ra(jay_function *func)
|
||||
{
|
||||
bool succ = true;
|
||||
linear_ctx *lin_ctx = linear_context(func->shader);
|
||||
struct regfile *blocks =
|
||||
linear_zalloc_array(lin_ctx, struct regfile, func->num_blocks);
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
struct regfile *b = &blocks[block->index];
|
||||
assert(block->index < func->num_blocks);
|
||||
|
||||
jay_foreach_ssa_file(file) {
|
||||
b->n[file] = jay_num_regs(func->shader, file);
|
||||
b->r[file] = linear_zalloc_array(lin_ctx, uint32_t, b->n[file]);
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_block(func, block) {
|
||||
succ &= validate_block(func, block, blocks);
|
||||
}
|
||||
|
||||
if (!succ) {
|
||||
jay_print_func(stderr, func);
|
||||
UNREACHABLE("invalid RA");
|
||||
}
|
||||
|
||||
linear_free_context(lin_ctx);
|
||||
}
|
||||
|
||||
#endif /* NDEBUG */
|
||||
109
src/intel/compiler/jay/meson.build
Normal file
109
src/intel/compiler/jay/meson.build
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright 2017 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
jay_opcodes = custom_target(
|
||||
input : ['jay_opcodes_gen.py'],
|
||||
output : ['jay_opcodes.c', 'jay_opcodes.h'],
|
||||
command : [prog_python, '@INPUT@', '--code', '@OUTPUT0@', '--header', '@OUTPUT1@'],
|
||||
depend_files : files('jay_opcodes.py'),
|
||||
)
|
||||
|
||||
idep_jay_opcodes_h = declare_dependency(
|
||||
sources : [jay_opcodes[1]],
|
||||
include_directories : include_directories('.'),
|
||||
)
|
||||
|
||||
jay_extra_info_h = custom_target(
|
||||
input : ['jay_extra_info.h.py'],
|
||||
output : 'jay_extra_info.h',
|
||||
command : [prog_python, '@INPUT@', '@OUTPUT@'],
|
||||
depend_files : files('jay_opcodes.py'),
|
||||
)
|
||||
|
||||
idep_jay_extra_info_h = declare_dependency(
|
||||
sources : [jay_extra_info_h],
|
||||
include_directories : include_directories('.'),
|
||||
)
|
||||
|
||||
jay_builder_opcodes_h = custom_target(
|
||||
input : 'jay_builder_opcodes.h.py',
|
||||
output : 'jay_builder_opcodes.h',
|
||||
command : [prog_python, '@INPUT@', '@OUTPUT@'],
|
||||
depend_files : files('jay_opcodes.py'),
|
||||
)
|
||||
|
||||
idep_jay_builder_opcodes_h = declare_dependency(
|
||||
sources : [jay_builder_opcodes_h],
|
||||
include_directories : include_directories('.'),
|
||||
)
|
||||
|
||||
jay_nir_algebraic = custom_target(
|
||||
'jay_nir_algebraic.c',
|
||||
input : ['jay_nir_algebraic.py'],
|
||||
output : 'jay_nir_algebraic.c',
|
||||
command : [prog_python, '@INPUT@', '@OUTPUT@', '-p', dir_compiler_nir] ,
|
||||
depend_files : nir_algebraic_depends,
|
||||
)
|
||||
|
||||
libintel_compiler_jay_files = files(
|
||||
'jay.h',
|
||||
'jay_assign_flags.c',
|
||||
'jay_from_nir.c',
|
||||
'jay_ir.h',
|
||||
'jay_liveness.c',
|
||||
'jay_lower_post_ra.c',
|
||||
'jay_lower_pre_ra.c',
|
||||
'jay_lower_scoreboard.c',
|
||||
'jay_lower_spill.c',
|
||||
'jay_opt_dead_code.c',
|
||||
'jay_opt_control_flow.c',
|
||||
'jay_opt_propagate.c',
|
||||
'jay_print.c',
|
||||
'jay_private.h',
|
||||
'jay_repair_ssa.c',
|
||||
'jay_register_allocate.c',
|
||||
'jay_simd_width.c',
|
||||
'jay_spill.c',
|
||||
'jay_to_binary.c',
|
||||
'jay_validate.c',
|
||||
'jay_validate_ra.c',
|
||||
)
|
||||
|
||||
libintel_compiler_jay = static_library(
|
||||
'intel_compiler_jay',
|
||||
[libintel_compiler_jay_files, jay_nir_algebraic, jay_opcodes[0]],
|
||||
include_directories : [inc_include, inc_src, inc_intel],
|
||||
c_args : [no_override_init_args, '-Wno-c23-extensions', '-Wno-array-bounds'],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [idep_nir_headers, idep_jay_opcodes_h, idep_jay_builder_opcodes_h, idep_jay_extra_info_h, idep_mesautil, idep_intel_dev],
|
||||
build_by_default : false,
|
||||
)
|
||||
|
||||
idep_intel_compiler_jay = declare_dependency(
|
||||
link_with : [libintel_compiler_jay],
|
||||
dependencies : [
|
||||
idep_nir,
|
||||
idep_vtn,
|
||||
],
|
||||
)
|
||||
|
||||
if with_tests
|
||||
test(
|
||||
'jay_tests',
|
||||
executable(
|
||||
'jay_tests',
|
||||
files(
|
||||
'test/test-lower-post-ra.cpp',
|
||||
'test/test-optimizer.cpp',
|
||||
'test/test-repair-ssa.cpp',
|
||||
),
|
||||
c_args : [c_msvc_compat_args, no_override_init_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
include_directories : [inc_include, inc_src, inc_intel],
|
||||
dependencies: [idep_gtest, idep_nir, idep_jay_opcodes_h, idep_jay_builder_opcodes_h, idep_jay_extra_info_h, idep_mesautil, idep_intel_dev],
|
||||
link_with : [libintel_compiler_jay],
|
||||
),
|
||||
suite : ['intel'],
|
||||
protocol : 'gtest',
|
||||
)
|
||||
endif
|
||||
57
src/intel/compiler/jay/register-file.md
Normal file
57
src/intel/compiler/jay/register-file.md
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Glossary
|
||||
|
||||
**lane**: A single work-item.
|
||||
|
||||
**subgroup**: A collection of 8, 16, or 32 lanes executing in lockstep.
|
||||
Avoid using the term _thread_ as it is ambiguous.
|
||||
|
||||
**uniform**: A value that has the same value in every active lane of a subgroup.
|
||||
Sometimes called _convergent_. Opposite of "non-uniform".
|
||||
|
||||
**non-uniform**: A value that may have different values in different active
|
||||
lanes within a subgroup. Sometimes called _divergent_. Opposite of "uniform".
|
||||
|
||||
**GPR**: General-purpose register, a single non-uniform value viewed from the
|
||||
perspective of a single lane. This is a 'virtual' or 'logical' register within
|
||||
the SIMT programming model. It does not represent a physical machine
|
||||
register. For that, see "GRF".
|
||||
|
||||
**UGPR**: Uniform general purpose register, a single uniform value. This is
|
||||
again a virtual or logical register.
|
||||
|
||||
**GRF**: A physical Intel GPU register. On Xe2+, a GRF is 512-bits. On older
|
||||
platforms, a GRF is 256-bits. Depending on the platform and the SIMD width,
|
||||
different numbers of GRFs required to store a single GPR, and different numbers
|
||||
of UGPRs fit into a single GRF. In SIMD32 mode on Xe2, 1 GPR requires 2 GRFs,
|
||||
and 16 UGPRs fit into 1 GRF.
|
||||
|
||||
**scalar**: A single value from the perspective of a single lane; a single GPR
|
||||
or UGPR. Note that a scalar may be either uniform or non-uniform. Opposite of
|
||||
"vector".
|
||||
|
||||
**vector**: A collection of multiple values from the perspective of a single
|
||||
lane. All scalars within the vector must be identically be GPRs or UGPRs.
|
||||
|
||||
# Introduction
|
||||
|
||||
Jay separates the logical register files (GPR and UGPR) from the
|
||||
unified physical register file. We assign registers independently for each
|
||||
logical file, and then post-RA we remap to physical GRFs. This simplifies RA.
|
||||
|
||||
We decide a static GPR/UGPR split up front. Ideally, we'd just use the
|
||||
first N registers for GPRs and the rest for UGPRs, or something like
|
||||
that. Unfortunately, several hardware issues complicate this scheme...
|
||||
|
||||
# End-of-thread SENDs
|
||||
|
||||
End-of-thread SENDs require their source is in r112-r127. As their source will
|
||||
always be per-thread, we want to make sure these are GPRs.
|
||||
|
||||
# Payloads
|
||||
|
||||
At the start of each thread, the register file is preloaded with a payload.
|
||||
Parts of the payload act like UGPRs, parts act like GPRs, and parts act like...
|
||||
something weird and in between. To minimize copying, we want to assign UGPRs to
|
||||
the UGPR parts of the payload and GPRs to the GPR parts. As for the weird cases,
|
||||
we model them as UGPR vectors and use special opcodes (lowered late to
|
||||
regioning) to unpack to GPRs for normal handling.
|
||||
141
src/intel/compiler/jay/test/jay_test.h
Normal file
141
src/intel/compiler/jay/test/jay_test.h
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_private.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
static inline jay_block *
|
||||
jay_test_block(jay_function *f)
|
||||
{
|
||||
jay_block *blk = jay_new_block(f);
|
||||
list_addtail(&blk->link, &f->blocks);
|
||||
return blk;
|
||||
}
|
||||
|
||||
/* Helper to generate a jay_builder suitable for creating test instructions */
|
||||
static inline jay_builder *
|
||||
jay_test_builder(void *memctx)
|
||||
{
|
||||
jay_shader *s = jay_new_shader(memctx, MESA_SHADER_COMPUTE);
|
||||
jay_function *f = jay_new_function(s);
|
||||
s->partition.base8 = 8;
|
||||
|
||||
struct intel_device_info *devinfo =
|
||||
rzalloc(memctx, struct intel_device_info);
|
||||
s->devinfo = devinfo;
|
||||
s->dispatch_width = 32;
|
||||
|
||||
unsigned verx10 = 200;
|
||||
devinfo->verx10 = verx10;
|
||||
devinfo->ver = verx10 / 10;
|
||||
assert(devinfo->ver > 0);
|
||||
|
||||
/* We'll use low indices for test values */
|
||||
f->ssa_alloc = 10;
|
||||
|
||||
jay_builder *b = rzalloc(memctx, jay_builder);
|
||||
*b = jay_init_builder(f, jay_after_block(jay_test_block(f)));
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Helper to compare for logical equality of instructions. Need to compare the
|
||||
* pointers, then compare raw data.
|
||||
*/
|
||||
static inline bool
|
||||
jay_inst_equal(jay_inst *A, jay_inst *B)
|
||||
{
|
||||
/* Check the plain old data portion of jay_inst. */
|
||||
unsigned header = sizeof(struct list_head);
|
||||
if (memcmp((uint8_t *) A + header, (uint8_t *) B + header,
|
||||
sizeof(jay_inst) - header))
|
||||
return false;
|
||||
|
||||
/* All of the sizes are plain data. They match, so do a deep compare. */
|
||||
size_t size = (A->num_srcs * sizeof(jay_def)) + jay_inst_info_size(A);
|
||||
return !memcmp(A->src, B->src, size);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
jay_block_equal(jay_block *A, jay_block *B)
|
||||
{
|
||||
if (list_length(&A->instructions) != list_length(&B->instructions))
|
||||
return false;
|
||||
|
||||
list_pair_for_each_entry(jay_inst, I, J, &A->instructions, &B->instructions,
|
||||
link) {
|
||||
if (!jay_inst_equal(I, J)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
jay_function_equal(jay_function *A, jay_function *B)
|
||||
{
|
||||
if (list_length(&A->blocks) != list_length(&B->blocks))
|
||||
return false;
|
||||
|
||||
list_pair_for_each_entry(jay_block, blockA, blockB, &A->blocks, &B->blocks,
|
||||
link) {
|
||||
if (!jay_block_equal(blockA, blockB))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
jay_shader_equal(jay_shader *A, jay_shader *B)
|
||||
{
|
||||
if (list_length(&A->functions) != list_length(&B->functions))
|
||||
return false;
|
||||
|
||||
list_pair_for_each_entry(jay_function, functionA, functionB, &A->functions,
|
||||
&B->functions, link) {
|
||||
if (!jay_function_equal(functionA, functionB))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define ASSERT_SHADER_EQUAL(A, B) \
|
||||
if (!jay_shader_equal(A, B)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Pass produced unexpected results"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
jay_print(stderr, A); \
|
||||
fprintf(stderr, " Expected:\n"); \
|
||||
jay_print(stderr, B); \
|
||||
fprintf(stderr, "\n"); \
|
||||
}
|
||||
|
||||
#define INSTRUCTION_CASE_GEN(instr, expected, pass, validate) \
|
||||
do { \
|
||||
jay_builder *A = jay_test_builder(mem_ctx); \
|
||||
jay_builder *B = jay_test_builder(mem_ctx); \
|
||||
{ \
|
||||
jay_builder *b = A; \
|
||||
instr; \
|
||||
} \
|
||||
if (validate) \
|
||||
jay_validate(A->shader, "test setup"); \
|
||||
{ \
|
||||
jay_builder *b = B; \
|
||||
expected; \
|
||||
} \
|
||||
JAY_PASS(A->shader, pass); \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
#define INSTRUCTION_CASE(instr, expected, pass) \
|
||||
INSTRUCTION_CASE_GEN(instr, expected, pass, true)
|
||||
82
src/intel/compiler/jay/test/test-lower-post-ra.cpp
Normal file
82
src/intel/compiler/jay/test/test-lower-post-ra.cpp
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_test.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) \
|
||||
INSTRUCTION_CASE( \
|
||||
{ \
|
||||
A->shader->post_ra = true; \
|
||||
instr; \
|
||||
}, \
|
||||
{ \
|
||||
B->shader->post_ra = true; \
|
||||
expected; \
|
||||
}, \
|
||||
jay_lower_post_ra)
|
||||
|
||||
#define PRE jay_add_predicate_else
|
||||
#define POST jay_add_predicate
|
||||
#define CFLAG jay_set_cond_flag
|
||||
|
||||
#define NEGCASE(x) CASE(x, x)
|
||||
|
||||
class LowerPostRA : public testing::Test {
|
||||
protected:
|
||||
LowerPostRA()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
x = jay_bare_reg(GPR, 1);
|
||||
y = jay_bare_reg(GPR, 2);
|
||||
z = jay_bare_reg(GPR, 3);
|
||||
u4 = jay_bare_reg(UGPR, 4);
|
||||
f0 = jay_bare_reg(FLAG, 0);
|
||||
f1 = jay_bare_reg(FLAG, 1);
|
||||
f2 = jay_bare_reg(FLAG, 2);
|
||||
}
|
||||
|
||||
~LowerPostRA()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
jay_inst *I;
|
||||
void *mem_ctx;
|
||||
jay_def x, y, z, u4, f0, f1, f2, nul = jay_null();
|
||||
};
|
||||
|
||||
TEST_F(LowerPostRA, Tied)
|
||||
{
|
||||
CASE(PRE(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), f0, z),
|
||||
POST(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), f0));
|
||||
|
||||
CASE(PRE(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), jay_negate(f0), z),
|
||||
POST(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), jay_negate(f0)));
|
||||
}
|
||||
|
||||
TEST_F(LowerPostRA, InsertMove)
|
||||
{
|
||||
CASE(PRE(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), f0, x), {
|
||||
POST(b, jay_MOV(b, z, x), jay_negate(f0));
|
||||
POST(b, jay_ADD(b, JAY_TYPE_U32, z, x, y), f0);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(LowerPostRA, RewriteToSel)
|
||||
{
|
||||
CASE(PRE(b, jay_MOV(b, z, y), f0, x),
|
||||
jay_SEL(b, JAY_TYPE_U32, z, x, y, jay_negate(f0)));
|
||||
}
|
||||
|
||||
TEST_F(LowerPostRA, CopyUGPR)
|
||||
{
|
||||
NEGCASE(jay_MOV(b, x, u4));
|
||||
NEGCASE(jay_MOV(b, u4, x));
|
||||
}
|
||||
312
src/intel/compiler/jay/test/test-optimizer.cpp
Normal file
312
src/intel/compiler/jay/test/test-optimizer.cpp
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "util/lut.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_private.h"
|
||||
#include "jay_test.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
static void
|
||||
jay_optimize_and_dce(jay_shader *shader)
|
||||
{
|
||||
JAY_PASS(shader, jay_opt_propagate_forwards);
|
||||
JAY_PASS(shader, jay_opt_propagate_backwards);
|
||||
JAY_PASS(shader, jay_opt_dead_code);
|
||||
}
|
||||
|
||||
#define CASE(instr, expected) \
|
||||
INSTRUCTION_CASE( \
|
||||
{ \
|
||||
instr; \
|
||||
jay_UNIT_TEST_u32(b, out); \
|
||||
}, \
|
||||
{ \
|
||||
expected; \
|
||||
jay_UNIT_TEST_u32(b, out); \
|
||||
}, \
|
||||
jay_optimize_and_dce)
|
||||
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
#define UNIT jay_UNIT_TEST_u32
|
||||
|
||||
#define NEG(x) jay_negate(x)
|
||||
|
||||
#define MOV(T, src0) \
|
||||
({ \
|
||||
jay_def dst = jay_alloc_def(b, GPR, 1); \
|
||||
jay_MODIFIER(b, T, dst, src0); \
|
||||
dst; \
|
||||
})
|
||||
|
||||
class Optimizer : public testing::Test {
|
||||
protected:
|
||||
Optimizer()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
out = jay_scalar(GPR, 8);
|
||||
wx = jay_scalar(TEST_FILE, 1);
|
||||
wy = jay_scalar(TEST_FILE, 1);
|
||||
wz = jay_scalar(TEST_FILE, 1);
|
||||
}
|
||||
|
||||
~Optimizer()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
|
||||
jay_def out, wx, wy, wz;
|
||||
};
|
||||
|
||||
static enum jay_type float_types[] = {
|
||||
JAY_TYPE_F16,
|
||||
JAY_TYPE_F32,
|
||||
};
|
||||
|
||||
TEST_F(Optimizer, Copyprop)
|
||||
{
|
||||
CASE(jay_ADD(b, JAY_TYPE_U32, out, wx, jay_MOV_u32(b, wy)),
|
||||
jay_ADD(b, JAY_TYPE_U32, out, wx, wy));
|
||||
|
||||
CASE(jay_ADD(b, JAY_TYPE_U32, out, wx, jay_MOV_u32(b, wy)),
|
||||
jay_ADD(b, JAY_TYPE_U32, out, wx, wy));
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, FusedNeg)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(float_types); ++i) {
|
||||
enum jay_type T = float_types[i];
|
||||
|
||||
CASE(jay_ADD(b, T, out, wx, MOV(T, NEG(wy))),
|
||||
jay_ADD(b, T, out, wx, NEG(wy)));
|
||||
|
||||
CASE(jay_MUL(b, T, out, MOV(T, NEG(wy)), NEG(wx)),
|
||||
jay_MUL(b, T, out, NEG(wy), NEG(wx)));
|
||||
|
||||
CASE(jay_MAD(b, T, out, MOV(T, NEG(wy)), wz, NEG(MOV(T, NEG(wx)))),
|
||||
jay_MAD(b, T, out, NEG(wy), wz, wx));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, SELToFloat)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_LT, flag, 3, x);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, wx, MOV(JAY_TYPE_F32, NEG(wy)), flag);
|
||||
},
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_LT, flag, 3, x);
|
||||
jay_SEL(b, JAY_TYPE_F32, out, wx, NEG(wy), flag);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, FusedNot)
|
||||
{
|
||||
CASE(jay_BFN(b, out, wx, jay_NOT_u32(b, wy), 0, UTIL_LUT3(a & b)),
|
||||
jay_BFN(b, out, wx, wy, 0, UTIL_LUT3(a & ~b)));
|
||||
|
||||
CASE(jay_AND(b, JAY_TYPE_U32, out, wx, jay_NOT_u32(b, wy)),
|
||||
jay_AND(b, JAY_TYPE_U32, out, wx, jay_negate(wy)));
|
||||
|
||||
CASE(jay_XOR(b, JAY_TYPE_U32, out, jay_NOT_u32(b, wx), wy),
|
||||
jay_XOR(b, JAY_TYPE_U32, out, jay_negate(wx), wy));
|
||||
|
||||
CASE(jay_OR(b, JAY_TYPE_U32, out, jay_NOT_u32(b, wx), jay_NOT_u32(b, wy)),
|
||||
jay_OR(b, JAY_TYPE_U32, out, jay_negate(wx), jay_negate(wy)));
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, NegativeFusedFneg)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(float_types); ++i) {
|
||||
enum jay_type T = float_types[i];
|
||||
NEGCASE(jay_ADD(b, JAY_TYPE_U32, out, wx, MOV(T, NEG(wy))));
|
||||
NEGCASE(jay_ADD(b, JAY_TYPE_S32, out, wx, MOV(T, NEG(wy))));
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: test fneg with f64 */
|
||||
|
||||
TEST_F(Optimizer, FusedSat)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(float_types); ++i) {
|
||||
enum jay_type T = float_types[i];
|
||||
|
||||
CASE(
|
||||
{
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_ADD(b, T, x, wx, MOV(T, NEG(wy)));
|
||||
jay_MODIFIER(b, T, out, x)->saturate = true;
|
||||
},
|
||||
{ jay_ADD(b, T, out, wx, NEG(wy))->saturate = true; });
|
||||
|
||||
CASE(
|
||||
{
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_MUL(b, T, x, wx, MOV(T, NEG(wy)));
|
||||
jay_MODIFIER(b, T, out, x)->saturate = true;
|
||||
},
|
||||
{ jay_MUL(b, T, out, wx, NEG(wy))->saturate = true; });
|
||||
|
||||
CASE(
|
||||
{
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_MAX(b, T, x, wx, MOV(T, NEG(wy)))->saturate = true;
|
||||
jay_MODIFIER(b, T, out, x)->saturate = true;
|
||||
},
|
||||
{ jay_MAX(b, T, out, wx, NEG(wy))->saturate = true; });
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, InverseBallotPropagate)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
jay_def x = jay_alloc_def(b, UGPR, 1);
|
||||
jay_def f = jay_alloc_def(b, FLAG, 1);
|
||||
jay_ADD(b, JAY_TYPE_U32, x, wx, wy);
|
||||
jay_MOV(b, f, x);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, wx, wy, f);
|
||||
},
|
||||
{
|
||||
UNUSED jay_def x = jay_alloc_def(b, UGPR, 1);
|
||||
jay_def f = jay_alloc_def(b, FLAG, 1);
|
||||
jay_ADD(b, JAY_TYPE_U32, f, wx, wy);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, wx, wy, f);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, GtZero)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_LT, flag, 0, x);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
},
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_inst *add = jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_set_conditional_mod(b, add, flag, JAY_CONDITIONAL_GT);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, MultipleCmp)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def flag2 = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_LT, flag, 0, x);
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_GT, flag2, 0, x);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, jay_SEL_u32(b, x, 123, flag), flag2);
|
||||
},
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def flag2 = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_inst *add = jay_ADD(b, JAY_TYPE_S32, x, wx, NEG(wy));
|
||||
jay_set_conditional_mod(b, add, flag, JAY_CONDITIONAL_GT);
|
||||
jay_CMP(b, JAY_TYPE_S32, JAY_CONDITIONAL_GT, flag2, 0, x);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, jay_SEL_u32(b, x, 123, flag), flag2);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, TypeNeutralConditionalMods)
|
||||
{
|
||||
enum jay_conditional_mod mods[] = {
|
||||
JAY_CONDITIONAL_NE,
|
||||
JAY_CONDITIONAL_EQ,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
CASE(
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_BFN(b, x, wx, wy, wz, UTIL_LUT3(a & b & c));
|
||||
jay_CMP(b, JAY_TYPE_S32, mods[i], flag, x, 0);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
},
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_inst *bfn3 = jay_BFN(b, x, wx, wy, wz, UTIL_LUT3(a & b & c));
|
||||
jay_set_conditional_mod(b, bfn3, flag, mods[i]);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
});
|
||||
|
||||
CASE(
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_AND(b, JAY_TYPE_U32, x, wx, wy);
|
||||
jay_CMP(b, JAY_TYPE_S32, mods[i], flag, x, 0);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
},
|
||||
{
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_inst *an = jay_AND(b, JAY_TYPE_U32, x, wx, wy);
|
||||
jay_set_conditional_mod(b, an, flag, mods[i]);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, SignednessMismatchConditionalMods)
|
||||
{
|
||||
enum jay_conditional_mod mods[] = {
|
||||
JAY_CONDITIONAL_LE,
|
||||
JAY_CONDITIONAL_GT,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
NEGCASE({
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_BFN(b, x, wx, wy, wz, UTIL_LUT3(a & b & c));
|
||||
jay_CMP(b, JAY_TYPE_S32, mods[i], flag, x, 0);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, FloatMismatchConditionalMods)
|
||||
{
|
||||
enum jay_conditional_mod mods[] = {
|
||||
JAY_CONDITIONAL_NAN,
|
||||
JAY_CONDITIONAL_EQ,
|
||||
JAY_CONDITIONAL_NE,
|
||||
JAY_CONDITIONAL_LT,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
NEGCASE({
|
||||
jay_def flag = jay_alloc_def(b, FLAG, 1);
|
||||
jay_def x = jay_alloc_def(b, GPR, 1);
|
||||
jay_BFN(b, x, wx, wy, wz, UTIL_LUT3(a & b & c));
|
||||
jay_CMP(b, JAY_TYPE_F32, mods[i], flag, x, 0);
|
||||
jay_SEL(b, JAY_TYPE_U32, out, x, 123, flag);
|
||||
});
|
||||
}
|
||||
}
|
||||
213
src/intel/compiler/jay/test/test-repair-ssa.cpp
Normal file
213
src/intel/compiler/jay/test/test-repair-ssa.cpp
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2024 Alyssa Rosenzweig
|
||||
* Copyright 2022 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_test.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
JAY_DEFINE_FUNCTION_PASS(pass, jay_repair_ssa)
|
||||
|
||||
#define CASE(instr) \
|
||||
INSTRUCTION_CASE_GEN( \
|
||||
{ \
|
||||
UNUSED bool repaired = false; \
|
||||
b->func->ssa_alloc = 1; \
|
||||
instr \
|
||||
}, \
|
||||
{ \
|
||||
UNUSED bool repaired = true; \
|
||||
b->func->ssa_alloc = 1; \
|
||||
instr \
|
||||
}, \
|
||||
pass, false)
|
||||
|
||||
class RepairSSA : public testing::Test {
|
||||
protected:
|
||||
RepairSSA()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
}
|
||||
|
||||
~RepairSSA()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
};
|
||||
|
||||
static jay_def
|
||||
jay_phi_2(jay_builder *b, jay_block *p1, jay_def v1, jay_block *p2, jay_def v2)
|
||||
{
|
||||
assert(v2.file == v1.file || jay_is_null(v2));
|
||||
jay_def idx = jay_alloc_def(b, v1.file, 1);
|
||||
jay_PHI_DST(b, idx);
|
||||
jay_cursor saved = b->cursor;
|
||||
|
||||
b->cursor = jay_after_block(p1);
|
||||
jay_PHI_SRC_u32(b, v1, jay_index(idx));
|
||||
|
||||
b->cursor = jay_after_block(p2);
|
||||
jay_PHI_SRC_u32(b, jay_is_null(v2) ? idx : v2, jay_index(idx));
|
||||
|
||||
b->cursor = saved;
|
||||
return idx;
|
||||
}
|
||||
|
||||
TEST_F(RepairSSA, Local)
|
||||
{
|
||||
CASE({
|
||||
jay_def x = jay_MOV_u32(b, 0xcafe);
|
||||
jay_def y = jay_MOV_u32(b, 0xefac);
|
||||
|
||||
if (repaired) {
|
||||
jay_UNIT_TEST(b, jay_ADD_f32(b, y, x));
|
||||
} else {
|
||||
jay_ADD(b, JAY_TYPE_F32, x, y, x);
|
||||
jay_UNIT_TEST(b, x);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/* A
|
||||
* / \
|
||||
* B C
|
||||
* \ /
|
||||
* D
|
||||
*/
|
||||
TEST_F(RepairSSA, IfElse)
|
||||
{
|
||||
CASE({
|
||||
jay_block *A = jay_first_block(b->func);
|
||||
jay_block *B = jay_test_block(b->func);
|
||||
jay_block *C = jay_test_block(b->func);
|
||||
jay_block *D = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
|
||||
jay_block_add_successor(B, D);
|
||||
jay_block_add_successor(C, D);
|
||||
|
||||
b->cursor = jay_after_block(A);
|
||||
jay_IF(b);
|
||||
|
||||
b->cursor = jay_after_block(B);
|
||||
jay_def x = jay_MOV_u32(b, 0xcafe);
|
||||
jay_def y = jay_MOV_u32(b, 0xbade);
|
||||
|
||||
b->cursor = jay_after_block(C);
|
||||
jay_ELSE(b);
|
||||
jay_def x2 = repaired ? jay_alloc_def(b, UGPR, 1) : x;
|
||||
jay_MOV(b, x2, 0xefac);
|
||||
jay_def y2 = jay_MOV_u32(b, 0xbaee);
|
||||
jay_ENDIF(b);
|
||||
|
||||
b->cursor = jay_after_block(D);
|
||||
jay_def y3 = jay_phi_2(b, B, y, C, y2);
|
||||
if (repaired)
|
||||
x = jay_phi_2(b, B, x, C, x2);
|
||||
|
||||
jay_UNIT_TEST(b, jay_ADD_f32(b, x, y3));
|
||||
});
|
||||
}
|
||||
|
||||
/*
|
||||
* H
|
||||
* |
|
||||
* A---|
|
||||
* / \ |
|
||||
* B C |
|
||||
* | / |
|
||||
* | D----
|
||||
* |
|
||||
* |-E
|
||||
*/
|
||||
TEST_F(RepairSSA, Loop)
|
||||
{
|
||||
CASE({
|
||||
jay_block *H = jay_first_block(b->func);
|
||||
jay_block *A = jay_test_block(b->func);
|
||||
jay_block *B = jay_test_block(b->func);
|
||||
jay_block *C = jay_test_block(b->func);
|
||||
jay_block *D = jay_test_block(b->func);
|
||||
jay_block *E = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(H, A);
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
jay_block_add_successor(B, E);
|
||||
jay_block_add_successor(C, D);
|
||||
jay_block_add_successor(D, A);
|
||||
|
||||
A->loop_header = true;
|
||||
|
||||
b->cursor = jay_after_block(H);
|
||||
jay_def x = jay_MOV_u32(b, 0xcafe);
|
||||
|
||||
b->cursor = jay_after_block(A);
|
||||
jay_def x_in = repaired ? jay_alloc_def(b, UGPR, 1) : x;
|
||||
jay_def x_out = repaired ? jay_alloc_def(b, UGPR, 1) : x;
|
||||
if (repaired) {
|
||||
jay_PHI_DST(b, x_in);
|
||||
}
|
||||
jay_IF(b);
|
||||
|
||||
b->cursor = jay_after_block(H);
|
||||
if (repaired) {
|
||||
jay_PHI_SRC_u32(b, x, jay_index(x_in));
|
||||
}
|
||||
|
||||
b->cursor = jay_after_block(B);
|
||||
jay_BREAK(b);
|
||||
|
||||
b->cursor = jay_after_block(D);
|
||||
jay_ADD(b, JAY_TYPE_U32, x_out, x_in, 1);
|
||||
if (repaired) {
|
||||
jay_PHI_SRC_u32(b, x_out, jay_index(x_in));
|
||||
}
|
||||
jay_WHILE(b);
|
||||
|
||||
b->cursor = jay_after_block(E);
|
||||
jay_UNIT_TEST(b, x_in);
|
||||
});
|
||||
}
|
||||
|
||||
/* Same setup as IfElse */
|
||||
TEST_F(RepairSSA, TrivialPhisOptimized)
|
||||
{
|
||||
CASE({
|
||||
jay_block *A = jay_first_block(b->func);
|
||||
jay_block *B = jay_test_block(b->func);
|
||||
jay_block *C = jay_test_block(b->func);
|
||||
jay_block *D = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
|
||||
jay_block_add_successor(B, D);
|
||||
jay_block_add_successor(C, D);
|
||||
|
||||
b->cursor = jay_after_block(A);
|
||||
jay_def x = jay_MOV_u32(b, 0xcafe);
|
||||
jay_IF(b);
|
||||
|
||||
b->cursor = jay_after_block(C);
|
||||
jay_ELSE(b);
|
||||
jay_ENDIF(b);
|
||||
|
||||
b->cursor = jay_after_block(D);
|
||||
if (repaired) {
|
||||
b->func->ssa_alloc++;
|
||||
}
|
||||
|
||||
jay_UNIT_TEST(b, jay_ADD_f32(b, x, x));
|
||||
});
|
||||
}
|
||||
|
|
@ -35,6 +35,7 @@ brw_device_sha1_gen_src = custom_target('brw_device_sha1_gen.c',
|
|||
command : [prog_python, '@INPUT0@', '--out', '@OUTPUT@'])
|
||||
|
||||
subdir('brw')
|
||||
subdir('jay')
|
||||
|
||||
if with_intel_elk
|
||||
subdir('elk')
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue