mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-23 23:48:18 +02:00
jay: schedule for pressure
Implement a simple pre-RA bottom-up list scheduler with the goal of decreasing register pressure. On Xe2, this significantly reduces spilling. SSA form allows us to estimate register demand cheaply and accurately, which theoretically [1] gives this algorithm the two Hippocratic properties: 1. Shaders with low register pressure are unaffected. 2. Register pressure can only be decreased, never increased. In other words: first, do no harm. The heuristic itself is very simple: greedily choose instructions that decrease liveness using a backwards list scheduler. This is far from optimal! But thanks to the above properties, even a heuristic that picked random instructions would be a win overall - by construction, we can only ever win. In other words: this scheduler is your older brother powering off the game console any time he's about to lose a game, maintaining a 100% win rate. [1] In reality, neither property is strictly satisfied due to the messy details of mapping our clean logical model onto Intel's many weird physical register files. Nevertheless, the algorithm is well-motivated and the empirical results on Xe2 are excellent. SIMD16: Totals: Instrs: 2754194 -> 2753957 (-0.01%); split: -0.23%, +0.22% CodeSize: 41094768 -> 41092768 (-0.00%); split: -0.23%, +0.23% Number of spill instructions: 1724 -> 1129 (-34.51%) Number of fill instructions: 1912 -> 1119 (-41.47%) Totals from 168 (6.35% of 2647) affected shaders: Instrs: 850994 -> 850757 (-0.03%); split: -0.75%, +0.73% CodeSize: 12825680 -> 12823680 (-0.02%); split: -0.74%, +0.73% Number of spill instructions: 1724 -> 1129 (-34.51%) Number of fill instructions: 1912 -> 1119 (-41.47%) SIMD32: Totals: Instrs: 4688858 -> 4557800 (-2.80%); split: -3.53%, +0.74% CodeSize: 70177200 -> 68214816 (-2.80%); split: -3.53%, +0.74% Number of spill instructions: 50316 -> 45795 (-8.99%); split: -9.56%, +0.57% Number of fill instructions: 51526 -> 45075 (-12.52%); split: -13.23%, +0.71% Totals from 819 (30.94% of 2647) affected shaders: Instrs: 3810182 -> 3679124 (-3.44%); split: -4.35%, +0.91% CodeSize: 57044000 -> 55081616 (-3.44%); split: -4.35%, +0.91% Number of spill instructions: 49264 -> 44743 (-9.18%); split: -9.76%, +0.58% Number of fill instructions: 50182 -> 43731 (-12.86%); split: -13.58%, +0.73% Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41688>
This commit is contained in:
parent
81e21a8756
commit
bc22a37d98
8 changed files with 462 additions and 0 deletions
85
src/intel/compiler/jay/jay_dag.c
Normal file
85
src/intel/compiler/jay/jay_dag.c
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2019 Broadcom
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "jay_dag.h"
|
||||
#include <stdint.h>
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
void
|
||||
jay_dag_init(struct jay_dag *dag, void *memctx, uint32_t node_count)
|
||||
{
|
||||
assert(node_count >= 1 && "node 0 is reserved and always present");
|
||||
|
||||
*dag = (struct jay_dag) {
|
||||
.adjacency = rzalloc_array(memctx, uint32_t, node_count),
|
||||
.parent_counts = rzalloc_array(memctx, uint32_t, node_count),
|
||||
.node_count = node_count,
|
||||
.node = 1,
|
||||
};
|
||||
|
||||
util_dynarray_init(&dag->heads, memctx);
|
||||
util_dynarray_init(&dag->edges, memctx);
|
||||
}
|
||||
|
||||
void
|
||||
jay_dag_add_edge(struct jay_dag *dag, uint32_t child)
|
||||
{
|
||||
if (child && child != dag->node) {
|
||||
assert(child < dag->node_count);
|
||||
|
||||
/* We have to prune degenerate or duplicate edges */
|
||||
for (uint32_t i = (dag->node > 0 ? dag->adjacency[dag->node - 1] : 0);
|
||||
i < util_dynarray_num_elements(&dag->edges, uint32_t); ++i) {
|
||||
if (*util_dynarray_element(&dag->edges, uint32_t, i) == child)
|
||||
return;
|
||||
}
|
||||
|
||||
util_dynarray_append(&dag->edges, child);
|
||||
dag->parent_counts[child]++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_dag_next_node(struct jay_dag *dag)
|
||||
{
|
||||
assert(dag->node < dag->node_count);
|
||||
|
||||
dag->adjacency[dag->node++] =
|
||||
util_dynarray_num_elements(&dag->edges, uint32_t);
|
||||
}
|
||||
|
||||
void
|
||||
jay_dag_finalize(struct jay_dag *dag, uint32_t first_node)
|
||||
{
|
||||
for (uint32_t i = dag->node - 1; i >= first_node; --i) {
|
||||
if (dag->parent_counts[i] == 0) {
|
||||
util_dynarray_append(&dag->heads, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a DAG head from the graph, and moves any new dag heads into the
|
||||
* heads list.
|
||||
*/
|
||||
void
|
||||
jay_dag_prune_head(struct jay_dag *dag, uint32_t head)
|
||||
{
|
||||
assert(!dag->parent_counts[head]);
|
||||
util_dynarray_delete_unordered(&dag->heads, uint32_t, head);
|
||||
uint32_t first = head > 0 ? dag->adjacency[head - 1] : 0;
|
||||
|
||||
for (unsigned i = first; i < dag->adjacency[head]; ++i) {
|
||||
uint32_t *it = util_dynarray_element(&dag->edges, uint32_t, i);
|
||||
|
||||
if ((--dag->parent_counts[*it]) == 0) {
|
||||
util_dynarray_append(&dag->heads, *it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
22
src/intel/compiler/jay/jay_dag.h
Normal file
22
src/intel/compiler/jay/jay_dag.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2019 Broadcom
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
struct jay_dag {
|
||||
struct util_dynarray heads, edges;
|
||||
uint32_t *parent_counts;
|
||||
uint32_t *adjacency;
|
||||
uint32_t node, node_count;
|
||||
};
|
||||
|
||||
void jay_dag_init(struct jay_dag *dag, void *memctx, uint32_t node_count);
|
||||
void jay_dag_prune_head(struct jay_dag *dag, uint32_t head);
|
||||
void jay_dag_add_edge(struct jay_dag *dag, uint32_t child);
|
||||
void jay_dag_finalize(struct jay_dag *dag, uint32_t first_node);
|
||||
void jay_dag_next_node(struct jay_dag *dag);
|
||||
|
|
@ -41,6 +41,7 @@ static const struct debug_named_value jay_debug_options[] = {
|
|||
{ "spill", JAY_DBG_SPILL, "Shrink register file to test spilling" },
|
||||
{ "sync", JAY_DBG_SYNC, "Sync after every instruction" },
|
||||
{ "noacc", JAY_DBG_NOACC, "Disable accumulator substitution" },
|
||||
{ "nosched", JAY_DBG_NOSCHED, "Disable scheduling" },
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
|
@ -2705,6 +2706,10 @@ jay_compile(const struct intel_device_info *devinfo,
|
|||
jay_print(stdout, s);
|
||||
}
|
||||
|
||||
if (!(jay_debug & JAY_DBG_NOSCHED)) {
|
||||
JAY_PASS(s, jay_schedule_pressure);
|
||||
}
|
||||
|
||||
JAY_PASS(s, jay_assign_flags);
|
||||
if (!(jay_debug & JAY_DBG_NOOPT)) {
|
||||
JAY_PASS(s, jay_opt_dead_code);
|
||||
|
|
|
|||
|
|
@ -1098,6 +1098,10 @@ typedef struct jay_block {
|
|||
|
||||
/** Pretty printing based on original structured control flow */
|
||||
uint8_t indent;
|
||||
|
||||
/* Register demand metadata calculated for scheduling use */
|
||||
unsigned demand_max[JAY_NUM_SSA_FILES];
|
||||
unsigned demand_out[JAY_NUM_SSA_FILES];
|
||||
} jay_block;
|
||||
|
||||
static inline jay_block *
|
||||
|
|
|
|||
|
|
@ -211,6 +211,11 @@ jay_calculate_register_demands(jay_function *func)
|
|||
jay_print_inst(stdout, I);
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_ssa_file(f) {
|
||||
block->demand_max[f] = max_demand[f];
|
||||
block->demand_out[f] = demands[f];
|
||||
}
|
||||
}
|
||||
|
||||
free(files);
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ extern "C" {
|
|||
#define JAY_DBG_SPILL BITFIELD_BIT(2)
|
||||
#define JAY_DBG_SYNC BITFIELD_BIT(3)
|
||||
#define JAY_DBG_NOACC BITFIELD_BIT(4)
|
||||
#define JAY_DBG_NOSCHED BITFIELD_BIT(5)
|
||||
extern int jay_debug;
|
||||
|
||||
bool jay_nir_lower_bool(nir_shader *nir);
|
||||
|
|
@ -70,6 +71,8 @@ void jay_opt_propagate_backwards(jay_shader *s);
|
|||
void jay_opt_dead_code(jay_shader *s);
|
||||
void jay_opt_predicate(jay_shader *s);
|
||||
|
||||
void jay_schedule_pressure(jay_shader *s);
|
||||
|
||||
void jay_lower_pre_ra(jay_shader *s);
|
||||
void jay_lower_post_ra(jay_shader *s);
|
||||
void jay_lower_spill(jay_function *func);
|
||||
|
|
|
|||
336
src/intel/compiler/jay/jay_schedule.c
Normal file
336
src/intel/compiler/jay/jay_schedule.c
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* Copyright 2023 Alyssa Rosenzweig
|
||||
* Copyright 2022 Collabora Ltd.
|
||||
* Copyright 2019 Broadcom
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file implements a simple pre-RA bottom-up list scheduler with the goal
|
||||
* of decreasing register pressure. On Xe2, this significantly reduces spilling.
|
||||
*
|
||||
* SSA form allows us to estimate register demand cheaply and accurately, which
|
||||
* theoretically [1] gives this algorithm the two Hippocratic properties:
|
||||
*
|
||||
* 1. Shaders with low register pressure are unaffected.
|
||||
* 2. Register pressure can only be decreased, never increased.
|
||||
*
|
||||
* In other words: first, do no harm.
|
||||
*
|
||||
* The heuristic itself is very simple: greedily choose instructions that
|
||||
* decrease liveness using a backwards list scheduler. This is far from optimal!
|
||||
* But thanks to the above properties, even a heuristic that picked random
|
||||
* instructions would be a win overall - by construction, we can only ever win.
|
||||
*
|
||||
* [1] In reality, neither property is strictly satisfied due to the messy
|
||||
* details of mapping our clean logical model onto Intel's many weird physical
|
||||
* register files. Nevertheless, the algorithm is well-motivated and the
|
||||
* empirical results on Xe2 are excellent.
|
||||
*/
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/sparse_bitset.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_dag.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
struct sched_ctx {
|
||||
struct jay_dag dag;
|
||||
unsigned dispatch_width;
|
||||
jay_inst **insts;
|
||||
struct u_sparse_bitset live;
|
||||
BITSET_WORD *seen;
|
||||
};
|
||||
|
||||
/* Cut down version of the function in jay_liveness.c */
|
||||
static void
|
||||
liveness_update(struct u_sparse_bitset *live, jay_inst *I)
|
||||
{
|
||||
jay_foreach_dst_index(I, _, def) {
|
||||
u_sparse_bitset_clear(live, def);
|
||||
}
|
||||
|
||||
jay_foreach_src_index(I, _, comp, index) {
|
||||
u_sparse_bitset_set(live, index);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
populate_dag(struct sched_ctx *ctx,
|
||||
jay_function *func,
|
||||
jay_block *block,
|
||||
uint32_t *def)
|
||||
{
|
||||
uint32_t first_node_in_this_block = ctx->dag.node;
|
||||
|
||||
/* TODO: Reorder memory instructions */
|
||||
uint32_t sidefx = 0, address = 0;
|
||||
|
||||
jay_foreach_inst_in_block(block, I) {
|
||||
if (jay_op_starts_block(I->op)) {
|
||||
continue;
|
||||
} else if (jay_op_ends_block(I->op)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Uses depend on definitions. SSA form forbids WaR and WaW hazards */
|
||||
jay_foreach_src_index(I, s, c, index) {
|
||||
if (def[index] && def[index] >= first_node_in_this_block) {
|
||||
jay_dag_add_edge(&ctx->dag, def[index]);
|
||||
}
|
||||
}
|
||||
|
||||
jay_foreach_dst_index(I, d, index) {
|
||||
def[index] = ctx->dag.node;
|
||||
}
|
||||
|
||||
/* Serialize address register access until we have an address RA */
|
||||
bool use_a0 = I->dst.file == J_ADDRESS || I->op == JAY_OPCODE_SHUFFLE;
|
||||
jay_foreach_src(I, s) {
|
||||
use_a0 |= I->src[s].file == J_ADDRESS;
|
||||
}
|
||||
|
||||
if (use_a0) {
|
||||
jay_dag_add_edge(&ctx->dag, address);
|
||||
address = ctx->dag.node;
|
||||
}
|
||||
|
||||
/* Serialize side effects for now */
|
||||
if ((I->op == JAY_OPCODE_SEND && !jay_send_pure(I)) ||
|
||||
I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
|
||||
|
||||
jay_dag_add_edge(&ctx->dag, sidefx);
|
||||
sidefx = ctx->dag.node;
|
||||
}
|
||||
|
||||
ctx->insts[ctx->dag.node] = I;
|
||||
jay_dag_next_node(&ctx->dag);
|
||||
}
|
||||
|
||||
jay_dag_finalize(&ctx->dag, first_node_in_this_block);
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to multiple register files, register demand is a vector. Our dynamic
|
||||
* register file partitioning justifies modelling demand as a single scalar,
|
||||
* where each file has a weight determined here.
|
||||
*/
|
||||
static unsigned
|
||||
scale(struct sched_ctx *ctx, jay_def x)
|
||||
{
|
||||
return x.file == J_ADDRESS ? 0 : jay_is_uniform(x) ? 1 : ctx->dispatch_width;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the change in register pressure from scheduling a given
|
||||
* instuction. Based on jay_calculate_register_demands, but without the use of
|
||||
* kill-bits since we are reordering instructions.
|
||||
*/
|
||||
static signed
|
||||
calculate_pressure_delta_before(struct sched_ctx *ctx, jay_inst *I)
|
||||
{
|
||||
signed delta = 0;
|
||||
|
||||
/* Make destinations live */
|
||||
jay_foreach_dst(I, dst) {
|
||||
delta += util_next_power_of_two(jay_num_values(dst)) * scale(ctx, dst);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
static signed
|
||||
calculate_pressure_delta_after(struct sched_ctx *ctx, jay_inst *I)
|
||||
{
|
||||
signed delta = 0;
|
||||
unsigned counter = 0;
|
||||
|
||||
/* Dead destinations are those written by the instruction but killed
|
||||
* immediately after the instruction finishes.
|
||||
*/
|
||||
jay_foreach_dst_index(I, _, index) {
|
||||
delta -= !u_sparse_bitset_test(&ctx->live, index) * scale(ctx, I->dst);
|
||||
}
|
||||
|
||||
jay_foreach_dst(I, d) {
|
||||
unsigned n = jay_num_values(d);
|
||||
delta -= (util_next_power_of_two(n) - n) * scale(ctx, I->dst);
|
||||
}
|
||||
|
||||
/* Late-kill sources. We precomputed the deduplication info and stashed it in
|
||||
* the I->last_use bitfield for convenience.
|
||||
*/
|
||||
jay_foreach_src_index(I, s, c, index) {
|
||||
if (BITSET_TEST(I->last_use, counter)) {
|
||||
delta -=
|
||||
!u_sparse_bitset_test(&ctx->live, index) * scale(ctx, I->src[s]);
|
||||
}
|
||||
|
||||
counter++;
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose the next instuction, bottom-up. For now we use a simple greedy
|
||||
* heuristic: choose the instuction that has the best effect on liveness.
|
||||
*/
|
||||
static uint32_t
|
||||
choose_inst(struct sched_ctx *s)
|
||||
{
|
||||
int32_t min_delta = INT32_MAX;
|
||||
uint32_t best = 0;
|
||||
|
||||
util_dynarray_foreach(&s->dag.heads, uint32_t, head) {
|
||||
jay_inst *I = s->insts[*head];
|
||||
int32_t delta = -(calculate_pressure_delta_after(s, I) +
|
||||
calculate_pressure_delta_before(s, I));
|
||||
|
||||
/* As a tiebreaker (only), sink flag writes to reduce specifically flag
|
||||
* pressure, because spilling flags costs extra instructions and GPR
|
||||
* pressure. This is a mildly positive heuristic.
|
||||
*/
|
||||
delta *= 2;
|
||||
if (jay_is_null(I->cond_flag)) {
|
||||
delta++;
|
||||
}
|
||||
|
||||
if (delta <= min_delta) {
|
||||
best = *head;
|
||||
min_delta = delta;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
static void
|
||||
pressure_schedule_block(jay_function *func,
|
||||
jay_block *block,
|
||||
struct util_dynarray *schedule,
|
||||
struct sched_ctx *s,
|
||||
void *memctx)
|
||||
{
|
||||
/* Our pressure calculations are all off by a constant, but that's ok */
|
||||
signed pressure = 0;
|
||||
signed orig_max_pressure = 0;
|
||||
|
||||
u_sparse_bitset_free(&s->live);
|
||||
u_sparse_bitset_dup_with_ctx(&s->live, &block->live_out, memctx);
|
||||
|
||||
jay_foreach_inst_in_block_rev(block, I) {
|
||||
if (jay_op_starts_block(I->op)) {
|
||||
break;
|
||||
} else if (jay_op_ends_block(I->op)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned counter = 0;
|
||||
|
||||
/* Filter duplicates as we go */
|
||||
BITSET_ZERO(I->last_use);
|
||||
|
||||
jay_foreach_src_index(I, _, c, index) {
|
||||
if (!BITSET_TEST(s->seen, index)) {
|
||||
BITSET_SET(I->last_use, counter);
|
||||
}
|
||||
|
||||
BITSET_SET(s->seen, index);
|
||||
counter++;
|
||||
}
|
||||
|
||||
jay_foreach_src_index(I, _, c, index) {
|
||||
BITSET_CLEAR(s->seen, index);
|
||||
}
|
||||
|
||||
pressure -= calculate_pressure_delta_after(s, I);
|
||||
orig_max_pressure = MAX2(pressure, orig_max_pressure);
|
||||
pressure -= calculate_pressure_delta_before(s, I);
|
||||
liveness_update(&s->live, I);
|
||||
}
|
||||
|
||||
u_sparse_bitset_free(&s->live);
|
||||
u_sparse_bitset_dup_with_ctx(&s->live, &block->live_out, memctx);
|
||||
|
||||
signed max_pressure = 0;
|
||||
pressure = 0;
|
||||
|
||||
while (s->dag.heads.size) {
|
||||
uint32_t node = choose_inst(s);
|
||||
pressure -= calculate_pressure_delta_after(s, s->insts[node]);
|
||||
max_pressure = MAX2(pressure, max_pressure);
|
||||
pressure -= calculate_pressure_delta_before(s, s->insts[node]);
|
||||
jay_dag_prune_head(&s->dag, node);
|
||||
|
||||
util_dynarray_append(schedule, node);
|
||||
liveness_update(&s->live, s->insts[node]);
|
||||
}
|
||||
|
||||
/* Apply the schedule only if it reduces pressure */
|
||||
if (max_pressure < orig_max_pressure) {
|
||||
util_dynarray_foreach(schedule, uint32_t, node) {
|
||||
jay_remove_instruction(s->insts[*node]);
|
||||
}
|
||||
|
||||
jay_builder b = jay_init_builder(func, jay_before_block(block));
|
||||
util_dynarray_foreach_reverse(schedule, uint32_t, node) {
|
||||
jay_builder_insert(&b, s->insts[*node]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pass(jay_function *f)
|
||||
{
|
||||
jay_compute_liveness(f);
|
||||
jay_calculate_register_demands(f);
|
||||
|
||||
void *memctx = ralloc_context(NULL);
|
||||
void *linctx = linear_context(memctx);
|
||||
struct util_dynarray schedule = UTIL_DYNARRAY_INIT;
|
||||
|
||||
uint32_t nr_inst = 1;
|
||||
jay_foreach_inst_in_func(f, _, I) {
|
||||
++nr_inst;
|
||||
}
|
||||
|
||||
BITSET_WORD *seen = BITSET_LINEAR_ZALLOC(linctx, f->ssa_alloc);
|
||||
struct sched_ctx sctx = { .seen = seen,
|
||||
.dispatch_width = f->shader->dispatch_width };
|
||||
uint32_t *def = linear_zalloc_array(linctx, uint32_t, f->ssa_alloc);
|
||||
sctx.insts = linear_alloc_array(linctx, jay_inst *, nr_inst);
|
||||
jay_dag_init(&sctx.dag, memctx, nr_inst);
|
||||
|
||||
unsigned ugpr_per_grf = jay_ugpr_per_grf(f->shader);
|
||||
unsigned ugpr_per_gpr = jay_grf_per_gpr(f->shader) * ugpr_per_grf;
|
||||
|
||||
jay_foreach_block(f, block) {
|
||||
/* Treat flags as GPR demand conservatively since they spill to GPRs */
|
||||
unsigned demand_ugpr = block->demand_max[UGPR];
|
||||
unsigned demand_gpr = block->demand_max[GPR] +
|
||||
block->demand_max[FLAG] +
|
||||
block->demand_max[UFLAG];
|
||||
|
||||
/* Schedule for pressure only blocks that might spill, to minimize harm
|
||||
* done to ILP and such. We conservatively use 104 GRFs as the threshold
|
||||
* instead of 128 to leave wiggle room for flag RA and late lowerings.
|
||||
*/
|
||||
if (((demand_gpr * ugpr_per_gpr) + demand_ugpr) >= (104 * ugpr_per_grf)) {
|
||||
util_dynarray_clear(&schedule);
|
||||
|
||||
populate_dag(&sctx, f, block, def);
|
||||
pressure_schedule_block(f, block, &schedule, &sctx, memctx);
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_fini(&schedule);
|
||||
ralloc_free(memctx);
|
||||
}
|
||||
|
||||
JAY_DEFINE_FUNCTION_PASS(jay_schedule_pressure, pass)
|
||||
|
|
@ -49,6 +49,7 @@ libintel_compiler_jay_files = files(
|
|||
'jay.h',
|
||||
'jay_assign_accumulators.c',
|
||||
'jay_assign_flags.c',
|
||||
'jay_dag.c',
|
||||
'jay_from_nir.c',
|
||||
'jay_ir.h',
|
||||
'jay_insert_fp_mode.c',
|
||||
|
|
@ -67,6 +68,7 @@ libintel_compiler_jay_files = files(
|
|||
'jay_repair_ssa.c',
|
||||
'jay_register_allocate.c',
|
||||
'jay_simd_width.c',
|
||||
'jay_schedule.c',
|
||||
'jay_spill.c',
|
||||
'jay_to_binary.c',
|
||||
'jay_validate.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue