mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
jay: drop UGPR->UMEM spilling path
This is totally broken now that we have a physical CFG for UGPRs. And of course, UGPRs generally were totally broken without the physical CFG. So I conclude this code basically never worked. Which is good because it was also basically always dead too. Just delete it and replace with a clear error message, instead of pretending it works and either randomly splatting validation or just straight up miscompiling silently or whatever. We might need an alternative UGPR->GPR spill path some day but that day is not today. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41215>
This commit is contained in:
parent
ad040f2fbb
commit
fed6b7bea0
6 changed files with 91 additions and 127 deletions
|
|
@ -61,9 +61,6 @@ enum PACKED jay_file {
|
|||
/** Memory registers representing spilled values: 32-bits per SIMT lane. */
|
||||
MEM,
|
||||
|
||||
/** Memory registers representing spilled values: 32-bits uniform values */
|
||||
UMEM,
|
||||
|
||||
/** Non-uniform flags (predicates): 1-bit per SIMT lane */
|
||||
FLAG,
|
||||
|
||||
|
|
@ -94,7 +91,7 @@ enum PACKED jay_file {
|
|||
JAY_NUM_SSA_FILES = J_ADDRESS + 1,
|
||||
|
||||
/* Set of files that the main RA (and not eg flag RA) allocates. */
|
||||
JAY_NUM_RA_FILES = UMEM + 1,
|
||||
JAY_NUM_RA_FILES = MEM + 1,
|
||||
JAY_NUM_GRF_FILES = UGPR + 1,
|
||||
};
|
||||
static_assert(JAY_FILE_LAST <= 0b1111, "must fit in 4 bits (see jay_def)");
|
||||
|
|
@ -458,15 +455,6 @@ jay_is_uniform(jay_def d)
|
|||
return jay_file_is_uniform(d.file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given definition represents a spilled variable.
|
||||
*/
|
||||
static inline bool
|
||||
jay_is_mem(jay_def x)
|
||||
{
|
||||
return x.file == MEM || x.file == UMEM;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
jay_as_uint(jay_def src)
|
||||
{
|
||||
|
|
@ -858,7 +846,7 @@ static inline bool
|
|||
jay_is_send_like(const jay_inst *I)
|
||||
{
|
||||
if (I->op == JAY_OPCODE_MOV)
|
||||
return jay_is_mem(I->dst) || jay_is_mem(I->src[0]);
|
||||
return I->dst.file == MEM || I->src[0].file == MEM;
|
||||
else
|
||||
return I->op == JAY_OPCODE_SEND;
|
||||
}
|
||||
|
|
@ -880,11 +868,8 @@ jay_is_shuffle_like(const jay_inst *I)
|
|||
static inline unsigned
|
||||
jay_src_alignment(jay_shader *shader, const jay_inst *I, unsigned s)
|
||||
{
|
||||
/* SENDs operate on entire GRFs at a time, so align UGPRs to GRFs. This
|
||||
* includes UGPR->UMEM moves which lower to SENDs.
|
||||
*/
|
||||
if ((I->op == JAY_OPCODE_SEND && I->src[s].file == UGPR) ||
|
||||
(I->dst.file == UMEM)) {
|
||||
/* SENDs operate on entire GRFs at a time, so align UGPRs to GRFs. */
|
||||
if (I->op == JAY_OPCODE_SEND && I->src[s].file == UGPR) {
|
||||
return jay_ugpr_per_grf(shader);
|
||||
}
|
||||
|
||||
|
|
@ -918,9 +903,7 @@ jay_dst_alignment(jay_shader *shader, const jay_inst *I)
|
|||
* instruction. (TODO)
|
||||
*/
|
||||
if (I->dst.file == UGPR &&
|
||||
(I->op == JAY_OPCODE_SEND ||
|
||||
(I->op == JAY_OPCODE_MOV && I->src[0].file == UMEM) ||
|
||||
I->op == JAY_OPCODE_MUL_32)) {
|
||||
(I->op == JAY_OPCODE_SEND || I->op == JAY_OPCODE_MUL_32)) {
|
||||
|
||||
return jay_ugpr_per_grf(shader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,15 +19,12 @@ insert_spill_fill(jay_builder *b,
|
|||
jay_def gpr,
|
||||
jay_def sp,
|
||||
bool load,
|
||||
unsigned *sp_delta_B,
|
||||
unsigned umem_base)
|
||||
unsigned *sp_delta_B)
|
||||
{
|
||||
assert(jay_is_mem(mem) && !jay_is_mem(gpr));
|
||||
assert(mem.file == MEM && gpr.file != MEM);
|
||||
|
||||
bool uniform = mem.file == UMEM;
|
||||
unsigned offs_B = mem.reg * 4;
|
||||
unsigned mem_reg_B =
|
||||
uniform ? (umem_base + offs_B) : (offs_B * b->shader->dispatch_width);
|
||||
unsigned mem_reg_B = offs_B * b->shader->dispatch_width;
|
||||
|
||||
/* The stack pointer needs to be offset to the desired offset */
|
||||
signed sp_adjust_B = mem_reg_B - (*sp_delta_B);
|
||||
|
|
@ -41,16 +38,12 @@ insert_spill_fill(jay_builder *b,
|
|||
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS);
|
||||
uint32_t desc = lsc_msg_desc(devinfo, load ? LSC_OP_LOAD : LSC_OP_STORE,
|
||||
LSC_ADDR_SURFTYPE_SS, LSC_ADDR_SIZE_A32,
|
||||
LSC_DATA_SIZE_D32, 1, uniform, cache);
|
||||
if (uniform) {
|
||||
sp.num_values_m1 = 0;
|
||||
}
|
||||
|
||||
LSC_DATA_SIZE_D32, 1, false, cache);
|
||||
jay_def srcs[] = { sp, gpr };
|
||||
|
||||
jay_SEND(b, .sfid = BRW_SFID_UGM, .msg_desc = desc, .srcs = srcs,
|
||||
.nr_srcs = load ? 1 : 2, .dst = load ? gpr : jay_null(),
|
||||
.type = JAY_TYPE_U32, .uniform = uniform, .ex_desc = ADDRESS_REG);
|
||||
.type = JAY_TYPE_U32, .ex_desc = ADDRESS_REG);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -67,18 +60,15 @@ jay_lower_spill(jay_function *func)
|
|||
sp.num_values_m1 = func->shader->dispatch_width - 1;
|
||||
|
||||
/* Calculate how much stack space we need */
|
||||
unsigned nr_mem = 0, nr_umem = 0;
|
||||
unsigned nr_mem = 0;
|
||||
jay_foreach_inst_in_func(func, block, I) {
|
||||
if (I->op == JAY_OPCODE_MOV && jay_is_send_like(I)) {
|
||||
jay_def mem = jay_is_mem(I->dst) ? I->dst : I->src[0];
|
||||
unsigned *nr = mem.file == UMEM ? &nr_umem : &nr_mem;
|
||||
|
||||
*nr = MAX2(*nr, mem.reg + 1);
|
||||
jay_def mem = I->dst.file == MEM ? I->dst : I->src[0];
|
||||
nr_mem = MAX2(nr_mem, mem.reg + 1);
|
||||
}
|
||||
}
|
||||
|
||||
assert((nr_umem > 0) || (nr_mem > 0));
|
||||
unsigned umem_base = (func->shader->dispatch_width * nr_mem * 4);
|
||||
assert(nr_mem > 0);
|
||||
|
||||
/* We burn the address & stack pointer registers for all spills/fills in a
|
||||
* shader. Preinitialize at the top using a scratch register.
|
||||
|
|
@ -118,13 +108,11 @@ jay_lower_spill(jay_function *func)
|
|||
address_valid = true;
|
||||
}
|
||||
|
||||
if (jay_is_mem(I->dst)) {
|
||||
insert_spill_fill(&b, I->dst, I->src[0], sp, false, &sp_delta_B,
|
||||
umem_base);
|
||||
if (I->dst.file == MEM) {
|
||||
insert_spill_fill(&b, I->dst, I->src[0], sp, false, &sp_delta_B);
|
||||
func->shader->spills++;
|
||||
} else {
|
||||
insert_spill_fill(&b, I->src[0], I->dst, sp, true, &sp_delta_B,
|
||||
umem_base);
|
||||
insert_spill_fill(&b, I->src[0], I->dst, sp, true, &sp_delta_B);
|
||||
func->shader->fills++;
|
||||
}
|
||||
|
||||
|
|
@ -152,5 +140,5 @@ jay_lower_spill(jay_function *func)
|
|||
/* Note this is bogus with recursion, but recursion is not supported on any
|
||||
* current graphics/compute API.
|
||||
*/
|
||||
func->shader->scratch_size += umem_base + (nr_umem * 4);
|
||||
func->shader->scratch_size += func->shader->dispatch_width * nr_mem * 4;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,9 +37,9 @@ static const char *jay_arf_str[] = {
|
|||
};
|
||||
|
||||
static const char *jay_file_str[JAY_FILE_LAST + 1] = {
|
||||
[GPR] = "r", [UGPR] = "u", [FLAG] = "f", [UFLAG] = "uf",
|
||||
[J_ADDRESS] = "a", [ACCUM] = "acc", [UACCUM] = "uacc", [J_ARF] = "arf",
|
||||
[MEM] = "m", [UMEM] = "um", [TEST_FILE] = "t",
|
||||
[GPR] = "r", [UGPR] = "u", [FLAG] = "f", [UFLAG] = "uf",
|
||||
[J_ADDRESS] = "a", [ACCUM] = "acc", [UACCUM] = "uacc", [J_ARF] = "arf",
|
||||
[MEM] = "m", [TEST_FILE] = "t",
|
||||
};
|
||||
|
||||
static const char *jay_base_types[] = {
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ unsigned jay_process_nir(const struct intel_device_info *devinfo,
|
|||
void jay_compute_liveness(jay_function *f);
|
||||
void jay_calculate_register_demands(jay_function *f);
|
||||
|
||||
void jay_spill(jay_function *func, enum jay_file file, unsigned limit);
|
||||
void jay_spill(jay_function *func, unsigned limit);
|
||||
void jay_partition_grf(jay_shader *shader);
|
||||
void jay_register_allocate(jay_shader *s);
|
||||
void jay_assign_flags(jay_shader *s);
|
||||
|
|
|
|||
|
|
@ -430,10 +430,6 @@ mov(jay_builder *b, jay_def dst, jay_def src, struct jay_temp_regs temps)
|
|||
temp = push_temp(b, temps, GPR, false, &backing, jay_null(), jay_null());
|
||||
jay_MOV(b, temp, src);
|
||||
jay_MOV(b, dst, temp);
|
||||
} else if (dst.file == UMEM && src.file == UMEM) {
|
||||
temp = push_temp(b, temps, UGPR, false, &backing, jay_null(), jay_null());
|
||||
jay_MOV(b, def_from_reg(temps.ugpr), src);
|
||||
jay_MOV(b, dst, def_from_reg(temps.ugpr));
|
||||
} else if (dst.file == GPR &&
|
||||
src.file == GPR &&
|
||||
jay_def_stride(b->shader, dst) !=
|
||||
|
|
@ -1547,45 +1543,42 @@ jay_partition_grf(jay_shader *shader)
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: These are arbitrary. Need to rework somehow, we have options. */
|
||||
/* TODO: Arbitrary. Need to rework somehow, we have options. */
|
||||
shader->num_regs[MEM] = 512;
|
||||
shader->num_regs[UMEM] = 2048;
|
||||
}
|
||||
|
||||
static void
|
||||
spill_file(jay_function *f, enum jay_file file, bool *spilled)
|
||||
static bool
|
||||
spill_gpr(jay_function *f)
|
||||
{
|
||||
unsigned limit = f->shader->num_regs[file];
|
||||
unsigned limit = f->shader->num_regs[GPR];
|
||||
|
||||
/* If testing spilling, set limit tightly. */
|
||||
if ((jay_debug & JAY_DBG_SPILL) &&
|
||||
file == GPR &&
|
||||
f->shader->stage != MESA_SHADER_VERTEX) {
|
||||
limit = 13;
|
||||
}
|
||||
|
||||
if (f->demand[file] > limit) {
|
||||
/* If we spill, we need to reserve UGPRs for spilling */
|
||||
if (!(*spilled)) {
|
||||
unsigned reservation = f->shader->dispatch_width + 1;
|
||||
f->shader->num_regs[UGPR] -= reservation;
|
||||
f->shader->partition.large_ugpr_block.len -= reservation;
|
||||
}
|
||||
|
||||
jay_spill(f, file, limit);
|
||||
jay_validate(f->shader, "spilling");
|
||||
jay_compute_liveness(f);
|
||||
jay_calculate_register_demands(f);
|
||||
|
||||
if (f->demand[file] > limit) {
|
||||
fprintf(stderr, "file %u, limit %u but demand %u\n", file, limit,
|
||||
f->demand[file]);
|
||||
fflush(stdout);
|
||||
UNREACHABLE("spiller bug");
|
||||
}
|
||||
|
||||
*spilled = true;
|
||||
if (f->demand[GPR] <= limit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Spilling requires reserving UGPRs for spilling */
|
||||
unsigned reservation = f->shader->dispatch_width + 1;
|
||||
f->shader->num_regs[UGPR] -= reservation;
|
||||
f->shader->partition.large_ugpr_block.len -= reservation;
|
||||
|
||||
jay_spill(f, limit);
|
||||
jay_validate(f->shader, "spilling");
|
||||
jay_compute_liveness(f);
|
||||
jay_calculate_register_demands(f);
|
||||
|
||||
if (f->demand[GPR] > limit) {
|
||||
fprintf(stderr, "limit %u but demand %u\n", limit, f->demand[GPR]);
|
||||
fflush(stdout);
|
||||
UNREACHABLE("spiller bug");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1594,12 +1587,17 @@ jay_register_allocate_function(jay_function *f)
|
|||
jay_shader *shader = f->shader;
|
||||
jay_ra_state ra = { .b.shader = shader, .b.func = f };
|
||||
|
||||
/* Spill as needed to fit within the limits. We spill GPR before UGPR since
|
||||
* spilling GPRs requires reserving a UGPR.
|
||||
/* Spill as needed to fit within the limits. */
|
||||
bool spilled = spill_gpr(f);
|
||||
|
||||
/* The spiller/SSA repair does not work on UGPRs because it cannot tolerate
|
||||
* the critical edges on the physical CFG. Fortunately, dynamic GPR/UGPR
|
||||
* partitioning means this should ~never be hit -- we can allocate 1000 UGPRs
|
||||
* if we need them. I believe ACO has the same corner case.
|
||||
*/
|
||||
bool spilled = false;
|
||||
spill_file(f, GPR, &spilled);
|
||||
spill_file(f, UGPR, &spilled);
|
||||
if (f->demand[UGPR] > f->shader->num_regs[UGPR]) {
|
||||
UNREACHABLE("UGPR spilling is unimplemented");
|
||||
}
|
||||
|
||||
typed_memcpy(ra.num_regs, shader->num_regs, JAY_NUM_RA_FILES);
|
||||
|
||||
|
|
|
|||
|
|
@ -113,10 +113,7 @@ struct spill_block {
|
|||
struct spill_ctx {
|
||||
jay_function *func;
|
||||
|
||||
/* Register file being spilled */
|
||||
enum jay_file file;
|
||||
|
||||
/* Set of values whose file equals `file` */
|
||||
/* Set of values whose file equals GPR */
|
||||
BITSET_WORD *in_file;
|
||||
|
||||
/* Set of values currently available in the register file */
|
||||
|
|
@ -162,8 +159,8 @@ struct spill_ctx {
|
|||
static inline jay_def
|
||||
jay_def_as_mem(struct spill_ctx *ctx, jay_def idx)
|
||||
{
|
||||
assert(idx.file == GPR || idx.file == UGPR);
|
||||
idx.file = idx.file == UGPR ? UMEM : MEM;
|
||||
assert(idx.file == GPR);
|
||||
idx.file = MEM;
|
||||
idx._payload = jay_base_index(idx) + ctx->n;
|
||||
return idx;
|
||||
}
|
||||
|
|
@ -194,7 +191,7 @@ static void
|
|||
insert_spill(jay_builder *b, struct spill_ctx *ctx, unsigned node)
|
||||
{
|
||||
if (!can_remat_node(ctx, node)) {
|
||||
jay_def idx = jay_scalar(ctx->file, node);
|
||||
jay_def idx = jay_scalar(GPR, node);
|
||||
jay_MOV(b, jay_def_as_mem(ctx, idx), idx);
|
||||
}
|
||||
}
|
||||
|
|
@ -206,7 +203,7 @@ insert_reload(struct spill_ctx *ctx,
|
|||
unsigned node)
|
||||
{
|
||||
jay_builder b = jay_init_builder(ctx->func, cursor);
|
||||
jay_def idx = jay_scalar(ctx->file, node);
|
||||
jay_def idx = jay_scalar(GPR, node);
|
||||
|
||||
/* Reloading breaks SSA, but jay_repair_ssa will repair */
|
||||
if (can_remat_node(ctx, node)) {
|
||||
|
|
@ -327,7 +324,7 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
jay_inst *phi_dst = ctx->defs[jay_phi_src_index(phi_src)];
|
||||
unsigned src = jay_index(phi_src->src[0]);
|
||||
|
||||
if (phi_src->src[0].file == ctx->file && jay_is_mem(phi_dst->dst)) {
|
||||
if (phi_src->src[0].file == GPR && phi_dst->dst.file == MEM) {
|
||||
if (!u_sparse_bitset_test(&sp->S_out, src)) {
|
||||
/* Spill the phi source. TODO: avoid redundant spills here */
|
||||
b.cursor = jay_after_block_logical(pred);
|
||||
|
|
@ -335,8 +332,8 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
}
|
||||
|
||||
if (can_remat_node(ctx, jay_index(phi_src->src[0]))) {
|
||||
jay_def idx = jay_scalar(ctx->file, src);
|
||||
jay_def tmp = jay_alloc_def(&b, ctx->file, 1);
|
||||
jay_def idx = jay_scalar(GPR, src);
|
||||
jay_def tmp = jay_alloc_def(&b, GPR, 1);
|
||||
|
||||
b.cursor = jay_before_function(ctx->func);
|
||||
remat_to(&b, tmp, ctx, src);
|
||||
|
|
@ -352,7 +349,7 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
/* Anything assumed to be spilled in succ must be spilled along all edges. */
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ss->S_in, v) {
|
||||
if (!u_sparse_bitset_test(&sp->S_out, v)) {
|
||||
b.cursor = jay_along_edge(pred, succ, GPR /* XXX */);
|
||||
b.cursor = jay_along_edge(pred, succ, GPR);
|
||||
insert_spill(&b, ctx, v);
|
||||
}
|
||||
}
|
||||
|
|
@ -367,14 +364,13 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
jay_foreach_phi_src_in_block(pred, phi_src) {
|
||||
unsigned src = jay_index(phi_src->src[0]);
|
||||
|
||||
if (phi_src->src[0].file == ctx->file &&
|
||||
!jay_is_mem(ctx->defs[jay_phi_src_index(phi_src)]->dst) &&
|
||||
if (phi_src->src[0].file == GPR &&
|
||||
ctx->defs[jay_phi_src_index(phi_src)]->dst.file != MEM &&
|
||||
!u_sparse_bitset_test(&sp->W_out, src)) {
|
||||
|
||||
/* Fill the phi source in the predecessor */
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, ctx->file);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ, ctx->file),
|
||||
src);
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, GPR);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ, GPR), src);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -386,9 +382,8 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
if (!u_sparse_bitset_test(&sp->W_out, v) &&
|
||||
!u_sparse_bitset_test(&ctx->phi_set, v)) {
|
||||
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, GPR /* XXX */);
|
||||
insert_reload(ctx, reload_block,
|
||||
jay_along_edge(pred, succ, GPR /* XXX */), v);
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, GPR);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ, GPR), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -424,7 +419,7 @@ populate_local_next_use(struct spill_ctx *ctx, jay_block *block)
|
|||
ip -= inst_cycles(I);
|
||||
|
||||
jay_foreach_src_index(I, s, c, v) {
|
||||
if (I->src[s].file == ctx->file) {
|
||||
if (I->src[s].file == GPR) {
|
||||
if (I->op != JAY_OPCODE_PHI_SRC) {
|
||||
util_dynarray_append(&ctx->next_ip, lookup_next_use(ctx, v));
|
||||
}
|
||||
|
|
@ -434,7 +429,7 @@ populate_local_next_use(struct spill_ctx *ctx, jay_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
if (I->dst.file == ctx->file) {
|
||||
if (I->dst.file == GPR) {
|
||||
jay_foreach_index_rev(I->dst, _, v) {
|
||||
util_dynarray_append(&ctx->next_ip, lookup_next_use(ctx, v));
|
||||
}
|
||||
|
|
@ -465,7 +460,7 @@ min_algorithm(struct spill_ctx *ctx,
|
|||
* Phi sources are handled later.
|
||||
*/
|
||||
if (I->op == JAY_OPCODE_PHI_DST) {
|
||||
if (I->dst.file == ctx->file) {
|
||||
if (I->dst.file == GPR) {
|
||||
if (!u_sparse_bitset_test(&ctx->W, jay_index(I->dst))) {
|
||||
u_sparse_bitset_set(&ctx->S, jay_index(I->dst));
|
||||
I->dst = jay_def_as_mem(ctx, I->dst);
|
||||
|
|
@ -484,7 +479,7 @@ min_algorithm(struct spill_ctx *ctx,
|
|||
unsigned R[JAY_MAX_SRCS], nR = 0;
|
||||
|
||||
jay_foreach_src_index(I, s, c, v) {
|
||||
if (I->src[s].file == ctx->file && !u_sparse_bitset_test(&ctx->W, v)) {
|
||||
if (I->src[s].file == GPR && !u_sparse_bitset_test(&ctx->W, v)) {
|
||||
R[nR++] = v;
|
||||
insert_W(ctx, v);
|
||||
|
||||
|
|
@ -498,12 +493,12 @@ min_algorithm(struct spill_ctx *ctx,
|
|||
* We need to round up to power-of-two destination sizes to match the
|
||||
* rounding in demand calculation.
|
||||
*/
|
||||
bool has_dst = I->dst.file == ctx->file;
|
||||
bool has_dst = I->dst.file == GPR;
|
||||
unsigned dst_size = util_next_power_of_two(jay_num_values(I->dst));
|
||||
limit(ctx, I, ctx->k - (has_dst ? dst_size : 0));
|
||||
|
||||
/* Add destinations to the register file */
|
||||
if (I->dst.file == ctx->file) {
|
||||
if (I->dst.file == GPR) {
|
||||
jay_foreach_index(I->dst, _, index) {
|
||||
assert(next_use_cursor >= 1);
|
||||
ctx->next_uses[index] = next_ips[--next_use_cursor];
|
||||
|
|
@ -522,7 +517,7 @@ min_algorithm(struct spill_ctx *ctx,
|
|||
* how we currently estimate register demand.
|
||||
*/
|
||||
jay_foreach_src_index_rev(I, s, c, node) {
|
||||
if (I->src[s].file == ctx->file) {
|
||||
if (I->src[s].file == GPR) {
|
||||
assert(next_use_cursor >= 1);
|
||||
ctx->next_uses[node] = next_ips[--next_use_cursor];
|
||||
|
||||
|
|
@ -567,7 +562,7 @@ compute_w_entry_loop_header(struct spill_ctx *ctx, jay_block *block)
|
|||
}
|
||||
|
||||
jay_foreach_phi_dst_in_block(block, I) {
|
||||
if (I->dst.file == ctx->file) {
|
||||
if (I->dst.file == GPR) {
|
||||
ctx->candidates[j++] = (struct next_use) {
|
||||
.index = jay_index(I->dst),
|
||||
.dist = ctx->next_uses[jay_index(I->dst)],
|
||||
|
|
@ -600,7 +595,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
U_SPARSE_BITSET_FOREACH_SET(&ctx->N, i) {
|
||||
bool all = true, any = false;
|
||||
|
||||
jay_foreach_predecessor(block, P, ctx->file) {
|
||||
jay_foreach_predecessor(block, P, GPR) {
|
||||
bool in = u_sparse_bitset_test(&ctx->blocks[(*P)->index].W_out, i);
|
||||
all &= in;
|
||||
any |= in;
|
||||
|
|
@ -614,7 +609,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
jay_foreach_predecessor(block, pred, GPR) {
|
||||
jay_foreach_phi_src_in_block(*pred, I) {
|
||||
if (!u_sparse_bitset_test(&ctx->blocks[(*pred)->index].W_out,
|
||||
jay_index(I->src[0]))) {
|
||||
|
|
@ -628,7 +623,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
* this reduces pointless spills/fills with massive phi webs.
|
||||
*/
|
||||
jay_foreach_phi_dst_in_block(block, I) {
|
||||
if (I->dst.file == ctx->file &&
|
||||
if (I->dst.file == GPR &&
|
||||
!u_sparse_bitset_test(&ctx->phi_set, jay_index(I->dst))) {
|
||||
ctx->candidates[j++] = (struct next_use) {
|
||||
.index = jay_index(I->dst),
|
||||
|
|
@ -657,7 +652,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
static ATTRIBUTE_NOINLINE void
|
||||
compute_s_entry(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
jay_foreach_predecessor(block, pred, GPR) {
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ctx->blocks[(*pred)->index].S_out, v) {
|
||||
if (u_sparse_bitset_test(&block->live_in, v)) {
|
||||
u_sparse_bitset_set(&ctx->S, v);
|
||||
|
|
@ -708,7 +703,7 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx)
|
|||
jay_foreach_inst_in_block(block, I) {
|
||||
/* Record first use before def */
|
||||
jay_foreach_src_index(I, s, c, index) {
|
||||
if (I->src[s].file == ctx->file &&
|
||||
if (I->src[s].file == GPR &&
|
||||
!u_sparse_bitset_test(&ctx->W, index)) {
|
||||
|
||||
add_next_use(&sb->next_use_in, index, cycle);
|
||||
|
|
@ -733,7 +728,7 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx)
|
|||
}
|
||||
|
||||
/* Propagate successor live-in to pred live-out, joining with min */
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
jay_foreach_predecessor(block, pred, GPR) {
|
||||
if (minimum_next_uses(&ctx->blocks[(*pred)->index].next_use_out,
|
||||
&sb->next_use_in, ctx->next_uses,
|
||||
&ctx->phi_set)) {
|
||||
|
|
@ -776,11 +771,11 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx)
|
|||
}
|
||||
|
||||
void
|
||||
jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
||||
jay_spill(jay_function *func, unsigned k)
|
||||
{
|
||||
void *memctx = ralloc_context(NULL);
|
||||
void *linctx = linear_context(memctx);
|
||||
struct spill_ctx ctx = { .func = func, .file = file, .k = k };
|
||||
struct spill_ctx ctx = { .func = func, .k = k };
|
||||
|
||||
ctx.n = func->ssa_alloc;
|
||||
ctx.in_file = BITSET_LINEAR_ZALLOC(linctx, ctx.n);
|
||||
|
|
@ -795,7 +790,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
ctx.defs[jay_index(I->dst)] = I;
|
||||
}
|
||||
|
||||
if (I->dst.file == file) {
|
||||
if (I->dst.file == GPR) {
|
||||
BITSET_SET_COUNT(ctx.in_file, jay_base_index(I->dst),
|
||||
jay_num_values(I->dst));
|
||||
}
|
||||
|
|
@ -831,7 +826,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
* next_use_in set but are accounted for when computing W_entry.
|
||||
*/
|
||||
jay_foreach_phi_dst_in_block(block, I) {
|
||||
if (I->dst.file == file) {
|
||||
if (I->dst.file == GPR) {
|
||||
assert(nu_cursor >= 1);
|
||||
ctx.next_uses[jay_index(I->dst)] = next_ips[--nu_cursor];
|
||||
u_sparse_bitset_set(&ctx.N, jay_index(I->dst));
|
||||
|
|
@ -840,7 +835,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
|
||||
if (block->loop_header) {
|
||||
compute_w_entry_loop_header(&ctx, block);
|
||||
} else if (jay_num_predecessors(block, file) /* skip start blocks */) {
|
||||
} else if (jay_num_predecessors(block, GPR) /* skip start blocks */) {
|
||||
compute_w_entry(&ctx, block);
|
||||
}
|
||||
|
||||
|
|
@ -853,7 +848,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
|
||||
/* Now that all blocks are processed separately, stitch it together */
|
||||
jay_foreach_block(func, block) {
|
||||
jay_foreach_predecessor(block, pred, file) {
|
||||
jay_foreach_predecessor(block, pred, GPR) {
|
||||
u_sparse_bitset_clear_all(&ctx.phi_set);
|
||||
insert_coupling_code(&ctx, *pred, block);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue