mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-21 16:48:22 +02:00
jay: introduce accumulators into the partition
In SIMD16, map acc2/acc3 as extra GPRs. This gets us a pressure reduction. We leave acc0/acc1 reserved for mul_32 lowering and for parallel copy lowering, changing this would be very challenging due to the possibility of SIMD1 multiplies leading to uniform access on the accumulator => stuff blows up. But this is an easy win on select platforms. Note we still use acc2/acc3 for post-RA accumulator substitution, this just lets us also use them as panic registers. SIMD16: Totals from 784 (29.62% of 2647) affected shaders: Instrs: 1686724 -> 1686700 (-0.00%); split: -0.15%, +0.15% CodeSize: 23406952 -> 23409432 (+0.01%); split: -0.16%, +0.17% Number of spill instructions: 224 -> 174 (-22.32%) Number of fill instructions: 546 -> 382 (-30.04%) Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42097>
This commit is contained in:
parent
091e6976d9
commit
5e64954fe0
3 changed files with 67 additions and 17 deletions
|
|
@ -723,6 +723,9 @@ enum jay_block_type {
|
|||
/** A block reserved for post-RA spill lowering */
|
||||
JAY_BLOCK_SPILL,
|
||||
|
||||
/** A block containing accumulators mapped as GPRs */
|
||||
JAY_BLOCK_ACCUM,
|
||||
|
||||
JAY_BLOCK_TYPES,
|
||||
};
|
||||
|
||||
|
|
@ -740,7 +743,7 @@ struct jay_register_block {
|
|||
enum jay_stride stride;
|
||||
|
||||
/** Special feature of the block */
|
||||
enum jay_block_type type:2;
|
||||
enum jay_block_type type:3;
|
||||
};
|
||||
static_assert(sizeof(struct jay_register_block) == 8, "packed");
|
||||
|
||||
|
|
|
|||
|
|
@ -148,6 +148,9 @@ build_partition(jay_shader *shader, struct jay_partition_builder *b, unsigned n)
|
|||
for (unsigned b = 0; b < p->nr_blocks[file]; ++b) {
|
||||
struct jay_register_block B = p->blocks[file][b];
|
||||
unsigned len_grf = (B.len_gpr * 16) / p->units_x16[file];
|
||||
if (B.type == JAY_BLOCK_ACCUM) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(len_grf > 0 && "no empty partitions");
|
||||
assert(B.start_grf + len_grf <= JAY_NUM_PHYS_GRF && "GRF file size");
|
||||
|
|
@ -254,6 +257,7 @@ jay_partition_grf(jay_shader *shader)
|
|||
*/
|
||||
unsigned grf_8 = align(instr_req.gpr[JAY_STRIDE_8], 2) * grf_per_gpr;
|
||||
unsigned grf_2 = instr_req.gpr[JAY_STRIDE_2] * grf_per_gpr;
|
||||
unsigned mapped_accums = grf_per_gpr == 1 ? 2 : 0;
|
||||
|
||||
for (unsigned spilling = 0; spilling <= 1; spilling++) {
|
||||
/* There is an interdependence between partition choice and spilling,
|
||||
|
|
@ -296,7 +300,7 @@ jay_partition_grf(jay_shader *shader)
|
|||
nonuniform_grfs = JAY_NUM_PHYS_GRF - uniform_grfs;
|
||||
|
||||
/* Set the targets for the virtual register file accordingly */
|
||||
shader->num_regs[GPR] = nonuniform_grfs / grf_per_gpr;
|
||||
shader->num_regs[GPR] = (nonuniform_grfs / grf_per_gpr) + mapped_accums;
|
||||
shader->num_regs[UGPR] = uniform_grfs * ugpr_per_grf;
|
||||
|
||||
/* jay_gpr_limit depends on shader->num_regs[GPR]. If we're under the
|
||||
|
|
@ -335,6 +339,9 @@ jay_partition_grf(jay_shader *shader)
|
|||
/* EOT */
|
||||
{ UGPR, 0, eot_u, JAY_BLOCK_EOT },
|
||||
{ GPR, JAY_STRIDE_4, eot_4, JAY_BLOCK_EOT },
|
||||
|
||||
/* Accumulator block */
|
||||
{ GPR, JAY_STRIDE_4, mapped_accums * grf_per_gpr, JAY_BLOCK_ACCUM },
|
||||
};
|
||||
|
||||
build_partition(shader, blocks, ARRAY_SIZE(blocks));
|
||||
|
|
@ -368,7 +375,8 @@ jay_print_partition(struct jay_partition *p)
|
|||
printf(" %u-bit", jay_stride_to_bits(B.stride));
|
||||
}
|
||||
|
||||
const char *types[JAY_BLOCK_TYPES] = { "", " EOT", " Spill" };
|
||||
const char *types[JAY_BLOCK_TYPES] = { "", " EOT", " Spill",
|
||||
" Accumulator" };
|
||||
printf(ANSI_ITALIC "%s" ANSI_END "\n", types[B.type]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ push_temp(jay_builder *b,
|
|||
unsigned r = avoid_regs[0] ? (avoid_regs[1] ? 2 : 1) : 0;
|
||||
|
||||
file = file == UGPR ? UACCUM : ACCUM;
|
||||
*backing = jay_bare_reg(file, outer ? 2 : 0);
|
||||
*backing = jay_bare_reg(file, outer * 2);
|
||||
|
||||
/* Put accumulators down the float pipe - it's still a raw move. */
|
||||
jay_def new = def_from_reg(r);
|
||||
|
|
@ -301,21 +301,34 @@ pop_temp(jay_builder *b, jay_def temp, jay_def backing)
|
|||
static void
|
||||
mov(jay_builder *b, jay_def dst, jay_def src, struct jay_temp_regs temps)
|
||||
{
|
||||
jay_shader *s = b->shader;
|
||||
bool split_copy = dst.file == MEM && src.file == MEM;
|
||||
split_copy |= (dst.file == GPR && src.file == GPR) &&
|
||||
jay_def_stride(s, dst) != jay_def_stride(s, src) &&
|
||||
jay_def_stride(s, dst) != JAY_STRIDE_4 &&
|
||||
jay_def_stride(s, src) != JAY_STRIDE_4;
|
||||
bool acc_src = false, acc_dst = false;
|
||||
|
||||
if (dst.file == GPR && src.file == GPR) {
|
||||
struct jay_partition *p = &b->shader->partition;
|
||||
struct jay_register_block D = jay_lookup_block(p, dst.reg, GPR);
|
||||
struct jay_register_block S = jay_lookup_block(p, src.reg, GPR);
|
||||
|
||||
acc_dst = D.type == JAY_BLOCK_ACCUM;
|
||||
acc_src = S.type == JAY_BLOCK_ACCUM;
|
||||
|
||||
split_copy |= D.stride != S.stride &&
|
||||
D.stride != JAY_STRIDE_4 &&
|
||||
S.stride != JAY_STRIDE_4;
|
||||
|
||||
split_copy |= (acc_dst && S.stride != JAY_STRIDE_4) ||
|
||||
(acc_src && D.stride != JAY_STRIDE_4);
|
||||
}
|
||||
|
||||
if (split_copy) {
|
||||
jay_def temp = jay_null(), backing = jay_null();
|
||||
temp = push_temp(b, temps, GPR, false, &backing, jay_null(), jay_null());
|
||||
jay_MOV(b, temp, src);
|
||||
jay_MOV(b, dst, temp);
|
||||
jay_MOV(b, temp, src)->type = acc_src ? JAY_TYPE_F32 : JAY_TYPE_U32;
|
||||
jay_MOV(b, dst, temp)->type = acc_dst ? JAY_TYPE_F32 : JAY_TYPE_U32;
|
||||
pop_temp(b, temp, backing);
|
||||
} else {
|
||||
jay_MOV(b, dst, src);
|
||||
jay_MOV(b, dst, src)->type =
|
||||
(acc_src || acc_dst) ? JAY_TYPE_F32 : JAY_TYPE_U32;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -563,12 +576,14 @@ is_block_compatible(struct jay_register_block block,
|
|||
enum jay_file file,
|
||||
enum jay_stride min_stride,
|
||||
enum jay_stride max_stride,
|
||||
bool eot)
|
||||
bool eot,
|
||||
bool allow_accum)
|
||||
{
|
||||
return block.type != JAY_BLOCK_SPILL &&
|
||||
(file != GPR ||
|
||||
(min_stride <= block.stride && block.stride <= max_stride)) &&
|
||||
(!eot || block.type == JAY_BLOCK_EOT);
|
||||
(!eot || block.type == JAY_BLOCK_EOT) &&
|
||||
(allow_accum || block.type != JAY_BLOCK_ACCUM);
|
||||
}
|
||||
|
||||
static jay_reg
|
||||
|
|
@ -581,7 +596,7 @@ try_find_free_reg(jay_ra_state *ra,
|
|||
struct jay_register_block B = ra->b.shader->partition.blocks[file][b];
|
||||
|
||||
if (is_block_compatible(B, file, stride4 ? JAY_STRIDE_4 : 0,
|
||||
stride4 ? JAY_STRIDE_4 : ~0, false)) {
|
||||
stride4 ? JAY_STRIDE_4 : ~0, false, !stride4)) {
|
||||
|
||||
for (unsigned i = B.start_gpr; i < B.start_gpr + B.len_gpr; ++i) {
|
||||
if (BITSET_TEST(ra->available_regs[file], i) && i != except) {
|
||||
|
|
@ -740,7 +755,8 @@ pick_regs(jay_ra_state *ra,
|
|||
|
||||
if (!BITSET_TEST_COUNT(ra->pinned[file], cur, size) &&
|
||||
util_is_aligned(cur - block.start_gpr, alignment) &&
|
||||
is_block_compatible(block, file, min_stride, max_stride, eot) &&
|
||||
is_block_compatible(block, file, min_stride, max_stride, eot,
|
||||
false) &&
|
||||
cur + size <= (block.start_gpr + block.len_gpr)) {
|
||||
return cur;
|
||||
}
|
||||
|
|
@ -784,7 +800,8 @@ pick_regs(jay_ra_state *ra,
|
|||
|
||||
struct jay_register_block block = partition->blocks[file][b];
|
||||
|
||||
if (is_block_compatible(block, file, min_stride, max_stride, eot)) {
|
||||
if (is_block_compatible(block, file, min_stride, max_stride, eot,
|
||||
false)) {
|
||||
unsigned r = b_ == rr->block ? rr->gpr : 0;
|
||||
|
||||
if (affinity.repr == jay_channel(var, 0) && b_ == rr->block) {
|
||||
|
|
@ -1248,6 +1265,20 @@ insert_parallel_copies_for_phis(jay_function *f)
|
|||
free(phi_dsts);
|
||||
}
|
||||
|
||||
static void
|
||||
map_gpr_to_acc(jay_shader *shader, jay_def *x)
|
||||
{
|
||||
if (x->file == GPR) {
|
||||
struct jay_register_block B =
|
||||
jay_lookup_block(&shader->partition, x->reg, GPR);
|
||||
|
||||
if (B.type == JAY_BLOCK_ACCUM) {
|
||||
x->file = ACCUM;
|
||||
x->reg = (2 + (x->reg - B.start_gpr)) * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
jay_register_allocate_function(jay_function *f)
|
||||
{
|
||||
|
|
@ -1369,6 +1400,14 @@ jay_register_allocate_function(jay_function *f)
|
|||
if (spilled) {
|
||||
jay_lower_spill(f);
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_func(f, block, I) {
|
||||
map_gpr_to_acc(shader, &I->dst);
|
||||
|
||||
jay_foreach_src(I, s) {
|
||||
map_gpr_to_acc(shader, &I->src[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue