pan/bi: Track words instead of bytes in RA

Reduces RA memory footprint by 4x, fixing an OOM in the following dEQP
test that otherwise would allocate 8GB of memory...

   dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.36

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11123>
This commit is contained in:
Alyssa Rosenzweig 2021-06-08 14:49:04 -04:00 committed by Marge Bot
parent dbc346d659
commit 47e0cce820
3 changed files with 20 additions and 24 deletions

View file

@ -36,8 +36,8 @@ bi_liveness_ins_update(uint16_t *live, bi_instr *ins, unsigned max)
bi_foreach_src(ins, src) {
unsigned count = bi_count_read_registers(ins, src);
unsigned rmask = (1 << (4 * count)) - 1;
uint16_t mask = (rmask << (4 * ins->src[src].offset));
unsigned rmask = BITFIELD_MASK(count);
uint16_t mask = (rmask << ins->src[src].offset);
unsigned node = bi_get_node(ins->src[src]);
pan_liveness_gen(live, node, max, mask);

View file

@ -38,10 +38,10 @@ struct lcra_state {
* Each element is itself a bit field denoting whether (c_j - c_i) bias
* is present or not, including negative biases.
*
* Note for Midgard, there are 16 components so the bias is in range
* [-15, 15] so encoded by 32-bit field. */
* Note for Bifrost, there are 4 components so the bias is in range
* [-3, 3] so encoded by 8-bit field. */
uint32_t *linear;
uint8_t *linear;
/* Before solving, forced registers; after solving, solutions. */
unsigned *solutions;
@ -84,18 +84,18 @@ lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, u
if (i == j)
return;
uint32_t constraint_fw = 0;
uint32_t constraint_bw = 0;
uint8_t constraint_fw = 0;
uint8_t constraint_bw = 0;
for (unsigned D = 0; D < 16; ++D) {
for (unsigned D = 0; D < 4; ++D) {
if (cmask_i & (cmask_j << D)) {
constraint_bw |= (1 << (15 + D));
constraint_fw |= (1 << (15 - D));
constraint_bw |= (1 << (3 + D));
constraint_fw |= (1 << (3 - D));
}
if (cmask_i & (cmask_j >> D)) {
constraint_fw |= (1 << (15 + D));
constraint_bw |= (1 << (15 - D));
constraint_fw |= (1 << (3 + D));
constraint_bw |= (1 << (3 - D));
}
}
@ -106,7 +106,7 @@ lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, u
static bool
lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
{
unsigned *row = &l->linear[i * l->node_count];
uint8_t *row = &l->linear[i * l->node_count];
signed constant = solutions[i];
for (unsigned j = 0; j < l->node_count; ++j) {
@ -114,10 +114,10 @@ lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
signed lhs = solutions[j] - constant;
if (lhs < -15 || lhs > 15)
if (lhs < -3 || lhs > 3)
continue;
if (row[j] & (1 << (lhs + 15)))
if (row[j] & (1 << (lhs + 3)))
return false;
}
@ -134,7 +134,7 @@ lcra_solve(struct lcra_state *l)
bool succ = false;
u_foreach_bit64(r, l->affinity[step]) {
l->solutions[step] = r * 4;
l->solutions[step] = r;
if (lcra_test_linear(l, l->solutions, step)) {
succ = true;
@ -157,7 +157,7 @@ static unsigned
lcra_count_constraints(struct lcra_state *l, unsigned i)
{
unsigned count = 0;
unsigned *constraints = &l->linear[i * l->node_count];
uint8_t *constraints = &l->linear[i * l->node_count];
for (unsigned j = 0; j < l->node_count; ++j)
count += util_bitcount(constraints[j]);
@ -315,12 +315,8 @@ bi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index)
return index;
}
assert((solution & 0x3) == 0);
unsigned reg = solution / 4;
reg += index.offset;
/* todo: do we want to compose with the subword swizzle? */
bi_index new_index = bi_register(reg);
bi_index new_index = bi_register(solution + index.offset);
new_index.swizzle = index.swizzle;
new_index.abs = index.abs;
new_index.neg = index.neg;

View file

@ -110,8 +110,8 @@ bi_count_write_registers(bi_instr *ins, unsigned d)
unsigned
bi_writemask(bi_instr *ins, unsigned d)
{
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d) * 4);
unsigned shift = ins->dest[d].offset * 4; /* 32-bit words */
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
unsigned shift = ins->dest[d].offset;
return (mask << shift);
}