mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 06:40:27 +01:00
i965/vec4: handle 32 and 64 bit channels in liveness analysis
Our current data flow analysis does not take into account that channels
on 64-bit operands are 64-bit. This is a problem when the same register
is accessed using both 64-bit and 32-bit channels. This is very common
in operations where we need to access 64-bit data in 32-bit chunks,
such as the double packing and packing operations.
This patch changes the analysis by checking the bits that each source
or destination datatype needs. Actually, rather than bits, we use
blocks of 32bits, which is the minimum channel size.
Because a vgrf can contain a dvec4 (256 bits), we reserve 8
32-bit blocks to map the channels.
v2 (Curro):
- Simplify code by making the var_from_reg helpers take an extra
argument with the register component we want.
- Fix a couple of cases where we had to update the code to the new
way of representing live variables.
v3:
- Fix indent in multiline expressions (Matt)
- Fix comment's closing tag (Matt)
- Use DIV_ROUND_UP(inst->size_written, 16) instead of 2 * regs_written(inst)
to avoid rounding issues. The same for regs_read(i). (Curro).
- Add asserts in var_from_reg() to avoid exceeding the allocated
registers (Curro).
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
parent
29dd5cf9d6
commit
4ea3bf8ebb
5 changed files with 50 additions and 53 deletions
|
|
@ -1140,7 +1140,7 @@ vec4_visitor::opt_register_coalesce()
|
|||
/* Can't coalesce this GRF if someone else was going to
|
||||
* read it later.
|
||||
*/
|
||||
if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
|
||||
if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 8) > ip)
|
||||
continue;
|
||||
|
||||
/* We need to check interference with the final destination between this
|
||||
|
|
|
|||
|
|
@ -248,7 +248,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
|
|||
* more -- a sure sign they'll fail operands_match().
|
||||
*/
|
||||
if (src->file == VGRF) {
|
||||
if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
|
||||
if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) {
|
||||
entry->remove();
|
||||
ralloc_free(entry);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -57,13 +57,12 @@ vec4_visitor::dead_code_eliminate()
|
|||
if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
|
||||
(inst->dst.is_null() && inst->writes_flag())){
|
||||
bool result_live[4] = { false };
|
||||
|
||||
if (inst->dst.file == VGRF) {
|
||||
for (unsigned i = 0; i < regs_written(inst); i++) {
|
||||
for (int c = 0; c < 4; c++)
|
||||
result_live[c] |= BITSET_TEST(live,
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->dst, i * REG_SIZE), c));
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
const unsigned v = var_from_reg(alloc, inst->dst, c, i);
|
||||
result_live[c] |= BITSET_TEST(live, v);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (unsigned c = 0; c < 4; c++)
|
||||
|
|
@ -112,14 +111,11 @@ vec4_visitor::dead_code_eliminate()
|
|||
|
||||
if (inst->dst.file == VGRF && !inst->predicate &&
|
||||
!inst->is_align1_partial_write()) {
|
||||
for (unsigned i = 0; i < regs_written(inst); i++) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (inst->dst.writemask & (1 << c)) {
|
||||
BITSET_CLEAR(live,
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->dst,
|
||||
i * REG_SIZE),
|
||||
c));
|
||||
const unsigned v = var_from_reg(alloc, inst->dst, c, i);
|
||||
BITSET_CLEAR(live, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -137,12 +133,10 @@ vec4_visitor::dead_code_eliminate()
|
|||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
BITSET_SET(live, var_from_reg(alloc,
|
||||
byte_offset(inst->src[i],
|
||||
j * REG_SIZE),
|
||||
c));
|
||||
const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
|
||||
BITSET_SET(live, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,12 +76,9 @@ vec4_live_variables::setup_def_use()
|
|||
/* Set use[] for this instruction */
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
const unsigned v =
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->src[i], j * REG_SIZE),
|
||||
c);
|
||||
const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
|
||||
if (!BITSET_TEST(bd->def, v))
|
||||
BITSET_SET(bd->use, v);
|
||||
}
|
||||
|
|
@ -101,12 +98,10 @@ vec4_live_variables::setup_def_use()
|
|||
*/
|
||||
if (inst->dst.file == VGRF &&
|
||||
(!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
|
||||
for (unsigned i = 0; i < regs_written(inst); i++) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (inst->dst.writemask & (1 << c)) {
|
||||
const unsigned v =
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->dst, i * REG_SIZE), c);
|
||||
const unsigned v = var_from_reg(alloc, inst->dst, c, i);
|
||||
if (!BITSET_TEST(bd->use, v))
|
||||
BITSET_SET(bd->def, v);
|
||||
}
|
||||
|
|
@ -191,7 +186,7 @@ vec4_live_variables::vec4_live_variables(const simple_allocator &alloc,
|
|||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
num_vars = alloc.total_size * 4;
|
||||
num_vars = alloc.total_size * 8;
|
||||
block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
|
||||
|
||||
bitset_words = BITSET_WORDS(num_vars);
|
||||
|
|
@ -241,14 +236,14 @@ vec4_visitor::calculate_live_intervals()
|
|||
if (this->live_intervals)
|
||||
return;
|
||||
|
||||
int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
|
||||
int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
|
||||
int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
|
||||
int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
|
||||
ralloc_free(this->virtual_grf_start);
|
||||
ralloc_free(this->virtual_grf_end);
|
||||
this->virtual_grf_start = start;
|
||||
this->virtual_grf_end = end;
|
||||
|
||||
for (unsigned i = 0; i < this->alloc.total_size * 4; i++) {
|
||||
for (unsigned i = 0; i < this->alloc.total_size * 8; i++) {
|
||||
start[i] = MAX_INSTRUCTION;
|
||||
end[i] = -1;
|
||||
}
|
||||
|
|
@ -260,11 +255,9 @@ vec4_visitor::calculate_live_intervals()
|
|||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned j = 0; j < regs_read(inst, i); j++) {
|
||||
for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
const unsigned v =
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->src[i], j * REG_SIZE), c);
|
||||
const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
|
||||
start[v] = MIN2(start[v], ip);
|
||||
end[v] = ip;
|
||||
}
|
||||
|
|
@ -273,12 +266,10 @@ vec4_visitor::calculate_live_intervals()
|
|||
}
|
||||
|
||||
if (inst->dst.file == VGRF) {
|
||||
for (unsigned i = 0; i < regs_written(inst); i++) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (inst->dst.writemask & (1 << c)) {
|
||||
const unsigned v =
|
||||
var_from_reg(alloc,
|
||||
byte_offset(inst->dst, i * REG_SIZE), c);
|
||||
const unsigned v = var_from_reg(alloc, inst->dst, c, i);
|
||||
start[v] = MIN2(start[v], ip);
|
||||
end[v] = ip;
|
||||
}
|
||||
|
|
@ -345,8 +336,8 @@ vec4_visitor::var_range_end(unsigned v, unsigned n) const
|
|||
bool
|
||||
vec4_visitor::virtual_grf_interferes(int a, int b)
|
||||
{
|
||||
return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a]) <=
|
||||
var_range_start(4 * alloc.offsets[b], 4 * alloc.sizes[b])) ||
|
||||
(var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b]) <=
|
||||
var_range_start(4 * alloc.offsets[a], 4 * alloc.sizes[a])));
|
||||
return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
|
||||
var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
|
||||
(var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
|
||||
var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,23 +78,35 @@ protected:
|
|||
void *mem_ctx;
|
||||
};
|
||||
|
||||
/* Returns the variable index for the k-th dword of the c-th component of
|
||||
* register reg.
|
||||
*/
|
||||
inline unsigned
|
||||
var_from_reg(const simple_allocator &alloc, const src_reg ®,
|
||||
unsigned c = 0)
|
||||
unsigned c = 0, unsigned k = 0)
|
||||
{
|
||||
assert(reg.file == VGRF && reg.nr < alloc.count &&
|
||||
reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4);
|
||||
return (4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
|
||||
BRW_GET_SWZ(reg.swizzle, c));
|
||||
assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
|
||||
const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
|
||||
unsigned result =
|
||||
8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
|
||||
(BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize;
|
||||
/* Do not exceed the limit for this register */
|
||||
assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline unsigned
|
||||
var_from_reg(const simple_allocator &alloc, const dst_reg ®,
|
||||
unsigned c = 0)
|
||||
unsigned c = 0, unsigned k = 0)
|
||||
{
|
||||
assert(reg.file == VGRF && reg.nr < alloc.count &&
|
||||
reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4);
|
||||
return 4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + c;
|
||||
assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
|
||||
const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
|
||||
unsigned result =
|
||||
8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
|
||||
(c + k / csize * 4) * csize + k % csize;
|
||||
/* Do not exceed the limit for this register */
|
||||
assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
|
||||
return result;
|
||||
}
|
||||
|
||||
} /* namespace brw */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue