diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 7bdfc4edcab..403e2cc8031 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1140,7 +1140,7 @@ vec4_visitor::opt_register_coalesce() /* Can't coalesce this GRF if someone else was going to * read it later. */ - if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip) + if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 8) > ip) continue; /* We need to check interference with the final destination between this diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index bf7e4a4f070..eed2478cda9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -248,7 +248,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == VGRF) { - if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) { + if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) { entry->remove(); ralloc_free(entry); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 9185d5202b9..5b22a096dd1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -57,13 +57,12 @@ vec4_visitor::dead_code_eliminate() if ((inst->dst.file == VGRF && !inst->has_side_effects()) || (inst->dst.is_null() && inst->writes_flag())){ bool result_live[4] = { false }; - if (inst->dst.file == VGRF) { - for (unsigned i = 0; i < regs_written(inst); i++) { - for (int c = 0; c < 4; c++) - result_live[c] |= BITSET_TEST(live, - var_from_reg(alloc, - byte_offset(inst->dst, i * REG_SIZE), c)); + for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { + for (int c = 0; c < 4; c++) { + const unsigned v = var_from_reg(alloc, inst->dst, c, i); + result_live[c] |= BITSET_TEST(live, v); + } } } else { for (unsigned c = 0; c < 4; c++) @@ -112,14 +111,11 @@ vec4_visitor::dead_code_eliminate() if (inst->dst.file == VGRF && !inst->predicate && !inst->is_align1_partial_write()) { - for (unsigned i = 0; i < regs_written(inst); i++) { + for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { - BITSET_CLEAR(live, - var_from_reg(alloc, - byte_offset(inst->dst, - i * REG_SIZE), - c)); + const unsigned v = var_from_reg(alloc, inst->dst, c, i); + BITSET_CLEAR(live, v); } } } @@ -137,12 +133,10 @@ vec4_visitor::dead_code_eliminate() for (int i = 0; i < 3; i++) { if (inst->src[i].file == VGRF) { - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { for (int c = 0; c < 4; c++) { - BITSET_SET(live, var_from_reg(alloc, - byte_offset(inst->src[i], - j * REG_SIZE), - c)); + const unsigned v = var_from_reg(alloc, inst->src[i], c, j); + BITSET_SET(live, v); } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index b70d6c2fab6..73f658cd8fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -76,12 +76,9 @@ vec4_live_variables::setup_def_use() /* Set use[] for this instruction */ for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == VGRF) { - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { for (int c = 0; c < 4; c++) { - const unsigned v = - var_from_reg(alloc, - byte_offset(inst->src[i], j * REG_SIZE), - c); + const unsigned v = var_from_reg(alloc, inst->src[i], c, j); if (!BITSET_TEST(bd->def, v)) BITSET_SET(bd->use, v); } @@ -101,12 +98,10 @@ vec4_live_variables::setup_def_use() */ if (inst->dst.file == VGRF && (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) { - for (unsigned i = 0; i < regs_written(inst); i++) { + for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { - const unsigned v = - var_from_reg(alloc, - byte_offset(inst->dst, i * REG_SIZE), c); + const unsigned v = var_from_reg(alloc, inst->dst, c, i); if (!BITSET_TEST(bd->use, v)) BITSET_SET(bd->def, v); } @@ -191,7 +186,7 @@ vec4_live_variables::vec4_live_variables(const simple_allocator &alloc, { mem_ctx = ralloc_context(NULL); - num_vars = alloc.total_size * 4; + num_vars = alloc.total_size * 8; block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); bitset_words = BITSET_WORDS(num_vars); @@ -241,14 +236,14 @@ vec4_visitor::calculate_live_intervals() if (this->live_intervals) return; - int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 4); - int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 4); + int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8); + int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8); ralloc_free(this->virtual_grf_start); ralloc_free(this->virtual_grf_end); this->virtual_grf_start = start; this->virtual_grf_end = end; - for (unsigned i = 0; i < this->alloc.total_size * 4; i++) { + for (unsigned i = 0; i < this->alloc.total_size * 8; i++) { start[i] = MAX_INSTRUCTION; end[i] = -1; } @@ -260,11 +255,9 @@ vec4_visitor::calculate_live_intervals() foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == VGRF) { - for (unsigned j = 0; j < regs_read(inst, i); j++) { + for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { for (int c = 0; c < 4; c++) { - const unsigned v = - var_from_reg(alloc, - byte_offset(inst->src[i], j * REG_SIZE), c); + const unsigned v = var_from_reg(alloc, inst->src[i], c, j); start[v] = MIN2(start[v], ip); end[v] = ip; } @@ -273,12 +266,10 @@ vec4_visitor::calculate_live_intervals() } if (inst->dst.file == VGRF) { - for (unsigned i = 0; i < regs_written(inst); i++) { + for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { - const unsigned v = - var_from_reg(alloc, - byte_offset(inst->dst, i * REG_SIZE), c); + const unsigned v = var_from_reg(alloc, inst->dst, c, i); start[v] = MIN2(start[v], ip); end[v] = ip; } @@ -345,8 +336,8 @@ vec4_visitor::var_range_end(unsigned v, unsigned n) const bool vec4_visitor::virtual_grf_interferes(int a, int b) { - return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a]) <= - var_range_start(4 * alloc.offsets[b], 4 * alloc.sizes[b])) || - (var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b]) <= - var_range_start(4 * alloc.offsets[a], 4 * alloc.sizes[a]))); + return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <= + var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) || + (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <= + var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a]))); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h index 2fbcaa1228a..8807c453743 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h @@ -78,23 +78,35 @@ protected: void *mem_ctx; }; +/* Returns the variable index for the k-th dword of the c-th component of + * register reg. + */ inline unsigned var_from_reg(const simple_allocator &alloc, const src_reg ®, - unsigned c = 0) + unsigned c = 0, unsigned k = 0) { - assert(reg.file == VGRF && reg.nr < alloc.count && - reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4); - return (4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + - BRW_GET_SWZ(reg.swizzle, c)); + assert(reg.file == VGRF && reg.nr < alloc.count && c < 4); + const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); + unsigned result = + 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + + (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize; + /* Do not exceed the limit for this register */ + assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr])); + return result; } inline unsigned var_from_reg(const simple_allocator &alloc, const dst_reg ®, - unsigned c = 0) + unsigned c = 0, unsigned k = 0) { - assert(reg.file == VGRF && reg.nr < alloc.count && - reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4); - return 4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + c; + assert(reg.file == VGRF && reg.nr < alloc.count && c < 4); + const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4); + unsigned result = + 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + + (c + k / csize * 4) * csize + k % csize; + /* Do not exceed the limit for this register */ + assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr])); + return result; } } /* namespace brw */