diff --git a/src/intel/compiler/brw_analysis.h b/src/intel/compiler/brw_analysis.h index 908c7aec38e..d32239ad98b 100644 --- a/src/intel/compiler/brw_analysis.h +++ b/src/intel/compiler/brw_analysis.h @@ -435,8 +435,7 @@ public: BITSET_WORD flag_livein[1]; BITSET_WORD flag_liveout[1]; - int start_ip; - int end_ip; + brw_range ip_range; }; brw_live_variables(const brw_shader *s); diff --git a/src/intel/compiler/brw_analysis_liveness.cpp b/src/intel/compiler/brw_analysis_liveness.cpp index 9a81d9aac84..ae1c834557a 100644 --- a/src/intel/compiler/brw_analysis_liveness.cpp +++ b/src/intel/compiler/brw_analysis_liveness.cpp @@ -106,9 +106,9 @@ brw_live_variables::setup_def_use() foreach_block (block, cfg) { struct block_data *bd = &block_data[block->num]; - assert(ip == bd->start_ip); + assert(ip == bd->ip_range.start); if (block->num > 0) - assert(block_data[block->num - 1].end_ip == ip - 1); + assert(block_data[block->num - 1].ip_range.end == ip - 1); foreach_inst_in_block(brw_inst, inst, block) { /* Set use[] for this instruction */ @@ -233,13 +233,13 @@ brw_live_variables::compute_start_end() unsigned i; BITSET_FOREACH_SET(i, bd->livein, (unsigned)num_vars) { - start[i] = MIN2(start[i], bd->start_ip); - end[i] = MAX2(end[i], bd->start_ip); + start[i] = MIN2(start[i], bd->ip_range.start); + end[i] = MAX2(end[i], bd->ip_range.start); } BITSET_FOREACH_SET(i, bd->liveout, (unsigned)num_vars) { - start[i] = MIN2(start[i], bd->end_ip); - end[i] = MAX2(end[i], bd->end_ip); + start[i] = MIN2(start[i], bd->ip_range.end); + end[i] = MAX2(end[i], bd->ip_range.end); } } } @@ -297,12 +297,8 @@ brw_live_variables::brw_live_variables(const brw_shader *s) } const brw_ip_ranges &ips = s->ip_ranges_analysis.require(); - for (int i = 0; i < cfg->num_blocks; i++) { - brw_range range = ips.range(cfg->blocks[i]); - - block_data[i].start_ip = range.start; - block_data[i].end_ip = range.end; - } + for (int i = 0; i < cfg->num_blocks; i++) + block_data[i].ip_range = ips.range(cfg->blocks[i]); setup_def_use(); compute_live_variables(); @@ -326,13 +322,17 @@ check_register_live_range(const brw_live_variables *live, int ip, const brw_reg ®, unsigned n) { const unsigned var = live->var_from_reg(reg); + const brw_range reg_range{ live->vgrf_start[reg.nr], + live->vgrf_end[reg.nr] }; if (var + n > unsigned(live->num_vars) || - live->vgrf_start[reg.nr] > ip || live->vgrf_end[reg.nr] < ip) + !reg_range.contains(ip)) return false; for (unsigned j = 0; j < n; j++) { - if (live->start[var + j] > ip || live->end[var + j] < ip) + const brw_range var_range{ live->start[var + j], + live->end[var + j] }; + if (!var_range.contains(ip)) return false; } @@ -365,13 +365,32 @@ brw_live_variables::validate(const brw_shader *s) const bool brw_live_variables::vars_interfere(int a, int b) const { - return !(end[b] <= start[a] || - end[a] <= start[b]); + /* Clip the ranges so the end of a live range can overlap with + * the start of another live range. See details in vgrfs_interfere(). + */ + brw_range ra{start[a], end[a]}; + brw_range rb{start[b], end[b]}; + + return overlaps(clip_end(ra, 1), + clip_end(rb, 1)); } bool brw_live_variables::vgrfs_interfere(int a, int b) const { - return !(vgrf_end[a] <= vgrf_start[b] || - vgrf_end[b] <= vgrf_start[a]); + /* The live ranges are constructed such that at the start + * of the range there's a WRITE to the VGRF and at the + * end of the range there's a READ of the VGRF. + * + * Two VGRFs will not interfere if the same IP has both + * the READ at the end for one and the WRITE at the start + * for the other. + * + * Clip the ranges to cover this edge case. + */ + brw_range ra{vgrf_start[a], vgrf_end[a]}; + brw_range rb{vgrf_start[b], vgrf_end[b]}; + + return overlaps(clip_end(ra, 1), + clip_end(rb, 1)); } diff --git a/src/intel/compiler/brw_opt_copy_propagation.cpp b/src/intel/compiler/brw_opt_copy_propagation.cpp index 860682d0c16..4c62ef0602c 100644 --- a/src/intel/compiler/brw_opt_copy_propagation.cpp +++ b/src/intel/compiler/brw_opt_copy_propagation.cpp @@ -1525,11 +1525,13 @@ brw_opt_copy_propagation(brw_shader &s) for (auto iter = out_acp[block->num].begin(); iter != out_acp[block->num].end(); ++iter) { assert((*iter)->dst.file == VGRF); + brw_range block_range = ips.range(block); - if (block_range.start <= live.vgrf_start[(*iter)->dst.nr] && - live.vgrf_end[(*iter)->dst.nr] <= block_range.end) { + brw_range vgrf_range{live.vgrf_start[(*iter)->dst.nr], + live.vgrf_end[(*iter)->dst.nr]}; + + if (block_range.contains(vgrf_range)) out_acp[block->num].remove(*iter); - } } } diff --git a/src/intel/compiler/brw_opt_register_coalesce.cpp b/src/intel/compiler/brw_opt_register_coalesce.cpp index b6d7af7ef73..5a06050b0cf 100644 --- a/src/intel/compiler/brw_opt_register_coalesce.cpp +++ b/src/intel/compiler/brw_opt_register_coalesce.cpp @@ -105,24 +105,22 @@ can_coalesce_vars(const intel_device_info *devinfo, if (!live.vars_interfere(src_var, dst_var)) return true; - int dst_start = live.start[dst_var]; - int dst_end = live.end[dst_var]; - int src_start = live.start[src_var]; - int src_end = live.end[src_var]; + brw_range dst_range{ live.start[dst_var], live.end[dst_var] }; + brw_range src_range{ live.start[src_var], live.end[src_var] }; - /* Variables interfere and one line range isn't a subset of the other. */ - if ((dst_end > src_end && src_start < dst_start) || - (src_end > dst_end && dst_start < src_start)) + /* Variables interfere and one live range isn't a subset of the other. */ + if (!dst_range.contains(src_range) && + !src_range.contains(dst_range)) return false; /* Check for a write to either register in the intersection of their live * ranges. */ - int start_ip = MAX2(dst_start, src_start); - int end_ip = MIN2(dst_end, src_end); + brw_range intersection = intersect(dst_range, src_range); + assert(!intersection.is_empty()); foreach_block(scan_block, cfg) { - if (ips.range(scan_block).end < start_ip) + if (ips.range(scan_block).end < intersection.start) continue; int scan_ip = ips.range(scan_block).start - 1; @@ -133,7 +131,7 @@ can_coalesce_vars(const intel_device_info *devinfo, scan_ip++; /* Ignore anything before the intersection of the live ranges */ - if (scan_ip < start_ip) + if (scan_ip < intersection.start) continue; /* Ignore the copying instruction itself */ @@ -142,7 +140,7 @@ can_coalesce_vars(const intel_device_info *devinfo, continue; } - if (scan_ip > end_ip) + if (scan_ip > intersection.end) return true; /* registers do not interfere */ if (seen_src_write && !seen_copy) { diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp index 617af758097..30bce0cf9bc 100644 --- a/src/intel/compiler/brw_reg_allocate.cpp +++ b/src/intel/compiler/brw_reg_allocate.cpp @@ -284,8 +284,7 @@ public: bool assign_regs(bool allow_spilling, bool spill_all); private: - void setup_live_interference(unsigned node, - int node_start_ip, int node_end_ip); + void setup_live_interference(unsigned node, brw_range ip_range); void setup_inst_interference(const brw_inst *inst); void build_interference_graph(bool allow_spilling); @@ -369,8 +368,7 @@ namespace { } void -brw_reg_alloc::setup_live_interference(unsigned node, - int node_start_ip, int node_end_ip) +brw_reg_alloc::setup_live_interference(unsigned node, brw_range ip_range) { /* Mark any virtual grf that is live between the start of the program and * the last use of a payload node interfering with that payload node. @@ -383,10 +381,12 @@ brw_reg_alloc::setup_live_interference(unsigned node, * in order to not have to worry about the uniform issue described in * calculate_live_intervals(). */ - if (node_start_ip <= payload_last_use_ip[i]) + if (ip_range.start <= payload_last_use_ip[i]) ra_add_node_interference(g, node, first_payload_node + i); } + const brw_range clipped_ip_range = clip_end(ip_range, 1); + /* Add interference with every vgrf whose live range intersects this * node's. We only need to look at nodes below this one as the reflexivity * of interference will take care of the rest. @@ -394,8 +394,14 @@ brw_reg_alloc::setup_live_interference(unsigned node, for (unsigned n2 = first_vgrf_node; n2 <= (unsigned)last_vgrf_node && n2 < node; n2++) { unsigned vgrf = n2 - first_vgrf_node; - if (!(node_end_ip <= live.vgrf_start[vgrf] || - live.vgrf_end[vgrf] <= node_start_ip)) + + /* Clip the ranges so the end of a live range can overlap with + * the start of another live range. See details in vgrfs_interfere(). + */ + brw_range vgrf_range { live.vgrf_start[vgrf], + live.vgrf_end[vgrf] }; + if (overlaps(clip_end(vgrf_range, 1), + clipped_ip_range)) ra_add_node_interference(g, node, n2); } } @@ -663,9 +669,9 @@ brw_reg_alloc::build_interference_graph(bool allow_spilling) /* Add interference based on the live range of the register */ for (unsigned i = 0; i < fs->alloc.count; i++) { - setup_live_interference(first_vgrf_node + i, - live.vgrf_start[i], - live.vgrf_end[i]); + brw_range vgrf_range{ live.vgrf_start[i], + live.vgrf_end[i] }; + setup_live_interference(first_vgrf_node + i, vgrf_range); } /* Add interference based on the instructions in which a register is used. @@ -1085,7 +1091,8 @@ brw_reg_alloc::alloc_spill_reg(unsigned size, int ip) assert(n == first_vgrf_node + vgrf); assert(n == first_spill_node + spill_node_count); - setup_live_interference(n, ip - 1, ip + 1); + brw_range spill_reg_range{ ip - 1, ip + 1 }; + setup_live_interference(n, spill_reg_range); /* Add interference between this spill node and any other spill nodes for * the same instruction. diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 40f454e6892..7459c2700c8 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -856,11 +856,11 @@ brw_instruction_scheduler::setup_liveness(cfg_t *cfg) */ for (int block = 0; block < cfg->num_blocks - 1; block++) { for (int i = 0; i < grf_count; i++) { - const int this_block_end = ips.range(cfg->blocks[block]).end; - const int next_block_start = ips.range(cfg->blocks[block + 1]).start; + const int block_end = ips.range(cfg->blocks[block]).end; + const brw_range vgrf_range{live.vgrf_start[i], live.vgrf_end[i]}; - if (live.vgrf_start[i] <= this_block_end && - live.vgrf_end[i] >= next_block_start) { + if (vgrf_range.contains(block_end) && + vgrf_range.contains(block_end + 1)) { if (!BITSET_TEST(livein[block + 1], i)) { reg_pressure_in[block + 1] += s->alloc.sizes[i]; BITSET_SET(livein[block + 1], i);