diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 585f16505f1..60603ba4557 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -90,6 +90,25 @@ struct block_data { * have a fully uninitialized destination at the end of this block. */ BITSET_WORD *undef; + + /** + * Which entries in the fs_copy_prop_dataflow acp table can the + * start of this block be reached from. Note that this is a weaker + * condition than livein. + */ + BITSET_WORD *reachin; + + /** + * Which entries in the fs_copy_prop_dataflow acp table are + * overwritten by an instruction with channel masks inconsistent + * with the copy instruction (e.g. due to force_writemask_all). + * Such an overwrite can cause the copy entry to become invalid + * even if the copy instruction is subsequently re-executed for any + * given channel i, since the execution of the overwrite for + * channel i may corrupt other channels j!=i inactive for the + * subsequent copy. + */ + BITSET_WORD *exec_mismatch; }; class fs_copy_prop_dataflow @@ -141,6 +160,8 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words); bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words); bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd[block->num].reachin = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd[block->num].exec_mismatch = rzalloc_array(bd, BITSET_WORD, bitset_words); for (int i = 0; i < ACP_HASH_SIZE; i++) { foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) { @@ -208,8 +229,11 @@ fs_copy_prop_dataflow::setup_initial_values() unsigned idx = reg_space(inst->dst) & (acp_table_size - 1); foreach_in_list(acp_entry, entry, &acp_table[idx]) { if (regions_overlap(inst->dst, inst->size_written, - entry->dst, entry->size_written)) + entry->dst, entry->size_written)) { BITSET_SET(bd[block->num].kill, entry->global_idx); + if (inst->force_writemask_all && !entry->force_writemask_all) + BITSET_SET(bd[block->num].exec_mismatch, entry->global_idx); + } } } } @@ -235,8 +259,11 @@ fs_copy_prop_dataflow::setup_initial_values() unsigned idx = reg_space(inst->dst) & (acp_table_size - 1); foreach_in_list(acp_entry, entry, &acp_table[idx]) { if (regions_overlap(inst->dst, inst->size_written, - entry->src, entry->size_read)) + entry->src, entry->size_read)) { BITSET_SET(bd[block->num].kill, entry->global_idx); + if (inst->force_writemask_all && !entry->force_writemask_all) + BITSET_SET(bd[block->num].exec_mismatch, entry->global_idx); + } } } } @@ -293,6 +320,7 @@ fs_copy_prop_dataflow::run() for (int i = 0; i < bitset_words; i++) { const BITSET_WORD old_liveout = bd[block->num].liveout[i]; + const BITSET_WORD old_reachin = bd[block->num].reachin[i]; BITSET_WORD livein_from_any_block = 0; /* Update livein for this block. If a copy is live out of all @@ -310,6 +338,13 @@ fs_copy_prop_dataflow::run() bd[block->num].livein[i] &= (bd[parent->num].liveout[i] | bd[parent->num].undef[i]); livein_from_any_block |= bd[parent->num].liveout[i]; + + /* Update reachin for this block. If the end of any + * parent block is reachable from the copy, the start + * of this block is reachable from it as well. + */ + bd[block->num].reachin[i] |= (bd[parent->num].reachin[i] | + bd[parent->num].copy[i]); } /* Limit to the set of ACP entries that can possibly be available @@ -325,7 +360,44 @@ fs_copy_prop_dataflow::run() bd[block->num].copy[i] | (bd[block->num].livein[i] & ~bd[block->num].kill[i]); - if (old_liveout != bd[block->num].liveout[i]) + if (old_liveout != bd[block->num].liveout[i] || + old_reachin != bd[block->num].reachin[i]) + progress = true; + } + } + } while (progress); + + /* Perform a second fixed-point pass in order to propagate the + * exec_mismatch bitsets. Note that this requires an accurate + * value of the reachin bitsets as input, which isn't available + * until the end of the first propagation pass, so this loop cannot + * be folded into the previous one. + */ + do { + progress = false; + + foreach_block (block, cfg) { + for (int i = 0; i < bitset_words; i++) { + const BITSET_WORD old_exec_mismatch = bd[block->num].exec_mismatch[i]; + + /* Update exec_mismatch for this block. If the end of a + * parent block is reachable by an overwrite with + * inconsistent execution masking, the start of this block + * is reachable by such an overwrite as well. + */ + foreach_list_typed(bblock_link, parent_link, link, &block->parents) { + bblock_t *parent = parent_link->block; + bd[block->num].exec_mismatch[i] |= (bd[parent->num].exec_mismatch[i] & + bd[parent->num].reachin[i]); + } + + /* Only consider overwrites with inconsistent execution + * masking if they are reachable from the copy, since + * overwrites unreachable from a copy are harmless to that + * copy. + */ + bd[block->num].exec_mismatch[i] &= bd[block->num].reachin[i]; + if (old_exec_mismatch != bd[block->num].exec_mismatch[i]) progress = true; } } @@ -1182,7 +1254,8 @@ fs_visitor::opt_copy_propagation() exec_list in_acp[ACP_HASH_SIZE]; for (int i = 0; i < dataflow.num_acp; i++) { - if (BITSET_TEST(dataflow.bd[block->num].livein, i)) { + if (BITSET_TEST(dataflow.bd[block->num].livein, i) && + !BITSET_TEST(dataflow.bd[block->num].exec_mismatch, i)) { struct acp_entry *entry = dataflow.acp[i]; in_acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); }