diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index a52beb50a17..ca2c81ef609 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -671,7 +671,6 @@ bool brw_fs_opt_cmod_propagation(fs_visitor &s); bool brw_fs_opt_combine_constants(fs_visitor &s); bool brw_fs_opt_compact_virtual_grfs(fs_visitor &s); bool brw_fs_opt_copy_propagation(fs_visitor &s); -bool brw_fs_opt_cse(fs_visitor &s); bool brw_fs_opt_cse_defs(fs_visitor &s); bool brw_fs_opt_dead_code_eliminate(fs_visitor &s); bool brw_fs_opt_dead_control_flow_eliminate(fs_visitor &s); diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 098856cc0e2..9ec292ba34b 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -30,24 +30,11 @@ /** @file brw_fs_cse.cpp * - * Support for local common subexpression elimination. - * - * See Muchnick's Advanced Compiler Design and Implementation, section - * 13.1 (p378). + * Support for SSA-based global Common Subexpression Elimination (CSE). */ using namespace brw; -namespace { -struct aeb_entry : public exec_node { - /** The instruction that generates the expression value. */ - fs_inst *generator; - - /** The temporary where the value is stored. */ - fs_reg tmp; -}; -} - static bool is_expression(const fs_visitor *v, const fs_inst *const inst) { @@ -251,200 +238,6 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) operands_match(a, b, negate); } -static void -create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) -{ - unsigned written = regs_written(inst); - unsigned dst_width = - DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); - fs_inst *copy; - - if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - assert(src.file == VGRF); - fs_reg *payload = ralloc_array(bld.shader->mem_ctx, fs_reg, - inst->sources); - for (int i = 0; i < inst->header_size; i++) { - payload[i] = src; - src.offset += REG_SIZE; - } - for (int i = inst->header_size; i < inst->sources; i++) { - src.type = inst->src[i].type; - payload[i] = src; - src = offset(src, bld, 1); - } - copy = bld.LOAD_PAYLOAD(inst->dst, payload, inst->sources, - inst->header_size); - } else if (written != dst_width) { - assert(src.file == VGRF); - assert(written % dst_width == 0); - const int sources = written / dst_width; - fs_reg *payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); - for (int i = 0; i < sources; i++) { - payload[i] = src; - src = offset(src, bld, 1); - } - copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, 0); - } else { - copy = bld.MOV(inst->dst, src); - copy->group = inst->group; - copy->force_writemask_all = inst->force_writemask_all; - copy->src[0].negate = negate; - } - assert(regs_written(copy) == written); -} - -static bool -brw_fs_opt_cse_local(fs_visitor &s, const fs_live_variables &live, bblock_t *block, int &ip) -{ - const intel_device_info *devinfo = s.devinfo; - bool progress = false; - exec_list aeb; - - void *cse_ctx = ralloc_context(NULL); - - foreach_inst_in_block(fs_inst, inst, block) { - /* Skip some cases. */ - if (is_expression(&s, inst) && !inst->is_partial_write() && - ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || - inst->dst.is_null())) - { - bool found = false; - bool negate = false; - - foreach_in_list_use_after(aeb_entry, entry, &aeb) { - /* Match current instruction's expression against those in AEB. */ - if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) && - instructions_match(inst, entry->generator, &negate)) { - found = true; - progress = true; - break; - } - } - - if (!found) { - if (inst->opcode != BRW_OPCODE_MOV || - (inst->opcode == BRW_OPCODE_MOV && - inst->src[0].file == IMM && - inst->src[0].type == BRW_TYPE_VF)) { - /* Our first sighting of this expression. Create an entry. */ - aeb_entry *entry = ralloc(cse_ctx, aeb_entry); - entry->tmp = reg_undef; - entry->generator = inst; - aeb.push_tail(entry); - } - } else { - /* This is at least our second sighting of this expression. - * If we don't have a temporary already, make one. - */ - bool no_existing_temp = entry->tmp.file == BAD_FILE; - if (no_existing_temp && !entry->generator->dst.is_null()) { - const fs_builder ibld = fs_builder(&s, block, entry->generator) - .at(block, entry->generator->next); - int written = regs_written(entry->generator); - - entry->tmp = fs_reg(VGRF, s.alloc.allocate(written), - entry->generator->dst.type); - - create_copy_instr(ibld, entry->generator, entry->tmp, false); - - entry->generator->dst = entry->tmp; - } - - /* dest <- temp */ - if (!inst->dst.is_null()) { - assert(inst->size_written == entry->generator->size_written); - assert(inst->dst.type == entry->tmp.type); - const fs_builder ibld(&s, block, inst); - - create_copy_instr(ibld, inst, entry->tmp, negate); - } - - /* Set our iterator so that next time through the loop inst->next - * will get the instruction in the basic block after the one we've - * removed. - */ - fs_inst *prev = (fs_inst *)inst->prev; - - inst->remove(block); - inst = prev; - } - } - - /* Discard jumps aren't represented in the CFG unfortunately, so we need - * to make sure that they behave as a CSE barrier, since we lack global - * dataflow information. This is particularly likely to cause problems - * with instructions dependent on the current execution mask like - * SHADER_OPCODE_FIND_LIVE_CHANNEL. - */ - if (inst->opcode == BRW_OPCODE_HALT || - inst->opcode == SHADER_OPCODE_HALT_TARGET) - aeb.make_empty(); - - foreach_in_list_safe(aeb_entry, entry, &aeb) { - /* Kill all AEB entries that write a different value to or read from - * the flag register if we just wrote it. - */ - if (inst->flags_written(devinfo)) { - bool negate; /* dummy */ - if (entry->generator->flags_read(devinfo) || - (entry->generator->flags_written(devinfo) && - !instructions_match(inst, entry->generator, &negate))) { - entry->remove(); - ralloc_free(entry); - continue; - } - } - - for (int i = 0; i < entry->generator->sources; i++) { - fs_reg *src_reg = &entry->generator->src[i]; - - /* Kill all AEB entries that use the destination we just - * overwrote. - */ - if (regions_overlap(inst->dst, inst->size_written, - entry->generator->src[i], - entry->generator->size_read(i))) { - entry->remove(); - ralloc_free(entry); - break; - } - - /* Kill any AEB entries using registers that don't get reused any - * more -- a sure sign they'll fail operands_match(). - */ - if (src_reg->file == VGRF && live.vgrf_end[src_reg->nr] < ip) { - entry->remove(); - ralloc_free(entry); - break; - } - } - } - - ip++; - } - - ralloc_free(cse_ctx); - - return progress; -} - -bool -brw_fs_opt_cse(fs_visitor &s) -{ - const fs_live_variables &live = s.live_analysis.require(); - bool progress = false; - int ip = 0; - - foreach_block (block, s.cfg) { - progress = brw_fs_opt_cse_local(s, live, block, ip) || progress; - } - - if (progress) - s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); - - return progress; -} - /* -------------------------------------------------------------------- */ #define HASH(hash, data) XXH32(&(data), sizeof(data), hash)