diff --git a/src/intel/compiler/brw_def_analysis.cpp b/src/intel/compiler/brw_def_analysis.cpp new file mode 100644 index 00000000000..e8c877d2669 --- /dev/null +++ b/src/intel/compiler/brw_def_analysis.cpp @@ -0,0 +1,205 @@ +/* + * Copyright © 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "brw_fs.h" +#include "brw_cfg.h" +#include "brw_ir_analysis.h" + +/** + * An opportunistic SSA-def analysis pass. + * + * VGRFs are considered defs (SSA values) when: + * + * 1. One instruction wholly defines the register (including all offsets) + * 2. The single defining write dominates all uses + * 3. All sources of the definition are also defs (for non-VGRF files) + * + * We don't consider non-VGRF sources to prevent an instruction from forming + * an SSA def. The other files represent immediates, pushed uniforms, inputs + * to shaders, thread payload fields, and so on. In theory, we could mutate + * FIXED_GRF register values, but we don't today, so it isn't an issue. + * + * Limitations: + * - We do not track uses, only definitions. + * - We do not handle flags, address registers, or accumulators yet. + * + * Usage: + * + * const def_analysis &defs = s.def_analysis.require(); + * fs_inst *def = defs.get(inst->src[i]); // returns NULL if non-SSA + * bblock_t *block = defs.get_block(inst->src[i]); // block containing def + * + * Def analysis requires the dominator tree, but not liveness information. + */ + +using namespace brw; + +static fs_inst *const UNSEEN = (fs_inst *) (uintptr_t) 1; + +void +def_analysis::mark_invalid(int nr) +{ + def_blocks[nr] = NULL; + def_insts[nr] = NULL; +} + +void +def_analysis::update_for_reads(const idom_tree &idom, + bblock_t *block, + fs_inst *inst) +{ + /* We don't track accumulator use for def analysis, so if an instruction + * implicitly reads the accumulator, we don't consider it to produce a def. + */ + if (inst->reads_accumulator_implicitly()) + mark_invalid(inst->dst.nr); + + for (int i = 0; i < inst->sources; i++) { + const int nr = inst->src[i].nr; + + if (inst->src[i].file != VGRF) { + /* Similarly, explicit reads of accumulators, address registers, + * and flags make the destination not a def, as we don't track those. + */ + if (inst->src[i].file == ARF && + (nr == BRW_ARF_ADDRESS || + nr == BRW_ARF_ACCUMULATOR || + nr == BRW_ARF_FLAG)) + mark_invalid(inst->dst.nr); + + continue; + } + + if (def_insts[nr]) { + /* Mark the source def invalid in two cases: + * + * 1. The register is used before being written + * 2. The def doesn't dominate our use. + * + */ + if (def_insts[nr] == UNSEEN || + !idom.dominates(def_blocks[nr], block)) + mark_invalid(nr); + } + + /* Additionally, if one of our sources is not a def, then our + * destination may have multiple dynamic assignments. + */ + if (!def_insts[nr] && inst->dst.file == VGRF) + mark_invalid(inst->dst.nr); + } +} + +bool +def_analysis::fully_defines(const fs_visitor *v, fs_inst *inst) +{ + return v->alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written && + !inst->is_partial_write(); +} + +void +def_analysis::update_for_write(const fs_visitor *v, + bblock_t *block, + fs_inst *inst) +{ + const int nr = inst->dst.nr; + + if (inst->dst.file != VGRF || !def_insts[nr]) + return; + + /* If this is our first write to the destination, and it fully defines + * the destination, then consider it an SSA def for now. + */ + if (def_insts[nr] == UNSEEN && fully_defines(v, inst)) { + def_insts[nr] = inst; + def_blocks[nr] = block; + } else { + /* Otherwise this is a second write or a partial write, in which + * case we know with certainty that this isn't an SSA def. + */ + mark_invalid(nr); + } +} + +def_analysis::def_analysis(const fs_visitor *v) +{ + const idom_tree &idom = v->idom_analysis.require(); + + def_count = v->alloc.count; + + def_insts = new fs_inst*[def_count](); + def_blocks = new bblock_t*[def_count](); + + for (unsigned i = 0; i < def_count; i++) + def_insts[i] = UNSEEN; + + foreach_block_and_inst(block, fs_inst, inst, v->cfg) { + if (inst->opcode != SHADER_OPCODE_UNDEF) { + update_for_reads(idom, block, inst); + update_for_write(v, block, inst); + } + } + + bool iterate; + do { + iterate = false; + + for (unsigned d = 0; d < def_count; d++) { + /* Anything still unseen was never written and thus dead code. */ + if (def_insts[d] == UNSEEN) + def_insts[d] = NULL; + + fs_inst *def = def_insts[d]; + if (!def) + continue; + + for (int i = 0; i < def->sources; i++) { + if (def->src[i].file != VGRF) + continue; + + const int nr = def->src[i].nr; + + /* If our "def" reads a non-SSA source, then it isn't a def. */ + if (!def_insts[nr] || def_insts[nr] == UNSEEN) { + mark_invalid(def->dst.nr); + iterate = true; + break; + } + } + } + } while (iterate); +} + +def_analysis::~def_analysis() +{ + delete[] def_insts; + delete[] def_blocks; +} + +bool +def_analysis::validate(const fs_visitor *v) const +{ + for (unsigned i = 0; i < def_count; i++) { + assert(!def_insts[i] == !def_blocks[i]); + } + + return true; +} + +void +def_analysis::print_stats(const fs_visitor *v) const +{ + unsigned defs = 0; + + for (unsigned i = 0; i < def_count; i++) { + if (def_insts[i]) + ++defs; + } + + fprintf(stderr, "DEFS: %u registers, %u SSA, %u non-SSA => %.1f SSA\n", + def_count, defs, def_count - defs, + 100.0f * float(defs) / float(def_count)); +} diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6ca33a4e313..91fc6729fc9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2739,6 +2739,7 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c) live_analysis.invalidate(c); regpressure_analysis.invalidate(c); idom_analysis.invalidate(c); + def_analysis.invalidate(c); } void diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index dde29f8a284..a5150192204 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -69,6 +69,51 @@ namespace brw { unsigned *regs_live_at_ip; }; + + class def_analysis { + public: + def_analysis(const fs_visitor *v); + ~def_analysis(); + + fs_inst * + get(const fs_reg ®) const + { + return reg.file == VGRF && reg.nr < def_count ? + def_insts[reg.nr] : NULL; + } + + bblock_t * + get_block(const fs_reg ®) const + { + return reg.file == VGRF && reg.nr < def_count ? + def_blocks[reg.nr] : NULL; + } + + unsigned count() const { return def_count; } + + void print_stats(const fs_visitor *) const; + + analysis_dependency_class + dependency_class() const + { + return DEPENDENCY_INSTRUCTION_IDENTITY | + DEPENDENCY_INSTRUCTION_DATA_FLOW | + DEPENDENCY_VARIABLES | + DEPENDENCY_BLOCKS; + } + + bool validate(const fs_visitor *) const; + + private: + void mark_invalid(int); + bool fully_defines(const fs_visitor *v, fs_inst *); + void update_for_reads(const idom_tree &idom, bblock_t *block, fs_inst *); + void update_for_write(const fs_visitor *v, bblock_t *block, fs_inst *); + + fs_inst **def_insts; + bblock_t **def_blocks; + unsigned def_count; + }; } #define UBO_START ((1 << 16) - 4) @@ -349,6 +394,7 @@ public: brw_analysis regpressure_analysis; brw_analysis performance_analysis; brw_analysis idom_analysis; + brw_analysis def_analysis; /** Number of uniform variable components visited. */ unsigned uniforms; diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index e827e45bc97..4eb73ff4f7e 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -1036,7 +1036,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, debug_enabled(debug_enabled), key(key), gs_compile(NULL), prog_data(prog_data), live_analysis(this), regpressure_analysis(this), - performance_analysis(this), idom_analysis(this), + performance_analysis(this), idom_analysis(this), def_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(dispatch_width), max_polygons(0), @@ -1060,7 +1060,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, debug_enabled(debug_enabled), key(&key->base), gs_compile(NULL), prog_data(&prog_data->base), live_analysis(this), regpressure_analysis(this), - performance_analysis(this), idom_analysis(this), + performance_analysis(this), idom_analysis(this), def_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(dispatch_width), max_polygons(max_polygons), @@ -1088,7 +1088,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, key(&c->key.base), gs_compile(c), prog_data(&prog_data->base.base), live_analysis(this), regpressure_analysis(this), - performance_analysis(this), idom_analysis(this), + performance_analysis(this), idom_analysis(this), def_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8), max_polygons(0), diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 2dac3132a2d..e7dc806b185 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -47,6 +47,7 @@ libintel_compiler_brw_files = files( 'brw_compiler.h', 'brw_dead_control_flow.cpp', 'brw_debug_recompile.c', + 'brw_def_analysis.cpp', 'brw_disasm.c', 'brw_disasm_info.cpp', 'brw_disasm_info.h',