diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 4f59f7cdaa7..fdb74cbd9e5 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -108,21 +108,6 @@ uint8_t get_counters_for_event(wait_event ev) } } -uint16_t get_events_for_counter(counter_type ctr) -{ - switch (ctr) { - case counter_exp: - return exp_events; - case counter_lgkm: - return lgkm_events; - case counter_vm: - return vm_events; - case counter_vs: - return vs_events; - } - return 0; -} - struct wait_entry { wait_imm imm; uint16_t events; /* use wait_event notion */ @@ -207,13 +192,6 @@ struct wait_ctx { std::map gpr_map; - /* used for vmem/smem scores */ - bool collect_statistics; - Instruction *gen_instr; - std::map unwaited_instrs[num_counters]; - std::map> reg_instrs[num_counters]; - std::vector wait_distances[num_events]; - wait_ctx() {} wait_ctx(Program *program_) : program(program_), @@ -222,8 +200,7 @@ struct wait_ctx { max_exp_cnt(6), max_lgkm_cnt(program_->chip_class >= GFX10 ? 62 : 14), max_vs_cnt(program_->chip_class >= GFX10 ? 62 : 0), - unordered_events(event_smem | (program_->chip_class < GFX10 ? event_flat : 0)), - collect_statistics(program_->collect_statistics) {} + unordered_events(event_smem | (program_->chip_class < GFX10 ? event_flat : 0)) {} bool join(const wait_ctx* other, bool logical) { @@ -262,56 +239,12 @@ struct wait_ctx { barrier_events[i] |= other->barrier_events[i]; } - /* these are used for statistics, so don't update "changed" */ - for (unsigned i = 0; i < num_counters; i++) { - for (const auto& instr : other->unwaited_instrs[i]) { - using iterator = std::map::iterator; - const std::pair insert_pair = unwaited_instrs[i].insert(instr); - if (!insert_pair.second) { - const iterator pos = insert_pair.first; - pos->second = std::min(pos->second, instr.second); - } - } - for (const auto& instr_pair : other->reg_instrs[i]) { - const PhysReg reg = instr_pair.first; - const std::set& instrs = instr_pair.second; - reg_instrs[i][reg].insert(instrs.begin(), instrs.end()); - } - } - return changed; } void wait_and_remove_from_entry(PhysReg reg, wait_entry& entry, counter_type counter) { - if (collect_statistics && (entry.counters & counter)) { - unsigned counter_idx = ffs(counter) - 1; - for (Instruction *instr : reg_instrs[counter_idx][reg]) { - auto pos = unwaited_instrs[counter_idx].find(instr); - if (pos == unwaited_instrs[counter_idx].end()) - continue; - - unsigned distance = pos->second; - unsigned events = entry.events & get_events_for_counter(counter); - while (events) { - unsigned event_idx = u_bit_scan(&events); - wait_distances[event_idx].push_back(distance); - } - - unwaited_instrs[counter_idx].erase(pos); - } - reg_instrs[counter_idx][reg].clear(); - } - entry.remove_counter(counter); } - - void advance_unwaited_instrs() - { - for (unsigned i = 0; i < num_counters; i++) { - for (std::pair& instr : unwaited_instrs[i]) - instr.second++; - } - } }; wait_imm check_instr(Instruction* instr, wait_ctx& ctx) @@ -661,16 +594,6 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event if (!it.second) it.first->second.join(new_entry); } - - if (ctx.collect_statistics) { - unsigned counters_todo = counters; - while (counters_todo) { - unsigned i = u_bit_scan(&counters_todo); - ctx.unwaited_instrs[i].insert(std::make_pair(ctx.gen_instr, 0u)); - for (unsigned j = 0; j < rc.size(); j++) - ctx.reg_instrs[i][PhysReg{reg.reg()+j}].insert(ctx.gen_instr); - } - } } void insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, bool has_sampler=false) @@ -819,7 +742,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx) memory_sync_info sync_info = get_sync_info(instr.get()); queued_imm.combine(kill(instr.get(), ctx, sync_info)); - ctx.gen_instr = instr.get(); gen(instr.get(), ctx); if (instr->format != Format::PSEUDO_BARRIER && !is_wait) { @@ -830,9 +752,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx) new_instructions.emplace_back(std::move(instr)); queued_imm.combine(perform_barrier(ctx, sync_info, semantic_acquire)); - - if (ctx.collect_statistics) - ctx.advance_unwaited_instrs(); } } @@ -844,51 +763,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx) } /* end namespace */ -static uint32_t calculate_score(std::vector &ctx_vec, uint32_t event_mask) -{ - double result = 0.0; - unsigned num_waits = 0; - while (event_mask) { - unsigned event_index = u_bit_scan(&event_mask); - for (const wait_ctx &ctx : ctx_vec) { - for (unsigned dist : ctx.wait_distances[event_index]) { - double score = dist; - /* for many events, excessive distances provide little benefit, so - * decrease the score in that case. */ - double threshold = INFINITY; - double inv_strength = 0.000001; - switch (1 << event_index) { - case event_smem: - threshold = 70.0; - inv_strength = 75.0; - break; - case event_vmem: - case event_vmem_store: - case event_flat: - threshold = 230.0; - inv_strength = 150.0; - break; - case event_lds: - threshold = 16.0; - break; - default: - break; - } - if (score > threshold) { - score -= threshold; - score = threshold + score / (1.0 + score / inv_strength); - } - - /* we don't want increases in high scores to hide decreases in low scores, - * so raise to the power of 0.1 before averaging. */ - result += pow(score, 0.1); - num_waits++; - } - } - } - return round(pow(result / num_waits, 10.0) * 10.0); -} - void insert_wait_states(Program* program) { /* per BB ctx */ @@ -942,13 +816,6 @@ void insert_wait_states(Program* program) out_ctx[current.index] = std::move(ctx); } - - if (program->collect_statistics) { - program->statistics[statistic_vmem_score] = - calculate_score(out_ctx, event_vmem | event_flat | event_vmem_store); - program->statistics[statistic_smem_score] = - calculate_score(out_ctx, event_smem); - } } } diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index b02c3b29a70..7b4a2f40e5f 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -41,8 +41,6 @@ static const std::array statis ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{"Inverse Throughput", "Estimated busy cycles to execute one wave"}; ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"}; ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"}; - ret[aco::statistic_vmem_score] = aco_compiler_statistic_info{"VMEM Score", "Average VMEM def-use distances"}; - ret[aco::statistic_smem_score] = aco_compiler_statistic_info{"SMEM Score", "Average SMEM def-use distances"}; ret[aco::statistic_sgpr_presched] = aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"}; ret[aco::statistic_vgpr_presched] = aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"}; return ret; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 037f3de3697..ff3f3e63e7b 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1833,8 +1833,6 @@ enum statistic { statistic_inv_throughput, statistic_vmem_clauses, statistic_smem_clauses, - statistic_vmem_score, - statistic_smem_score, statistic_sgpr_presched, statistic_vgpr_presched, num_statistics