2019-09-17 13:22:17 +02:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2018 Valve Corporation
|
|
|
|
|
* Copyright © 2018 Google
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2019-09-17 13:22:17 +02:00
|
|
|
*/
|
|
|
|
|
|
2019-10-24 18:27:25 +02:00
|
|
|
#include "aco_builder.h"
|
2021-06-09 15:40:03 +02:00
|
|
|
#include "aco_ir.h"
|
2021-07-20 17:05:43 +02:00
|
|
|
#include "aco_util.h"
|
2021-06-09 10:14:54 +02:00
|
|
|
|
2024-05-16 17:42:17 +02:00
|
|
|
#include "common/ac_descriptors.h"
|
2021-06-10 11:33:15 +02:00
|
|
|
#include "common/sid.h"
|
2019-10-24 18:27:25 +02:00
|
|
|
|
2021-07-20 11:48:15 +02:00
|
|
|
#include <algorithm>
|
aco/spill: Prefer unordered_map over map for next use distances
This changes the iteration order of next use distances, so some "random"
changes to shader metrics are expected.
fossil-db for Navi14:
Totals from 1239 (0.82% of 150305) affected shaders:
SpillSGPRs: 10559 -> 10562 (+0.03%); split: -0.05%, +0.08%
SpillVGPRs: 1632 -> 1863 (+14.15%)
CodeSize: 19321468 -> 19304164 (-0.09%); split: -0.09%, +0.01%
Instrs: 3593957 -> 3591647 (-0.06%); split: -0.07%, +0.01%
Latency: 103120695 -> 102475647 (-0.63%); split: -0.63%, +0.01%
InvThroughput: 23897614 -> 23575320 (-1.35%); split: -1.36%, +0.02%
VClause: 66406 -> 66943 (+0.81%); split: -0.01%, +0.81%
SClause: 118559 -> 118548 (-0.01%)
Copies: 310871 -> 308950 (-0.62%); split: -0.69%, +0.08%
Branches: 123386 -> 123413 (+0.02%); split: -0.00%, +0.03%
These numbers mostly come from parallel-rdp ubershaders. Small changes are
also found in the rdr2 and rage2 shader metrics, whereas others are not
significantly affected.
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11925>
2021-07-13 12:31:12 +02:00
|
|
|
#include <cstring>
|
2019-09-17 13:22:17 +02:00
|
|
|
#include <map>
|
2020-03-11 11:02:20 +01:00
|
|
|
#include <set>
|
aco/spill: Prefer unordered_map over map for next use distances
This changes the iteration order of next use distances, so some "random"
changes to shader metrics are expected.
fossil-db for Navi14:
Totals from 1239 (0.82% of 150305) affected shaders:
SpillSGPRs: 10559 -> 10562 (+0.03%); split: -0.05%, +0.08%
SpillVGPRs: 1632 -> 1863 (+14.15%)
CodeSize: 19321468 -> 19304164 (-0.09%); split: -0.09%, +0.01%
Instrs: 3593957 -> 3591647 (-0.06%); split: -0.07%, +0.01%
Latency: 103120695 -> 102475647 (-0.63%); split: -0.63%, +0.01%
InvThroughput: 23897614 -> 23575320 (-1.35%); split: -1.36%, +0.02%
VClause: 66406 -> 66943 (+0.81%); split: -0.01%, +0.81%
SClause: 118559 -> 118548 (-0.01%)
Copies: 310871 -> 308950 (-0.62%); split: -0.69%, +0.08%
Branches: 123386 -> 123413 (+0.02%); split: -0.00%, +0.03%
These numbers mostly come from parallel-rdp ubershaders. Small changes are
also found in the rdr2 and rage2 shader metrics, whereas others are not
significantly affected.
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11925>
2021-07-13 12:31:12 +02:00
|
|
|
#include <unordered_map>
|
2021-06-09 15:40:03 +02:00
|
|
|
#include <unordered_set>
|
|
|
|
|
#include <vector>
|
2019-09-17 13:22:17 +02:00
|
|
|
|
aco/spill: Prefer unordered_map over map for next use distances
This changes the iteration order of next use distances, so some "random"
changes to shader metrics are expected.
fossil-db for Navi14:
Totals from 1239 (0.82% of 150305) affected shaders:
SpillSGPRs: 10559 -> 10562 (+0.03%); split: -0.05%, +0.08%
SpillVGPRs: 1632 -> 1863 (+14.15%)
CodeSize: 19321468 -> 19304164 (-0.09%); split: -0.09%, +0.01%
Instrs: 3593957 -> 3591647 (-0.06%); split: -0.07%, +0.01%
Latency: 103120695 -> 102475647 (-0.63%); split: -0.63%, +0.01%
InvThroughput: 23897614 -> 23575320 (-1.35%); split: -1.36%, +0.02%
VClause: 66406 -> 66943 (+0.81%); split: -0.01%, +0.81%
SClause: 118559 -> 118548 (-0.01%)
Copies: 310871 -> 308950 (-0.62%); split: -0.69%, +0.08%
Branches: 123386 -> 123413 (+0.02%); split: -0.00%, +0.03%
These numbers mostly come from parallel-rdp ubershaders. Small changes are
also found in the rdr2 and rage2 shader metrics, whereas others are not
significantly affected.
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11925>
2021-07-13 12:31:12 +02:00
|
|
|
namespace std {
|
|
|
|
|
template <> struct hash<aco::Temp> {
|
|
|
|
|
size_t operator()(aco::Temp temp) const noexcept
|
|
|
|
|
{
|
|
|
|
|
uint32_t v;
|
|
|
|
|
std::memcpy(&v, &temp, sizeof(temp));
|
|
|
|
|
return std::hash<uint32_t>{}(v);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
} // namespace std
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/*
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
* Implements the spilling algorithm on SSA-form based on
|
2019-09-17 13:22:17 +02:00
|
|
|
* "Register Spilling and Live-Range Splitting for SSA-Form Programs"
|
|
|
|
|
* by Matthias Braun and Sebastian Hack.
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
*
|
|
|
|
|
* Key difference between this algorithm and the min-algorithm from the paper
|
|
|
|
|
* is the use of average use distances rather than next-use distances per
|
|
|
|
|
* instruction.
|
|
|
|
|
* As we decrement the number of remaining uses, the average use distances
|
|
|
|
|
* give an approximation of the next-use distances while being computationally
|
|
|
|
|
* and memory-wise less expensive.
|
2019-09-17 13:22:17 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
namespace aco {
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
struct remat_info {
|
|
|
|
|
Instruction* instr;
|
|
|
|
|
};
|
|
|
|
|
|
2024-03-14 14:56:18 +01:00
|
|
|
struct loop_info {
|
|
|
|
|
uint32_t index;
|
|
|
|
|
aco::unordered_map<Temp, uint32_t> spills;
|
2024-02-16 10:53:10 +01:00
|
|
|
IDSet live_in;
|
2024-03-14 14:56:18 +01:00
|
|
|
};
|
|
|
|
|
|
2024-02-16 11:20:13 +01:00
|
|
|
struct use_info {
|
|
|
|
|
uint32_t num_uses = 0;
|
|
|
|
|
uint32_t last_use = 0;
|
aco/spill: use float division for score() calculation rather than integers
This was the original intention and should result in more fine-grained
and thus better decisions.
Totals from 63 (0.08% of 79395) affected shaders: (Navi31)
Instrs: 3173500 -> 3174012 (+0.02%); split: -0.01%, +0.02%
CodeSize: 16345348 -> 16349288 (+0.02%); split: -0.01%, +0.03%
Latency: 18528036 -> 18526082 (-0.01%); split: -0.02%, +0.01%
InvThroughput: 3619125 -> 3618709 (-0.01%); split: -0.02%, +0.01%
VClause: 82654 -> 82648 (-0.01%)
SClause: 61256 -> 61257 (+0.00%); split: -0.00%, +0.01%
Copies: 250037 -> 250158 (+0.05%); split: -0.06%, +0.11%
Branches: 101302 -> 101303 (+0.00%)
VALU: 1791447 -> 1791435 (-0.00%); split: -0.00%, +0.00%
SALU: 401898 -> 402007 (+0.03%); split: -0.03%, +0.06%
VOPD: 730 -> 741 (+1.51%)
Totals from 40 (0.06% of 63053) affected shaders: (Vega10)
Instrs: 161584 -> 161567 (-0.01%); split: -0.04%, +0.03%
CodeSize: 891168 -> 891004 (-0.02%); split: -0.04%, +0.03%
Latency: 3550766 -> 3549770 (-0.03%); split: -0.05%, +0.03%
InvThroughput: 2627028 -> 2626484 (-0.02%); split: -0.03%, +0.01%
VClause: 2970 -> 2971 (+0.03%)
SClause: 4203 -> 4205 (+0.05%); split: -0.26%, +0.31%
Copies: 19923 -> 19893 (-0.15%); split: -0.44%, +0.29%
VALU: 116045 -> 116054 (+0.01%); split: -0.01%, +0.02%
SALU: 22100 -> 22066 (-0.15%); split: -0.39%, +0.24%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31769>
2024-10-21 17:38:03 +02:00
|
|
|
float score() { return static_cast<float>(last_use) / static_cast<float>(num_uses); }
|
2024-02-16 11:20:13 +01:00
|
|
|
};
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
struct spill_ctx {
|
|
|
|
|
RegisterDemand target_pressure;
|
|
|
|
|
Program* program;
|
2021-07-20 17:05:43 +02:00
|
|
|
aco::monotonic_buffer_resource memory;
|
|
|
|
|
|
2021-07-21 17:19:13 +02:00
|
|
|
std::vector<aco::map<Temp, Temp>> renames;
|
|
|
|
|
std::vector<aco::unordered_map<Temp, uint32_t>> spills_entry;
|
|
|
|
|
std::vector<aco::unordered_map<Temp, uint32_t>> spills_exit;
|
2021-07-19 19:32:38 +02:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
std::vector<bool> processed;
|
2024-03-14 14:56:18 +01:00
|
|
|
std::vector<loop_info> loop;
|
2021-07-20 17:05:43 +02:00
|
|
|
|
2024-02-16 11:20:13 +01:00
|
|
|
std::vector<use_info> ssa_infos;
|
2020-07-07 13:11:07 +01:00
|
|
|
std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences;
|
2019-10-16 16:39:06 +02:00
|
|
|
std::vector<std::vector<uint32_t>> affinities;
|
2019-09-17 13:22:17 +02:00
|
|
|
std::vector<bool> is_reloaded;
|
2021-07-21 17:19:13 +02:00
|
|
|
aco::unordered_map<Temp, remat_info> remat;
|
2021-07-15 15:11:44 +02:00
|
|
|
std::set<Instruction*> unused_remats;
|
2019-10-28 17:15:17 +01:00
|
|
|
unsigned wave_size;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2022-05-19 16:09:13 +01:00
|
|
|
unsigned sgpr_spill_slots;
|
|
|
|
|
unsigned vgpr_spill_slots;
|
|
|
|
|
Temp scratch_rsrc;
|
|
|
|
|
|
2024-06-13 11:55:27 +02:00
|
|
|
spill_ctx(const RegisterDemand target_pressure_, Program* program_)
|
|
|
|
|
: target_pressure(target_pressure_), program(program_), memory(),
|
2021-07-21 17:19:13 +02:00
|
|
|
renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
|
|
|
|
|
spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
|
|
|
|
spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
processed(program->blocks.size(), false), ssa_infos(program->peekAllocationId()),
|
|
|
|
|
remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0)
|
2019-10-28 17:15:17 +01:00
|
|
|
{}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2019-10-16 16:39:06 +02:00
|
|
|
void add_affinity(uint32_t first, uint32_t second)
|
|
|
|
|
{
|
|
|
|
|
unsigned found_first = affinities.size();
|
|
|
|
|
unsigned found_second = affinities.size();
|
|
|
|
|
for (unsigned i = 0; i < affinities.size(); i++) {
|
|
|
|
|
std::vector<uint32_t>& vec = affinities[i];
|
|
|
|
|
for (uint32_t entry : vec) {
|
|
|
|
|
if (entry == first)
|
|
|
|
|
found_first = i;
|
|
|
|
|
else if (entry == second)
|
|
|
|
|
found_second = i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (found_first == affinities.size() && found_second == affinities.size()) {
|
|
|
|
|
affinities.emplace_back(std::vector<uint32_t>({first, second}));
|
|
|
|
|
} else if (found_first < affinities.size() && found_second == affinities.size()) {
|
|
|
|
|
affinities[found_first].push_back(second);
|
|
|
|
|
} else if (found_second < affinities.size() && found_first == affinities.size()) {
|
|
|
|
|
affinities[found_second].push_back(first);
|
|
|
|
|
} else if (found_first != found_second) {
|
|
|
|
|
/* merge second into first */
|
|
|
|
|
affinities[found_first].insert(affinities[found_first].end(),
|
|
|
|
|
affinities[found_second].begin(),
|
|
|
|
|
affinities[found_second].end());
|
|
|
|
|
affinities.erase(std::next(affinities.begin(), found_second));
|
|
|
|
|
} else {
|
|
|
|
|
assert(found_first == found_second);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-14 13:54:39 +01:00
|
|
|
uint32_t add_to_spills(Temp to_spill, aco::unordered_map<Temp, uint32_t>& spills)
|
|
|
|
|
{
|
|
|
|
|
const uint32_t spill_id = allocate_spill_id(to_spill.regClass());
|
|
|
|
|
for (auto pair : spills)
|
|
|
|
|
add_interference(spill_id, pair.second);
|
2024-03-14 15:16:02 +01:00
|
|
|
if (!loop.empty()) {
|
|
|
|
|
for (auto pair : loop.back().spills)
|
2024-03-14 14:56:18 +01:00
|
|
|
add_interference(spill_id, pair.second);
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-14 13:54:39 +01:00
|
|
|
spills[to_spill] = spill_id;
|
|
|
|
|
return spill_id;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 11:08:50 +01:00
|
|
|
void add_interference(uint32_t first, uint32_t second)
|
|
|
|
|
{
|
|
|
|
|
if (interferences[first].first.type() != interferences[second].first.type())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
bool inserted = interferences[first].second.insert(second).second;
|
|
|
|
|
if (inserted)
|
|
|
|
|
interferences[second].second.insert(first);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
uint32_t allocate_spill_id(RegClass rc)
|
|
|
|
|
{
|
2020-07-07 13:11:07 +01:00
|
|
|
interferences.emplace_back(rc, std::unordered_set<uint32_t>());
|
2019-09-17 13:22:17 +02:00
|
|
|
is_reloaded.push_back(false);
|
|
|
|
|
return next_spill_id++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t next_spill_id = 0;
|
|
|
|
|
};
|
|
|
|
|
|
2024-02-16 11:20:13 +01:00
|
|
|
/**
|
|
|
|
|
* Gathers information about the number of uses and point of last use
|
|
|
|
|
* per SSA value.
|
|
|
|
|
*
|
2024-07-08 18:51:56 +02:00
|
|
|
* Phi definitions are added to live-ins.
|
2024-02-16 11:20:13 +01:00
|
|
|
*/
|
2024-02-15 16:01:47 +01:00
|
|
|
void
|
2024-02-16 11:20:13 +01:00
|
|
|
gather_ssa_use_info(spill_ctx& ctx)
|
2024-02-15 16:01:47 +01:00
|
|
|
{
|
2024-02-16 11:20:13 +01:00
|
|
|
unsigned instruction_idx = 0;
|
2024-02-15 16:01:47 +01:00
|
|
|
for (Block& block : ctx.program->blocks) {
|
|
|
|
|
for (int i = block.instructions.size() - 1; i >= 0; i--) {
|
|
|
|
|
aco_ptr<Instruction>& instr = block.instructions[i];
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp()) {
|
2024-02-16 11:20:13 +01:00
|
|
|
use_info& info = ctx.ssa_infos[op.tempId()];
|
|
|
|
|
info.num_uses++;
|
|
|
|
|
info.last_use = std::max(info.last_use, instruction_idx + i);
|
2024-02-15 16:01:47 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-16 11:20:13 +01:00
|
|
|
|
|
|
|
|
/* All live-in variables at loop headers get an additional artificial use.
|
|
|
|
|
* As we decrement the number of uses while processing the blocks, this
|
|
|
|
|
* ensures that the number of uses won't becomes zero before the loop
|
|
|
|
|
* (and the variables' live-ranges) end.
|
|
|
|
|
*/
|
|
|
|
|
if (block.kind & block_kind_loop_header) {
|
2024-07-08 18:51:56 +02:00
|
|
|
for (unsigned t : ctx.program->live.live_in[block.index])
|
2024-02-16 11:20:13 +01:00
|
|
|
ctx.ssa_infos[t].num_uses++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
instruction_idx += block.instructions.size();
|
2024-02-15 16:01:47 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
bool
|
|
|
|
|
should_rematerialize(aco_ptr<Instruction>& instr)
|
|
|
|
|
{
|
|
|
|
|
/* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
|
2019-11-06 17:47:06 +01:00
|
|
|
if (instr->format != Format::VOP1 && instr->format != Format::SOP1 &&
|
|
|
|
|
instr->format != Format::PSEUDO && instr->format != Format::SOPK)
|
2019-09-17 13:22:17 +02:00
|
|
|
return false;
|
aco: always use p_parallelcopy for pre-RA copies
Most fossil-db changes are because literals are applied earlier
(in label_instruction), so use counts are more accurate and more literals
are applied.
fossil-db (Navi):
Totals from 79551 (57.89% of 137413) affected shaders:
SGPRs: 4549610 -> 4542802 (-0.15%); split: -0.19%, +0.04%
VGPRs: 3326764 -> 3324172 (-0.08%); split: -0.10%, +0.03%
SpillSGPRs: 38886 -> 34562 (-11.12%); split: -11.14%, +0.02%
CodeSize: 240143456 -> 240001008 (-0.06%); split: -0.11%, +0.05%
MaxWaves: 1078919 -> 1079281 (+0.03%); split: +0.04%, -0.01%
Instrs: 46627073 -> 46528490 (-0.21%); split: -0.22%, +0.01%
fossil-db (Polaris):
Totals from 98463 (70.90% of 138881) affected shaders:
SGPRs: 5164689 -> 5164353 (-0.01%); split: -0.02%, +0.01%
VGPRs: 3920936 -> 3921856 (+0.02%); split: -0.00%, +0.03%
SpillSGPRs: 56298 -> 52259 (-7.17%); split: -7.22%, +0.04%
CodeSize: 258680092 -> 258692712 (+0.00%); split: -0.02%, +0.03%
MaxWaves: 620863 -> 620823 (-0.01%); split: +0.00%, -0.01%
Instrs: 50776289 -> 50757577 (-0.04%); split: -0.04%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216>
2020-10-14 13:50:24 +01:00
|
|
|
/* TODO: pseudo-instruction rematerialization is only supported for
|
|
|
|
|
* p_create_vector/p_parallelcopy */
|
2021-01-20 15:27:16 +00:00
|
|
|
if (instr->isPseudo() && instr->opcode != aco_opcode::p_create_vector &&
|
aco: always use p_parallelcopy for pre-RA copies
Most fossil-db changes are because literals are applied earlier
(in label_instruction), so use counts are more accurate and more literals
are applied.
fossil-db (Navi):
Totals from 79551 (57.89% of 137413) affected shaders:
SGPRs: 4549610 -> 4542802 (-0.15%); split: -0.19%, +0.04%
VGPRs: 3326764 -> 3324172 (-0.08%); split: -0.10%, +0.03%
SpillSGPRs: 38886 -> 34562 (-11.12%); split: -11.14%, +0.02%
CodeSize: 240143456 -> 240001008 (-0.06%); split: -0.11%, +0.05%
MaxWaves: 1078919 -> 1079281 (+0.03%); split: +0.04%, -0.01%
Instrs: 46627073 -> 46528490 (-0.21%); split: -0.22%, +0.01%
fossil-db (Polaris):
Totals from 98463 (70.90% of 138881) affected shaders:
SGPRs: 5164689 -> 5164353 (-0.01%); split: -0.02%, +0.01%
VGPRs: 3920936 -> 3921856 (+0.02%); split: -0.00%, +0.03%
SpillSGPRs: 56298 -> 52259 (-7.17%); split: -7.22%, +0.04%
CodeSize: 258680092 -> 258692712 (+0.00%); split: -0.02%, +0.03%
MaxWaves: 620863 -> 620823 (-0.01%); split: +0.00%, -0.01%
Instrs: 50776289 -> 50757577 (-0.04%); split: -0.04%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216>
2020-10-14 13:50:24 +01:00
|
|
|
instr->opcode != aco_opcode::p_parallelcopy)
|
2019-09-17 13:22:17 +02:00
|
|
|
return false;
|
2021-01-20 15:27:16 +00:00
|
|
|
if (instr->isSOPK() && instr->opcode != aco_opcode::s_movk_i32)
|
2019-11-06 17:47:06 +01:00
|
|
|
return false;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
for (const Operand& op : instr->operands) {
|
|
|
|
|
/* TODO: rematerialization using temporaries isn't yet supported */
|
2021-02-23 15:07:43 +01:00
|
|
|
if (!op.isConstant())
|
2019-09-17 13:22:17 +02:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* TODO: rematerialization with multiple definitions isn't yet supported */
|
|
|
|
|
if (instr->definitions.size() > 1)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aco_ptr<Instruction>
|
|
|
|
|
do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
|
|
|
|
|
{
|
2021-07-15 15:14:41 +02:00
|
|
|
std::unordered_map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
|
2019-09-17 13:22:17 +02:00
|
|
|
if (remat != ctx.remat.end()) {
|
|
|
|
|
Instruction* instr = remat->second.instr;
|
2021-01-20 15:27:16 +00:00
|
|
|
assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) &&
|
|
|
|
|
"unsupported");
|
aco: always use p_parallelcopy for pre-RA copies
Most fossil-db changes are because literals are applied earlier
(in label_instruction), so use counts are more accurate and more literals
are applied.
fossil-db (Navi):
Totals from 79551 (57.89% of 137413) affected shaders:
SGPRs: 4549610 -> 4542802 (-0.15%); split: -0.19%, +0.04%
VGPRs: 3326764 -> 3324172 (-0.08%); split: -0.10%, +0.03%
SpillSGPRs: 38886 -> 34562 (-11.12%); split: -11.14%, +0.02%
CodeSize: 240143456 -> 240001008 (-0.06%); split: -0.11%, +0.05%
MaxWaves: 1078919 -> 1079281 (+0.03%); split: +0.04%, -0.01%
Instrs: 46627073 -> 46528490 (-0.21%); split: -0.22%, +0.01%
fossil-db (Polaris):
Totals from 98463 (70.90% of 138881) affected shaders:
SGPRs: 5164689 -> 5164353 (-0.01%); split: -0.02%, +0.01%
VGPRs: 3920936 -> 3921856 (+0.02%); split: -0.00%, +0.03%
SpillSGPRs: 56298 -> 52259 (-7.17%); split: -7.22%, +0.04%
CodeSize: 258680092 -> 258692712 (+0.00%); split: -0.02%, +0.03%
MaxWaves: 620863 -> 620823 (-0.01%); split: +0.00%, -0.01%
Instrs: 50776289 -> 50757577 (-0.04%); split: -0.04%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216>
2020-10-14 13:50:24 +01:00
|
|
|
assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector ||
|
|
|
|
|
instr->opcode == aco_opcode::p_parallelcopy) &&
|
|
|
|
|
"unsupported");
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(instr->definitions.size() == 1 && "unsupported");
|
|
|
|
|
|
|
|
|
|
aco_ptr<Instruction> res;
|
2024-03-25 15:55:27 +01:00
|
|
|
res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(),
|
|
|
|
|
instr->definitions.size()));
|
|
|
|
|
if (instr->isSOPK())
|
2024-03-19 15:46:56 +01:00
|
|
|
res->salu().imm = instr->salu().imm;
|
2024-03-25 15:55:27 +01:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
|
|
|
|
res->operands[i] = instr->operands[i];
|
|
|
|
|
if (instr->operands[i].isTemp()) {
|
|
|
|
|
assert(false && "unsupported");
|
|
|
|
|
if (ctx.remat.count(instr->operands[i].getTemp()))
|
2021-07-15 15:11:44 +02:00
|
|
|
ctx.unused_remats.erase(ctx.remat[instr->operands[i].getTemp()].instr);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
res->definitions[0] = Definition(new_name);
|
|
|
|
|
return res;
|
|
|
|
|
} else {
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> reload{create_instruction(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
|
2021-07-13 11:22:46 +02:00
|
|
|
reload->operands[0] = Operand::c32(spill_id);
|
2019-09-17 13:22:17 +02:00
|
|
|
reload->definitions[0] = Definition(new_name);
|
|
|
|
|
ctx.is_reloaded[spill_id] = true;
|
|
|
|
|
return reload;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
get_rematerialize_info(spill_ctx& ctx)
|
|
|
|
|
{
|
|
|
|
|
for (Block& block : ctx.program->blocks) {
|
|
|
|
|
bool logical = false;
|
|
|
|
|
for (aco_ptr<Instruction>& instr : block.instructions) {
|
|
|
|
|
if (instr->opcode == aco_opcode::p_logical_start)
|
|
|
|
|
logical = true;
|
|
|
|
|
else if (instr->opcode == aco_opcode::p_logical_end)
|
|
|
|
|
logical = false;
|
|
|
|
|
if (logical && should_rematerialize(instr)) {
|
|
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isTemp()) {
|
2020-11-26 22:03:27 -08:00
|
|
|
ctx.remat[def.getTemp()] = remat_info{instr.get()};
|
2021-07-15 15:11:44 +02:00
|
|
|
ctx.unused_remats.insert(instr.get());
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
RegisterDemand
|
|
|
|
|
init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
|
|
|
|
|
{
|
|
|
|
|
RegisterDemand spilled_registers;
|
|
|
|
|
|
|
|
|
|
/* first block, nothing was spilled before */
|
2023-03-10 20:59:36 +01:00
|
|
|
if (block->linear_preds.empty())
|
2019-09-17 13:22:17 +02:00
|
|
|
return {0, 0};
|
|
|
|
|
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
/* live-in variables at the beginning of the current block */
|
2024-07-08 18:51:56 +02:00
|
|
|
const IDSet& live_in = ctx.program->live.live_in[block_idx];
|
2021-03-26 13:12:43 +00:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* loop header block */
|
aco/spill: select more loop-carried variables to be spilled
This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.
Totals from 68 (0.09% of 79395) affected shaders: (GFX11)
Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-21 14:57:36 +01:00
|
|
|
if (block->kind & block_kind_loop_header) {
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(block->linear_preds[0] == block_idx - 1);
|
|
|
|
|
assert(block->logical_preds[0] == block_idx - 1);
|
|
|
|
|
|
|
|
|
|
/* check how many live-through variables should be spilled */
|
2024-06-19 14:11:51 +02:00
|
|
|
RegisterDemand reg_pressure = block->live_in_demand;
|
2021-02-22 18:02:24 +01:00
|
|
|
RegisterDemand loop_demand = reg_pressure;
|
2019-09-17 13:22:17 +02:00
|
|
|
unsigned i = block_idx;
|
aco/spill: select more loop-carried variables to be spilled
This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.
Totals from 68 (0.09% of 79395) affected shaders: (GFX11)
Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-21 14:57:36 +01:00
|
|
|
while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth)
|
2021-02-22 18:02:24 +01:00
|
|
|
loop_demand.update(ctx.program->blocks[i++].register_demand);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-11-25 19:57:05 +01:00
|
|
|
for (auto spilled : ctx.spills_exit[block_idx - 1]) {
|
2021-03-26 13:12:43 +00:00
|
|
|
/* variable is not live at loop entry: probably a phi operand */
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
if (!live_in.count(spilled.first.id()))
|
2020-08-06 17:38:41 +01:00
|
|
|
continue;
|
|
|
|
|
|
aco/spill: keep loop-carried variables spilled at loop headers
Now, that we avoid re-spilling loop-carried variables, we can
keep them spilled at loop-headers.
Totals from 31 (0.04% of 79395) affected shaders: (GFX11)
Instrs: 1539175 -> 1538109 (-0.07%); split: -0.14%, +0.07%
CodeSize: 7877948 -> 7871916 (-0.08%); split: -0.16%, +0.08%
SpillSGPRs: 1936 -> 1523 (-21.33%)
SpillVGPRs: 160 -> 146 (-8.75%)
Scratch: 18176 -> 16896 (-7.04%)
Latency: 9439964 -> 9441197 (+0.01%); split: -0.01%, +0.02%
InvThroughput: 2406253 -> 2406700 (+0.02%); split: -0.01%, +0.03%
VClause: 38903 -> 38922 (+0.05%)
SClause: 33347 -> 33349 (+0.01%); split: -0.01%, +0.02%
Copies: 131420 -> 130974 (-0.34%); split: -0.57%, +0.23%
Branches: 53091 -> 53212 (+0.23%); split: -0.03%, +0.26%
VALU: 844372 -> 843876 (-0.06%); split: -0.12%, +0.06%
SALU: 190329 -> 189891 (-0.23%); split: -0.40%, +0.17%
VMEM: 60491 -> 60510 (+0.03%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-22 17:12:17 +01:00
|
|
|
/* keep live-through variables spilled */
|
|
|
|
|
ctx.spills_entry[block_idx][spilled.first] = spilled.second;
|
|
|
|
|
spilled_registers += spilled.first;
|
|
|
|
|
loop_demand -= spilled.first;
|
2020-08-06 17:38:41 +01:00
|
|
|
}
|
2024-03-14 15:16:02 +01:00
|
|
|
if (!ctx.loop.empty()) {
|
|
|
|
|
/* If this is a nested loop, keep variables from the outer loop spilled. */
|
|
|
|
|
for (auto spilled : ctx.loop.back().spills) {
|
2024-03-25 18:08:29 +01:00
|
|
|
/* If the inner loop comes after the last continue statement of the outer loop,
|
|
|
|
|
* the loop-carried variables might not be live-in for the inner loop.
|
|
|
|
|
*/
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
if (live_in.count(spilled.first.id()) &&
|
2024-03-25 18:08:29 +01:00
|
|
|
ctx.spills_entry[block_idx].insert(spilled).second) {
|
2024-03-14 15:16:02 +01:00
|
|
|
spilled_registers += spilled.first;
|
|
|
|
|
loop_demand -= spilled.first;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-08-06 17:38:41 +01:00
|
|
|
|
aco/spill: keep loop-carried variables spilled at loop headers
Now, that we avoid re-spilling loop-carried variables, we can
keep them spilled at loop-headers.
Totals from 31 (0.04% of 79395) affected shaders: (GFX11)
Instrs: 1539175 -> 1538109 (-0.07%); split: -0.14%, +0.07%
CodeSize: 7877948 -> 7871916 (-0.08%); split: -0.16%, +0.08%
SpillSGPRs: 1936 -> 1523 (-21.33%)
SpillVGPRs: 160 -> 146 (-8.75%)
Scratch: 18176 -> 16896 (-7.04%)
Latency: 9439964 -> 9441197 (+0.01%); split: -0.01%, +0.02%
InvThroughput: 2406253 -> 2406700 (+0.02%); split: -0.01%, +0.03%
VClause: 38903 -> 38922 (+0.05%)
SClause: 33347 -> 33349 (+0.01%); split: -0.01%, +0.02%
Copies: 131420 -> 130974 (-0.34%); split: -0.57%, +0.23%
Branches: 53091 -> 53212 (+0.23%); split: -0.03%, +0.26%
VALU: 844372 -> 843876 (-0.06%); split: -0.12%, +0.06%
SALU: 190329 -> 189891 (-0.23%); split: -0.40%, +0.17%
VMEM: 60491 -> 60510 (+0.03%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-22 17:12:17 +01:00
|
|
|
/* select more live-through variables and constants */
|
2021-02-22 15:58:46 +01:00
|
|
|
RegType type = RegType::vgpr;
|
2021-02-22 18:02:24 +01:00
|
|
|
while (loop_demand.exceeds(ctx.target_pressure)) {
|
2021-02-22 15:58:46 +01:00
|
|
|
/* if VGPR demand is low enough, select SGPRs */
|
2021-02-22 18:02:24 +01:00
|
|
|
if (type == RegType::vgpr && loop_demand.vgpr <= ctx.target_pressure.vgpr)
|
2021-02-22 15:58:46 +01:00
|
|
|
type = RegType::sgpr;
|
|
|
|
|
/* if SGPR demand is low enough, break */
|
2021-02-22 18:02:24 +01:00
|
|
|
if (type == RegType::sgpr && loop_demand.sgpr <= ctx.target_pressure.sgpr)
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
float score = 0.0;
|
aco/spill: select more loop-carried variables to be spilled
This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.
Totals from 68 (0.09% of 79395) affected shaders: (GFX11)
Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-21 14:57:36 +01:00
|
|
|
unsigned remat = 0;
|
2019-09-17 13:22:17 +02:00
|
|
|
Temp to_spill;
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
for (unsigned t : live_in) {
|
|
|
|
|
Temp var = Temp(t, ctx.program->temp_rc[t]);
|
|
|
|
|
if (var.type() != type || ctx.spills_entry[block_idx].count(var) ||
|
2024-06-13 11:55:27 +02:00
|
|
|
var.regClass().is_linear_vgpr())
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
unsigned can_remat = ctx.remat.count(var);
|
|
|
|
|
if (can_remat > remat || (can_remat == remat && ctx.ssa_infos[t].score() > score)) {
|
|
|
|
|
to_spill = var;
|
|
|
|
|
score = ctx.ssa_infos[t].score();
|
aco/spill: select more loop-carried variables to be spilled
This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.
Totals from 68 (0.09% of 79395) affected shaders: (GFX11)
Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-21 14:57:36 +01:00
|
|
|
remat = can_remat;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2021-02-22 15:58:46 +01:00
|
|
|
|
|
|
|
|
/* select SGPRs or break */
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
if (score == 0.0) {
|
2021-02-22 15:58:46 +01:00
|
|
|
if (type == RegType::sgpr)
|
|
|
|
|
break;
|
|
|
|
|
type = RegType::sgpr;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
aco/spill: select more loop-carried variables to be spilled
This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.
Totals from 68 (0.09% of 79395) affected shaders: (GFX11)
Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
2024-02-21 14:57:36 +01:00
|
|
|
ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
|
2021-02-22 15:58:46 +01:00
|
|
|
spilled_registers += to_spill;
|
2021-02-22 18:02:24 +01:00
|
|
|
loop_demand -= to_spill;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2024-03-14 14:56:18 +01:00
|
|
|
/* create new loop_info */
|
2024-02-16 11:37:49 +01:00
|
|
|
loop_info info = {block_idx, ctx.spills_entry[block_idx], live_in};
|
2024-03-14 14:56:18 +01:00
|
|
|
ctx.loop.emplace_back(std::move(info));
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* shortcut */
|
2021-02-22 18:02:24 +01:00
|
|
|
if (!loop_demand.exceeds(ctx.target_pressure))
|
2019-09-17 13:22:17 +02:00
|
|
|
return spilled_registers;
|
|
|
|
|
|
|
|
|
|
/* if reg pressure is too high at beginning of loop, add variables with furthest use */
|
2021-02-22 18:02:24 +01:00
|
|
|
reg_pressure -= spilled_registers;
|
2020-08-04 17:08:43 +01:00
|
|
|
|
2021-02-22 15:58:46 +01:00
|
|
|
while (reg_pressure.exceeds(ctx.target_pressure)) {
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
float score = 0;
|
2024-10-21 17:25:10 +02:00
|
|
|
Temp to_spill = Temp();
|
2021-02-22 15:58:46 +01:00
|
|
|
type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
|
2024-07-11 12:48:18 +02:00
|
|
|
for (aco_ptr<Instruction>& phi : block->instructions) {
|
|
|
|
|
if (!is_phi(phi))
|
|
|
|
|
break;
|
2024-08-13 12:31:24 +02:00
|
|
|
if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
|
2024-07-11 12:48:18 +02:00
|
|
|
continue;
|
|
|
|
|
Temp var = phi->definitions[0].getTemp();
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
if (var.type() == type && !ctx.spills_entry[block_idx].count(var) &&
|
2024-07-11 12:48:18 +02:00
|
|
|
ctx.ssa_infos[var.id()].score() > score) {
|
aco/spill: use average use distances to spill loop variables
Totals from 83 (0.10% of 79395) affected shaders: (GFX11)
Instrs: 3429510 -> 3427917 (-0.05%); split: -0.05%, +0.01%
CodeSize: 17587884 -> 17570224 (-0.10%); split: -0.11%, +0.01%
SpillSGPRs: 4660 -> 4630 (-0.64%); split: -1.07%, +0.43%
Latency: 20054145 -> 20054454 (+0.00%); split: -0.00%, +0.01%
InvThroughput: 4989606 -> 4989740 (+0.00%); split: -0.00%, +0.01%
VClause: 90844 -> 90843 (-0.00%)
SClause: 69534 -> 69535 (+0.00%); split: -0.04%, +0.04%
Copies: 283288 -> 283415 (+0.04%); split: -0.11%, +0.15%
Branches: 113543 -> 113409 (-0.12%); split: -0.12%, +0.01%
VALU: 1888933 -> 1887489 (-0.08%); split: -0.08%, +0.00%
SALU: 423548 -> 423609 (+0.01%); split: -0.07%, +0.09%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-22 16:41:07 +01:00
|
|
|
to_spill = var;
|
2024-07-11 12:48:18 +02:00
|
|
|
score = ctx.ssa_infos[var.id()].score();
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2024-10-21 17:25:10 +02:00
|
|
|
assert(to_spill != Temp());
|
2024-03-14 13:54:39 +01:00
|
|
|
ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
|
2021-02-22 15:58:46 +01:00
|
|
|
spilled_registers += to_spill;
|
|
|
|
|
reg_pressure -= to_spill;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return spilled_registers;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* branch block */
|
2019-10-30 12:04:22 +01:00
|
|
|
if (block->linear_preds.size() == 1 && !(block->kind & block_kind_loop_exit)) {
|
2024-02-20 10:19:33 +01:00
|
|
|
/* keep variables spilled */
|
2019-09-17 13:22:17 +02:00
|
|
|
unsigned pred_idx = block->linear_preds[0];
|
|
|
|
|
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
|
2024-02-20 10:19:33 +01:00
|
|
|
if (pair.first.type() != RegType::sgpr)
|
2021-07-13 12:59:58 +02:00
|
|
|
continue;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-02-16 11:37:49 +01:00
|
|
|
if (live_in.count(pair.first.id())) {
|
2024-02-20 10:19:33 +01:00
|
|
|
spilled_registers += pair.first;
|
|
|
|
|
ctx.spills_entry[block_idx].emplace(pair);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2024-02-20 10:19:33 +01:00
|
|
|
|
|
|
|
|
if (block->logical_preds.empty())
|
|
|
|
|
return spilled_registers;
|
|
|
|
|
|
|
|
|
|
pred_idx = block->logical_preds[0];
|
|
|
|
|
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
|
|
|
|
|
if (pair.first.type() != RegType::vgpr)
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-02-16 11:37:49 +01:00
|
|
|
if (live_in.count(pair.first.id())) {
|
2024-02-20 10:19:33 +01:00
|
|
|
spilled_registers += pair.first;
|
|
|
|
|
ctx.spills_entry[block_idx].emplace(pair);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return spilled_registers;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* else: merge block */
|
2023-08-30 10:57:49 +01:00
|
|
|
std::map<Temp, bool> partial_spills;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* keep variables spilled on all incoming paths */
|
2024-02-16 11:37:49 +01:00
|
|
|
for (unsigned t : live_in) {
|
|
|
|
|
const RegClass rc = ctx.program->temp_rc[t];
|
|
|
|
|
Temp var = Temp(t, rc);
|
|
|
|
|
Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* If it can be rematerialized, keep the variable spilled if all predecessors do not reload
|
|
|
|
|
* it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other
|
|
|
|
|
* predecessors. The idea is that it's better in practice to rematerialize redundantly than to
|
|
|
|
|
* create lots of phis. */
|
2024-02-16 11:37:49 +01:00
|
|
|
const bool remat = ctx.remat.count(var);
|
2024-02-22 17:21:05 +01:00
|
|
|
/* If the variable is spilled at the current loop-header, spilling is essentially for free
|
|
|
|
|
* while reloading is not. Thus, keep them spilled if they are at least partially spilled.
|
|
|
|
|
*/
|
2024-02-16 11:37:49 +01:00
|
|
|
const bool avoid_respill = block->loop_nest_depth && ctx.loop.back().spills.count(var);
|
2024-02-22 17:21:05 +01:00
|
|
|
bool spill = true;
|
|
|
|
|
bool partial_spill = false;
|
2019-09-17 13:22:17 +02:00
|
|
|
uint32_t spill_id = 0;
|
|
|
|
|
for (unsigned pred_idx : preds) {
|
2024-02-16 11:37:49 +01:00
|
|
|
if (!ctx.spills_exit[pred_idx].count(var)) {
|
2024-02-22 17:21:05 +01:00
|
|
|
spill = false;
|
2019-09-17 13:22:17 +02:00
|
|
|
} else {
|
2024-02-22 17:21:05 +01:00
|
|
|
partial_spill = true;
|
2019-09-17 13:22:17 +02:00
|
|
|
/* it might be that on one incoming path, the variable has a different spill_id, but
|
|
|
|
|
* add_couple_code() will take care of that. */
|
2024-02-16 11:37:49 +01:00
|
|
|
spill_id = ctx.spills_exit[pred_idx][var];
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2024-02-22 17:21:05 +01:00
|
|
|
spill |= (remat && partial_spill);
|
|
|
|
|
spill |= (avoid_respill && partial_spill);
|
2019-09-17 13:22:17 +02:00
|
|
|
if (spill) {
|
2024-02-16 11:37:49 +01:00
|
|
|
ctx.spills_entry[block_idx][var] = spill_id;
|
|
|
|
|
partial_spills.erase(var);
|
|
|
|
|
spilled_registers += var;
|
2024-02-22 17:21:05 +01:00
|
|
|
} else {
|
2024-02-16 11:37:49 +01:00
|
|
|
partial_spills[var] = partial_spill;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* same for phis */
|
2021-02-22 18:02:24 +01:00
|
|
|
for (aco_ptr<Instruction>& phi : block->instructions) {
|
|
|
|
|
if (!is_phi(phi))
|
|
|
|
|
break;
|
2024-08-13 12:31:24 +02:00
|
|
|
if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
|
2021-04-27 17:26:09 +02:00
|
|
|
continue;
|
|
|
|
|
|
2023-12-18 11:21:08 +01:00
|
|
|
Block::edge_vec& preds =
|
2019-09-17 13:22:17 +02:00
|
|
|
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
|
2024-06-07 12:08:32 +02:00
|
|
|
bool is_all_undef = true;
|
2022-05-25 16:12:19 +02:00
|
|
|
bool is_all_spilled = true;
|
2024-02-19 14:19:52 +01:00
|
|
|
bool is_partial_spill = false;
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
2022-05-25 16:12:19 +02:00
|
|
|
if (phi->operands[i].isUndefined())
|
2021-02-22 14:06:05 +01:00
|
|
|
continue;
|
2024-02-19 14:19:52 +01:00
|
|
|
bool spilled = phi->operands[i].isTemp() &&
|
|
|
|
|
ctx.spills_exit[preds[i]].count(phi->operands[i].getTemp());
|
|
|
|
|
is_all_spilled &= spilled;
|
|
|
|
|
is_partial_spill |= spilled;
|
2024-06-07 12:08:32 +02:00
|
|
|
is_all_undef = false;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
2022-05-25 16:12:19 +02:00
|
|
|
|
2024-06-07 12:08:32 +02:00
|
|
|
if (is_all_spilled && !is_all_undef) {
|
2022-05-25 16:12:19 +02:00
|
|
|
/* The phi is spilled at all predecessors. Keep it spilled. */
|
2024-03-14 13:54:39 +01:00
|
|
|
ctx.add_to_spills(phi->definitions[0].getTemp(), ctx.spills_entry[block_idx]);
|
2019-09-17 13:22:17 +02:00
|
|
|
spilled_registers += phi->definitions[0].getTemp();
|
2024-02-22 17:21:05 +01:00
|
|
|
partial_spills.erase(phi->definitions[0].getTemp());
|
2022-05-25 16:12:19 +02:00
|
|
|
} else {
|
|
|
|
|
/* Phis might increase the register pressure. */
|
2024-02-19 14:19:52 +01:00
|
|
|
partial_spills[phi->definitions[0].getTemp()] = is_partial_spill;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* if reg pressure at first instruction is still too high, add partially spilled variables */
|
2024-06-19 14:11:51 +02:00
|
|
|
RegisterDemand reg_pressure = block->live_in_demand;
|
2021-02-22 18:02:24 +01:00
|
|
|
reg_pressure -= spilled_registers;
|
2020-08-04 17:08:43 +01:00
|
|
|
|
2021-02-22 15:58:46 +01:00
|
|
|
while (reg_pressure.exceeds(ctx.target_pressure)) {
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(!partial_spills.empty());
|
2023-08-30 10:57:49 +01:00
|
|
|
std::map<Temp, bool>::iterator it = partial_spills.begin();
|
2020-08-05 13:29:58 +01:00
|
|
|
Temp to_spill = Temp();
|
2024-02-19 14:19:52 +01:00
|
|
|
bool is_partial_spill = false;
|
2024-02-16 11:37:49 +01:00
|
|
|
float score = 0.0;
|
2021-02-22 15:58:46 +01:00
|
|
|
RegType type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
while (it != partial_spills.end()) {
|
2023-08-30 10:57:49 +01:00
|
|
|
assert(!ctx.spills_entry[block_idx].count(it->first));
|
|
|
|
|
|
2024-02-16 11:37:49 +01:00
|
|
|
if (it->first.type() == type && !it->first.regClass().is_linear_vgpr() &&
|
|
|
|
|
((it->second && !is_partial_spill) ||
|
|
|
|
|
(it->second == is_partial_spill && ctx.ssa_infos[it->first.id()].score() > score))) {
|
|
|
|
|
score = ctx.ssa_infos[it->first.id()].score();
|
2023-08-30 10:57:49 +01:00
|
|
|
to_spill = it->first;
|
2024-02-19 14:19:52 +01:00
|
|
|
is_partial_spill = it->second;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
++it;
|
|
|
|
|
}
|
2024-10-21 17:25:10 +02:00
|
|
|
assert(to_spill != Temp());
|
2024-03-14 13:54:39 +01:00
|
|
|
ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
|
2019-09-17 13:22:17 +02:00
|
|
|
partial_spills.erase(to_spill);
|
2021-02-22 15:58:46 +01:00
|
|
|
spilled_registers += to_spill;
|
|
|
|
|
reg_pressure -= to_spill;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return spilled_registers;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2024-02-15 14:57:06 +01:00
|
|
|
add_coupling_code(spill_ctx& ctx, Block* block, IDSet& live_in)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
2024-02-15 14:57:06 +01:00
|
|
|
const unsigned block_idx = block->index;
|
2024-02-20 10:19:33 +01:00
|
|
|
/* No coupling code necessary */
|
2019-09-17 13:22:17 +02:00
|
|
|
if (block->linear_preds.size() == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
2024-02-20 10:19:33 +01:00
|
|
|
/* Branch block: update renames */
|
2020-01-02 14:54:31 +00:00
|
|
|
if (block->linear_preds.size() == 1 &&
|
|
|
|
|
!(block->kind & (block_kind_loop_exit | block_kind_loop_header))) {
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(ctx.processed[block->linear_preds[0]]);
|
2019-10-30 14:42:00 +01:00
|
|
|
|
2024-02-20 10:19:33 +01:00
|
|
|
ctx.renames[block_idx] = ctx.renames[block->linear_preds[0]];
|
|
|
|
|
if (!block->logical_preds.empty() && block->logical_preds[0] != block->linear_preds[0]) {
|
|
|
|
|
for (auto it : ctx.renames[block->logical_preds[0]]) {
|
|
|
|
|
if (it.first.type() == RegType::vgpr)
|
|
|
|
|
ctx.renames[block_idx].insert_or_assign(it.first, it.second);
|
2019-10-30 14:42:00 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-20 10:19:33 +01:00
|
|
|
std::vector<aco_ptr<Instruction>> instructions;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* loop header and merge blocks: check if all (linear) predecessors have been processed */
|
|
|
|
|
for (ASSERTED unsigned pred : block->linear_preds)
|
|
|
|
|
assert(ctx.processed[pred]);
|
|
|
|
|
|
|
|
|
|
/* iterate the phi nodes for which operands to spill at the predecessor */
|
|
|
|
|
for (aco_ptr<Instruction>& phi : block->instructions) {
|
2021-02-22 18:02:24 +01:00
|
|
|
if (!is_phi(phi))
|
2019-09-17 13:22:17 +02:00
|
|
|
break;
|
|
|
|
|
|
2024-02-16 11:20:13 +01:00
|
|
|
for (const Operand& op : phi->operands) {
|
|
|
|
|
if (op.isTemp())
|
|
|
|
|
ctx.ssa_infos[op.tempId()].num_uses--;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-20 12:36:05 +02:00
|
|
|
/* The phi is not spilled */
|
2021-02-04 15:55:23 +01:00
|
|
|
if (!phi->definitions[0].isTemp() ||
|
2024-06-20 12:36:05 +02:00
|
|
|
!ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()))
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
2023-12-18 11:21:08 +01:00
|
|
|
Block::edge_vec& preds =
|
2019-09-17 13:22:17 +02:00
|
|
|
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
|
|
|
|
|
uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
|
2024-08-13 12:31:24 +02:00
|
|
|
phi->definitions[0].setKill(true);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
2019-10-15 18:23:52 +02:00
|
|
|
if (phi->operands[i].isUndefined())
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
2019-10-15 18:23:52 +02:00
|
|
|
unsigned pred_idx = preds[i];
|
2021-02-22 14:06:05 +01:00
|
|
|
Operand spill_op = phi->operands[i];
|
2024-06-20 12:36:05 +02:00
|
|
|
phi->operands[i] = Operand(phi->definitions[0].regClass());
|
2021-02-22 14:06:05 +01:00
|
|
|
|
|
|
|
|
if (spill_op.isTemp()) {
|
2024-06-20 12:36:05 +02:00
|
|
|
assert(spill_op.isKill());
|
|
|
|
|
Temp var = spill_op.getTemp();
|
2021-02-22 14:06:05 +01:00
|
|
|
|
|
|
|
|
std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
|
2023-04-12 07:10:19 +03:00
|
|
|
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
|
2021-02-22 14:06:05 +01:00
|
|
|
if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var))
|
2021-07-15 15:11:44 +02:00
|
|
|
ctx.unused_remats.erase(ctx.remat[var].instr);
|
2021-02-22 14:06:05 +01:00
|
|
|
|
|
|
|
|
/* check if variable is already spilled at predecessor */
|
2021-07-16 12:17:29 +02:00
|
|
|
auto spilled = ctx.spills_exit[pred_idx].find(var);
|
2021-02-22 14:06:05 +01:00
|
|
|
if (spilled != ctx.spills_exit[pred_idx].end()) {
|
|
|
|
|
if (spilled->second != def_spill_id)
|
|
|
|
|
ctx.add_affinity(def_spill_id, spilled->second);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
2021-02-22 14:06:05 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-06-20 12:36:05 +02:00
|
|
|
/* If the phi operand has the same name as the definition,
|
|
|
|
|
* add to predecessor's spilled variables, so that it gets
|
|
|
|
|
* skipped in the loop below.
|
|
|
|
|
*/
|
|
|
|
|
if (var == phi->definitions[0].getTemp())
|
|
|
|
|
ctx.spills_exit[pred_idx][var] = def_spill_id;
|
|
|
|
|
|
2021-02-22 14:06:05 +01:00
|
|
|
/* rename if necessary */
|
|
|
|
|
if (rename_it != ctx.renames[pred_idx].end()) {
|
|
|
|
|
spill_op.setTemp(rename_it->second);
|
|
|
|
|
ctx.renames[pred_idx].erase(rename_it);
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2024-02-28 10:41:12 +01:00
|
|
|
/* add interferences */
|
2021-02-22 14:06:05 +01:00
|
|
|
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
|
2024-02-28 10:41:12 +01:00
|
|
|
ctx.add_interference(def_spill_id, pair.second);
|
2021-02-22 14:06:05 +01:00
|
|
|
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
2021-02-22 14:06:05 +01:00
|
|
|
spill->operands[0] = spill_op;
|
2024-02-28 10:41:12 +01:00
|
|
|
spill->operands[1] = Operand::c32(def_spill_id);
|
2019-09-17 13:22:17 +02:00
|
|
|
Block& pred = ctx.program->blocks[pred_idx];
|
|
|
|
|
unsigned idx = pred.instructions.size();
|
|
|
|
|
do {
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
idx--;
|
|
|
|
|
} while (phi->opcode == aco_opcode::p_phi &&
|
|
|
|
|
pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
|
|
|
|
|
pred.instructions.insert(it, std::move(spill));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* iterate all (other) spilled variables for which to spill at the predecessor */
|
|
|
|
|
// TODO: would be better to have them sorted: first vgprs and first with longest distance
|
|
|
|
|
for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
|
2024-07-11 12:48:18 +02:00
|
|
|
/* if variable is not live-in, it must be from a phi: this works because of CSSA form */
|
|
|
|
|
if (!live_in.count(pair.first.id()))
|
|
|
|
|
continue;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-07-11 12:48:18 +02:00
|
|
|
Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned pred_idx : preds) {
|
|
|
|
|
/* variable is already spilled at predecessor */
|
2021-07-16 12:17:29 +02:00
|
|
|
auto spilled = ctx.spills_exit[pred_idx].find(pair.first);
|
2019-09-17 13:22:17 +02:00
|
|
|
if (spilled != ctx.spills_exit[pred_idx].end()) {
|
|
|
|
|
if (spilled->second != pair.second)
|
2019-10-16 16:39:06 +02:00
|
|
|
ctx.add_affinity(pair.second, spilled->second);
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-21 17:01:17 +01:00
|
|
|
/* If this variable is spilled through the entire loop, no need to re-spill.
|
|
|
|
|
* It can be reloaded from the same spill-slot it got at the loop-preheader.
|
|
|
|
|
* No need to add interferences since every spilled variable in the loop already
|
|
|
|
|
* interferes with the spilled loop-variables. Make sure that the spill_ids match.
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t loop_nest_depth = std::min(ctx.program->blocks[pred_idx].loop_nest_depth,
|
|
|
|
|
ctx.program->blocks[block_idx].loop_nest_depth);
|
|
|
|
|
if (loop_nest_depth) {
|
|
|
|
|
auto spill = ctx.loop[loop_nest_depth - 1].spills.find(pair.first);
|
|
|
|
|
if (spill != ctx.loop[loop_nest_depth - 1].spills.end() && spill->second == pair.second)
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2019-10-30 12:32:32 +01:00
|
|
|
/* add interferences between spilled variable and predecessors exit spills */
|
2024-02-28 11:43:19 +01:00
|
|
|
for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx])
|
2020-07-16 11:08:50 +01:00
|
|
|
ctx.add_interference(exit_spill.second, pair.second);
|
2019-10-30 12:32:32 +01:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* variable is in register at predecessor and has to be spilled */
|
|
|
|
|
/* rename if necessary */
|
|
|
|
|
Temp var = pair.first;
|
|
|
|
|
std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
|
|
|
|
|
if (rename_it != ctx.renames[pred_idx].end()) {
|
|
|
|
|
var = rename_it->second;
|
|
|
|
|
ctx.renames[pred_idx].erase(rename_it);
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
2019-09-17 13:22:17 +02:00
|
|
|
spill->operands[0] = Operand(var);
|
2021-07-13 11:22:46 +02:00
|
|
|
spill->operands[1] = Operand::c32(pair.second);
|
2019-09-17 13:22:17 +02:00
|
|
|
Block& pred = ctx.program->blocks[pred_idx];
|
|
|
|
|
unsigned idx = pred.instructions.size();
|
|
|
|
|
do {
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
idx--;
|
|
|
|
|
} while (pair.first.type() == RegType::vgpr &&
|
|
|
|
|
pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
|
|
|
|
|
pred.instructions.insert(it, std::move(spill));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* iterate phis for which operands to reload */
|
2024-06-20 12:36:05 +02:00
|
|
|
for (aco_ptr<Instruction>& phi : block->instructions) {
|
|
|
|
|
if (!is_phi(phi))
|
|
|
|
|
break;
|
2024-08-13 12:31:24 +02:00
|
|
|
if (phi->definitions[0].isKill())
|
|
|
|
|
continue;
|
2024-06-20 12:36:05 +02:00
|
|
|
|
2021-02-04 15:55:23 +01:00
|
|
|
assert(!phi->definitions[0].isTemp() ||
|
2024-08-13 12:31:24 +02:00
|
|
|
!ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()));
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2023-12-18 11:21:08 +01:00
|
|
|
Block::edge_vec& preds =
|
2019-09-17 13:22:17 +02:00
|
|
|
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
|
|
|
|
if (!phi->operands[i].isTemp())
|
|
|
|
|
continue;
|
|
|
|
|
unsigned pred_idx = preds[i];
|
|
|
|
|
|
2021-02-22 11:00:22 +01:00
|
|
|
/* if the operand was reloaded, rename */
|
2021-07-13 12:59:58 +02:00
|
|
|
if (!ctx.spills_exit[pred_idx].count(phi->operands[i].getTemp())) {
|
2019-09-17 13:22:17 +02:00
|
|
|
std::map<Temp, Temp>::iterator it =
|
|
|
|
|
ctx.renames[pred_idx].find(phi->operands[i].getTemp());
|
2021-07-13 12:59:58 +02:00
|
|
|
if (it != ctx.renames[pred_idx].end()) {
|
2019-09-17 13:22:17 +02:00
|
|
|
phi->operands[i].setTemp(it->second);
|
2023-04-12 07:10:19 +03:00
|
|
|
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
|
2021-07-13 12:59:58 +02:00
|
|
|
} else {
|
|
|
|
|
auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
|
|
|
|
|
if (remat_it != ctx.remat.end()) {
|
2021-07-15 15:11:44 +02:00
|
|
|
ctx.unused_remats.erase(remat_it->second.instr);
|
2021-07-13 12:59:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Temp tmp = phi->operands[i].getTemp();
|
|
|
|
|
|
|
|
|
|
/* reload phi operand at end of predecessor block */
|
2020-09-14 20:58:33 +01:00
|
|
|
Temp new_name = ctx.program->allocateTmp(tmp.regClass());
|
2019-09-17 13:22:17 +02:00
|
|
|
Block& pred = ctx.program->blocks[pred_idx];
|
|
|
|
|
unsigned idx = pred.instructions.size();
|
|
|
|
|
do {
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
idx--;
|
|
|
|
|
} while (phi->opcode == aco_opcode::p_phi &&
|
|
|
|
|
pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
|
|
|
|
|
aco_ptr<Instruction> reload =
|
|
|
|
|
do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]);
|
|
|
|
|
|
2021-02-22 11:00:22 +01:00
|
|
|
/* reload spilled exec mask directly to exec */
|
|
|
|
|
if (!phi->definitions[0].isTemp()) {
|
|
|
|
|
assert(phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec);
|
|
|
|
|
reload->definitions[0] = phi->definitions[0];
|
|
|
|
|
phi->operands[i] = Operand(exec, ctx.program->lane_mask);
|
|
|
|
|
} else {
|
|
|
|
|
ctx.spills_exit[pred_idx].erase(tmp);
|
|
|
|
|
ctx.renames[pred_idx][tmp] = new_name;
|
|
|
|
|
phi->operands[i].setTemp(new_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pred.instructions.insert(it, std::move(reload));
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* iterate live variables for which to reload */
|
2024-02-15 14:57:06 +01:00
|
|
|
for (unsigned t : live_in) {
|
|
|
|
|
const RegClass rc = ctx.program->temp_rc[t];
|
|
|
|
|
Temp var = Temp(t, rc);
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* skip spilled variables */
|
2024-02-15 14:57:06 +01:00
|
|
|
if (ctx.spills_entry[block_idx].count(var))
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
2024-02-15 14:57:06 +01:00
|
|
|
Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned pred_idx : preds) {
|
2024-02-15 14:57:06 +01:00
|
|
|
/* skip if the variable is not spilled at the predecessor */
|
|
|
|
|
if (!ctx.spills_exit[pred_idx].count(var))
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* variable is spilled at predecessor and has to be reloaded */
|
2024-02-15 14:57:06 +01:00
|
|
|
Temp new_name = ctx.program->allocateTmp(rc);
|
2019-09-17 13:22:17 +02:00
|
|
|
Block& pred = ctx.program->blocks[pred_idx];
|
|
|
|
|
unsigned idx = pred.instructions.size();
|
|
|
|
|
do {
|
|
|
|
|
assert(idx != 0);
|
|
|
|
|
idx--;
|
2024-02-15 14:57:06 +01:00
|
|
|
} while (rc.type() == RegType::vgpr &&
|
2019-09-17 13:22:17 +02:00
|
|
|
pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
|
|
|
|
|
|
|
|
|
|
aco_ptr<Instruction> reload =
|
2024-02-15 14:57:06 +01:00
|
|
|
do_reload(ctx, var, new_name, ctx.spills_exit[pred.index][var]);
|
2019-09-17 13:22:17 +02:00
|
|
|
pred.instructions.insert(it, std::move(reload));
|
|
|
|
|
|
2024-02-15 14:57:06 +01:00
|
|
|
ctx.spills_exit[pred.index].erase(var);
|
|
|
|
|
ctx.renames[pred.index][var] = new_name;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check if we have to create a new phi for this variable */
|
|
|
|
|
Temp rename = Temp();
|
|
|
|
|
bool is_same = true;
|
|
|
|
|
for (unsigned pred_idx : preds) {
|
2024-02-15 14:57:06 +01:00
|
|
|
if (!ctx.renames[pred_idx].count(var)) {
|
2019-09-17 13:22:17 +02:00
|
|
|
if (rename == Temp())
|
2024-02-15 14:57:06 +01:00
|
|
|
rename = var;
|
2019-09-17 13:22:17 +02:00
|
|
|
else
|
2024-02-15 14:57:06 +01:00
|
|
|
is_same = rename == var;
|
2019-09-17 13:22:17 +02:00
|
|
|
} else {
|
|
|
|
|
if (rename == Temp())
|
2024-02-15 14:57:06 +01:00
|
|
|
rename = ctx.renames[pred_idx][var];
|
2019-09-17 13:22:17 +02:00
|
|
|
else
|
2024-02-15 14:57:06 +01:00
|
|
|
is_same = rename == ctx.renames[pred_idx][var];
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!is_same)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!is_same) {
|
|
|
|
|
/* the variable was renamed differently in the predecessors: we have to create a phi */
|
2024-02-15 14:57:06 +01:00
|
|
|
aco_opcode opcode = rc.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> phi{create_instruction(opcode, Format::PSEUDO, preds.size(), 1)};
|
2024-02-15 14:57:06 +01:00
|
|
|
rename = ctx.program->allocateTmp(rc);
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
|
|
|
|
Temp tmp;
|
2024-02-15 14:57:06 +01:00
|
|
|
if (ctx.renames[preds[i]].count(var)) {
|
|
|
|
|
tmp = ctx.renames[preds[i]][var];
|
2020-12-11 19:37:50 +01:00
|
|
|
} else if (preds[i] >= block_idx) {
|
2019-09-17 13:22:17 +02:00
|
|
|
tmp = rename;
|
2020-12-11 19:37:50 +01:00
|
|
|
} else {
|
2024-02-15 14:57:06 +01:00
|
|
|
tmp = var;
|
2023-04-12 07:10:19 +03:00
|
|
|
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
|
2020-12-11 19:37:50 +01:00
|
|
|
if (ctx.remat.count(tmp))
|
2021-07-15 15:11:44 +02:00
|
|
|
ctx.unused_remats.erase(ctx.remat[tmp].instr);
|
2020-12-11 19:37:50 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
phi->operands[i] = Operand(tmp);
|
|
|
|
|
}
|
|
|
|
|
phi->definitions[0] = Definition(rename);
|
2024-06-21 15:45:22 +02:00
|
|
|
phi->register_demand = block->live_in_demand;
|
2024-06-20 12:36:05 +02:00
|
|
|
block->instructions.insert(block->instructions.begin(), std::move(phi));
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* the variable was renamed: add new name to renames */
|
2024-02-15 14:57:06 +01:00
|
|
|
if (!(rename == Temp() || rename == var))
|
|
|
|
|
ctx.renames[block_idx][var] = rename;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2021-07-10 14:06:50 +02:00
|
|
|
process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand spilled_registers)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
2020-01-02 14:57:02 +00:00
|
|
|
assert(!ctx.processed[block_idx]);
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
std::vector<aco_ptr<Instruction>> instructions;
|
|
|
|
|
unsigned idx = 0;
|
|
|
|
|
|
2023-04-12 07:10:19 +03:00
|
|
|
/* phis are handled separately */
|
2019-09-17 13:22:17 +02:00
|
|
|
while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
|
|
|
|
|
block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
|
2024-07-11 12:48:18 +02:00
|
|
|
const Definition def = block->instructions[idx]->definitions[0];
|
|
|
|
|
if (def.isTemp() && !def.isKill() && def.tempId() < ctx.ssa_infos.size())
|
|
|
|
|
ctx.program->live.live_in[block_idx].insert(def.tempId());
|
2020-12-11 14:30:30 +01:00
|
|
|
instructions.emplace_back(std::move(block->instructions[idx++]));
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2021-07-10 14:06:50 +02:00
|
|
|
auto& current_spills = ctx.spills_exit[block_idx];
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
while (idx < block->instructions.size()) {
|
|
|
|
|
aco_ptr<Instruction>& instr = block->instructions[idx];
|
|
|
|
|
|
2023-08-25 19:21:54 +01:00
|
|
|
/* Spilling is handled as part of phis (they should always have the same or higher register
|
|
|
|
|
* demand). If we try to spill here, we might not be able to reduce the register demand enough
|
|
|
|
|
* because there is no path to spill constant/undef phi operands. */
|
|
|
|
|
if (instr->opcode == aco_opcode::p_branch) {
|
|
|
|
|
instructions.emplace_back(std::move(instr));
|
|
|
|
|
idx++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
std::map<Temp, std::pair<Temp, uint32_t>> reloads;
|
2021-07-10 12:25:45 +02:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* rename and reload operands */
|
|
|
|
|
for (Operand& op : instr->operands) {
|
|
|
|
|
if (!op.isTemp())
|
|
|
|
|
continue;
|
2024-02-16 10:53:10 +01:00
|
|
|
|
|
|
|
|
if (op.isFirstKill())
|
2024-07-08 18:51:56 +02:00
|
|
|
ctx.program->live.live_in[block_idx].erase(op.tempId());
|
2024-02-16 11:20:13 +01:00
|
|
|
ctx.ssa_infos[op.tempId()].num_uses--;
|
2024-02-16 10:53:10 +01:00
|
|
|
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
if (!current_spills.count(op.getTemp()))
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* the Operand is spilled: add it to reloads */
|
2020-09-14 20:58:33 +01:00
|
|
|
Temp new_tmp = ctx.program->allocateTmp(op.regClass());
|
2019-09-17 13:22:17 +02:00
|
|
|
ctx.renames[block_idx][op.getTemp()] = new_tmp;
|
|
|
|
|
reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]);
|
|
|
|
|
current_spills.erase(op.getTemp());
|
|
|
|
|
spilled_registers -= new_tmp;
|
|
|
|
|
}
|
|
|
|
|
|
aco: calculate register demand per instruction as maximum necessary to execute the instruction
Previously, the register demand per instruction was calculated as the number of
live variables in the register file after executing an instruction plus additional
temporary registers, necessary during the execution of the instruction.
With this change, now it also includes all variables which are live right before
executing an instruction, i.e. killed Operands.
Care has been taken so that the invariant
register_demand[idx] = register_demand[idx - 1] - get_temp_registers(prev_instr)
+ get_live_changes(instr) + get_temp_registers(instr)
still holds.
Slight changes in scheduling:
Totals from 316 (0.40% of 79395) affected shaders: (GFX11)
Instrs: 301329 -> 300777 (-0.18%); split: -0.31%, +0.12%
CodeSize: 1577976 -> 1576204 (-0.11%); split: -0.21%, +0.10%
SpillSGPRs: 448 -> 447 (-0.22%)
Latency: 1736349 -> 1726182 (-0.59%); split: -2.01%, +1.42%
InvThroughput: 243894 -> 243883 (-0.00%); split: -0.03%, +0.03%
VClause: 6134 -> 6280 (+2.38%); split: -1.04%, +3.42%
SClause: 6142 -> 6137 (-0.08%); split: -0.13%, +0.05%
Copies: 14037 -> 14032 (-0.04%); split: -0.56%, +0.52%
Branches: 3284 -> 3283 (-0.03%)
VALU: 182750 -> 182718 (-0.02%); split: -0.04%, +0.03%
SALU: 18522 -> 18538 (+0.09%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29804>
2024-06-20 10:03:11 +02:00
|
|
|
/* check if register demand is low enough during and after the current instruction */
|
2019-09-17 13:22:17 +02:00
|
|
|
if (block->register_demand.exceeds(ctx.target_pressure)) {
|
2024-06-21 15:45:22 +02:00
|
|
|
RegisterDemand new_demand = instr->register_demand;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* if reg pressure is too high, spill variable with furthest next use */
|
2021-02-22 15:58:46 +01:00
|
|
|
while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
float score = 0.0;
|
2024-10-21 17:25:10 +02:00
|
|
|
Temp to_spill = Temp();
|
2024-02-21 16:48:18 +01:00
|
|
|
unsigned do_rematerialize = 0;
|
|
|
|
|
unsigned avoid_respill = 0;
|
2021-02-17 18:29:48 +01:00
|
|
|
RegType type = RegType::sgpr;
|
|
|
|
|
if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr)
|
|
|
|
|
type = RegType::vgpr;
|
|
|
|
|
|
2024-07-08 18:51:56 +02:00
|
|
|
for (unsigned t : ctx.program->live.live_in[block_idx]) {
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
RegClass rc = ctx.program->temp_rc[t];
|
|
|
|
|
Temp var = Temp(t, rc);
|
|
|
|
|
if (rc.type() != type || current_spills.count(var) || rc.is_linear_vgpr())
|
2021-02-17 18:29:48 +01:00
|
|
|
continue;
|
2024-02-21 16:48:18 +01:00
|
|
|
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
unsigned can_rematerialize = ctx.remat.count(var);
|
|
|
|
|
unsigned loop_variable = block->loop_nest_depth && ctx.loop.back().spills.count(var);
|
2024-02-21 16:48:18 +01:00
|
|
|
if (avoid_respill > loop_variable || do_rematerialize > can_rematerialize)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (can_rematerialize > do_rematerialize || loop_variable > avoid_respill ||
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
ctx.ssa_infos[t].score() > score) {
|
2024-02-21 16:48:18 +01:00
|
|
|
/* Don't spill operands */
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
if (std::any_of(instr->operands.begin(), instr->operands.end(),
|
|
|
|
|
[&](Operand& op) { return op.isTemp() && op.getTemp() == var; }))
|
2024-02-21 16:48:18 +01:00
|
|
|
continue;
|
|
|
|
|
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
to_spill = var;
|
|
|
|
|
score = ctx.ssa_infos[t].score();
|
2021-02-17 18:29:48 +01:00
|
|
|
do_rematerialize = can_rematerialize;
|
2024-02-21 16:48:18 +01:00
|
|
|
avoid_respill = loop_variable;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2024-10-21 17:25:10 +02:00
|
|
|
assert(to_spill != Temp());
|
2024-02-21 16:48:18 +01:00
|
|
|
|
|
|
|
|
if (avoid_respill) {
|
|
|
|
|
/* This variable is spilled at the loop-header of the current loop.
|
|
|
|
|
* Re-use the spill-slot in order to avoid an extra store.
|
|
|
|
|
*/
|
|
|
|
|
current_spills[to_spill] = ctx.loop.back().spills[to_spill];
|
|
|
|
|
spilled_registers += to_spill;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-14 13:54:39 +01:00
|
|
|
uint32_t spill_id = ctx.add_to_spills(to_spill, current_spills);
|
|
|
|
|
/* add interferences with reloads */
|
2021-07-10 12:17:37 +02:00
|
|
|
for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads)
|
2020-07-16 11:08:50 +01:00
|
|
|
ctx.add_interference(spill_id, pair.second.second);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
spilled_registers += to_spill;
|
|
|
|
|
|
|
|
|
|
/* rename if necessary */
|
2021-07-13 12:59:58 +02:00
|
|
|
if (ctx.renames[block_idx].count(to_spill)) {
|
2019-09-17 13:22:17 +02:00
|
|
|
to_spill = ctx.renames[block_idx][to_spill];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* add spill to new instructions */
|
2024-03-25 12:05:50 +01:00
|
|
|
aco_ptr<Instruction> spill{
|
2024-03-25 15:55:27 +01:00
|
|
|
create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
|
2019-09-17 13:22:17 +02:00
|
|
|
spill->operands[0] = Operand(to_spill);
|
2021-07-13 11:22:46 +02:00
|
|
|
spill->operands[1] = Operand::c32(spill_id);
|
2019-09-17 13:22:17 +02:00
|
|
|
instructions.emplace_back(std::move(spill));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-16 10:53:10 +01:00
|
|
|
for (const Definition& def : instr->definitions) {
|
|
|
|
|
if (def.isTemp() && !def.isKill())
|
2024-07-08 18:51:56 +02:00
|
|
|
ctx.program->live.live_in[block_idx].insert(def.tempId());
|
2024-02-16 10:53:10 +01:00
|
|
|
}
|
aco/spill: use average use distances in process_block()
Totals from 128 (0.16% of 79395) affected shaders: (GFX11)
Instrs: 672936 -> 672086 (-0.13%); split: -0.40%, +0.28%
CodeSize: 3574396 -> 3565540 (-0.25%); split: -0.55%, +0.30%
SpillSGPRs: 6254 -> 6636 (+6.11%); split: -0.32%, +6.43%
SpillVGPRs: 967 -> 839 (-13.24%)
Latency: 6489362 -> 6469427 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 2216723 -> 2212417 (-0.19%); split: -0.24%, +0.05%
VClause: 11670 -> 11517 (-1.31%); split: -1.50%, +0.19%
SClause: 15711 -> 15754 (+0.27%); split: -0.29%, +0.57%
Copies: 85185 -> 84155 (-1.21%); split: -2.14%, +0.93%
Branches: 18706 -> 18708 (+0.01%); split: -0.05%, +0.06%
VALU: 386892 -> 386204 (-0.18%); split: -0.40%, +0.23%
SALU: 85754 -> 86211 (+0.53%); split: -0.46%, +1.00%
VMEM: 18777 -> 18524 (-1.35%); split: -1.37%, +0.02%
VOPD: 670 -> 628 (-6.27%); split: +0.75%, -7.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27799>
2024-02-20 11:25:09 +01:00
|
|
|
/* rename operands */
|
|
|
|
|
for (Operand& op : instr->operands) {
|
|
|
|
|
if (op.isTemp()) {
|
|
|
|
|
auto rename_it = ctx.renames[block_idx].find(op.getTemp());
|
|
|
|
|
if (rename_it != ctx.renames[block_idx].end()) {
|
|
|
|
|
op.setTemp(rename_it->second);
|
|
|
|
|
} else {
|
|
|
|
|
/* prevent its defining instruction from being DCE'd if it could be rematerialized */
|
|
|
|
|
auto remat_it = ctx.remat.find(op.getTemp());
|
|
|
|
|
if (remat_it != ctx.remat.end()) {
|
|
|
|
|
ctx.unused_remats.erase(remat_it->second.instr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-16 10:53:10 +01:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* add reloads and instruction to new instructions */
|
2021-07-10 12:17:37 +02:00
|
|
|
for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads) {
|
2019-09-17 13:22:17 +02:00
|
|
|
aco_ptr<Instruction> reload =
|
|
|
|
|
do_reload(ctx, pair.second.first, pair.first, pair.second.second);
|
|
|
|
|
instructions.emplace_back(std::move(reload));
|
|
|
|
|
}
|
|
|
|
|
instructions.emplace_back(std::move(instr));
|
|
|
|
|
idx++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
block->instructions = std::move(instructions);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
spill_block(spill_ctx& ctx, unsigned block_idx)
|
|
|
|
|
{
|
|
|
|
|
Block* block = &ctx.program->blocks[block_idx];
|
|
|
|
|
|
|
|
|
|
/* determine set of variables which are spilled at the beginning of the block */
|
|
|
|
|
RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
|
|
|
|
|
|
2024-03-14 14:56:18 +01:00
|
|
|
if (!(block->kind & block_kind_loop_header)) {
|
2019-09-17 13:22:17 +02:00
|
|
|
/* add spill/reload code on incoming control flow edges */
|
2024-07-08 18:51:56 +02:00
|
|
|
add_coupling_code(ctx, block, ctx.program->live.live_in[block_idx]);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2021-07-10 14:06:50 +02:00
|
|
|
assert(ctx.spills_exit[block_idx].empty());
|
2024-02-16 10:53:10 +01:00
|
|
|
ctx.spills_exit[block_idx] = ctx.spills_entry[block_idx];
|
|
|
|
|
process_block(ctx, block_idx, block, spilled_registers);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-01-02 14:57:02 +00:00
|
|
|
ctx.processed[block_idx] = true;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* check if the next block leaves the current loop */
|
|
|
|
|
if (block->loop_nest_depth == 0 ||
|
|
|
|
|
ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth)
|
|
|
|
|
return;
|
|
|
|
|
|
2024-03-14 14:56:18 +01:00
|
|
|
uint32_t loop_header_idx = ctx.loop.back().index;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* preserve original renames at end of loop header block */
|
2024-03-14 14:56:18 +01:00
|
|
|
aco::map<Temp, Temp> renames = std::move(ctx.renames[loop_header_idx]);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* add coupling code to all loop header predecessors */
|
2024-02-16 11:20:13 +01:00
|
|
|
for (unsigned t : ctx.loop.back().live_in)
|
|
|
|
|
ctx.ssa_infos[t].num_uses--;
|
2024-02-15 14:57:06 +01:00
|
|
|
add_coupling_code(ctx, &ctx.program->blocks[loop_header_idx], ctx.loop.back().live_in);
|
2024-03-14 14:56:18 +01:00
|
|
|
renames.swap(ctx.renames[loop_header_idx]);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-02-20 12:45:56 +01:00
|
|
|
/* remove loop header info from stack */
|
2024-03-14 14:56:18 +01:00
|
|
|
ctx.loop.pop_back();
|
2024-02-20 12:45:56 +01:00
|
|
|
if (renames.empty())
|
|
|
|
|
return;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-02-20 12:45:56 +01:00
|
|
|
/* Add the new renames to each block */
|
|
|
|
|
for (std::pair<Temp, Temp> rename : renames) {
|
|
|
|
|
/* If there is already a rename, don't overwrite it. */
|
2024-03-14 14:56:18 +01:00
|
|
|
for (unsigned idx = loop_header_idx; idx <= block_idx; idx++)
|
2024-02-20 12:45:56 +01:00
|
|
|
ctx.renames[idx].insert(rename);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* propagate new renames through loop: i.e. repair the SSA */
|
2024-03-14 14:56:18 +01:00
|
|
|
for (unsigned idx = loop_header_idx; idx <= block_idx; idx++) {
|
2024-02-20 12:45:56 +01:00
|
|
|
Block& current = ctx.program->blocks[idx];
|
|
|
|
|
/* rename all uses in this block */
|
|
|
|
|
for (aco_ptr<Instruction>& instr : current.instructions) {
|
|
|
|
|
/* no need to rename the loop header phis once again. */
|
2024-03-14 14:56:18 +01:00
|
|
|
if (idx == loop_header_idx && is_phi(instr))
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
2024-02-20 12:45:56 +01:00
|
|
|
for (Operand& op : instr->operands) {
|
|
|
|
|
if (!op.isTemp())
|
|
|
|
|
continue;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2024-02-20 12:45:56 +01:00
|
|
|
auto rename = renames.find(op.getTemp());
|
|
|
|
|
if (rename != renames.end())
|
|
|
|
|
op.setTemp(rename->second);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-24 18:27:25 +02:00
|
|
|
Temp
|
2023-08-15 14:55:10 +02:00
|
|
|
load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
|
2019-10-24 18:27:25 +02:00
|
|
|
{
|
|
|
|
|
Temp private_segment_buffer = ctx.program->private_segment_buffer;
|
2023-04-26 14:58:58 +08:00
|
|
|
if (!private_segment_buffer.bytes()) {
|
|
|
|
|
Temp addr_lo =
|
|
|
|
|
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
|
|
|
|
Temp addr_hi =
|
|
|
|
|
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
|
|
|
|
private_segment_buffer =
|
|
|
|
|
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
2023-05-13 17:55:54 +02:00
|
|
|
} else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
|
2019-10-24 18:27:25 +02:00
|
|
|
private_segment_buffer =
|
2021-07-13 11:22:46 +02:00
|
|
|
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
|
2023-04-26 14:58:58 +08:00
|
|
|
}
|
2019-10-24 18:27:25 +02:00
|
|
|
|
2023-08-15 14:55:10 +02:00
|
|
|
if (apply_scratch_offset) {
|
|
|
|
|
Temp addr_lo = bld.tmp(s1);
|
|
|
|
|
Temp addr_hi = bld.tmp(s1);
|
|
|
|
|
bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
|
|
|
|
|
private_segment_buffer);
|
|
|
|
|
|
|
|
|
|
Temp carry = bld.tmp(s1);
|
|
|
|
|
addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
|
|
|
|
|
ctx.program->scratch_offset);
|
|
|
|
|
addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
|
|
|
|
|
Operand::c32(0), bld.scc(carry));
|
|
|
|
|
|
|
|
|
|
private_segment_buffer =
|
|
|
|
|
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
|
|
|
|
}
|
2019-10-24 18:27:25 +02:00
|
|
|
|
2024-05-16 17:42:17 +02:00
|
|
|
struct ac_buffer_state ac_state = {0};
|
|
|
|
|
uint32_t desc[4];
|
|
|
|
|
|
|
|
|
|
ac_state.size = 0xffffffff;
|
|
|
|
|
ac_state.format = PIPE_FORMAT_R32_FLOAT;
|
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
|
|
|
ac_state.swizzle[i] = PIPE_SWIZZLE_0;
|
2019-11-01 09:06:26 +01:00
|
|
|
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
2024-05-16 17:42:17 +02:00
|
|
|
ac_state.element_size = ctx.program->gfx_level <= GFX8 ? 1u : 0u;
|
|
|
|
|
ac_state.index_stride = ctx.program->wave_size == 64 ? 3u : 2u;
|
|
|
|
|
ac_state.add_tid = true;
|
|
|
|
|
ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
|
|
|
|
|
|
|
|
|
|
ac_build_buffer_descriptor(ctx.program->gfx_level, &ac_state, desc);
|
2019-10-24 18:27:25 +02:00
|
|
|
|
2021-07-13 11:22:46 +02:00
|
|
|
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
|
2024-05-16 17:42:17 +02:00
|
|
|
Operand::c32(desc[2]), Operand::c32(desc[3]));
|
2019-10-24 18:27:25 +02:00
|
|
|
}
|
|
|
|
|
|
2022-05-19 16:09:13 +01:00
|
|
|
void
|
|
|
|
|
setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
|
|
|
|
std::vector<aco_ptr<Instruction>>& instructions, uint32_t spill_slot,
|
2023-08-15 14:55:10 +02:00
|
|
|
Temp& scratch_offset, unsigned* offset)
|
2022-05-19 16:09:13 +01:00
|
|
|
{
|
2023-08-15 14:55:10 +02:00
|
|
|
uint32_t scratch_size = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
|
|
|
|
|
|
|
|
|
uint32_t offset_range;
|
|
|
|
|
if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
offset_range =
|
|
|
|
|
ctx.program->dev.scratch_global_offset_max - ctx.program->dev.scratch_global_offset_min;
|
|
|
|
|
} else {
|
|
|
|
|
if (scratch_size < 4095)
|
|
|
|
|
offset_range = 4095 - scratch_size;
|
|
|
|
|
else
|
|
|
|
|
offset_range = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool overflow = (ctx.vgpr_spill_slots - 1) * 4 > offset_range;
|
|
|
|
|
|
|
|
|
|
Builder rsrc_bld(ctx.program);
|
|
|
|
|
if (block.kind & block_kind_top_level) {
|
|
|
|
|
rsrc_bld.reset(&instructions);
|
|
|
|
|
} else if (ctx.scratch_rsrc == Temp() && (!overflow || ctx.program->gfx_level < GFX9)) {
|
|
|
|
|
Block* tl_block = █
|
|
|
|
|
while (!(tl_block->kind & block_kind_top_level))
|
|
|
|
|
tl_block = &ctx.program->blocks[tl_block->linear_idom];
|
|
|
|
|
|
|
|
|
|
/* find p_logical_end */
|
|
|
|
|
std::vector<aco_ptr<Instruction>>& prev_instructions = tl_block->instructions;
|
|
|
|
|
unsigned idx = prev_instructions.size() - 1;
|
|
|
|
|
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
|
|
|
|
|
idx--;
|
|
|
|
|
rsrc_bld.reset(&prev_instructions, std::next(prev_instructions.begin(), idx));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If spilling overflows the constant offset range at any point, we need to emit the soffset
|
|
|
|
|
* before every spill/reload to avoid increasing register demand.
|
|
|
|
|
*/
|
|
|
|
|
Builder offset_bld = rsrc_bld;
|
|
|
|
|
if (overflow)
|
|
|
|
|
offset_bld.reset(&instructions);
|
2022-05-19 16:09:13 +01:00
|
|
|
|
|
|
|
|
*offset = spill_slot * 4;
|
2022-05-19 16:09:13 +01:00
|
|
|
if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
*offset += ctx.program->dev.scratch_global_offset_min;
|
|
|
|
|
|
2023-08-15 14:55:10 +02:00
|
|
|
if (ctx.scratch_rsrc == Temp() || overflow) {
|
|
|
|
|
int32_t saddr = scratch_size - ctx.program->dev.scratch_global_offset_min;
|
|
|
|
|
if ((int32_t)*offset > (int32_t)ctx.program->dev.scratch_global_offset_max) {
|
|
|
|
|
saddr += (int32_t)*offset;
|
|
|
|
|
*offset = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* GFX9+ uses scratch_* instructions, which don't use a resource. */
|
|
|
|
|
ctx.scratch_rsrc = offset_bld.copy(offset_bld.def(s1), Operand::c32(saddr));
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
|
|
|
|
} else {
|
2023-08-15 14:55:10 +02:00
|
|
|
if (ctx.scratch_rsrc == Temp())
|
|
|
|
|
ctx.scratch_rsrc = load_scratch_resource(ctx, rsrc_bld, overflow);
|
|
|
|
|
|
|
|
|
|
if (overflow) {
|
|
|
|
|
uint32_t soffset =
|
|
|
|
|
ctx.program->config->scratch_bytes_per_wave + *offset * ctx.program->wave_size;
|
|
|
|
|
*offset = 0;
|
|
|
|
|
|
|
|
|
|
scratch_offset = offset_bld.copy(offset_bld.def(s1), Operand::c32(soffset));
|
|
|
|
|
} else {
|
|
|
|
|
*offset += scratch_size;
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
|
|
|
|
|
aco_ptr<Instruction>& spill, std::vector<uint32_t>& slots)
|
|
|
|
|
{
|
|
|
|
|
ctx.program->config->spilled_vgprs += spill->operands[0].size();
|
|
|
|
|
|
|
|
|
|
uint32_t spill_id = spill->operands[1].constantValue();
|
|
|
|
|
uint32_t spill_slot = slots[spill_id];
|
|
|
|
|
|
2023-08-15 14:55:10 +02:00
|
|
|
Temp scratch_offset = ctx.program->scratch_offset;
|
2022-05-19 16:09:13 +01:00
|
|
|
unsigned offset;
|
2023-08-15 14:55:10 +02:00
|
|
|
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
2022-05-19 16:09:13 +01:00
|
|
|
|
|
|
|
|
assert(spill->operands[0].isTemp());
|
|
|
|
|
Temp temp = spill->operands[0].getTemp();
|
|
|
|
|
assert(temp.type() == RegType::vgpr && !temp.is_linear());
|
|
|
|
|
|
|
|
|
|
Builder bld(ctx.program, &instructions);
|
|
|
|
|
if (temp.size() > 1) {
|
2024-03-25 15:55:27 +01:00
|
|
|
Instruction* split{
|
|
|
|
|
create_instruction(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
|
2022-05-19 16:09:13 +01:00
|
|
|
split->operands[0] = Operand(temp);
|
|
|
|
|
for (unsigned i = 0; i < temp.size(); i++)
|
|
|
|
|
split->definitions[i] = bld.def(v1);
|
|
|
|
|
bld.insert(split);
|
|
|
|
|
for (unsigned i = 0; i < temp.size(); i++, offset += 4) {
|
|
|
|
|
Temp elem = split->definitions[i].getTemp();
|
2022-05-19 16:09:13 +01:00
|
|
|
if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem,
|
|
|
|
|
offset, memory_sync_info(storage_vgpr_spill, semantic_private));
|
|
|
|
|
} else {
|
2023-08-15 14:55:10 +02:00
|
|
|
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
|
2024-05-14 18:34:01 +01:00
|
|
|
Operand(v1), scratch_offset, elem, offset, false);
|
2022-05-19 16:09:13 +01:00
|
|
|
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
2024-05-14 18:34:01 +01:00
|
|
|
instr->mubuf().cache.value = ac_swizzled;
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
2022-05-19 16:09:13 +01:00
|
|
|
} else if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, temp, offset,
|
|
|
|
|
memory_sync_info(storage_vgpr_spill, semantic_private));
|
2022-05-19 16:09:13 +01:00
|
|
|
} else {
|
|
|
|
|
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
|
2024-05-14 18:34:01 +01:00
|
|
|
scratch_offset, temp, offset, false);
|
2022-05-19 16:09:13 +01:00
|
|
|
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
2024-05-14 18:34:01 +01:00
|
|
|
instr->mubuf().cache.value = ac_swizzled;
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
|
|
|
|
|
aco_ptr<Instruction>& reload, std::vector<uint32_t>& slots)
|
|
|
|
|
{
|
|
|
|
|
uint32_t spill_id = reload->operands[0].constantValue();
|
|
|
|
|
uint32_t spill_slot = slots[spill_id];
|
|
|
|
|
|
2023-08-15 14:55:10 +02:00
|
|
|
Temp scratch_offset = ctx.program->scratch_offset;
|
2022-05-19 16:09:13 +01:00
|
|
|
unsigned offset;
|
2023-08-15 14:55:10 +02:00
|
|
|
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
2022-05-19 16:09:13 +01:00
|
|
|
|
|
|
|
|
Definition def = reload->definitions[0];
|
|
|
|
|
|
|
|
|
|
Builder bld(ctx.program, &instructions);
|
|
|
|
|
if (def.size() > 1) {
|
2024-03-25 15:55:27 +01:00
|
|
|
Instruction* vec{
|
|
|
|
|
create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
|
2022-05-19 16:09:13 +01:00
|
|
|
vec->definitions[0] = def;
|
|
|
|
|
for (unsigned i = 0; i < def.size(); i++, offset += 4) {
|
|
|
|
|
Temp tmp = bld.tmp(v1);
|
|
|
|
|
vec->operands[i] = Operand(tmp);
|
2022-05-19 16:09:13 +01:00
|
|
|
if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
bld.scratch(aco_opcode::scratch_load_dword, Definition(tmp), Operand(v1),
|
|
|
|
|
ctx.scratch_rsrc, offset,
|
|
|
|
|
memory_sync_info(storage_vgpr_spill, semantic_private));
|
|
|
|
|
} else {
|
|
|
|
|
Instruction* instr =
|
|
|
|
|
bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
|
2024-05-14 18:34:01 +01:00
|
|
|
Operand(v1), scratch_offset, offset, false);
|
2022-05-19 16:09:13 +01:00
|
|
|
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
2024-05-14 18:34:01 +01:00
|
|
|
instr->mubuf().cache.value = ac_swizzled;
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
|
|
|
|
bld.insert(vec);
|
2022-05-19 16:09:13 +01:00
|
|
|
} else if (ctx.program->gfx_level >= GFX9) {
|
|
|
|
|
bld.scratch(aco_opcode::scratch_load_dword, def, Operand(v1), ctx.scratch_rsrc, offset,
|
|
|
|
|
memory_sync_info(storage_vgpr_spill, semantic_private));
|
2022-05-19 16:09:13 +01:00
|
|
|
} else {
|
|
|
|
|
Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
|
2024-05-14 18:34:01 +01:00
|
|
|
Operand(v1), scratch_offset, offset, false);
|
2022-05-19 16:09:13 +01:00
|
|
|
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
2024-05-14 18:34:01 +01:00
|
|
|
instr->mubuf().cache.value = ac_swizzled;
|
2022-05-19 16:09:13 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
void
|
|
|
|
|
add_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned, std::vector<uint32_t>& slots,
|
|
|
|
|
std::vector<bool>& slots_used, unsigned id)
|
|
|
|
|
{
|
|
|
|
|
for (unsigned other : ctx.interferences[id].second) {
|
|
|
|
|
if (!is_assigned[other])
|
|
|
|
|
continue;
|
2019-10-16 16:39:06 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
RegClass other_rc = ctx.interferences[other].first;
|
2020-07-16 11:08:50 +01:00
|
|
|
unsigned slot = slots[other];
|
|
|
|
|
std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
|
2020-07-07 13:10:38 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-09 10:14:54 +02:00
|
|
|
unsigned
|
2022-05-19 16:09:13 +01:00
|
|
|
find_available_slot(std::vector<bool>& used, unsigned wave_size, unsigned size, bool is_sgpr)
|
2020-07-07 13:10:38 +01:00
|
|
|
{
|
|
|
|
|
unsigned wave_size_minus_one = wave_size - 1;
|
|
|
|
|
unsigned slot = 0;
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
bool available = true;
|
|
|
|
|
for (unsigned i = 0; i < size; i++) {
|
|
|
|
|
if (slot + i < used.size() && used[slot + i]) {
|
|
|
|
|
available = false;
|
|
|
|
|
break;
|
2019-10-16 16:39:06 +02:00
|
|
|
}
|
|
|
|
|
}
|
2020-07-07 13:10:38 +01:00
|
|
|
if (!available) {
|
|
|
|
|
slot++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) {
|
|
|
|
|
slot = align(slot, wave_size);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::fill(used.begin(), used.end(), false);
|
|
|
|
|
|
|
|
|
|
if (slot + size > used.size())
|
|
|
|
|
used.resize(slot + size);
|
|
|
|
|
|
|
|
|
|
return slot;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
2020-07-07 13:10:38 +01:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
void
|
|
|
|
|
assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_assigned,
|
|
|
|
|
std::vector<uint32_t>& slots, unsigned* num_slots)
|
|
|
|
|
{
|
2022-05-19 16:09:13 +01:00
|
|
|
std::vector<bool> slots_used;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
/* assign slots for ids with affinities first */
|
|
|
|
|
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
|
|
|
|
if (ctx.interferences[vec[0]].first.type() != type)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
for (unsigned id : vec) {
|
|
|
|
|
if (!ctx.is_reloaded[id])
|
2019-09-17 13:22:17 +02:00
|
|
|
continue;
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
add_interferences(ctx, is_assigned, slots, slots_used, id);
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-19 16:09:13 +01:00
|
|
|
unsigned slot = find_available_slot(
|
|
|
|
|
slots_used, ctx.wave_size, ctx.interferences[vec[0]].first.size(), type == RegType::sgpr);
|
2020-07-07 13:10:38 +01:00
|
|
|
|
|
|
|
|
for (unsigned id : vec) {
|
|
|
|
|
assert(!is_assigned[id]);
|
|
|
|
|
|
|
|
|
|
if (ctx.is_reloaded[id]) {
|
|
|
|
|
slots[id] = slot;
|
|
|
|
|
is_assigned[id] = true;
|
2019-10-16 16:39:06 +02:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
/* assign slots for ids without affinities */
|
|
|
|
|
for (unsigned id = 0; id < ctx.interferences.size(); id++) {
|
|
|
|
|
if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type)
|
|
|
|
|
continue;
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
add_interferences(ctx, is_assigned, slots, slots_used, id);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2022-05-19 16:09:13 +01:00
|
|
|
unsigned slot = find_available_slot(
|
|
|
|
|
slots_used, ctx.wave_size, ctx.interferences[id].first.size(), type == RegType::sgpr);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
slots[id] = slot;
|
|
|
|
|
is_assigned[id] = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*num_slots = slots_used.size();
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 17:33:52 +00:00
|
|
|
void
|
|
|
|
|
end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spill_temps,
|
|
|
|
|
const std::vector<uint32_t>& slots,
|
2021-07-21 17:19:13 +02:00
|
|
|
const aco::unordered_map<Temp, uint32_t>& spills)
|
2023-01-10 17:33:52 +00:00
|
|
|
{
|
|
|
|
|
std::vector<bool> is_used(vgpr_spill_temps.size());
|
|
|
|
|
for (std::pair<Temp, uint32_t> pair : spills) {
|
|
|
|
|
if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second])
|
|
|
|
|
is_used[slots[pair.second] / ctx.wave_size] = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<Temp> temps;
|
|
|
|
|
for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
|
|
|
|
|
if (vgpr_spill_temps[i].id() && !is_used[i]) {
|
|
|
|
|
temps.push_back(vgpr_spill_temps[i]);
|
|
|
|
|
vgpr_spill_temps[i] = Temp();
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-03-10 20:59:36 +01:00
|
|
|
if (temps.empty() || block.linear_preds.empty())
|
2023-01-10 17:33:52 +00:00
|
|
|
return;
|
|
|
|
|
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> destr{
|
|
|
|
|
create_instruction(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
|
2024-07-25 17:15:15 +02:00
|
|
|
for (unsigned i = 0; i < temps.size(); i++)
|
2023-01-10 17:33:52 +00:00
|
|
|
destr->operands[i] = Operand(temps[i]);
|
|
|
|
|
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
|
|
|
|
|
while (is_phi(*it))
|
|
|
|
|
++it;
|
|
|
|
|
block.instructions.insert(it, std::move(destr));
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
void
|
|
|
|
|
assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|
|
|
|
{
|
|
|
|
|
std::vector<uint32_t> slots(ctx.interferences.size());
|
|
|
|
|
std::vector<bool> is_assigned(ctx.interferences.size());
|
|
|
|
|
|
|
|
|
|
/* first, handle affinities: just merge all interferences into both spill ids */
|
|
|
|
|
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
|
|
|
|
for (unsigned i = 0; i < vec.size(); i++) {
|
|
|
|
|
for (unsigned j = i + 1; j < vec.size(); j++) {
|
|
|
|
|
assert(vec[i] != vec[j]);
|
|
|
|
|
bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]];
|
|
|
|
|
ctx.is_reloaded[vec[i]] = reloaded;
|
|
|
|
|
ctx.is_reloaded[vec[j]] = reloaded;
|
2019-10-24 18:27:25 +02:00
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
2020-07-07 13:10:38 +01:00
|
|
|
for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++)
|
|
|
|
|
for (ASSERTED uint32_t id : ctx.interferences[i].second)
|
|
|
|
|
assert(i != id);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
/* for each spill slot, assign as many spill ids as possible */
|
2022-05-19 16:09:13 +01:00
|
|
|
assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &ctx.sgpr_spill_slots);
|
|
|
|
|
assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &ctx.vgpr_spill_slots);
|
2019-10-24 18:27:25 +02:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (unsigned id = 0; id < is_assigned.size(); id++)
|
|
|
|
|
assert(is_assigned[id] || !ctx.is_reloaded[id]);
|
|
|
|
|
|
2019-10-16 16:39:06 +02:00
|
|
|
for (std::vector<uint32_t>& vec : ctx.affinities) {
|
|
|
|
|
for (unsigned i = 0; i < vec.size(); i++) {
|
|
|
|
|
for (unsigned j = i + 1; j < vec.size(); j++) {
|
|
|
|
|
assert(is_assigned[vec[i]] == is_assigned[vec[j]]);
|
|
|
|
|
if (!is_assigned[vec[i]])
|
|
|
|
|
continue;
|
|
|
|
|
assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]);
|
|
|
|
|
assert(ctx.interferences[vec[i]].first.type() ==
|
|
|
|
|
ctx.interferences[vec[j]].first.type());
|
2020-07-07 13:10:38 +01:00
|
|
|
assert(slots[vec[i]] == slots[vec[j]]);
|
2019-10-16 16:39:06 +02:00
|
|
|
}
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* hope, we didn't mess up */
|
2022-05-19 16:09:13 +01:00
|
|
|
std::vector<Temp> vgpr_spill_temps((ctx.sgpr_spill_slots + ctx.wave_size - 1) / ctx.wave_size);
|
2019-09-17 13:22:17 +02:00
|
|
|
assert(vgpr_spill_temps.size() <= spills_to_vgpr);
|
|
|
|
|
|
|
|
|
|
/* replace pseudo instructions with actual hardware instructions */
|
|
|
|
|
unsigned last_top_level_block_idx = 0;
|
|
|
|
|
for (Block& block : ctx.program->blocks) {
|
|
|
|
|
|
2023-03-10 20:59:36 +01:00
|
|
|
if (block.kind & block_kind_top_level) {
|
2019-09-17 13:22:17 +02:00
|
|
|
last_top_level_block_idx = block.index;
|
|
|
|
|
|
2023-01-10 17:33:52 +00:00
|
|
|
end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]);
|
2023-06-12 10:18:37 +02:00
|
|
|
|
|
|
|
|
/* If the block has no predecessors (for example in RT resume shaders),
|
|
|
|
|
* we cannot reuse the current scratch_rsrc temp because its definition is unreachable */
|
|
|
|
|
if (block.linear_preds.empty())
|
|
|
|
|
ctx.scratch_rsrc = Temp();
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<aco_ptr<Instruction>>::iterator it;
|
|
|
|
|
std::vector<aco_ptr<Instruction>> instructions;
|
|
|
|
|
instructions.reserve(block.instructions.size());
|
2019-10-24 18:27:25 +02:00
|
|
|
Builder bld(ctx.program, &instructions);
|
2019-09-17 13:22:17 +02:00
|
|
|
for (it = block.instructions.begin(); it != block.instructions.end(); ++it) {
|
|
|
|
|
|
|
|
|
|
if ((*it)->opcode == aco_opcode::p_spill) {
|
|
|
|
|
uint32_t spill_id = (*it)->operands[1].constantValue();
|
|
|
|
|
|
|
|
|
|
if (!ctx.is_reloaded[spill_id]) {
|
|
|
|
|
/* never reloaded, so don't spill */
|
2020-07-07 13:10:38 +01:00
|
|
|
} else if (!is_assigned[spill_id]) {
|
|
|
|
|
unreachable("No spill slot assigned for spill id");
|
|
|
|
|
} else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
|
2022-05-19 16:09:13 +01:00
|
|
|
spill_vgpr(ctx, block, instructions, *it, slots);
|
2020-07-07 13:10:38 +01:00
|
|
|
} else {
|
2019-09-17 13:22:17 +02:00
|
|
|
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
uint32_t spill_slot = slots[spill_id];
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* check if the linear vgpr already exists */
|
2019-10-28 17:15:17 +01:00
|
|
|
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
|
2020-09-14 20:58:33 +01:00
|
|
|
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
|
2019-10-28 17:15:17 +01:00
|
|
|
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> create{
|
|
|
|
|
create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
2019-09-17 13:22:17 +02:00
|
|
|
create->definitions[0] = Definition(linear_vgpr);
|
|
|
|
|
/* find the right place to insert this definition */
|
|
|
|
|
if (last_top_level_block_idx == block.index) {
|
|
|
|
|
/* insert right before the current instruction */
|
|
|
|
|
instructions.emplace_back(std::move(create));
|
|
|
|
|
} else {
|
|
|
|
|
assert(last_top_level_block_idx < block.index);
|
2024-05-14 06:50:55 +02:00
|
|
|
/* insert after p_logical_end of the last top-level block */
|
2020-11-03 14:40:05 +01:00
|
|
|
std::vector<aco_ptr<Instruction>>& block_instrs =
|
|
|
|
|
ctx.program->blocks[last_top_level_block_idx].instructions;
|
2024-05-14 06:50:55 +02:00
|
|
|
auto insert_point =
|
|
|
|
|
std::find_if(block_instrs.rbegin(), block_instrs.rend(),
|
|
|
|
|
[](const auto& iter) {
|
|
|
|
|
return iter->opcode == aco_opcode::p_logical_end;
|
|
|
|
|
})
|
|
|
|
|
.base();
|
|
|
|
|
block_instrs.insert(insert_point, std::move(create));
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* spill sgpr: just add the vgpr temp to operands */
|
2024-03-25 15:55:27 +01:00
|
|
|
Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
|
2019-10-28 17:15:17 +01:00
|
|
|
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
2021-07-13 11:22:46 +02:00
|
|
|
spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
|
2019-09-17 13:22:17 +02:00
|
|
|
spill->operands[2] = (*it)->operands[0];
|
|
|
|
|
instructions.emplace_back(aco_ptr<Instruction>(spill));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else if ((*it)->opcode == aco_opcode::p_reload) {
|
|
|
|
|
uint32_t spill_id = (*it)->operands[0].constantValue();
|
|
|
|
|
assert(ctx.is_reloaded[spill_id]);
|
|
|
|
|
|
2020-07-07 13:10:38 +01:00
|
|
|
if (!is_assigned[spill_id]) {
|
|
|
|
|
unreachable("No spill slot assigned for spill id");
|
|
|
|
|
} else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
|
2022-05-19 16:09:13 +01:00
|
|
|
reload_vgpr(ctx, block, instructions, *it, slots);
|
2020-07-07 13:10:38 +01:00
|
|
|
} else {
|
|
|
|
|
uint32_t spill_slot = slots[spill_id];
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* check if the linear vgpr already exists */
|
2019-10-28 17:15:17 +01:00
|
|
|
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
|
2020-09-14 20:58:33 +01:00
|
|
|
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
|
2019-10-28 17:15:17 +01:00
|
|
|
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> create{
|
|
|
|
|
create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
|
2019-09-17 13:22:17 +02:00
|
|
|
create->definitions[0] = Definition(linear_vgpr);
|
|
|
|
|
/* find the right place to insert this definition */
|
|
|
|
|
if (last_top_level_block_idx == block.index) {
|
|
|
|
|
/* insert right before the current instruction */
|
|
|
|
|
instructions.emplace_back(std::move(create));
|
|
|
|
|
} else {
|
|
|
|
|
assert(last_top_level_block_idx < block.index);
|
2024-05-14 06:50:55 +02:00
|
|
|
/* insert after p_logical_end of the last top-level block */
|
2020-11-03 14:40:05 +01:00
|
|
|
std::vector<aco_ptr<Instruction>>& block_instrs =
|
|
|
|
|
ctx.program->blocks[last_top_level_block_idx].instructions;
|
2024-05-14 06:50:55 +02:00
|
|
|
auto insert_point =
|
|
|
|
|
std::find_if(block_instrs.rbegin(), block_instrs.rend(),
|
|
|
|
|
[](const auto& iter) {
|
|
|
|
|
return iter->opcode == aco_opcode::p_logical_end;
|
|
|
|
|
})
|
|
|
|
|
.base();
|
|
|
|
|
block_instrs.insert(insert_point, std::move(create));
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* reload sgpr: just add the vgpr temp to operands */
|
2024-03-25 15:55:27 +01:00
|
|
|
Instruction* reload = create_instruction(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
|
2019-10-28 17:15:17 +01:00
|
|
|
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
|
2021-07-13 11:22:46 +02:00
|
|
|
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
|
2019-09-17 13:22:17 +02:00
|
|
|
reload->definitions[0] = (*it)->definitions[0];
|
|
|
|
|
instructions.emplace_back(aco_ptr<Instruction>(reload));
|
|
|
|
|
}
|
2021-07-15 15:11:44 +02:00
|
|
|
} else if (!ctx.unused_remats.count(it->get())) {
|
2019-09-17 13:22:17 +02:00
|
|
|
instructions.emplace_back(std::move(*it));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
block.instructions = std::move(instructions);
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-24 18:27:25 +02:00
|
|
|
/* update required scratch memory */
|
2023-01-05 14:01:21 +00:00
|
|
|
ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} /* end namespace */
|
|
|
|
|
|
2020-10-08 10:12:58 +02:00
|
|
|
void
|
2024-06-13 11:55:27 +02:00
|
|
|
spill(Program* program)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
|
|
|
|
program->config->spilled_vgprs = 0;
|
|
|
|
|
program->config->spilled_sgprs = 0;
|
|
|
|
|
|
2021-04-20 17:35:41 +01:00
|
|
|
program->progress = CompilationProgress::after_spilling;
|
|
|
|
|
|
2019-10-24 11:38:37 +02:00
|
|
|
/* no spilling when register pressure is low enough */
|
|
|
|
|
if (program->num_waves > 0)
|
2019-09-17 13:22:17 +02:00
|
|
|
return;
|
|
|
|
|
|
2019-10-15 18:23:52 +02:00
|
|
|
/* lower to CSSA before spilling to ensure correctness w.r.t. phis */
|
2024-06-13 11:55:27 +02:00
|
|
|
lower_to_cssa(program);
|
2019-10-15 18:23:52 +02:00
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
/* calculate target register demand */
|
2021-05-11 22:58:27 +02:00
|
|
|
const RegisterDemand demand = program->max_reg_demand; /* current max */
|
|
|
|
|
const uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
|
|
|
|
|
const uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
|
|
|
|
|
uint16_t extra_vgprs = 0;
|
|
|
|
|
uint16_t extra_sgprs = 0;
|
|
|
|
|
|
|
|
|
|
/* calculate extra VGPRs required for spilling SGPRs */
|
|
|
|
|
if (demand.sgpr > sgpr_limit) {
|
|
|
|
|
unsigned sgpr_spills = demand.sgpr - sgpr_limit;
|
2024-01-11 10:38:40 +01:00
|
|
|
extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
|
2021-05-11 22:58:27 +02:00
|
|
|
}
|
|
|
|
|
/* add extra SGPRs required for spilling VGPRs */
|
|
|
|
|
if (demand.vgpr + extra_vgprs > vgpr_limit) {
|
2022-05-19 16:09:13 +01:00
|
|
|
if (program->gfx_level >= GFX9)
|
|
|
|
|
extra_sgprs = 1; /* SADDR */
|
|
|
|
|
else
|
|
|
|
|
extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */
|
2021-05-11 22:58:27 +02:00
|
|
|
if (demand.sgpr + extra_sgprs > sgpr_limit) {
|
|
|
|
|
/* re-calculate in case something has changed */
|
|
|
|
|
unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit;
|
2024-01-11 10:38:40 +01:00
|
|
|
extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
|
2021-05-11 22:58:27 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/* the spiller has to target the following register demand */
|
|
|
|
|
const RegisterDemand target(vgpr_limit - extra_vgprs, sgpr_limit - extra_sgprs);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* initialize ctx */
|
2024-06-13 11:55:27 +02:00
|
|
|
spill_ctx ctx(target, program);
|
2024-02-16 11:20:13 +01:00
|
|
|
gather_ssa_use_info(ctx);
|
2019-09-17 13:22:17 +02:00
|
|
|
get_rematerialize_info(ctx);
|
|
|
|
|
|
|
|
|
|
/* create spills and reloads */
|
|
|
|
|
for (unsigned i = 0; i < program->blocks.size(); i++)
|
|
|
|
|
spill_block(ctx, i);
|
|
|
|
|
|
|
|
|
|
/* assign spill slots and DCE rematerialized code */
|
2021-05-11 22:58:27 +02:00
|
|
|
assign_spill_slots(ctx, extra_vgprs);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
/* update live variable information */
|
2024-06-13 11:55:27 +02:00
|
|
|
live_var_analysis(program);
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2020-01-10 16:16:43 +00:00
|
|
|
assert(program->num_waves > 0);
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace aco
|