mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 22:10:11 +01:00
aco: prefer 4-byte aligned definitions
shader-db (Navi, fp16 enabled): Totals from 42 (0.03% of 127638) affected shaders: CodeSize: 811984 -> 806224 (-0.71%) Instrs: 155733 -> 155939 (+0.13%); split: -0.04%, +0.18% Cycles: 1982568 -> 1984400 (+0.09%); split: -0.06%, +0.15% VMEM: 7187 -> 7121 (-0.92%); split: +0.86%, -1.78% SMEM: 1770 -> 1769 (-0.06%) VClause: 1475 -> 1476 (+0.07%) Copies: 12406 -> 12606 (+1.61%); split: -0.46%, +2.07% Branches: 5901 -> 5900 (-0.02%); split: -0.25%, +0.24% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5040>
This commit is contained in:
parent
56345b8c61
commit
62ea429a99
1 changed files with 30 additions and 30 deletions
|
|
@ -567,38 +567,9 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
uint32_t lb = info.lb;
|
||||
uint32_t ub = info.ub;
|
||||
uint32_t size = info.size;
|
||||
uint32_t stride = info.stride;
|
||||
uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride;
|
||||
RegClass rc = info.rc;
|
||||
|
||||
if (rc.is_subdword()) {
|
||||
for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
|
||||
assert(reg_file[entry.first] == 0xF0000000);
|
||||
if (lb > entry.first || entry.first >= ub)
|
||||
continue;
|
||||
|
||||
for (unsigned i = 0; i < 4; i+= stride) {
|
||||
if (entry.second[i] != 0)
|
||||
continue;
|
||||
|
||||
bool reg_found = true;
|
||||
for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
|
||||
reg_found &= entry.second[i + j] == 0;
|
||||
|
||||
/* check neighboring reg if needed */
|
||||
reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
|
||||
if (reg_found) {
|
||||
PhysReg res{entry.first};
|
||||
res.reg_b += i;
|
||||
adjust_max_used_regs(ctx, rc, entry.first);
|
||||
return {res, true};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stride = 1; /* stride in full registers */
|
||||
rc = info.rc = RegClass(RegType::vgpr, size);
|
||||
}
|
||||
|
||||
if (stride == 1) {
|
||||
|
||||
for (unsigned stride = 8; stride > 1; stride /= 2) {
|
||||
|
|
@ -689,6 +660,35 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
reg_lo += stride;
|
||||
}
|
||||
|
||||
/* do this late because using the upper bytes of a register can require
|
||||
* larger instruction encodings or copies
|
||||
* TODO: don't do this in situations where it doesn't benefit */
|
||||
if (rc.is_subdword()) {
|
||||
for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
|
||||
assert(reg_file[entry.first] == 0xF0000000);
|
||||
if (lb > entry.first || entry.first >= ub)
|
||||
continue;
|
||||
|
||||
for (unsigned i = 0; i < 4; i+= info.stride) {
|
||||
if (entry.second[i] != 0)
|
||||
continue;
|
||||
|
||||
bool reg_found = true;
|
||||
for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
|
||||
reg_found &= entry.second[i + j] == 0;
|
||||
|
||||
/* check neighboring reg if needed */
|
||||
reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
|
||||
if (reg_found) {
|
||||
PhysReg res{entry.first};
|
||||
res.reg_b += i;
|
||||
adjust_max_used_regs(ctx, rc, entry.first);
|
||||
return {res, true};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {{}, false};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue