aco: fix edge check with sub-dword temporaries

Fixes RA failure for a parallel-rdp shader on pitcairn.

fossil-db (Navi):
Totals from 2 (0.00% of 128733) affected shaders:
CodeSize: 203656 -> 205724 (+1.02%)
Instrs: 32267 -> 32529 (+0.81%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5544>
This commit is contained in:
Rhys Perry 2020-06-18 13:30:50 +01:00 committed by Marge Bot
parent 747e808697
commit 897a47d847
2 changed files with 9 additions and 5 deletions

View file

@ -307,7 +307,7 @@ struct PhysReg {
constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
constexpr PhysReg advance(unsigned bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; }
constexpr PhysReg advance(int bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; }
uint16_t reg_b = 0;
};

View file

@ -214,6 +214,10 @@ public:
clear(def.physReg(), def.regClass());
}
unsigned get_id(PhysReg reg) {
return regs[reg] == 0xF0000000 ? subdword_regs[reg][reg.byte()] : regs[reg];
}
private:
void fill(PhysReg start, unsigned size, uint32_t val) {
for (unsigned i = 0; i < size; i++)
@ -942,9 +946,9 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
unsigned reg_hi = lb + size - 1;
for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) {
/* first check the edges: this is what we have to fix to allow for num_moves > size */
if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1])
if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
continue;
if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1])
if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
continue;
/* second, check that we have at most k=num_moves elements in the window
@ -1300,9 +1304,9 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
// TODO: this can be improved */
if (reg_lo < lb || reg_hi >= ub || reg_lo % stride != 0)
continue;
if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1])
if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
continue;
if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1])
if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
continue;
/* count variables to be moved and check war_hint */