nvc0/ir: limit max number of regs based on availability in SM

This effectively limits registers to 32 and 64 for fermi and kepler when
1024 threads are used, but allows the full amount to be used with
smaller thread sizes.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Ilia Mirkin 2016-05-28 14:28:07 -04:00
parent 27a51ff9b4
commit 1f895caba0
2 changed files with 4 additions and 2 deletions

View file

@ -455,7 +455,7 @@ NVC0LegalizePostRA::visit(Function *fn)
pOne = new_LValue(fn, FILE_PREDICATE);
carry = new_LValue(fn, FILE_FLAGS);
rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63;
carry->reg.data.id = 0;
pOne->reg.data.id = 7;

View file

@ -238,9 +238,11 @@ void TargetNVC0::initOpInfo()
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
case FILE_GPR: return MIN2(gprs, smregs / threads);
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;