nvc0: fix num_gprs for Volta+

Overallocating by 2 gprs for ugprs is a wild guess by me. It does make
sense though as each subgroup shares 64 ugprs and that's 2 per thread.

Signed-off-by: Karol Herbst <git@karolherbst.de>
Reviewed-by: M Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24261>
This commit is contained in:
Karol Herbst 2023-07-20 15:38:13 +02:00 committed by Marge Bot
parent fa8634388b
commit 23795dc318
2 changed files with 12 additions and 3 deletions

View file

@ -686,9 +686,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->relocs = info_out.bin.relocData;
prog->fixups = info_out.bin.fixupData;
if (info_out.target >= NVISA_GV100_CHIPSET)
prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 255); //XXX: why?
prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 3);
else
prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1));
prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 1);
prog->cp.smem_size = info_out.bin.smemSize;
prog->num_barriers = info_out.numBarriers;

View file

@ -265,9 +265,18 @@ void TargetNVC0::initOpInfo()
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
const unsigned int bs = (chipset >= NVISA_GV100_CHIPSET) ? 16 : 0;
unsigned int gprs;
/* probably because of ugprs? */
if (chipset >= NVISA_GV100_CHIPSET)
gprs = 253;
else if (chipset >= NVISA_GK20A_CHIPSET)
gprs = 255;
else
gprs = 63;
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return MIN2(gprs, smregs / threads);