diff --git a/src/nouveau/headers/nv_device_info.h b/src/nouveau/headers/nv_device_info.h index 1532983025c..ee0afb68ea9 100644 --- a/src/nouveau/headers/nv_device_info.h +++ b/src/nouveau/headers/nv_device_info.h @@ -55,6 +55,7 @@ struct nv_device_info { uint16_t tpc_count; uint8_t mp_per_tpc; uint8_t max_warps_per_mp; + uint8_t max_blocks_per_mp; bool has_transfer_queue; diff --git a/src/nouveau/winsys/nouveau_device.c b/src/nouveau/winsys/nouveau_device.c index f5ad9d5db0f..ae7cbd3f34b 100644 --- a/src/nouveau/winsys/nouveau_device.c +++ b/src/nouveau/winsys/nouveau_device.c @@ -159,6 +159,49 @@ max_warps_per_mp_for_sm(uint8_t sm) } } +static uint8_t +max_blocks_per_mp_for_sm(uint8_t sm) +{ + /* Values taken from CUDA programming guide section "Compute Capabilities" */ + switch (sm) { + case 10: + case 11: + case 12: + case 13: + case 20: + case 21: + return 8; + case 30: + case 32: + case 35: + case 37: + case 75: + case 86: + case 87: + return 16; + case 89: + case 110: + case 120: + return 24; + case 50: + case 52: + case 53: + case 60: + case 61: + case 62: + case 70: + case 72: + case 80: + case 90: + case 100: + return 32; + default: + assert(!"unkown SM version"); + /* return the smallest known value */ + return 8; + } +} + static uint8_t mp_per_tpc_for_chipset(uint16_t chipset) { @@ -538,6 +581,7 @@ nouveau_ws_device_new(drmDevicePtr drm_device) // for now we hardcode those values, but in the future Nouveau could provide that information to // us instead. device->info.max_warps_per_mp = max_warps_per_mp_for_sm(device->info.sm); + device->info.max_blocks_per_mp = max_blocks_per_mp_for_sm(device->info.sm); device->info.mp_per_tpc = mp_per_tpc_for_chipset(device->info.chipset); /* Transfer queues require two kernel fixes: