diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index 1e18621882e..3cfd4bfc40a 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -60,16 +60,13 @@ nvk_slm_area_ensure(struct nvk_device *dev, */ bytes_per_warp = ALIGN(bytes_per_warp, 0x200); - uint64_t bytes_per_tpc = bytes_per_warp * 64; /* max warps */ + uint64_t bytes_per_mp = bytes_per_warp * dev->ws_dev->max_warps_per_mp; + uint64_t bytes_per_tpc = bytes_per_mp * dev->ws_dev->mp_per_tpc; /* The hardware seems to require this alignment for * NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A_SIZE_LOWER. - * - * Fortunately, this is just the alignment for bytes_per_warp multiplied - * by the number of warps, 64. It might matter for real on a GPU with 48 - * warps but we don't support any of those yet. */ - assert(bytes_per_tpc == ALIGN(bytes_per_tpc, 0x8000)); + bytes_per_tpc = ALIGN(bytes_per_tpc, 0x8000); /* nvk_slm_area::bytes_per_mp only ever increases so we can check this * outside the lock and exit early in the common case. We only need to diff --git a/src/nouveau/winsys/nouveau_device.c b/src/nouveau/winsys/nouveau_device.c index 7c6c1d10037..49284b6c768 100644 --- a/src/nouveau/winsys/nouveau_device.c +++ b/src/nouveau/winsys/nouveau_device.c @@ -75,6 +75,52 @@ sm_for_chipset(uint16_t chipset) return 0x00; } +static uint8_t +max_warps_per_mp_for_sm(uint8_t sm) +{ + switch (sm) { + case 10: + case 11: + return 24; + case 12: + case 13: + case 75: + return 32; + case 20: + case 21: + case 86: + case 87: + case 89: + return 48; + case 30: + case 32: + case 35: + case 37: + case 50: + case 52: + case 53: + case 60: + case 61: + case 62: + case 70: + case 72: + case 80: + case 90: + return 64; + default: + assert(!"unkown SM version"); + } +} + +static uint8_t +mp_per_tpc_for_chipset(uint16_t chipset) +{ + // GP100 is special and has two, otherwise it's a Volta and newer thing to have two + if (chipset == 0x130 || chipset >= 0x140) + return 2; + return 1; +} + static void nouveau_ws_device_set_dbg_flags(struct nouveau_ws_device *dev) { @@ -256,8 +302,9 @@ nouveau_ws_device_new(drmDevicePtr drm_device) if (nouveau_ws_param(fd, NOUVEAU_GETPARAM_GRAPH_UNITS, &value)) goto out_err; - device->gpc_count = value & 0x000000ff; - device->tpc_count = value >> 8; + + device->gpc_count = (value >> 0) & 0x000000ff; + device->tpc_count = (value >> 8) & 0x0000ffff; nouveau_ws_device_set_dbg_flags(device); @@ -272,6 +319,11 @@ nouveau_ws_device_new(drmDevicePtr drm_device) device->info.cls_m2mf = tmp_ctx->m2mf.cls; device->info.cls_compute = tmp_ctx->compute.cls; + // for now we hardcode those values, but in the future Nouveau could provide that information to + // us instead. + device->max_warps_per_mp = max_warps_per_mp_for_sm(device->info.sm); + device->mp_per_tpc = mp_per_tpc_for_chipset(device->info.chipset); + nouveau_ws_context_destroy(tmp_ctx); simple_mtx_init(&device->bos_lock, mtx_plain); diff --git a/src/nouveau/winsys/nouveau_device.h b/src/nouveau/winsys/nouveau_device.h index b16b3d314fc..903a4c1f6bd 100644 --- a/src/nouveau/winsys/nouveau_device.h +++ b/src/nouveau/winsys/nouveau_device.h @@ -38,6 +38,8 @@ struct nouveau_ws_device { uint32_t local_mem_domain; uint8_t gpc_count; + uint8_t mp_per_tpc; + uint8_t max_warps_per_mp; uint16_t tpc_count; enum nvk_debug debug_flags;