nvk: properly calculate SLM region by taking per arch limits into account

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
Karol Herbst 2023-07-20 22:50:16 +02:00 committed by Marge Bot
parent 4b66a0a70c
commit 61c0d86831
3 changed files with 59 additions and 8 deletions

View file

@ -60,16 +60,13 @@ nvk_slm_area_ensure(struct nvk_device *dev,
*/
bytes_per_warp = ALIGN(bytes_per_warp, 0x200);
uint64_t bytes_per_tpc = bytes_per_warp * 64; /* max warps */
uint64_t bytes_per_mp = bytes_per_warp * dev->ws_dev->max_warps_per_mp;
uint64_t bytes_per_tpc = bytes_per_mp * dev->ws_dev->mp_per_tpc;
/* The hardware seems to require this alignment for
* NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A_SIZE_LOWER.
*
* Fortunately, this is just the alignment for bytes_per_warp multiplied
* by the number of warps, 64. It might matter for real on a GPU with 48
* warps but we don't support any of those yet.
*/
assert(bytes_per_tpc == ALIGN(bytes_per_tpc, 0x8000));
bytes_per_tpc = ALIGN(bytes_per_tpc, 0x8000);
/* nvk_slm_area::bytes_per_mp only ever increases so we can check this
* outside the lock and exit early in the common case. We only need to

View file

@ -75,6 +75,52 @@ sm_for_chipset(uint16_t chipset)
return 0x00;
}
static uint8_t
max_warps_per_mp_for_sm(uint8_t sm)
{
switch (sm) {
case 10:
case 11:
return 24;
case 12:
case 13:
case 75:
return 32;
case 20:
case 21:
case 86:
case 87:
case 89:
return 48;
case 30:
case 32:
case 35:
case 37:
case 50:
case 52:
case 53:
case 60:
case 61:
case 62:
case 70:
case 72:
case 80:
case 90:
return 64;
default:
assert(!"unkown SM version");
}
}
static uint8_t
mp_per_tpc_for_chipset(uint16_t chipset)
{
// GP100 is special and has two, otherwise it's a Volta and newer thing to have two
if (chipset == 0x130 || chipset >= 0x140)
return 2;
return 1;
}
static void
nouveau_ws_device_set_dbg_flags(struct nouveau_ws_device *dev)
{
@ -256,8 +302,9 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
if (nouveau_ws_param(fd, NOUVEAU_GETPARAM_GRAPH_UNITS, &value))
goto out_err;
device->gpc_count = value & 0x000000ff;
device->tpc_count = value >> 8;
device->gpc_count = (value >> 0) & 0x000000ff;
device->tpc_count = (value >> 8) & 0x0000ffff;
nouveau_ws_device_set_dbg_flags(device);
@ -272,6 +319,11 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
device->info.cls_m2mf = tmp_ctx->m2mf.cls;
device->info.cls_compute = tmp_ctx->compute.cls;
// for now we hardcode those values, but in the future Nouveau could provide that information to
// us instead.
device->max_warps_per_mp = max_warps_per_mp_for_sm(device->info.sm);
device->mp_per_tpc = mp_per_tpc_for_chipset(device->info.chipset);
nouveau_ws_context_destroy(tmp_ctx);
simple_mtx_init(&device->bos_lock, mtx_plain);

View file

@ -38,6 +38,8 @@ struct nouveau_ws_device {
uint32_t local_mem_domain;
uint8_t gpc_count;
uint8_t mp_per_tpc;
uint8_t max_warps_per_mp;
uint16_t tpc_count;
enum nvk_debug debug_flags;