mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
nvk: properly calculate SLM region by taking per arch limits into account
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
parent
4b66a0a70c
commit
61c0d86831
3 changed files with 59 additions and 8 deletions
|
|
@ -60,16 +60,13 @@ nvk_slm_area_ensure(struct nvk_device *dev,
|
|||
*/
|
||||
bytes_per_warp = ALIGN(bytes_per_warp, 0x200);
|
||||
|
||||
uint64_t bytes_per_tpc = bytes_per_warp * 64; /* max warps */
|
||||
uint64_t bytes_per_mp = bytes_per_warp * dev->ws_dev->max_warps_per_mp;
|
||||
uint64_t bytes_per_tpc = bytes_per_mp * dev->ws_dev->mp_per_tpc;
|
||||
|
||||
/* The hardware seems to require this alignment for
|
||||
* NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A_SIZE_LOWER.
|
||||
*
|
||||
* Fortunately, this is just the alignment for bytes_per_warp multiplied
|
||||
* by the number of warps, 64. It might matter for real on a GPU with 48
|
||||
* warps but we don't support any of those yet.
|
||||
*/
|
||||
assert(bytes_per_tpc == ALIGN(bytes_per_tpc, 0x8000));
|
||||
bytes_per_tpc = ALIGN(bytes_per_tpc, 0x8000);
|
||||
|
||||
/* nvk_slm_area::bytes_per_mp only ever increases so we can check this
|
||||
* outside the lock and exit early in the common case. We only need to
|
||||
|
|
|
|||
|
|
@ -75,6 +75,52 @@ sm_for_chipset(uint16_t chipset)
|
|||
return 0x00;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
max_warps_per_mp_for_sm(uint8_t sm)
|
||||
{
|
||||
switch (sm) {
|
||||
case 10:
|
||||
case 11:
|
||||
return 24;
|
||||
case 12:
|
||||
case 13:
|
||||
case 75:
|
||||
return 32;
|
||||
case 20:
|
||||
case 21:
|
||||
case 86:
|
||||
case 87:
|
||||
case 89:
|
||||
return 48;
|
||||
case 30:
|
||||
case 32:
|
||||
case 35:
|
||||
case 37:
|
||||
case 50:
|
||||
case 52:
|
||||
case 53:
|
||||
case 60:
|
||||
case 61:
|
||||
case 62:
|
||||
case 70:
|
||||
case 72:
|
||||
case 80:
|
||||
case 90:
|
||||
return 64;
|
||||
default:
|
||||
assert(!"unkown SM version");
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
mp_per_tpc_for_chipset(uint16_t chipset)
|
||||
{
|
||||
// GP100 is special and has two, otherwise it's a Volta and newer thing to have two
|
||||
if (chipset == 0x130 || chipset >= 0x140)
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
nouveau_ws_device_set_dbg_flags(struct nouveau_ws_device *dev)
|
||||
{
|
||||
|
|
@ -256,8 +302,9 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
|
|||
|
||||
if (nouveau_ws_param(fd, NOUVEAU_GETPARAM_GRAPH_UNITS, &value))
|
||||
goto out_err;
|
||||
device->gpc_count = value & 0x000000ff;
|
||||
device->tpc_count = value >> 8;
|
||||
|
||||
device->gpc_count = (value >> 0) & 0x000000ff;
|
||||
device->tpc_count = (value >> 8) & 0x0000ffff;
|
||||
|
||||
nouveau_ws_device_set_dbg_flags(device);
|
||||
|
||||
|
|
@ -272,6 +319,11 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
|
|||
device->info.cls_m2mf = tmp_ctx->m2mf.cls;
|
||||
device->info.cls_compute = tmp_ctx->compute.cls;
|
||||
|
||||
// for now we hardcode those values, but in the future Nouveau could provide that information to
|
||||
// us instead.
|
||||
device->max_warps_per_mp = max_warps_per_mp_for_sm(device->info.sm);
|
||||
device->mp_per_tpc = mp_per_tpc_for_chipset(device->info.chipset);
|
||||
|
||||
nouveau_ws_context_destroy(tmp_ctx);
|
||||
|
||||
simple_mtx_init(&device->bos_lock, mtx_plain);
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ struct nouveau_ws_device {
|
|||
uint32_t local_mem_domain;
|
||||
|
||||
uint8_t gpc_count;
|
||||
uint8_t mp_per_tpc;
|
||||
uint8_t max_warps_per_mp;
|
||||
uint16_t tpc_count;
|
||||
|
||||
enum nvk_debug debug_flags;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue