diff --git a/src/asahi/vulkan/hk_instance.c b/src/asahi/vulkan/hk_instance.c index e0db7a46633..699e63acb78 100644 --- a/src/asahi/vulkan/hk_instance.c +++ b/src/asahi/vulkan/hk_instance.c @@ -14,6 +14,7 @@ #include "util/build_id.h" #include "util/driconf.h" #include "util/mesa-blake3.h" +#include "util/os_misc.h" VKAPI_ATTR VkResult VKAPI_CALL hk_EnumerateInstanceVersion(uint32_t *pApiVersion) @@ -77,6 +78,9 @@ hk_EnumerateInstanceExtensionProperties(const char *pLayerName, &instance_extensions, pPropertyCount, pProperties); } +/* Use 1/2 of total size to avoid swapping */ +#define HK_HEAP_MEMORY_PERCENT (0.5f) + /* clang-format off */ static const driOptionDescription hk_dri_options[] = { DRI_CONF_SECTION_PERFORMANCE @@ -94,6 +98,7 @@ static const driOptionDescription hk_dri_options[] = { DRI_CONF_SECTION_END DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(HK_HEAP_MEMORY_PERCENT) DRI_CONF_HK_DISABLE_BORDER_EMULATION(false) DRI_CONF_HK_FAKE_MINMAX(false) DRI_CONF_HK_IMAGE_VIEW_MIN_LOD(false) @@ -115,6 +120,11 @@ hk_init_dri_options(struct hk_instance *instance) instance->force_vk_vendor = driQueryOptioni(&instance->dri_options, "force_vk_vendor"); + instance->heap_memory_percent = + driQueryOptionf(&instance->dri_options, "heap_memory_percent"); + if (instance->heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + instance->heap_memory_percent = HK_HEAP_MEMORY_PERCENT; + instance->no_border = driQueryOptionb(&instance->dri_options, "hk_disable_border_emulation"); diff --git a/src/asahi/vulkan/hk_instance.h b/src/asahi/vulkan/hk_instance.h index d4d1e2f031b..eded4e3675d 100644 --- a/src/asahi/vulkan/hk_instance.h +++ b/src/asahi/vulkan/hk_instance.h @@ -18,6 +18,7 @@ struct hk_instance { uint8_t driver_build_sha[BLAKE3_KEY_LEN]; uint32_t force_vk_vendor; + float heap_memory_percent; bool no_border; bool fake_minmax; diff --git a/src/asahi/vulkan/hk_physical_device.c b/src/asahi/vulkan/hk_physical_device.c index fc1f737e868..05d54f764d6 100644 --- a/src/asahi/vulkan/hk_physical_device.c +++ b/src/asahi/vulkan/hk_physical_device.c @@ -11,6 +11,7 @@ #include "asahi/lib/agx_nir_lower_vbo.h" #include "util/disk_cache.h" #include "util/mesa-blake3.h" +#include "util/os_misc.h" #include "git_sha1.h" #include "hk_buffer.h" #include "hk_entrypoints.h" @@ -24,6 +25,7 @@ #include "vulkan/vulkan_core.h" #include "vulkan/wsi/wsi_common.h" #include "vk_drm_syncobj.h" +#include "vk_physical_device.h" #include "vk_shader_module.h" #include @@ -1158,42 +1160,30 @@ hk_physical_device_free_disk_cache(struct hk_physical_device *pdev) #endif } -/* Use 1/2 of total size to avoid swapping */ -#define SYSMEM_HEAP_FRACTION(x) (x * 1 / 2) - static uint64_t hk_get_sysmem_heap_size(struct hk_physical_device *pdev) { if (pdev->sysmem) return pdev->sysmem; - uint64_t sysmem_size_B = 0; - if (!os_get_total_physical_memory(&sysmem_size_B)) - return 0; - - return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20); + struct hk_instance *instance = hk_physical_device_instance(pdev); + return os_get_gpu_heap_size(instance->heap_memory_percent, + &instance->heap_memory_percent); } static uint64_t hk_get_sysmem_heap_available(struct hk_physical_device *pdev) { - if (pdev->sysmem) { - uint64_t total_used = 0; - for (unsigned i = 0; i < pdev->mem_heap_count; i++) { - const struct hk_memory_heap *heap = &pdev->mem_heaps[i]; - uint64_t used = p_atomic_read(&heap->used); - total_used += used; - } - return pdev->sysmem - total_used; + assert(pdev->sysmem); + + uint64_t total_used = 0; + for (unsigned i = 0; i < pdev->mem_heap_count; i++) { + const struct hk_memory_heap *heap = &pdev->mem_heaps[i]; + uint64_t used = p_atomic_read(&heap->used); + total_used += used; } - uint64_t sysmem_size_B = 0; - if (!os_get_available_system_memory(&sysmem_size_B)) { - vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory"); - return 0; - } - - return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20); + return pdev->sysmem - total_used; } VkResult @@ -1309,7 +1299,6 @@ hk_create_drm_physical_device(struct vk_instance *_instance, pdev->mem_heaps[sysmem_heap_idx] = (struct hk_memory_heap){ .size = sysmem_size_B, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .available = hk_get_sysmem_heap_available, }; pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){ @@ -1393,6 +1382,7 @@ hk_GetPhysicalDeviceMemoryProperties2( VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice); + struct hk_instance *instance = hk_physical_device_instance(pdev); pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count; for (int i = 0; i < pdev->mem_heap_count; i++) { @@ -1412,6 +1402,9 @@ hk_GetPhysicalDeviceMemoryProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext; + uint64_t sysmem_budget = + pdev->sysmem ? hk_get_sysmem_heap_available(pdev) : 0; + for (unsigned i = 0; i < pdev->mem_heap_count; i++) { const struct hk_memory_heap *heap = &pdev->mem_heaps[i]; uint64_t used = p_atomic_read(&heap->used); @@ -1427,33 +1420,22 @@ hk_GetPhysicalDeviceMemoryProperties2( */ p->heapUsage[i] = used; - uint64_t available = heap->size; - if (heap->available) - available = heap->available(pdev); + /* Set the budget at 90% to avoid thrashing. */ + float percent = 0.9f; - /* From the Vulkan 1.3.278 spec: - * - * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize - * values in which memory budgets are returned, with one - * element for each memory heap. A heap’s budget is a rough - * estimate of how much memory the process can allocate from - * that heap before allocations may fail or cause performance - * degradation. The budget includes any currently allocated - * device memory." - * - * and - * - * "The heapBudget value must be less than or equal to - * VkMemoryHeap::size for each heap." - * - * available (queried above) is the total amount free memory - * system-wide and does not include our allocations so we need - * to add that in. - */ - uint64_t budget = MIN2(available + used, heap->size); + uint64_t budget; + if (sysmem_budget) { + budget = MIN2(sysmem_budget + used, heap->size); + budget = ROUND_DOWN_TO((uint64_t)(budget * percent), 1 << 20); + } else { + /* Scale the budget the same way the heap was scaled. */ + percent *= instance->heap_memory_percent; - /* Set the budget at 90% of available to avoid thrashing */ - p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20); + budget = vk_physical_device_heap_budget_from_system( + &pdev->vk, percent, heap->size, used); + } + + p->heapBudget[i] = budget; } /* From the Vulkan 1.3.278 spec: diff --git a/src/asahi/vulkan/hk_physical_device.h b/src/asahi/vulkan/hk_physical_device.h index ea286c92ace..6446672aced 100644 --- a/src/asahi/vulkan/hk_physical_device.h +++ b/src/asahi/vulkan/hk_physical_device.h @@ -26,7 +26,6 @@ struct hk_memory_heap { uint64_t size; uint64_t used; VkMemoryHeapFlags flags; - uint64_t (*available)(struct hk_physical_device *pdev); }; struct hk_physical_device { diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index cf7fa43675e..8e415dfa023 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -53,12 +52,15 @@ #include "util/disk_cache.h" #include "util/driconf.h" #include "util/os_file.h" +#include "util/os_misc.h" +#include "util/u_atomic.h" #include "util/u_debug.h" #include "util/format/u_format.h" #include "perfcntrs/v3d_perfcntrs.h" #include "vk_shader_module.h" #include "vk_format.h" #include "vk_ycbcr_conversion.h" +#include "vk_physical_device.h" #include @@ -589,6 +591,10 @@ static const driOptionDescription v3dv_dri_options[] = { DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false) DRI_CONF_VK_XWAYLAND_WAIT_READY(true) DRI_CONF_SECTION_END + + DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(OS_GPU_HEAP_SIZE_HEURISTIC) + DRI_CONF_SECTION_END }; static void @@ -599,6 +605,9 @@ v3dv_init_dri_options(struct v3dv_instance *instance) driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "v3dv", NULL, NULL, instance->vk.app_info.app_name, instance->vk.app_info.app_version, instance->vk.app_info.engine_name, instance->vk.app_info.engine_version); + + instance->heap_memory_percent = + driQueryOptionf(&instance->dri_options, "heap_memory_percent"); } VKAPI_ATTR VkResult VKAPI_CALL @@ -749,51 +758,45 @@ v3dv_DestroyInstance(VkInstance _instance, } static uint64_t -compute_heap_size() +compute_heap_size(struct v3dv_instance *instance) { -#if !USE_V3D_SIMULATOR - /* Query the total ram from the system */ - struct sysinfo info; - sysinfo(&info); + const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull; + uint64_t memory; - uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit; +#if !USE_V3D_SIMULATOR + memory = os_get_gpu_heap_size(instance->heap_memory_percent, + &instance->heap_memory_percent); #else uint64_t total_ram = (uint64_t) v3d_simulator_get_mem_size(); + memory = os_gpu_heap_size_calculate(total_ram, + instance->heap_memory_percent, + &instance->heap_memory_percent); #endif - /* We don't want to burn too much ram with the GPU. If the user has 4GB - * or less, we use at most half. If they have more than 4GB we limit it - * to 3/4 with a max. of 4GB since the GPU cannot address more than that. - */ - const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull; - uint64_t available; - if (total_ram <= MAX_HEAP_SIZE) - available = total_ram / 2; - else - available = MIN2(MAX_HEAP_SIZE, total_ram * 3 / 4); - - return available; + return MIN2(MAX_HEAP_SIZE, memory); } static uint64_t compute_memory_budget(struct v3dv_physical_device *device) { + struct v3dv_instance *instance = + (struct v3dv_instance*) device->vk.instance; + uint64_t heap_size = device->memory.memoryHeaps[0].size; - uint64_t heap_used = device->heap_used; - uint64_t sys_available; -#if !USE_V3D_SIMULATOR - ASSERTED bool has_available_memory = - os_get_available_system_memory(&sys_available); - assert(has_available_memory); -#else - sys_available = (uint64_t) v3d_simulator_get_mem_free(); -#endif + uint64_t heap_used = p_atomic_read(&device->heap_used); /* Let's not incite the app to starve the system: report at most 90% of * available system memory. */ - uint64_t heap_available = sys_available * 9 / 10; - return MIN2(heap_size, heap_used + heap_available); + const float percentage = 0.9f; + +#if !USE_V3D_SIMULATOR + return vk_physical_device_heap_budget_from_system( + &device->vk, percentage, heap_size, heap_used); +#else + return vk_physical_device_heap_budget(v3d_simulator_get_mem_free(), + percentage, heap_size, heap_used); +#endif } static bool @@ -896,8 +899,10 @@ get_device_properties(const struct v3dv_physical_device *device, STATIC_ASSERT(MAX_UNIFORM_BUFFERS >= MAX_DYNAMIC_UNIFORM_BUFFERS); STATIC_ASSERT(MAX_STORAGE_BUFFERS >= MAX_DYNAMIC_STORAGE_BUFFERS); + V3DV_FROM_HANDLE(v3dv_instance, instance, device->vk.instance); + const uint32_t page_size = 4096; - const uint64_t mem_size = compute_heap_size(); + const uint64_t mem_size = compute_heap_size(instance); const uint32_t max_varying_components = 16 * 4; @@ -1430,7 +1435,7 @@ create_physical_device(struct v3dv_instance *instance, /* Setup available memory heaps and types */ VkPhysicalDeviceMemoryProperties *mem = &device->memory; mem->memoryHeapCount = 1; - mem->memoryHeaps[0].size = compute_heap_size(); + mem->memoryHeaps[0].size = compute_heap_size(instance); mem->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; /* This is the only combination required by the spec */ diff --git a/src/broadcom/vulkan/v3dv_device.h b/src/broadcom/vulkan/v3dv_device.h index 68635f868fc..e077540c7fd 100644 --- a/src/broadcom/vulkan/v3dv_device.h +++ b/src/broadcom/vulkan/v3dv_device.h @@ -158,6 +158,8 @@ struct v3dv_instance { struct driOptionCache dri_options; struct driOptionCache available_dri_options; + float heap_memory_percent; + bool pipeline_cache_enabled; bool default_pipeline_cache_enabled; bool meta_cache_enabled; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 2e755551dca..f7eba8b5bee 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -19,10 +19,12 @@ #include "util/driconf.h" #include "util/hex.h" #include "util/os_misc.h" +#include "util/u_atomic.h" #include "util/u_debug.h" #include "util/u_process.h" #include "vk_android.h" #include "vk_debug_utils.h" +#include "vk_physical_device.h" #include "vk_shader_module.h" #include "vk_util.h" @@ -1841,6 +1843,7 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_SECTION_END DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(OS_GPU_HEAP_SIZE_HEURISTIC) DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false) DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false) DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false) @@ -1866,6 +1869,9 @@ tu_init_dri_options(struct tu_instance *instance) driQueryOptioni(&instance->dri_options, "force_vk_vendor"); instance->dont_care_as_load = driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load"); + + instance->heap_memory_percent = + driQueryOptionf(&instance->dri_options, "heap_memory_percent"); instance->conservative_lrz = !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz"); instance->reserve_descriptor_set = @@ -2051,19 +2057,10 @@ tu_GetPhysicalDeviceQueueFamilyProperties2( uint64_t tu_get_system_heap_size(struct tu_physical_device *physical_device) { - uint64_t total_ram = 0; - ASSERTED bool has_physical_memory = - os_get_total_physical_memory(&total_ram); - assert(has_physical_memory); + float *percent = &physical_device->instance->heap_memory_percent; - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) - available_ram = total_ram / 2; - else - available_ram = total_ram * 3 / 4; + uint64_t available_ram = os_get_gpu_heap_size(*percent, percent); + assert(available_ram); if (physical_device->va_size) available_ram = MIN2(available_ram, physical_device->va_size); @@ -2071,25 +2068,23 @@ tu_get_system_heap_size(struct tu_physical_device *physical_device) return available_ram; } -static VkDeviceSize +static inline VkDeviceSize tu_get_budget_memory(struct tu_physical_device *physical_device) { - uint64_t heap_size = physical_device->heap.size; - uint64_t heap_used = physical_device->heap.used; - uint64_t sys_available; - ASSERTED bool has_available_memory = - os_get_available_system_memory(&sys_available); - assert(has_available_memory); + struct tu_instance *instance = physical_device->instance; - if (physical_device->va_size) - sys_available = MIN2(sys_available, physical_device->va_size); + uint64_t heap_size = physical_device->heap.size; + uint64_t heap_used = p_atomic_read(&physical_device->heap.used); /* * Let's not incite the app to starve the system: report at most 90% of * available system memory. */ - uint64_t heap_available = sys_available * 9 / 10; - return MIN2(heap_size, heap_used + heap_available); + const float percent = 0.9f; + + return vk_physical_device_heap_budget_from_system(&physical_device->vk, + percent, heap_size, + heap_used); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index c9f521fcc15..fe2a4da45ba 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -189,6 +189,7 @@ struct tu_instance uint32_t force_vk_vendor; bool dont_care_as_load; + float heap_memory_percent; /* Conservative LRZ (default true) invalidates LRZ on draws with * blend and depth-write enabled, because this can lead to incorrect diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index f232a974b4f..7b574c69caf 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -30,6 +30,7 @@ #include "util/format/u_formats.h" #include "util/half_float.h" #include "util/macros.h" +#include "util/os_misc.h" #include "util/simple_mtx.h" #include "util/timespec.h" #include "util/u_drm.h" @@ -1887,10 +1888,9 @@ static void agx_query_memory_info(struct pipe_screen *pscreen, struct pipe_memory_info *info) { - uint64_t mem_B = 0; - os_get_total_physical_memory(&mem_B); - - uint64_t mem_kB = mem_B / 1024; + struct agx_screen *screen = agx_screen(pscreen); + uint64_t mem_kB = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL) / 1024; *info = (struct pipe_memory_info){ .total_device_memory = mem_kB, @@ -1967,6 +1967,7 @@ agx_init_compute_caps(struct pipe_screen *pscreen) { struct pipe_compute_caps *caps = (struct pipe_compute_caps *)&pscreen->compute_caps; + struct agx_screen *screen = agx_screen(pscreen); struct agx_device *dev = agx_device(pscreen); caps->address_bits = 64; @@ -1981,10 +1982,8 @@ agx_init_compute_caps(struct pipe_screen *pscreen) caps->max_threads_per_block = 1024; - uint64_t system_memory; - if (os_get_total_physical_memory(&system_memory)) { - caps->max_global_size = caps->max_mem_alloc_size = system_memory; - } + caps->max_global_size = caps->max_mem_alloc_size = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL); caps->max_local_size = 32768; @@ -2001,6 +2000,7 @@ static void agx_init_screen_caps(struct pipe_screen *pscreen) { struct pipe_caps *caps = (struct pipe_caps *)&pscreen->caps; + struct agx_screen *screen = agx_screen(pscreen); u_init_pipe_screen_caps(pscreen, 1); @@ -2148,9 +2148,8 @@ agx_init_screen_caps(struct pipe_screen *pscreen) caps->max_viewports = AGX_MAX_VIEWPORTS; - uint64_t system_memory; caps->video_memory = - os_get_total_physical_memory(&system_memory) ? (system_memory >> 20) : 0; + os_get_gpu_heap_size(screen->heap_memory_percent, NULL) >> 20; caps->device_reset_status_query = true; caps->robust_buffer_access_behavior = true; @@ -2454,6 +2453,11 @@ agx_screen_create(int fd, struct renderonly *ro, simple_mtx_init(&agx_screen->flush_seqid_lock, mtx_plain); + agx_screen->heap_memory_percent = + driQueryOptionf(config->options, "heap_memory_percent"); + if (agx_screen->heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + agx_screen->heap_memory_percent = 1.0f; + screen->destroy = agx_destroy_screen; screen->get_screen_fd = agx_screen_get_fd; screen->get_name = agx_get_name; diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 396317165be..967cd2bd7a9 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -877,6 +877,8 @@ struct agx_screen { /* Lock to protect syncobj usage vs. destruction in context destroy */ struct u_rwlock destroy_lock; + + float heap_memory_percent; }; static inline struct agx_screen * diff --git a/src/gallium/drivers/asahi/driinfo_asahi.h b/src/gallium/drivers/asahi/driinfo_asahi.h index e9eb7ea849d..8ca4b1b2c5e 100644 --- a/src/gallium/drivers/asahi/driinfo_asahi.h +++ b/src/gallium/drivers/asahi/driinfo_asahi.h @@ -3,4 +3,5 @@ /* clang-format off */ DRI_CONF_SECTION_MISCELLANEOUS DRI_CONF_NO_FP16(false) + DRI_CONF_HEAP_MEMORY_PERCENT(1.0f) DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c index 882fc969c8a..71131141ccd 100644 --- a/src/gallium/drivers/crocus/crocus_screen.c +++ b/src/gallium/drivers/crocus/crocus_screen.c @@ -37,6 +37,7 @@ #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" +#include "util/os_misc.h" #include "util/u_debug.h" #include "util/u_inlines.h" #include "util/format/u_format.h" @@ -383,8 +384,9 @@ crocus_init_screen_caps(struct crocus_screen *screen) const unsigned gpu_mappable_megabytes = (screen->aperture_threshold) / (1024 * 1024); - uint64_t system_memory_bytes; - if (!os_get_total_physical_memory(&system_memory_bytes)) { + uint64_t system_memory_bytes = + os_get_gpu_heap_size(screen->driconf.heap_memory_percent, NULL); + if (!system_memory_bytes) { caps->video_memory = -1; } else { const unsigned system_memory_megabytes = @@ -604,6 +606,11 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config) screen->driconf.lower_depth_range_rate = driQueryOptionf(config->options, "lower_depth_range_rate"); + screen->driconf.heap_memory_percent = + driQueryOptionf(config->options, "heap_memory_percent"); + if (screen->driconf.heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + screen->driconf.heap_memory_percent = 1.0f; + screen->precompile = debug_get_bool_option("shader_precompile", true); isl_device_init(&screen->isl_dev, &screen->devinfo); diff --git a/src/gallium/drivers/crocus/crocus_screen.h b/src/gallium/drivers/crocus/crocus_screen.h index 3a367ddedee..8a58bbd1583 100644 --- a/src/gallium/drivers/crocus/crocus_screen.h +++ b/src/gallium/drivers/crocus/crocus_screen.h @@ -201,6 +201,7 @@ struct crocus_screen { bool always_flush_cache; bool limit_trig_input_range; float lower_depth_range_rate; + float heap_memory_percent; } driconf; uint64_t aperture_bytes; diff --git a/src/gallium/drivers/crocus/driinfo_crocus.h b/src/gallium/drivers/crocus/driinfo_crocus.h index c81b9a45182..e325353316c 100644 --- a/src/gallium/drivers/crocus/driinfo_crocus.h +++ b/src/gallium/drivers/crocus/driinfo_crocus.h @@ -14,3 +14,7 @@ DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() DRI_CONF_SECTION_END + +DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(1.0f) +DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/freedreno/driinfo_freedreno.h b/src/gallium/drivers/freedreno/driinfo_freedreno.h index 1920b3e59e3..4b0dcff4386 100644 --- a/src/gallium/drivers/freedreno/driinfo_freedreno.h +++ b/src/gallium/drivers/freedreno/driinfo_freedreno.h @@ -1,6 +1,7 @@ // freedreno specific driconf options DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(0) DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false) DRI_CONF_DISABLE_EXPLICIT_SYNC_HEURISTIC(false) DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 4b128d437f1..8c85c33bade 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -12,6 +12,7 @@ #include "util/format/u_format.h" #include "util/format/u_format_s3tc.h" +#include "util/os_misc.h" #include "util/u_debug.h" #include "util/u_inlines.h" #include "util/u_memory.h" @@ -25,7 +26,6 @@ #include #include #include "drm-uapi/drm_fourcc.h" -#include #include "freedreno_fence.h" #include "freedreno_perfetto.h" @@ -186,18 +186,21 @@ fd_screen_destroy(struct pipe_screen *pscreen) static uint64_t get_memory_size(struct fd_screen *screen) { - uint64_t system_memory; + float percent = screen->driconf.heap_memory_percent; + uint64_t va_size = 0; - if (!os_get_total_physical_memory(&system_memory)) - return 0; - if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) { - uint64_t va_size; - if (!fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size)) { - system_memory = MIN2(system_memory / 2, va_size); - } - } + if (fd_device_version(screen->dev) >= FD_VERSION_VA_SIZE) + fd_pipe_get_param(screen->pipe, FD_VA_SIZE, &va_size); - return system_memory; + if (percent == OS_GPU_HEAP_SIZE_HEURISTIC) + percent = va_size ? 0.5f : 1.0f; + + uint64_t memory = os_get_gpu_heap_size(percent, NULL); + + if (va_size) + memory = MIN2(memory, va_size); + + return memory; } static void @@ -351,11 +354,11 @@ fd_init_compute_caps(struct fd_screen *screen) caps->max_threads_per_block = options->max_workgroup_invocations; - caps->max_global_size = screen->ram_size; + caps->max_global_size = os_get_gpu_heap_size(1.0f, NULL); caps->max_local_size = screen->info->cs_shared_mem_size; - caps->max_mem_alloc_size = screen->ram_size; + caps->max_mem_alloc_size = caps->max_global_size; caps->max_clock_frequency = screen->max_freq / 1000000; @@ -1000,6 +1003,8 @@ fd_screen_create(int fd, driParseConfigFiles(config->options, config->options_info, 0, "msm", NULL, fd_dev_name(screen->dev_id), NULL, 0, NULL, 0); + screen->driconf.heap_memory_percent = + driQueryOptionf(config->options, "heap_memory_percent"); screen->driconf.conservative_lrz = !driQueryOptionb(config->options, "disable_conservative_lrz"); screen->driconf.enable_throttling = @@ -1009,10 +1014,6 @@ fd_screen_create(int fd, if (driQueryOptionb(config->options, "disable_explicit_sync_heuristic")) fd_device_disable_explicit_sync_heuristic(dev); - struct sysinfo si; - sysinfo(&si); - screen->ram_size = si.totalram; - DBG("Pipe Info:"); DBG(" GPU-id: %s", fd_dev_name(screen->dev_id)); DBG(" Chip-id: 0x%016"PRIx64, screen->chip_id); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 137fea1c5b8..91c98f4b9bf 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -69,7 +69,6 @@ struct fd_screen { uint32_t gpu_id; /* 220, 305, etc */ uint64_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ uint32_t max_freq; - uint32_t ram_size; uint32_t max_rts; /* max # of render targets */ uint32_t priority_mask; unsigned prio_low, prio_norm, prio_high; /* remap low/norm/high priority to kernel priority */ @@ -93,6 +92,8 @@ struct fd_screen { /* If "dual_color_blend_by_location" workaround is enabled */ bool dual_color_blend_by_location; + + float heap_memory_percent; } driconf; struct fd_dev_info dev_info; diff --git a/src/gallium/drivers/panfrost/driinfo_panfrost.h b/src/gallium/drivers/panfrost/driinfo_panfrost.h index 908aa319ebd..e1f7cbe2b26 100644 --- a/src/gallium/drivers/panfrost/driinfo_panfrost.h +++ b/src/gallium/drivers/panfrost/driinfo_panfrost.h @@ -22,6 +22,7 @@ DRI_CONF_SECTION_PERFORMANCE DRI_CONF_SECTION_END DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(1.0f) DRI_CONF_PAN_COMPUTE_CORE_MASK(~0ull) DRI_CONF_PAN_FRAGMENT_CORE_MASK(~0ull) DRI_CONF_OPT_B(pan_relax_afbc_yuv_imports, false, "Use relaxed import rules for AFBC(YUV)") diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 5ac43cdeeae..d3274e48cab 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -14,6 +14,7 @@ #include "frontend/winsys_handle.h" #include "util/format/u_format.h" +#include "util/os_misc.h" #include "util/u_debug_image.h" #include "util/u_drm.h" #include "util/u_gen_mipmap.h" @@ -1049,10 +1050,10 @@ panfrost_can_create_resource(struct pipe_screen *screen, if (!os_get_total_physical_memory(&system_memory)) return false; - /* Limit maximum texture size to a quarter of the system memory, to avoid - * allocating huge textures on systems with little memory. - */ - return tmp.plane.layout.data_size_B <= system_memory / 4; + uint64_t memory = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL); + + return tmp.plane.layout.data_size_B <= memory; } static struct pipe_resource * diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 86d28d2de7a..c17fd869af2 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -12,6 +12,7 @@ #include "pipe/p_screen.h" #include "util/format/u_format.h" #include "util/format/u_format_s3tc.h" +#include "util/os_misc.h" #include "util/os_time.h" #include "util/u_debug.h" #include "util/u_memory.h" @@ -634,18 +635,8 @@ panfrost_init_compute_caps(struct panfrost_screen *screen) */ caps->max_threads_per_block = dev->arch >= 6 ? 256 : 128; - uint64_t total_ram; - if (!os_get_total_physical_memory(&total_ram)) - total_ram = 0; - - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024 * 1024 * 1024) - available_ram = total_ram / 2; - else - available_ram = total_ram * 3 / 4; + const uint64_t available_ram = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL); /* 48bit address space max, with the lower 32MB reserved. We clamp * things so it matches kmod VA range limitations. @@ -843,9 +834,8 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) caps->max_texture_gather_offset = 7; - uint64_t system_memory; - caps->video_memory = os_get_total_physical_memory(&system_memory) ? - system_memory >> 20 : 0; + caps->video_memory = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL) >> 20; caps->shader_stencil_export = true; caps->conditional_render = true; @@ -1063,6 +1053,11 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config, snprintf(screen->renderer_string, sizeof(screen->renderer_string), "%s MC%u (Panfrost)", dev->model->name, core_count); + screen->heap_memory_percent = + driQueryOptionf(config->options, "heap_memory_percent"); + if (screen->heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + screen->heap_memory_percent = 1.0f; + screen->afbc_tiled = driQueryOptionb(config->options, "pan_afbc_tiled"); screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK; diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 14eb7ea59fd..3cc822d8100 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -106,6 +106,8 @@ struct panfrost_screen { struct panfrost_vtable vtbl; struct disk_cache *disk_cache; + float heap_memory_percent; + /* Use AFBC tiled layout whenever possible */ bool afbc_tiled; diff --git a/src/gallium/drivers/v3d/driinfo_v3d.h b/src/gallium/drivers/v3d/driinfo_v3d.h index 147ad0b49bd..72c3f5dbd75 100644 --- a/src/gallium/drivers/v3d/driinfo_v3d.h +++ b/src/gallium/drivers/v3d/driinfo_v3d.h @@ -1,5 +1,6 @@ // v3d-specific driconf options DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(1.0f) DRI_CONF_V3D_NONMSAA_TEXTURE_SIZE_LIMIT(false) DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index f2a979ee9a6..93d7e0ba5c7 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -22,8 +22,6 @@ * IN THE SOFTWARE. */ -#include - #include "common/v3d_device_info.h" #include "common/v3d_limits.h" #include "util/os_misc.h" @@ -218,10 +216,10 @@ v3d_init_compute_caps(struct v3d_screen *screen) /* GL_MAX_COMPUTE_SHARED_MEMORY_SIZE */ caps->max_local_size = 32768; - struct sysinfo si; - sysinfo(&si); - caps->max_global_size = si.totalram; - caps->max_mem_alloc_size = MIN2(V3D_MAX_BUFFER_RANGE, si.totalram); + caps->max_global_size = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL); + caps->max_mem_alloc_size = + MIN2(V3D_MAX_BUFFER_RANGE, caps->max_global_size); caps->max_compute_units = 1; caps->subgroup_sizes = 16; @@ -338,9 +336,8 @@ v3d_init_screen_caps(struct v3d_screen *screen) caps->vendor_id = 0x14E4; - uint64_t system_memory; - caps->video_memory = os_get_total_physical_memory(&system_memory) ? - system_memory >> 20 : 0; + caps->video_memory = + os_get_gpu_heap_size(screen->heap_memory_percent, NULL) >> 20; caps->uma = true; @@ -817,6 +814,11 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config, driCheckOption(config->options, nonmsaa_name, DRI_BOOL) && driQueryOptionb(config->options, nonmsaa_name); + screen->heap_memory_percent = + driQueryOptionf(config->options, "heap_memory_percent"); + if (screen->heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + screen->heap_memory_percent = 1.0f; + slab_create_parent(&screen->transfer_pool, sizeof(struct v3d_transfer), 16); screen->has_csd = v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_CSD); diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h index 831ce8d7213..4f6a7d1fb6a 100644 --- a/src/gallium/drivers/v3d/v3d_screen.h +++ b/src/gallium/drivers/v3d/v3d_screen.h @@ -89,6 +89,8 @@ struct v3d_screen { bool has_cpu_queue; bool has_multisync; + float heap_memory_percent; + #if USE_V3D_SIMULATOR struct v3d_simulator_file *sim_file; #endif diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index ac4a019d0e9..efc9832e322 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -27,6 +27,7 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/os_misc.h" #include "util/u_debug.h" #include "util/u_memory.h" #include "util/format/u_format.h" @@ -199,9 +200,7 @@ vc4_init_screen_caps(struct vc4_screen *screen) caps->vendor_id = 0x14E4; - uint64_t system_memory; - caps->video_memory = os_get_total_physical_memory(&system_memory) ? - system_memory >> 20 : 0; + caps->video_memory = os_get_gpu_heap_size(1.0f, NULL) >> 20; caps->uma = true; diff --git a/src/imagination/vulkan/pvr_physical_device.c b/src/imagination/vulkan/pvr_physical_device.c index 4d7f58eb3dd..9124e839bc9 100644 --- a/src/imagination/vulkan/pvr_physical_device.c +++ b/src/imagination/vulkan/pvr_physical_device.c @@ -18,6 +18,7 @@ #include "util/disk_cache.h" #include "util/ralloc.h" +#include "util/os_misc.h" #include "vk_util.h" #include "vk_log.h" @@ -987,30 +988,13 @@ static bool pvr_device_is_conformant(const struct pvr_device_info *info) return false; } -/* Minimum required by the Vulkan 1.1 spec (see Table 32. Required Limits) */ +/* Minimum required by the Vulkan spec Limits (maxMemoryAllocationSize) */ #define PVR_MAX_MEMORY_ALLOCATION_SIZE (1ull << 30) -static uint64_t pvr_compute_heap_size(void) +static inline uint64_t pvr_compute_heap_size(void) { - /* Query the total ram from the system */ - uint64_t total_ram; - if (!os_get_total_physical_memory(&total_ram)) - return 0; - - if (total_ram < PVR_MAX_MEMORY_ALLOCATION_SIZE) { - mesa_logw( - "Warning: The available RAM is below the minimum required by the Vulkan specification!"); - } - - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL) - available_ram = total_ram / 2U; - else - available_ram = total_ram * 3U / 4U; - + uint64_t available_ram = + os_get_gpu_heap_size(OS_GPU_HEAP_SIZE_HEURISTIC, NULL); return MAX2(available_ram, PVR_MAX_MEMORY_ALLOCATION_SIZE); } diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index 86cf601ce04..6b48f809d75 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -18,13 +18,17 @@ #include "util/disk_cache.h" #include "util/mesa-blake3.h" +#include "util/os_misc.h" #include "git_sha1.h" #include "vulkan/wsi/wsi_common.h" #include "vk_device.h" #include "vk_drm_syncobj.h" +#include "vk_physical_device.h" #include "vk_shader_module.h" +#define KK_HEAP_SIZE_PERCENT (0.75f) + static uint32_t kk_get_vk_version() { @@ -776,30 +780,6 @@ kk_physical_device_free_disk_cache(struct kk_physical_device *pdev) #endif } -static uint64_t -kk_get_sysmem_heap_size(void) -{ - uint64_t sysmem_size_B = 0; - if (!os_get_total_physical_memory(&sysmem_size_B)) - return 0; - - /* Use 3/4 of total size to avoid swapping */ - return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); -} - -static uint64_t -kk_get_sysmem_heap_available(struct kk_physical_device *pdev) -{ - uint64_t sysmem_size_B = 0; - if (!os_get_available_system_memory(&sysmem_size_B)) { - vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory"); - return 0; - } - - /* Use 3/4 of available to avoid swapping */ - return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); -} - static void get_metal_limits(struct kk_physical_device *pdev) { @@ -863,7 +843,7 @@ kk_enumerate_physical_devices(struct vk_instance *_instance) kk_physical_device_init_pipeline_cache(pdev); - uint64_t sysmem_size_B = kk_get_sysmem_heap_size(); + uint64_t sysmem_size_B = os_get_gpu_heap_size(KK_HEAP_SIZE_PERCENT, NULL); if (sysmem_size_B == 0) { result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "Failed to query total system memory"); @@ -874,7 +854,6 @@ kk_enumerate_physical_devices(struct vk_instance *_instance) pdev->mem_heaps[sysmem_heap_idx] = (struct kk_memory_heap){ .size = sysmem_size_B, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .available = kk_get_sysmem_heap_available, }; pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){ @@ -973,33 +952,12 @@ kk_GetPhysicalDeviceMemoryProperties2( */ p->heapUsage[i] = used; - uint64_t available = heap->size; - if (heap->available) - available = heap->available(pdev); - - /* From the Vulkan 1.3.278 spec: - * - * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize - * values in which memory budgets are returned, with one - * element for each memory heap. A heap’s budget is a rough - * estimate of how much memory the process can allocate from - * that heap before allocations may fail or cause performance - * degradation. The budget includes any currently allocated - * device memory." - * - * and - * - * "The heapBudget value must be less than or equal to - * VkMemoryHeap::size for each heap." - * - * available (queried above) is the total amount free memory - * system-wide and does not include our allocations so we need - * to add that in. + /* Set the budget at 90% to avoid thrashing. Multiplying with + * KK_HEAP_SIZE_PERCENT to scale the budget the same way the heap + * was scaled. */ - uint64_t budget = MIN2(available + used, heap->size); - - /* Set the budget at 90% of available to avoid thrashing */ - p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20); + p->heapBudget[i] = vk_physical_device_heap_budget_from_system( + &pdev->vk, KK_HEAP_SIZE_PERCENT * 0.9f, heap->size, used); } /* From the Vulkan 1.3.278 spec: diff --git a/src/kosmickrisp/vulkan/kk_physical_device.h b/src/kosmickrisp/vulkan/kk_physical_device.h index 91c053a8310..1878a381ab5 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.h +++ b/src/kosmickrisp/vulkan/kk_physical_device.h @@ -32,7 +32,6 @@ struct kk_memory_heap { uint64_t size; uint64_t used; VkMemoryHeapFlags flags; - uint64_t (*available)(struct kk_physical_device *pdev); }; struct kk_device_info { diff --git a/src/nouveau/vulkan/nvk_instance.c b/src/nouveau/vulkan/nvk_instance.c index 5a8464d0510..4f8ec0ae2a6 100644 --- a/src/nouveau/vulkan/nvk_instance.c +++ b/src/nouveau/vulkan/nvk_instance.c @@ -13,6 +13,7 @@ #include "util/detect_os.h" #include "util/driconf.h" #include "util/mesa-blake3.h" +#include "util/os_misc.h" #include "util/u_debug.h" VKAPI_ATTR VkResult VKAPI_CALL @@ -97,6 +98,8 @@ nvk_init_debug_flags(struct nvk_instance *instance) instance->debug_flags = parse_debug_string(os_get_option("NVK_DEBUG"), flags); } +#define NVK_HEAP_MEMORY_PERCENT (0.75f) + static const driOptionDescription nvk_dri_options[] = { DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(true) @@ -113,6 +116,10 @@ static const driOptionDescription nvk_dri_options[] = { DRI_CONF_VK_ZERO_VRAM(false) DRI_CONF_NVK_APP_LAYER() DRI_CONF_SECTION_END + + DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(NVK_HEAP_MEMORY_PERCENT) + DRI_CONF_SECTION_END }; static void @@ -126,6 +133,11 @@ nvk_init_dri_options(struct nvk_instance *instance) instance->force_vk_vendor = driQueryOptioni(&instance->dri_options, "force_vk_vendor"); + instance->heap_memory_percent = + driQueryOptionf(&instance->dri_options, "heap_memory_percent"); + if (instance->heap_memory_percent == OS_GPU_HEAP_SIZE_HEURISTIC) + instance->heap_memory_percent = NVK_HEAP_MEMORY_PERCENT; + if (driQueryOptionb(&instance->dri_options, "vk_zero_vram")) instance->debug_flags |= NVK_DEBUG_ZERO_MEMORY; diff --git a/src/nouveau/vulkan/nvk_instance.h b/src/nouveau/vulkan/nvk_instance.h index 2303e261b33..e9248c8d79c 100644 --- a/src/nouveau/vulkan/nvk_instance.h +++ b/src/nouveau/vulkan/nvk_instance.h @@ -22,6 +22,7 @@ struct nvk_instance { uint8_t driver_build_sha[BLAKE3_KEY_LEN]; uint32_t force_vk_vendor; + float heap_memory_percent; }; VK_DEFINE_HANDLE_CASTS(nvk_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 1a610c5e4c2..afd9496cebd 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -21,10 +21,12 @@ #include "util/detect_os.h" #include "util/disk_cache.h" #include "util/mesa-blake3.h" +#include "util/os_misc.h" #include "vk_android.h" #include "vk_device.h" #include "vk_drm_syncobj.h" +#include "vk_physical_device.h" #include "vk_shader_module.h" #include "vulkan/wsi/wsi_common.h" @@ -1379,17 +1381,6 @@ nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev) #endif } -static uint64_t -nvk_get_sysmem_heap_size(void) -{ - uint64_t sysmem_size_B = 0; - if (!os_get_total_physical_memory(&sysmem_size_B)) - return 0; - - /* Use 3/4 of total size to avoid swapping */ - return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); -} - static uint64_t nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev) { @@ -1399,8 +1390,7 @@ nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev) return 0; } - /* Use 3/4 of available to avoid swapping */ - return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); + return ROUND_DOWN_TO(sysmem_size_B, 1 << 20); } static uint64_t @@ -1509,8 +1499,10 @@ nvk_create_drm_physical_device(struct vk_instance *_instance, nvk_physical_device_init_pipeline_cache(pdev); - uint64_t sysmem_size_B = nvk_get_sysmem_heap_size(); - if (sysmem_size_B == 0) { + uint64_t heap_size = + os_get_gpu_heap_size(instance->heap_memory_percent, + &instance->heap_memory_percent); + if (heap_size == 0) { result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "Failed to query total system memory"); goto fail_disk_cache; @@ -1555,7 +1547,7 @@ nvk_create_drm_physical_device(struct vk_instance *_instance, uint32_t sysmem_heap_idx = pdev->mem_heap_count++; pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) { - .size = sysmem_size_B, + .size = heap_size, .flags = 0, .available = nvk_get_sysmem_heap_available, }; @@ -1677,6 +1669,8 @@ nvk_GetPhysicalDeviceMemoryProperties2( switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext; + const struct nvk_instance *instance = + nvk_physical_device_instance(pdev); for (unsigned i = 0; i < pdev->mem_heap_count; i++) { const struct nvk_memory_heap *heap = &pdev->mem_heaps[i]; @@ -1693,33 +1687,22 @@ nvk_GetPhysicalDeviceMemoryProperties2( */ p->heapUsage[i] = used; + /* Set the budget at 90% to avoid thrashing */ + float percent = 0.9f; + uint64_t available = heap->size; - if (heap->available) + if (heap->available) { available = heap->available(pdev); - /* From the Vulkan 1.3.278 spec: - * - * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize - * values in which memory budgets are returned, with one - * element for each memory heap. A heap’s budget is a rough - * estimate of how much memory the process can allocate from - * that heap before allocations may fail or cause performance - * degradation. The budget includes any currently allocated - * device memory." - * - * and - * - * "The heapBudget value must be less than or equal to - * VkMemoryHeap::size for each heap." - * - * available (queried above) is the total amount free memory - * system-wide and does not include our allocations so we need - * to add that in. - */ - uint64_t budget = MIN2(available + used, heap->size); + if (heap->available == nvk_get_sysmem_heap_available) { + /* Scale the budget the same way the heap was scaled. */ + percent *= instance->heap_memory_percent; + } + } - /* Set the budget at 90% of available to avoid thrashing */ - p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20); + p->heapBudget[i] = + vk_physical_device_heap_budget(available, percent, heap->size, + used); } /* From the Vulkan 1.3.278 spec: diff --git a/src/panfrost/vulkan/panvk_instance.c b/src/panfrost/vulkan/panvk_instance.c index daf7da2102a..d6360c66ae7 100644 --- a/src/panfrost/vulkan/panvk_instance.c +++ b/src/panfrost/vulkan/panvk_instance.c @@ -204,6 +204,7 @@ static const driOptionDescription panvk_dri_options[] = { DRI_CONF_SECTION_END DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_HEAP_MEMORY_PERCENT(OS_GPU_HEAP_SIZE_HEURISTIC) DRI_CONF_PAN_COMPUTE_CORE_MASK(~0ull) DRI_CONF_PAN_FRAGMENT_CORE_MASK(~0ull) DRI_CONF_PAN_ENABLE_VERTEX_PIPELINE_STORES_ATOMICS(false) @@ -221,6 +222,8 @@ panvk_init_dri_options(struct panvk_instance *instance) instance->force_vk_vendor = driQueryOptioni(&instance->dri_options, "force_vk_vendor"); + instance->heap_memory_percent = + driQueryOptionf(&instance->dri_options, "heap_memory_percent"); instance->enable_vertex_pipeline_stores_atomics = driQueryOptionb( &instance->dri_options, "pan_enable_vertex_pipeline_stores_atomics"); diff --git a/src/panfrost/vulkan/panvk_instance.h b/src/panfrost/vulkan/panvk_instance.h index 7ea91a9bac7..1c04c824ea6 100644 --- a/src/panfrost/vulkan/panvk_instance.h +++ b/src/panfrost/vulkan/panvk_instance.h @@ -55,6 +55,7 @@ struct panvk_instance { uint8_t driver_build_sha[BLAKE3_KEY_LEN]; uint32_t force_vk_vendor; + float heap_memory_percent; bool enable_vertex_pipeline_stores_atomics; bool force_enable_shader_atomics; diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index 1e95c5c9390..ae36e06f76b 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -10,7 +10,6 @@ */ #include -#include #include "util/disk_cache.h" #include "util/os_misc.h" @@ -22,6 +21,7 @@ #include "vk_drm_syncobj.h" #include "vk_format.h" #include "vk_log.h" +#include "vk_physical_device.h" #include "vk_util.h" #include "panvk_device.h" @@ -220,36 +220,19 @@ get_core_masks(struct panvk_physical_device *device, return result; } -static uint64_t -get_system_heap_size() -{ - struct sysinfo info; - sysinfo(&info); - - uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit; - - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024 * 1024 * 1024) - available_ram = total_ram / 2; - else - available_ram = total_ram * 3 / 4; - - return available_ram; -} - static VkResult get_device_heaps(struct panvk_physical_device *device, - const struct panvk_instance *instance) + struct panvk_instance *instance) { int host_coherent_not_cached_idx = -1; int host_cached_not_coherent_idx = -1; + const uint64_t heap_size = os_get_gpu_heap_size( + instance->heap_memory_percent, &instance->heap_memory_percent); + device->memory.heap_count = 1; - device->memory.heaps[0] = (VkMemoryHeap) { - .size = get_system_heap_size(), + device->memory.heaps[0] = (VkMemoryHeap){ + .size = heap_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; @@ -627,13 +610,11 @@ panvk_GetPhysicalDeviceMemoryProperties2( switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext; + const struct panvk_instance *instance = + to_panvk_instance(physical_device->vk.instance); uint64_t used = p_atomic_read(&physical_device->memory.heap_used); uint64_t heap_size = physical_device->memory.heaps[0].size; - uint64_t available; - - if (!os_get_available_system_memory(&available)) - available = heap_size; /* From the Vulkan 1.3.278 spec: * @@ -644,29 +625,9 @@ panvk_GetPhysicalDeviceMemoryProperties2( */ p->heapUsage[0] = used; - /* From the Vulkan 1.3.278 spec: - * - * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize - * values in which memory budgets are returned, with one - * element for each memory heap. A heap’s budget is a rough - * estimate of how much memory the process can allocate from - * that heap before allocations may fail or cause performance - * degradation. The budget includes any currently allocated - * device memory." - * - * and - * - * "The heapBudget value must be less than or equal to - * VkMemoryHeap::size for each heap." - * - * available (queried above) is the total amount of free memory - * system-wide and does not include our allocations so we need - * to add that in. - */ - uint64_t budget = MIN2(available + used, heap_size); - /* Set the budget at 90% of available to avoid thrashing */ - p->heapBudget[0] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20); + p->heapBudget[0] = vk_physical_device_heap_budget_from_system( + &physical_device->vk, 0.9f, heap_size, used); /* From the Vulkan 1.3.278 spec: * diff --git a/src/util/driconf.h b/src/util/driconf.h index e618123d817..b1892a9c30a 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -545,6 +545,10 @@ DRI_CONF_OPT_B(no_fp16, def, \ "Disable 16-bit float support") +#define DRI_CONF_HEAP_MEMORY_PERCENT(def) \ + DRI_CONF_OPT_F(heap_memory_percent, def, 0.0, 1.0, \ + "Percentage of total system memory to report as gpu heap memory (0 = driver default)") + #define DRI_CONF_VK_ZERO_VRAM(def) \ DRI_CONF_OPT_B(vk_zero_vram, def, \ "Initialize to zero all VRAM allocations") diff --git a/src/util/os_misc.c b/src/util/os_misc.c index 014a93fe9c1..7c1bf83034c 100644 --- a/src/util/os_misc.c +++ b/src/util/os_misc.c @@ -33,6 +33,7 @@ #include "ralloc.h" #include "simple_mtx.h" #include "u_debug.h" +#include "u_math.h" #include diff --git a/src/util/os_misc.h b/src/util/os_misc.h index 9b411ac7126..b27ff720739 100644 --- a/src/util/os_misc.h +++ b/src/util/os_misc.h @@ -39,6 +39,7 @@ #include #include "util/detect.h" +#include "util/u_math.h" #if DETECT_OS_POSIX @@ -158,6 +159,53 @@ os_unset_option(const char *name) bool os_get_total_physical_memory(uint64_t *size); +#define OS_GPU_HEAP_SIZE_HEURISTIC (0.0f) + +/* + * Calculate the gpu heap size based on a percentage of @memory. + * If @percent is OS_GPU_HEAP_SIZE_HEURISTIC: + * Use a heuristic. + */ +static inline uint64_t +os_gpu_heap_size_calculate(uint64_t memory, float percent, float *percent_out) +{ + if (percent == OS_GPU_HEAP_SIZE_HEURISTIC) { + /* We don't want to burn too much ram with the GPU on devices with a small + * amount of memory. + */ + if (memory <= 1ull * 1024ull * 1024ull * 1024ull) + percent = 0.25f; + else if (memory <= 4ull * 1024ull * 1024ull * 1024ull) + percent = 0.5f; + else + percent = 0.75f; + } + + if (percent_out) + *percent_out = percent; + + return ROUND_DOWN_TO((uint64_t)(memory * percent), 1 << 20); +} + +/* + * Calculate the gpu heap size based on a percentage of the system memory. + * If @percent is OS_GPU_HEAP_SIZE_HEURISTIC: + * Use a heuristic. + * + * @percent_out is preserved on failure. + */ +static inline uint64_t +os_get_gpu_heap_size(float percent, float *percent_out) +{ + uint64_t memory; + + const bool success = os_get_total_physical_memory(&memory); + if (!success) + return 0; + + return os_gpu_heap_size_calculate(memory, percent, percent_out); +} + /* * Amount of physical memory available to a process */ diff --git a/src/vulkan/runtime/vk_physical_device.h b/src/vulkan/runtime/vk_physical_device.h index e36d7f777ef..6033a9adb15 100644 --- a/src/vulkan/runtime/vk_physical_device.h +++ b/src/vulkan/runtime/vk_physical_device.h @@ -23,8 +23,11 @@ #ifndef VK_PHYSICAL_DEVICE_H #define VK_PHYSICAL_DEVICE_H +#include + #include "vk_dispatch_table.h" #include "vk_extensions.h" +#include "vk_log.h" #include "vk_object.h" #include "vk_physical_device_features.h" #include "vk_physical_device_properties.h" @@ -32,6 +35,7 @@ #include "compiler/spirv/spirv_info.h" #include "util/list.h" +#include "util/os_misc.h" #ifdef __cplusplus extern "C" { @@ -150,6 +154,68 @@ vk_physical_device_check_device_features(struct vk_physical_device *physical_dev struct spirv_capabilities vk_physical_device_get_spirv_capabilities(const struct vk_physical_device *pdev); +/** Calculate GPU heap budget based on the provided available memory + * + * :param available_memory: |in| Total available memory + * :param available_percent: |in| Percentage to apply to the available memory + * :param heap_size: |in| Size of the system memory exposed as a GPU + * heap + * :param used: |in| Heap memory used up. Can be `0` if the driver + * doesn't track allocations and relies on just + * the available system memory + */ +static inline uint64_t +vk_physical_device_heap_budget(uint64_t available_memory, + float available_percent, + uint64_t heap_size, + uint64_t used) +{ + available_memory *= available_percent; + + /* From the Vulkan 1.3.278 spec: + * + * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize + * values in which memory budgets are returned, with one + * element for each memory heap. A heap’s budget is a rough + * estimate of how much memory the process can allocate from + * that heap before allocations may fail or cause performance + * degradation. The budget includes any currently allocated + * device memory." + * + * and + * + * "The heapBudget value must be less than or equal to + * VkMemoryHeap::size for each heap." + * + * available (queried above) is the total amount free memory + * system-wide and does not include our allocations so we need + * to add that in. + */ + available_memory += used; + available_memory = MIN2(heap_size, available_memory); + + return ROUND_DOWN_TO(available_memory, 1 << 20); +} + +static inline uint64_t +vk_physical_device_heap_budget_from_system(struct vk_physical_device *physical_device, + float available_percent, + uint64_t heap_size, + uint64_t used) +{ + uint64_t available_memory; + + const bool success = os_get_available_system_memory(&available_memory); + if (!success) { + vk_loge(VK_LOG_OBJS(physical_device), + "Failed to query available system memory"); + return 0; + } + + return vk_physical_device_heap_budget(available_memory, available_percent, + heap_size, used); +} + #ifdef __cplusplus } #endif