2024-07-21 11:17:41 +03:00
|
|
|
/* Copyright © 2024 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "anv_private.h"
|
|
|
|
|
#include "anv_api_version.h"
|
|
|
|
|
|
|
|
|
|
#include "util/driconf.h"
|
|
|
|
|
|
|
|
|
|
static const driOptionDescription anv_dri_options[] = {
|
|
|
|
|
DRI_CONF_SECTION_PERFORMANCE
|
|
|
|
|
DRI_CONF_ADAPTIVE_SYNC(true)
|
|
|
|
|
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
|
|
|
|
|
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
|
|
|
|
|
DRI_CONF_VK_KHR_PRESENT_WAIT(false)
|
|
|
|
|
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
|
|
|
|
|
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
|
2024-08-09 00:36:23 +03:00
|
|
|
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false)
|
2025-03-11 05:14:57 +02:00
|
|
|
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(false)
|
2024-07-21 11:17:41 +03:00
|
|
|
DRI_CONF_ANV_DISABLE_FCV(false)
|
anv: Allow compressed memtypes with default buffer types
Source 2 games segfault if certain buffers are not able to use the same
memory types as images. CS2 specifically expects this to be the case for
vertex and index buffers (VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT,
VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT). I have not tested other Source 2
games to see how much the requirement differs for the usage (if at all).
Up until now, we've disabled CCS for the Source 2 engine with the
anv_disable_xe2_ccs driconf option. However, this option is not great
for performance. So, replace this with a new option to allow the same
memory types we use for images on buffers - anv_enable_buffer_comp.
Compression of buffers is generally not good for performance. I
collected the result of unconditionally enabling the feature in the
performance CI on BMG. I used the default configuration to average the
result of two runs of each trace.
The CI reports that 4 game traces would regress between 0.44-1.01% FPS
with buffer compression. However, the CI actually shows it to be
beneficial in three of our game traces:
* Cyberpunk-trace-dx12-1080p-high 106.51%
* Hitman3-trace-dx12-1080p-med 101.59%
* Blackops3-trace-dx11-1080p-high 100.44%
So, enable the option for the two games we already have driconf entries
for, Cyberpunk and Hitman3.
Of course, also enable the option for Source 2 games. Casey Bowman
reports that on BMG, some frame times drop from ~15ms to ~7ms in CS2.
This is in large part due to the removal of HiZ resolves, which is a
consequence of the game now using of HIZ_CCS_WT instead of plain HIZ.
Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11520
Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32519>
2024-12-05 13:03:33 -05:00
|
|
|
DRI_CONF_ANV_ENABLE_BUFFER_COMP(false)
|
2024-07-21 11:17:41 +03:00
|
|
|
DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
|
|
|
|
|
DRI_CONF_ANV_FORCE_GUC_LOW_LATENCY(false)
|
|
|
|
|
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
|
|
|
|
|
DRI_CONF_ANV_FORCE_FILTER_ADDR_ROUNDING(false)
|
|
|
|
|
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
|
|
|
|
|
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
|
|
|
|
|
DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
|
|
|
|
|
DRI_CONF_NO_16BIT(false)
|
|
|
|
|
DRI_CONF_INTEL_ENABLE_WA_14018912822(false)
|
|
|
|
|
DRI_CONF_INTEL_SAMPLER_ROUTE_TO_LSC(false)
|
|
|
|
|
DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
|
|
|
|
|
DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6)
|
|
|
|
|
DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
|
2024-11-05 13:16:40 -05:00
|
|
|
DRI_CONF_SHADER_SPILLING_RATE(11)
|
2025-02-27 11:07:19 +02:00
|
|
|
DRI_CONFIG_INTEL_TBIMR(true)
|
2025-02-27 11:26:35 +02:00
|
|
|
DRI_CONFIG_INTEL_VF_DISTRIBUTION(true)
|
|
|
|
|
DRI_CONFIG_INTEL_TE_DISTRIBUTION(true)
|
2025-03-05 14:45:18 +02:00
|
|
|
DRI_CONFIG_INTEL_STORAGE_CACHE_POLICY_WT(false)
|
2024-07-21 11:17:41 +03:00
|
|
|
DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(false)
|
|
|
|
|
DRI_CONF_ANV_FAKE_NONLOCAL_MEMORY(false)
|
|
|
|
|
DRI_CONF_OPT_E(intel_stack_id, 512, 256, 2048,
|
|
|
|
|
"Control the number stackIDs (i.e. number of unique rays in the RT subsytem)",
|
|
|
|
|
DRI_CONF_ENUM(256, "256 stackids")
|
|
|
|
|
DRI_CONF_ENUM(512, "512 stackids")
|
|
|
|
|
DRI_CONF_ENUM(1024, "1024 stackids")
|
|
|
|
|
DRI_CONF_ENUM(2048, "2048 stackids"))
|
2024-11-21 20:34:29 +02:00
|
|
|
DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(false)
|
2024-07-21 11:17:41 +03:00
|
|
|
DRI_CONF_SECTION_END
|
|
|
|
|
|
|
|
|
|
DRI_CONF_SECTION_DEBUG
|
|
|
|
|
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
|
|
|
|
|
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
|
|
|
|
|
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
|
|
|
|
|
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
|
|
|
|
|
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
|
|
|
|
|
DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2)
|
|
|
|
|
DRI_CONF_FORCE_VK_VENDOR()
|
|
|
|
|
DRI_CONF_FAKE_SPARSE(false)
|
2025-01-24 10:54:00 +02:00
|
|
|
DRI_CONF_CUSTOM_BORDER_COLORS_WITHOUT_FORMAT(!DETECT_OS_ANDROID)
|
2024-07-21 11:17:41 +03:00
|
|
|
#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 34
|
|
|
|
|
DRI_CONF_VK_REQUIRE_ASTC(true)
|
|
|
|
|
#else
|
|
|
|
|
DRI_CONF_VK_REQUIRE_ASTC(false)
|
|
|
|
|
#endif
|
anv: enable vertex fetching component packing
DG2 a/b testing:
Borderlands3 -0.55%
Cyberpunk +0.38%
Superposition -0.67%
The shader stats mostly don't look like an improvement :
DG2 shader stats:
Blackops 3:
Totals from 265 (16.44% of 1612) affected shaders:
Instrs: 109055 -> 109080 (+0.02%); split: -0.01%, +0.04%
Cycle count: 6166549 -> 6021371 (-2.35%); split: -2.53%, +0.17%
Cyberpunk 2077:
Totals from 297 (23.50% of 1264) affected shaders:
Instrs: 197305 -> 197297 (-0.00%); split: -0.03%, +0.02%
Cycle count: 3374325 -> 3356562 (-0.53%); split: -1.23%, +0.70%
Fortnite:
Totals from 2090 (27.97% of 7471) affected shaders:
Instrs: 1777944 -> 1781070 (+0.18%); split: -0.01%, +0.18%
Cycle count: 25188758 -> 25162910 (-0.10%); split: -0.86%, +0.76%
Spill count: 1439 -> 1729 (+20.15%); split: -0.69%, +20.85%
Fill count: 1226 -> 1395 (+13.78%); split: -0.82%, +14.60%
Scratch Memory Size: 122880 -> 138240 (+12.50%); split: -1.67%, +14.17%
Hitman 3:
Totals from 490 (9.09% of 5392) affected shaders:
Instrs: 407489 -> 407486 (-0.00%); split: -0.00%, +0.00%
Cycle count: 1831149 -> 1831890 (+0.04%); split: -0.33%, +0.38%
Metro Exodus:
Totals from 4169 (9.68% of 43076) affected shaders:
Instrs: 817730 -> 817726 (-0.00%); split: -0.00%, +0.00%
Cycle count: 4646954 -> 4641559 (-0.12%); split: -0.61%, +0.50%
Xe2 shader stats :
Blackops 3:
Totals from 283 (19.46% of 1454) affected shaders:
Cycle count: 7662980 -> 7916316 (+3.31%); split: -0.38%, +3.69%
Cyberpunk 2077:
Totals from 329 (26.79% of 1228) affected shaders:
Instrs: 203312 -> 203327 (+0.01%); split: -0.01%, +0.02%
Cycle count: 4415812 -> 4434906 (+0.43%); split: -0.69%, +1.12%
Fortnite:
Totals from 1981 (30.18% of 6565) affected shaders:
Instrs: 1709583 -> 1711379 (+0.11%); split: -0.00%, +0.11%
Cycle count: 26882682 -> 26914014 (+0.12%); split: -0.66%, +0.78%
Spill count: 863 -> 1020 (+18.19%)
Fill count: 1195 -> 1271 (+6.36%)
Scratch Memory Size: 116736 -> 122880 (+5.26%)
Hitman 3:
Totals from 540 (10.56% of 5115) affected shaders:
Instrs: 478993 -> 478994 (+0.00%)
Cycle count: 3198740 -> 3198416 (-0.01%); split: -0.27%, +0.26%
Metro Exodus:
Totals from 4554 (12.28% of 37071) affected shaders:
Cycle count: 6460340 -> 6475666 (+0.24%); split: -0.38%, +0.62%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
2024-11-29 12:38:27 +02:00
|
|
|
DRI_CONF_ANV_VF_COMPONENT_PACKING(true)
|
2024-07-21 11:17:41 +03:00
|
|
|
DRI_CONF_SECTION_END
|
|
|
|
|
|
|
|
|
|
DRI_CONF_SECTION_QUALITY
|
|
|
|
|
DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
|
|
|
|
|
DRI_CONF_SECTION_END
|
|
|
|
|
};
|
|
|
|
|
|
2025-04-01 23:39:36 +03:00
|
|
|
static const struct debug_control debug_control[] = {
|
|
|
|
|
{ "bindless", ANV_DEBUG_BINDLESS},
|
|
|
|
|
{ "no-gpl", ANV_DEBUG_NO_GPL},
|
|
|
|
|
{ "no-sparse", ANV_DEBUG_NO_SPARSE},
|
|
|
|
|
{ "sparse-trtt", ANV_DEBUG_SPARSE_TRTT},
|
|
|
|
|
{ "video-decode", ANV_DEBUG_VIDEO_DECODE},
|
|
|
|
|
{ "video-encode", ANV_DEBUG_VIDEO_ENCODE},
|
anv: add shader-hash debug option
Emits a dummy MI_STORE_DATA_IMM with the shader hash in front of :
- 3DSTATE_VS
- 3DSTATE_HS
- 3DSTATE_DS
- 3DSTATE_HS
- 3DSTATE_PS
- COMPUTE_WALKER / GPGPU_WALKER
Example :
0x00000000: 0x10000002: MI_STORE_DATA_IMM
0x00000000: 0x10000002 : Dword 0
DWord Length: 2
Force Write Completion Check : false
Store Qword: 0
Use Global GTT: false
0x00000004: 0xffffe0c0 : Dword 1
Core Mode Enable: 0
0x00000008: 0x0000effe : Dword 2
Address: 0xeffeffffe0c0
0x0000000c: 0x126e815a : Dword 3 <------------ shader hash
0x00000010: 0x78100007 : Dword 4
Immediate Data: 309231962
0x00000000: 0x78100007: 3DSTATE_VS
0x00000000: 0x78100007 : Dword 0
DWord Length: 7
0x00000004: 0x00000000 : Dword 1
0x00000008: 0x00000000 : Dword 2
Kernel Start Pointer: 0x00000000
0x0000000c: 0x00040000 : Dword 3
Software Exception Enable: false
Accesses UAV: false
It'll correlate with the value emitted in the pipeline stats from fossil replay :
$ grep -i 126e815a /tmp/stats.csv
fossilize.aab93c5c3f965151.1.foz,GRAPHICS,de1b925dec8a8083,507378,498283,303434,vertex,8,50,4,0,1826,0,0,0,8,17,0,0x00000000126e815a,15
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34332>
2025-04-02 00:02:10 +03:00
|
|
|
{ "shader-hash", ANV_DEBUG_SHADER_HASH},
|
2025-04-01 23:39:36 +03:00
|
|
|
{ NULL, 0 }
|
|
|
|
|
};
|
|
|
|
|
|
2024-07-21 11:17:41 +03:00
|
|
|
VkResult anv_EnumerateInstanceVersion(
|
|
|
|
|
uint32_t* pApiVersion)
|
|
|
|
|
{
|
|
|
|
|
*pApiVersion = ANV_API_VERSION;
|
|
|
|
|
return VK_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct vk_instance_extension_table instance_extensions = {
|
|
|
|
|
.KHR_device_group_creation = true,
|
|
|
|
|
.KHR_external_fence_capabilities = true,
|
|
|
|
|
.KHR_external_memory_capabilities = true,
|
|
|
|
|
.KHR_external_semaphore_capabilities = true,
|
|
|
|
|
.KHR_get_physical_device_properties2 = true,
|
|
|
|
|
.EXT_debug_report = true,
|
|
|
|
|
.EXT_debug_utils = true,
|
|
|
|
|
|
|
|
|
|
#ifdef ANV_USE_WSI_PLATFORM
|
|
|
|
|
.KHR_get_surface_capabilities2 = true,
|
|
|
|
|
.KHR_surface = true,
|
|
|
|
|
.KHR_surface_protected_capabilities = true,
|
|
|
|
|
.EXT_surface_maintenance1 = true,
|
|
|
|
|
.EXT_swapchain_colorspace = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
|
|
|
|
.KHR_wayland_surface = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef VK_USE_PLATFORM_XCB_KHR
|
|
|
|
|
.KHR_xcb_surface = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
|
|
|
|
.KHR_xlib_surface = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
|
|
|
|
|
.EXT_acquire_xlib_display = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
|
|
|
|
|
.KHR_display = true,
|
|
|
|
|
.KHR_get_display_properties2 = true,
|
|
|
|
|
.EXT_direct_mode_display = true,
|
|
|
|
|
.EXT_display_surface_counter = true,
|
|
|
|
|
.EXT_acquire_drm_display = true,
|
|
|
|
|
#endif
|
|
|
|
|
#ifndef VK_USE_PLATFORM_WIN32_KHR
|
|
|
|
|
.EXT_headless_surface = true,
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkResult anv_EnumerateInstanceExtensionProperties(
|
|
|
|
|
const char* pLayerName,
|
|
|
|
|
uint32_t* pPropertyCount,
|
|
|
|
|
VkExtensionProperties* pProperties)
|
|
|
|
|
{
|
|
|
|
|
if (pLayerName)
|
|
|
|
|
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
|
|
|
|
|
|
|
|
|
|
return vk_enumerate_instance_extension_properties(
|
|
|
|
|
&instance_extensions, pPropertyCount, pProperties);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_init_dri_options(struct anv_instance *instance)
|
|
|
|
|
{
|
|
|
|
|
driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
|
|
|
|
|
ARRAY_SIZE(anv_dri_options));
|
|
|
|
|
driParseConfigFiles(&instance->dri_options,
|
|
|
|
|
&instance->available_dri_options, 0, "anv", NULL, NULL,
|
|
|
|
|
instance->vk.app_info.app_name,
|
|
|
|
|
instance->vk.app_info.app_version,
|
|
|
|
|
instance->vk.app_info.engine_name,
|
|
|
|
|
instance->vk.app_info.engine_version);
|
|
|
|
|
|
|
|
|
|
instance->assume_full_subgroups =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
|
|
|
|
|
instance->assume_full_subgroups_with_barrier =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier");
|
2025-03-11 05:14:57 +02:00
|
|
|
instance->assume_full_subgroups_with_shared_memory =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_shared_memory");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->limit_trig_input_range =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->sample_mask_out_opengl_behaviour =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->force_filter_addr_rounding =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->lower_depth_range_rate =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->no_16bit =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "no_16bit");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->intel_enable_wa_14018912822 =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->mesh_conv_prim_attrs_to_vert_attrs =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->fp64_workaround_enabled =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->generated_indirect_threshold =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->generated_indirect_ring_threshold =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptioni(&instance->dri_options, "generated_indirect_ring_threshold");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->query_clear_with_blorp_threshold =
|
|
|
|
|
driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold");
|
|
|
|
|
instance->query_copy_with_shader_threshold =
|
|
|
|
|
driQueryOptioni(&instance->dri_options, "query_copy_with_shader_threshold");
|
|
|
|
|
instance->force_vk_vendor =
|
|
|
|
|
driQueryOptioni(&instance->dri_options, "force_vk_vendor");
|
|
|
|
|
instance->has_fake_sparse =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "fake_sparse");
|
|
|
|
|
instance->enable_tbimr = driQueryOptionb(&instance->dri_options, "intel_tbimr");
|
2025-02-27 11:26:35 +02:00
|
|
|
instance->enable_vf_distribution =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "intel_vf_distribution");
|
|
|
|
|
instance->enable_te_distribution =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "intel_te_distribution");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->disable_fcv =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "anv_disable_fcv");
|
anv: Allow compressed memtypes with default buffer types
Source 2 games segfault if certain buffers are not able to use the same
memory types as images. CS2 specifically expects this to be the case for
vertex and index buffers (VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT,
VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT). I have not tested other Source 2
games to see how much the requirement differs for the usage (if at all).
Up until now, we've disabled CCS for the Source 2 engine with the
anv_disable_xe2_ccs driconf option. However, this option is not great
for performance. So, replace this with a new option to allow the same
memory types we use for images on buffers - anv_enable_buffer_comp.
Compression of buffers is generally not good for performance. I
collected the result of unconditionally enabling the feature in the
performance CI on BMG. I used the default configuration to average the
result of two runs of each trace.
The CI reports that 4 game traces would regress between 0.44-1.01% FPS
with buffer compression. However, the CI actually shows it to be
beneficial in three of our game traces:
* Cyberpunk-trace-dx12-1080p-high 106.51%
* Hitman3-trace-dx12-1080p-med 101.59%
* Blackops3-trace-dx11-1080p-high 100.44%
So, enable the option for the two games we already have driconf entries
for, Cyberpunk and Hitman3.
Of course, also enable the option for Source 2 games. Casey Bowman
reports that on BMG, some frame times drop from ~15ms to ~7ms in CS2.
This is in large part due to the removal of HiZ resolves, which is a
consequence of the game now using of HIZ_CCS_WT instead of plain HIZ.
Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11520
Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32519>
2024-12-05 13:03:33 -05:00
|
|
|
instance->enable_buffer_comp =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "anv_enable_buffer_comp");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->external_memory_implicit_sync =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "anv_external_memory_implicit_sync");
|
2024-07-21 11:17:41 +03:00
|
|
|
instance->compression_control_enabled =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "compression_control_enabled");
|
|
|
|
|
instance->anv_fake_nonlocal_memory =
|
2024-11-21 20:33:49 +02:00
|
|
|
driQueryOptionb(&instance->dri_options, "anv_fake_nonlocal_memory");
|
2024-11-21 20:34:29 +02:00
|
|
|
instance->anv_upper_bound_descriptor_pool_sampler =
|
|
|
|
|
driQueryOptionb(&instance->dri_options,
|
|
|
|
|
"anv_upper_bound_descriptor_pool_sampler");
|
2025-01-24 10:54:00 +02:00
|
|
|
instance->custom_border_colors_without_format =
|
|
|
|
|
driQueryOptionb(&instance->dri_options,
|
|
|
|
|
"custom_border_colors_without_format");
|
anv: enable vertex fetching component packing
DG2 a/b testing:
Borderlands3 -0.55%
Cyberpunk +0.38%
Superposition -0.67%
The shader stats mostly don't look like an improvement :
DG2 shader stats:
Blackops 3:
Totals from 265 (16.44% of 1612) affected shaders:
Instrs: 109055 -> 109080 (+0.02%); split: -0.01%, +0.04%
Cycle count: 6166549 -> 6021371 (-2.35%); split: -2.53%, +0.17%
Cyberpunk 2077:
Totals from 297 (23.50% of 1264) affected shaders:
Instrs: 197305 -> 197297 (-0.00%); split: -0.03%, +0.02%
Cycle count: 3374325 -> 3356562 (-0.53%); split: -1.23%, +0.70%
Fortnite:
Totals from 2090 (27.97% of 7471) affected shaders:
Instrs: 1777944 -> 1781070 (+0.18%); split: -0.01%, +0.18%
Cycle count: 25188758 -> 25162910 (-0.10%); split: -0.86%, +0.76%
Spill count: 1439 -> 1729 (+20.15%); split: -0.69%, +20.85%
Fill count: 1226 -> 1395 (+13.78%); split: -0.82%, +14.60%
Scratch Memory Size: 122880 -> 138240 (+12.50%); split: -1.67%, +14.17%
Hitman 3:
Totals from 490 (9.09% of 5392) affected shaders:
Instrs: 407489 -> 407486 (-0.00%); split: -0.00%, +0.00%
Cycle count: 1831149 -> 1831890 (+0.04%); split: -0.33%, +0.38%
Metro Exodus:
Totals from 4169 (9.68% of 43076) affected shaders:
Instrs: 817730 -> 817726 (-0.00%); split: -0.00%, +0.00%
Cycle count: 4646954 -> 4641559 (-0.12%); split: -0.61%, +0.50%
Xe2 shader stats :
Blackops 3:
Totals from 283 (19.46% of 1454) affected shaders:
Cycle count: 7662980 -> 7916316 (+3.31%); split: -0.38%, +3.69%
Cyberpunk 2077:
Totals from 329 (26.79% of 1228) affected shaders:
Instrs: 203312 -> 203327 (+0.01%); split: -0.01%, +0.02%
Cycle count: 4415812 -> 4434906 (+0.43%); split: -0.69%, +1.12%
Fortnite:
Totals from 1981 (30.18% of 6565) affected shaders:
Instrs: 1709583 -> 1711379 (+0.11%); split: -0.00%, +0.11%
Cycle count: 26882682 -> 26914014 (+0.12%); split: -0.66%, +0.78%
Spill count: 863 -> 1020 (+18.19%)
Fill count: 1195 -> 1271 (+6.36%)
Scratch Memory Size: 116736 -> 122880 (+5.26%)
Hitman 3:
Totals from 540 (10.56% of 5115) affected shaders:
Instrs: 478993 -> 478994 (+0.00%)
Cycle count: 3198740 -> 3198416 (-0.01%); split: -0.27%, +0.26%
Metro Exodus:
Totals from 4554 (12.28% of 37071) affected shaders:
Cycle count: 6460340 -> 6475666 (+0.24%); split: -0.38%, +0.62%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
2024-11-29 12:38:27 +02:00
|
|
|
instance->vf_component_packing =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "anv_vf_component_packing");
|
2024-07-21 11:17:41 +03:00
|
|
|
|
|
|
|
|
instance->stack_ids = driQueryOptioni(&instance->dri_options, "intel_stack_id");
|
|
|
|
|
switch (instance->stack_ids) {
|
|
|
|
|
case 256:
|
|
|
|
|
case 512:
|
|
|
|
|
case 1024:
|
|
|
|
|
case 2048:
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
mesa_logw("Invalid value provided for drirc intel_stack_id=%u, reverting to 512.",
|
|
|
|
|
instance->stack_ids);
|
|
|
|
|
instance->stack_ids = 512;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2024-11-21 20:33:49 +02:00
|
|
|
instance->force_guc_low_latency =
|
|
|
|
|
driQueryOptionb(&instance->dri_options, "force_guc_low_latency");
|
2024-07-21 11:17:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkResult anv_CreateInstance(
|
|
|
|
|
const VkInstanceCreateInfo* pCreateInfo,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
VkInstance* pInstance)
|
|
|
|
|
{
|
|
|
|
|
struct anv_instance *instance;
|
|
|
|
|
VkResult result;
|
|
|
|
|
|
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
|
|
|
|
|
|
|
|
|
|
if (pAllocator == NULL)
|
|
|
|
|
pAllocator = vk_default_allocator();
|
|
|
|
|
|
|
|
|
|
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
|
|
|
|
|
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
|
|
|
|
if (!instance)
|
|
|
|
|
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
|
|
|
|
|
|
struct vk_instance_dispatch_table dispatch_table;
|
|
|
|
|
vk_instance_dispatch_table_from_entrypoints(
|
|
|
|
|
&dispatch_table, &anv_instance_entrypoints, true);
|
|
|
|
|
vk_instance_dispatch_table_from_entrypoints(
|
|
|
|
|
&dispatch_table, &wsi_instance_entrypoints, false);
|
|
|
|
|
|
|
|
|
|
result = vk_instance_init(&instance->vk, &instance_extensions,
|
|
|
|
|
&dispatch_table, pCreateInfo, pAllocator);
|
|
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
|
vk_free(pAllocator, instance);
|
|
|
|
|
return vk_error(NULL, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
|
|
|
|
|
instance->vk.physical_devices.destroy = anv_physical_device_destroy;
|
|
|
|
|
|
|
|
|
|
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
|
|
|
|
|
|
|
|
|
anv_init_dri_options(instance);
|
|
|
|
|
|
2025-04-01 23:39:36 +03:00
|
|
|
instance->debug = parse_debug_string(os_get_option("ANV_DEBUG"),
|
|
|
|
|
debug_control);
|
|
|
|
|
|
2024-07-21 11:17:41 +03:00
|
|
|
intel_driver_ds_init();
|
|
|
|
|
|
|
|
|
|
*pInstance = anv_instance_to_handle(instance);
|
|
|
|
|
|
|
|
|
|
return VK_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void anv_DestroyInstance(
|
|
|
|
|
VkInstance _instance,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_instance, instance, _instance);
|
|
|
|
|
|
|
|
|
|
if (!instance)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
|
|
|
|
|
|
|
|
|
driDestroyOptionCache(&instance->dri_options);
|
|
|
|
|
driDestroyOptionInfo(&instance->available_dri_options);
|
|
|
|
|
|
|
|
|
|
vk_instance_finish(&instance->vk);
|
|
|
|
|
vk_free(&instance->vk.alloc, instance);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PFN_vkVoidFunction anv_GetInstanceProcAddr(
|
|
|
|
|
VkInstance _instance,
|
|
|
|
|
const char* pName)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_instance, instance, _instance);
|
2024-11-11 14:53:26 -05:00
|
|
|
return vk_instance_get_proc_addr(instance ? &instance->vk : NULL,
|
2024-07-21 11:17:41 +03:00
|
|
|
&anv_instance_entrypoints,
|
|
|
|
|
pName);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* With version 1+ of the loader interface the ICD should expose
|
|
|
|
|
* vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
|
|
|
|
|
*/
|
|
|
|
|
PUBLIC
|
|
|
|
|
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
|
|
|
|
|
VkInstance instance,
|
|
|
|
|
const char* pName)
|
|
|
|
|
{
|
|
|
|
|
return anv_GetInstanceProcAddr(instance, pName);
|
|
|
|
|
}
|