mesa/src/intel/vulkan/anv_instance.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

394 lines
16 KiB
C
Raw Normal View History

/* Copyright © 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
#include "anv_api_version.h"
#include "util/driconf.h"
static const driOptionDescription anv_dri_options[] = {
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_ADAPTIVE_SYNC(true)
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_VK_WSI_DISABLE_UNORDERED_SUBMITS(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false)
DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(false)
DRI_CONF_ANV_BARRIER_POST_TYPED_CLEAR_SHADER(false)
DRI_CONF_ANV_BARRIER_POST_UNTYPED_CLEAR_SHADER(false)
DRI_CONF_ANV_DISABLE_FCV(false)
anv: Allow compressed memtypes with default buffer types Source 2 games segfault if certain buffers are not able to use the same memory types as images. CS2 specifically expects this to be the case for vertex and index buffers (VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT). I have not tested other Source 2 games to see how much the requirement differs for the usage (if at all). Up until now, we've disabled CCS for the Source 2 engine with the anv_disable_xe2_ccs driconf option. However, this option is not great for performance. So, replace this with a new option to allow the same memory types we use for images on buffers - anv_enable_buffer_comp. Compression of buffers is generally not good for performance. I collected the result of unconditionally enabling the feature in the performance CI on BMG. I used the default configuration to average the result of two runs of each trace. The CI reports that 4 game traces would regress between 0.44-1.01% FPS with buffer compression. However, the CI actually shows it to be beneficial in three of our game traces: * Cyberpunk-trace-dx12-1080p-high 106.51% * Hitman3-trace-dx12-1080p-med 101.59% * Blackops3-trace-dx11-1080p-high 100.44% So, enable the option for the two games we already have driconf entries for, Cyberpunk and Hitman3. Of course, also enable the option for Source 2 games. Casey Bowman reports that on BMG, some frame times drop from ~15ms to ~7ms in CS2. This is in large part due to the removal of HiZ resolves, which is a consequence of the game now using of HIZ_CCS_WT instead of plain HIZ. Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11520 Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32519>
2024-12-05 13:03:33 -05:00
DRI_CONF_ANV_ENABLE_BUFFER_COMP(false)
DRI_CONF_ANV_DISABLE_DRM_AUX_MODIFIERS(false)
DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
DRI_CONF_ANV_FORCE_GUC_LOW_LATENCY(false)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_ANV_FORCE_FILTER_ADDR_ROUNDING(false)
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(false)
DRI_CONF_ANV_STATE_CACHE_PERF_FIX(false)
DRI_CONF_NO_16BIT(false)
DRI_CONF_INTEL_BINDING_TABLE_BLOCK_SIZE(BINDING_TABLE_POOL_DEFAULT_BLOCK_SIZE,
1024, 128 * 1024)
DRI_CONF_INTEL_ENABLE_WA_14018912822(false)
DRI_CONF_INTEL_ENABLE_WA_14024015672_MSAA(false)
DRI_CONF_INTEL_SAMPLER_ROUTE_TO_LSC(false)
DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6)
DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
DRI_CONF_ANV_DISABLE_LINK_TIME_OPTIMIZATION(false)
DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS(0)
DRI_CONF_SHADER_SPILLING_RATE(11)
DRI_CONFIG_INTEL_FORCE_COMPUTE_SURFACE_PREFETCH(true)
DRI_CONFIG_INTEL_FORCE_SAMPLER_PREFETCH(false)
DRI_CONFIG_INTEL_TBIMR(true)
DRI_CONFIG_INTEL_VF_DISTRIBUTION(true)
DRI_CONFIG_INTEL_TE_DISTRIBUTION(true)
DRI_CONFIG_INTEL_STORAGE_CACHE_POLICY_WT(false)
DRI_CONF_ANV_LARGE_WORKGROUP_NON_COHERENT_IMAGE_WORKAROUND(false)
#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 37
DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(true)
#else
DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(false)
#endif
DRI_CONF_ANV_FAKE_NONLOCAL_MEMORY(false)
DRI_CONF_OPT_E(intel_stack_id, 512, 256, 2048,
"Control the number stackIDs (i.e. number of unique rays in the RT subsytem)",
DRI_CONF_ENUM(256, "256 stackids")
DRI_CONF_ENUM(512, "512 stackids")
DRI_CONF_ENUM(1024, "1024 stackids")
DRI_CONF_ENUM(2048, "2048 stackids"))
DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
DRI_CONF_VK_LOWER_TERMINATE_TO_DISCARD(false)
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 35
DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(true)
#else
DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(false)
#endif
DRI_CONF_FORCE_VK_VENDOR()
DRI_CONF_FAKE_SPARSE(false)
DRI_CONF_CUSTOM_BORDER_COLORS_WITHOUT_FORMAT(!DETECT_OS_ANDROID)
#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 34
DRI_CONF_VK_REQUIRE_ASTC(true)
#else
DRI_CONF_VK_REQUIRE_ASTC(false)
#endif
anv: enable vertex fetching component packing DG2 a/b testing: Borderlands3 -0.55% Cyberpunk +0.38% Superposition -0.67% The shader stats mostly don't look like an improvement : DG2 shader stats: Blackops 3: Totals from 265 (16.44% of 1612) affected shaders: Instrs: 109055 -> 109080 (+0.02%); split: -0.01%, +0.04% Cycle count: 6166549 -> 6021371 (-2.35%); split: -2.53%, +0.17% Cyberpunk 2077: Totals from 297 (23.50% of 1264) affected shaders: Instrs: 197305 -> 197297 (-0.00%); split: -0.03%, +0.02% Cycle count: 3374325 -> 3356562 (-0.53%); split: -1.23%, +0.70% Fortnite: Totals from 2090 (27.97% of 7471) affected shaders: Instrs: 1777944 -> 1781070 (+0.18%); split: -0.01%, +0.18% Cycle count: 25188758 -> 25162910 (-0.10%); split: -0.86%, +0.76% Spill count: 1439 -> 1729 (+20.15%); split: -0.69%, +20.85% Fill count: 1226 -> 1395 (+13.78%); split: -0.82%, +14.60% Scratch Memory Size: 122880 -> 138240 (+12.50%); split: -1.67%, +14.17% Hitman 3: Totals from 490 (9.09% of 5392) affected shaders: Instrs: 407489 -> 407486 (-0.00%); split: -0.00%, +0.00% Cycle count: 1831149 -> 1831890 (+0.04%); split: -0.33%, +0.38% Metro Exodus: Totals from 4169 (9.68% of 43076) affected shaders: Instrs: 817730 -> 817726 (-0.00%); split: -0.00%, +0.00% Cycle count: 4646954 -> 4641559 (-0.12%); split: -0.61%, +0.50% Xe2 shader stats : Blackops 3: Totals from 283 (19.46% of 1454) affected shaders: Cycle count: 7662980 -> 7916316 (+3.31%); split: -0.38%, +3.69% Cyberpunk 2077: Totals from 329 (26.79% of 1228) affected shaders: Instrs: 203312 -> 203327 (+0.01%); split: -0.01%, +0.02% Cycle count: 4415812 -> 4434906 (+0.43%); split: -0.69%, +1.12% Fortnite: Totals from 1981 (30.18% of 6565) affected shaders: Instrs: 1709583 -> 1711379 (+0.11%); split: -0.00%, +0.11% Cycle count: 26882682 -> 26914014 (+0.12%); split: -0.66%, +0.78% Spill count: 863 -> 1020 (+18.19%) Fill count: 1195 -> 1271 (+6.36%) Scratch Memory Size: 116736 -> 122880 (+5.26%) Hitman 3: Totals from 540 (10.56% of 5115) affected shaders: Instrs: 478993 -> 478994 (+0.00%) Cycle count: 3198740 -> 3198416 (-0.01%); split: -0.27%, +0.26% Metro Exodus: Totals from 4554 (12.28% of 37071) affected shaders: Cycle count: 6460340 -> 6475666 (+0.24%); split: -0.38%, +0.62% Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
2024-11-29 12:38:27 +02:00
DRI_CONF_ANV_VF_COMPONENT_PACKING(true)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
DRI_CONF_SECTION_END
};
static const struct debug_control debug_control[] = {
{ "bindless", ANV_DEBUG_BINDLESS},
{ "no-gpl", ANV_DEBUG_NO_GPL},
{ "no-sparse", ANV_DEBUG_NO_SPARSE},
{ "sparse-trtt", ANV_DEBUG_SPARSE_TRTT},
{ "video-decode", ANV_DEBUG_VIDEO_DECODE},
{ "video-encode", ANV_DEBUG_VIDEO_ENCODE},
{ "shader-hash", ANV_DEBUG_SHADER_HASH},
{ "no-slab", ANV_DEBUG_NO_SLAB},
{ "desc-dirty", ANV_DEBUG_DESCRIPTOR_DIRTY},
{ "shader-print", ANV_DEBUG_SHADER_PRINT},
{ NULL, 0 }
};
enum anv_debug anv_debug;
static void
process_anv_debug_variable_once(void)
{
anv_debug = parse_debug_string(os_get_option("ANV_DEBUG"), debug_control);
}
VkResult anv_EnumerateInstanceVersion(
uint32_t* pApiVersion)
{
*pApiVersion = ANV_API_VERSION;
return VK_SUCCESS;
}
static const struct vk_instance_extension_table instance_extensions = {
.KHR_device_group_creation = true,
.KHR_external_fence_capabilities = true,
.KHR_external_memory_capabilities = true,
.KHR_external_semaphore_capabilities = true,
.KHR_get_physical_device_properties2 = true,
.EXT_debug_report = true,
.EXT_debug_utils = true,
#ifdef ANV_USE_WSI_PLATFORM
.KHR_get_surface_capabilities2 = true,
.KHR_surface = true,
.KHR_surface_maintenance1 = true,
.KHR_surface_protected_capabilities = true,
.EXT_surface_maintenance1 = true,
.EXT_swapchain_colorspace = true,
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
.KHR_wayland_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XCB_KHR
.KHR_xcb_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_KHR
.KHR_xlib_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
.EXT_acquire_xlib_display = true,
#endif
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
.KHR_display = true,
.KHR_get_display_properties2 = true,
.EXT_direct_mode_display = true,
.EXT_display_surface_counter = true,
.EXT_acquire_drm_display = true,
#endif
#ifndef VK_USE_PLATFORM_WIN32_KHR
.EXT_headless_surface = true,
#endif
};
VkResult anv_EnumerateInstanceExtensionProperties(
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
if (pLayerName)
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
return vk_enumerate_instance_extension_properties(
&instance_extensions, pPropertyCount, pProperties);
}
static void
anv_init_dri_options(struct anv_instance *instance)
{
driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
ARRAY_SIZE(anv_dri_options));
driParseConfigFiles(&instance->dri_options,
&instance->available_dri_options, 0, "anv", NULL, NULL,
instance->vk.app_info.app_name,
instance->vk.app_info.app_version,
instance->vk.app_info.engine_name,
instance->vk.app_info.engine_version);
instance->assume_full_subgroups =
driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
instance->assume_full_subgroups_with_barrier =
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier");
instance->assume_full_subgroups_with_shared_memory =
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_shared_memory");
instance->limit_trig_input_range =
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
instance->sample_mask_out_opengl_behaviour =
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
instance->force_filter_addr_rounding =
driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding");
instance->promote_cbv_to_push_buffers =
driQueryOptionb(&instance->dri_options, "anv_promote_cbv_to_push_buffers");
instance->state_cache_perf_fix =
driQueryOptionb(&instance->dri_options, "anv_state_cache_perf_fix");
instance->lower_depth_range_rate =
driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
instance->no_16bit =
driQueryOptionb(&instance->dri_options, "no_16bit");
instance->intel_enable_wa_14018912822 =
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822");
instance->intel_enable_wa_14024015672_msaa =
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14024015672_msaa");
instance->emulate_read_without_format =
driQueryOptionb(&instance->dri_options, "anv_emulate_read_without_format");
instance->fp64_workaround_enabled =
driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
instance->generated_indirect_threshold =
driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
instance->generated_indirect_ring_threshold =
driQueryOptioni(&instance->dri_options, "generated_indirect_ring_threshold");
instance->query_clear_with_blorp_threshold =
driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold");
instance->query_copy_with_shader_threshold =
driQueryOptioni(&instance->dri_options, "query_copy_with_shader_threshold");
instance->force_vk_vendor =
driQueryOptioni(&instance->dri_options, "force_vk_vendor");
instance->has_fake_sparse =
driQueryOptionb(&instance->dri_options, "fake_sparse");
instance->force_sampler_prefetch =
driQueryOptionb(&instance->dri_options, "intel_force_sampler_prefetch");
instance->force_compute_surface_prefetch =
driQueryOptionb(&instance->dri_options, "intel_force_compute_surface_prefetch");
instance->enable_tbimr = driQueryOptionb(&instance->dri_options, "intel_tbimr");
instance->enable_vf_distribution =
driQueryOptionb(&instance->dri_options, "intel_vf_distribution");
instance->enable_te_distribution =
driQueryOptionb(&instance->dri_options, "intel_te_distribution");
instance->large_workgroup_non_coherent_image_workaround =
driQueryOptionb(&instance->dri_options, "anv_large_workgroup_non_coherent_image_workaround");
instance->disable_fcv =
driQueryOptionb(&instance->dri_options, "anv_disable_fcv");
anv: Allow compressed memtypes with default buffer types Source 2 games segfault if certain buffers are not able to use the same memory types as images. CS2 specifically expects this to be the case for vertex and index buffers (VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT). I have not tested other Source 2 games to see how much the requirement differs for the usage (if at all). Up until now, we've disabled CCS for the Source 2 engine with the anv_disable_xe2_ccs driconf option. However, this option is not great for performance. So, replace this with a new option to allow the same memory types we use for images on buffers - anv_enable_buffer_comp. Compression of buffers is generally not good for performance. I collected the result of unconditionally enabling the feature in the performance CI on BMG. I used the default configuration to average the result of two runs of each trace. The CI reports that 4 game traces would regress between 0.44-1.01% FPS with buffer compression. However, the CI actually shows it to be beneficial in three of our game traces: * Cyberpunk-trace-dx12-1080p-high 106.51% * Hitman3-trace-dx12-1080p-med 101.59% * Blackops3-trace-dx11-1080p-high 100.44% So, enable the option for the two games we already have driconf entries for, Cyberpunk and Hitman3. Of course, also enable the option for Source 2 games. Casey Bowman reports that on BMG, some frame times drop from ~15ms to ~7ms in CS2. This is in large part due to the removal of HiZ resolves, which is a consequence of the game now using of HIZ_CCS_WT instead of plain HIZ. Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11520 Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32519>
2024-12-05 13:03:33 -05:00
instance->enable_buffer_comp =
driQueryOptionb(&instance->dri_options, "anv_enable_buffer_comp");
instance->external_memory_implicit_sync =
driQueryOptionb(&instance->dri_options, "anv_external_memory_implicit_sync");
instance->compression_control_enabled =
driQueryOptionb(&instance->dri_options, "compression_control_enabled");
instance->anv_fake_nonlocal_memory =
driQueryOptionb(&instance->dri_options, "anv_fake_nonlocal_memory");
instance->anv_upper_bound_descriptor_pool_sampler =
driQueryOptionb(&instance->dri_options,
"anv_upper_bound_descriptor_pool_sampler");
instance->custom_border_colors_without_format =
driQueryOptionb(&instance->dri_options,
"custom_border_colors_without_format");
anv: enable vertex fetching component packing DG2 a/b testing: Borderlands3 -0.55% Cyberpunk +0.38% Superposition -0.67% The shader stats mostly don't look like an improvement : DG2 shader stats: Blackops 3: Totals from 265 (16.44% of 1612) affected shaders: Instrs: 109055 -> 109080 (+0.02%); split: -0.01%, +0.04% Cycle count: 6166549 -> 6021371 (-2.35%); split: -2.53%, +0.17% Cyberpunk 2077: Totals from 297 (23.50% of 1264) affected shaders: Instrs: 197305 -> 197297 (-0.00%); split: -0.03%, +0.02% Cycle count: 3374325 -> 3356562 (-0.53%); split: -1.23%, +0.70% Fortnite: Totals from 2090 (27.97% of 7471) affected shaders: Instrs: 1777944 -> 1781070 (+0.18%); split: -0.01%, +0.18% Cycle count: 25188758 -> 25162910 (-0.10%); split: -0.86%, +0.76% Spill count: 1439 -> 1729 (+20.15%); split: -0.69%, +20.85% Fill count: 1226 -> 1395 (+13.78%); split: -0.82%, +14.60% Scratch Memory Size: 122880 -> 138240 (+12.50%); split: -1.67%, +14.17% Hitman 3: Totals from 490 (9.09% of 5392) affected shaders: Instrs: 407489 -> 407486 (-0.00%); split: -0.00%, +0.00% Cycle count: 1831149 -> 1831890 (+0.04%); split: -0.33%, +0.38% Metro Exodus: Totals from 4169 (9.68% of 43076) affected shaders: Instrs: 817730 -> 817726 (-0.00%); split: -0.00%, +0.00% Cycle count: 4646954 -> 4641559 (-0.12%); split: -0.61%, +0.50% Xe2 shader stats : Blackops 3: Totals from 283 (19.46% of 1454) affected shaders: Cycle count: 7662980 -> 7916316 (+3.31%); split: -0.38%, +3.69% Cyberpunk 2077: Totals from 329 (26.79% of 1228) affected shaders: Instrs: 203312 -> 203327 (+0.01%); split: -0.01%, +0.02% Cycle count: 4415812 -> 4434906 (+0.43%); split: -0.69%, +1.12% Fortnite: Totals from 1981 (30.18% of 6565) affected shaders: Instrs: 1709583 -> 1711379 (+0.11%); split: -0.00%, +0.11% Cycle count: 26882682 -> 26914014 (+0.12%); split: -0.66%, +0.78% Spill count: 863 -> 1020 (+18.19%) Fill count: 1195 -> 1271 (+6.36%) Scratch Memory Size: 116736 -> 122880 (+5.26%) Hitman 3: Totals from 540 (10.56% of 5115) affected shaders: Instrs: 478993 -> 478994 (+0.00%) Cycle count: 3198740 -> 3198416 (-0.01%); split: -0.27%, +0.26% Metro Exodus: Totals from 4554 (12.28% of 37071) affected shaders: Cycle count: 6460340 -> 6475666 (+0.24%); split: -0.38%, +0.62% Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
2024-11-29 12:38:27 +02:00
instance->vf_component_packing =
driQueryOptionb(&instance->dri_options, "anv_vf_component_packing");
instance->lower_terminate_to_discard =
driQueryOptionb(&instance->dri_options, "vk_lower_terminate_to_discard");
instance->disable_xe2_drm_ccs_modifiers =
driQueryOptionb(&instance->dri_options, "anv_disable_drm_ccs_modifiers");
instance->binding_table_block_size = util_next_power_of_two(
driQueryOptioni(&instance->dri_options, "intel_binding_table_block_size"));
instance->barrier_post_typed_clear_shader =
driQueryOptionb(&instance->dri_options, "anv_barrier_post_typed_clear_shader");
instance->barrier_post_untyped_clear_shader =
driQueryOptionb(&instance->dri_options, "anv_barrier_post_untyped_clear_shader");
anv: use D3D-compatible texturing for Proton Intel & AMD Direct3D drivers modify their rounding behaviour for texturing to match Direct3D expectations. Such behaviour is not conformant in Vulkan, and Intel hardware lacks a reasonable way to get NVIDIA's behaviour (which uniquely works for Vulkan & Direct3D). The second best choice is to use Direct3D-compatible behaviour for Proton (via driconf) and our current Vulkan-conformant behaviour everywhere else. Given the APIs diverge and there is no Vulkan extension to control the behaviour explicitly, driconf'ing on the engineName is the reasonable solution. anv already has a anv_force_filter_addr_rounding driconf option to force Direct3D behaviour for certain Direct3D titles. Here we simply apply it to all D3D10+ titles, aligning us with the Windows driver. Note that D3D9 does not have this behaviour. We therefore use standard Vulkan behaviour for D3D9 to avoid breaking D3D9 titles, even though the engineName is the same as D3D10+. This is the same solution radv uses, they call it radv_disable_trunc_coord. We could unify the driconf entries later. See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38098#note_3166306 for a more detailed analysis, as well as the linked references: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27337 https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25911 https://github.com/HansKristian-Work/vkd3d-proton/pull/1884 This fixes misrendering in piles of Direct3D games run on anv via Proton, including Assassin's Creed Valhalla. Cc: mesa-stable Closes: #13886 Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Co-authored-by: Calder Young <cgiacun@gmail.com> Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38114>
2025-10-28 13:02:59 -04:00
if (instance->vk.app_info.engine_name &&
!strcmp(instance->vk.app_info.engine_name, "DXVK")) {
/* Since 2.3.1+, DXVK uses the application version to signal D3D9. */
const bool is_d3d9 = instance->vk.app_info.app_version & 0x1;
/* This driconf bit enables D3D10+ behaviour for texture coordinate
* rounding. As D3D9 wants the Vulkan behaviour instead, apply the
* workaround only to D3D10+.
*/
instance->force_filter_addr_rounding &= !is_d3d9;
}
instance->disable_lto =
driQueryOptionb(&instance->dri_options, "anv_disable_link_time_optimization");
instance->enable_opt_divergent_atomics =
driQueryOptioni(&instance->dri_options, "anv_enable_opt_divergent_atomics");
instance->stack_ids = driQueryOptioni(&instance->dri_options, "intel_stack_id");
switch (instance->stack_ids) {
case 256:
case 512:
case 1024:
case 2048:
break;
default:
mesa_logw("Invalid value provided for drirc intel_stack_id=%u, reverting to 512.",
instance->stack_ids);
instance->stack_ids = 512;
break;
}
instance->force_guc_low_latency =
driQueryOptionb(&instance->dri_options, "force_guc_low_latency");
}
VkResult anv_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance)
{
struct anv_instance *instance;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
if (pAllocator == NULL)
pAllocator = vk_default_allocator();
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!instance)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_instance_dispatch_table dispatch_table;
vk_instance_dispatch_table_from_entrypoints(
&dispatch_table, &anv_instance_entrypoints, true);
vk_instance_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_instance_entrypoints, false);
result = vk_instance_init(&instance->vk, &instance_extensions,
&dispatch_table, pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(pAllocator, instance);
return vk_error(NULL, result);
}
instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
instance->vk.physical_devices.destroy = anv_physical_device_destroy;
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
anv_init_dri_options(instance);
static once_flag process_anv_debug_variable_flag = ONCE_FLAG_INIT;
call_once(&process_anv_debug_variable_flag,
process_anv_debug_variable_once);
process_intel_debug_variable();
instance->vk.enable_debug_logging = INTEL_DEBUG(DEBUG_PERF);
intel_driver_ds_init();
*pInstance = anv_instance_to_handle(instance);
return VK_SUCCESS;
}
void anv_DestroyInstance(
VkInstance _instance,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
if (!instance)
return;
VG(VALGRIND_DESTROY_MEMPOOL(instance));
driDestroyOptionCache(&instance->dri_options);
driDestroyOptionInfo(&instance->available_dri_options);
vk_instance_finish(&instance->vk);
vk_free(&instance->vk.alloc, instance);
}
PFN_vkVoidFunction anv_GetInstanceProcAddr(
VkInstance _instance,
const char* pName)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
return vk_instance_get_proc_addr(instance ? &instance->vk : NULL,
&anv_instance_entrypoints,
pName);
}
/* With version 1+ of the loader interface the ICD should expose
* vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
*/
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName)
{
return anv_GetInstanceProcAddr(instance, pName);
}