diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 38eb0e6c9a6..103d61c49d7 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -136,7 +136,7 @@ anv_device_init_blorp(struct anv_device *device) device->blorp.context.get_fp64_nir = get_fp64_nir; device->blorp.context.lookup_shader = lookup_blorp_shader; device->blorp.context.upload_shader = upload_blorp_shader; - device->blorp.context.enable_tbimr = device->physical->instance->enable_tbimr; + device->blorp.context.enable_tbimr = device->physical->instance->drirc.debug.tbimr; device->blorp.context.get_surface_address = blorp_get_surface_address; device->blorp.context.exec = anv_genX(device->info, blorp_exec); device->blorp.context.upload_dynamic_state = upload_dynamic_state; @@ -184,7 +184,7 @@ anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer, */ flags |= BLORP_BATCH_EMIT_3DSTATE_VF; - if (!cmd_buffer->device->physical->instance->enable_vf_distribution) + if (!cmd_buffer->device->physical->instance->drirc.debug.vf_distribution) flags |= BLORP_BATCH_DISABLE_VF_DISTRIBUTION; blorp_batch_init(&cmd_buffer->device->blorp.context, batch, cmd_buffer, flags); diff --git a/src/intel/vulkan/anv_buffer.c b/src/intel/vulkan/anv_buffer.c index 7bb3ed3c8e3..80a17daef52 100644 --- a/src/intel/vulkan/anv_buffer.c +++ b/src/intel/vulkan/anv_buffer.c @@ -91,7 +91,7 @@ anv_get_buffer_memory_requirements(struct anv_device *device, memory_types = device->physical->memory.protected_mem_types; else if (need_dynamic_visible_buffer) memory_types = device->physical->memory.dynamic_visible_mem_types; - else if (device->physical->instance->enable_buffer_comp) + else if (device->physical->instance->drirc.debug.enable_buffer_comp) memory_types = device->physical->memory.default_buffer_mem_types | device->physical->memory.compressed_mem_types; else diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 4118647a95f..0bb63b23bfd 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -1253,7 +1253,7 @@ VkResult anv_CreateDescriptorPool( * samplers. */ uint32_t max_descriptor_count = 0; - if (device->physical->instance->anv_upper_bound_descriptor_pool_sampler && + if (device->physical->instance->drirc.debug.upper_bound_desc_pool_sampler && !device->physical->indirect_descriptors) { for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { max_descriptor_count = MAX2(pCreateInfo->pPoolSizes[i].descriptorCount, diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ee19a0a0341..219cb615829 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -583,7 +583,7 @@ anv_state_pools_init(struct anv_device *device) &(struct anv_state_pool_params) { .name = "binding table pool", .base_address = device->physical->va.binding_table_pool.addr, - .block_size = device->physical->instance->binding_table_block_size, + .block_size = device->physical->instance->drirc.perf.bt_block_size, .max_size = device->physical->va.binding_table_pool.size, }); } else { @@ -1242,7 +1242,7 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_meta_device; - device->vk.disable_lto = device->physical->instance->disable_lto; + device->vk.disable_lto = device->physical->instance->drirc.debug.disable_lto; simple_mtx_init(&device->accel_struct_build.mutex, mtx_plain); simple_mtx_init(&device->fp64_mutex, mtx_plain); @@ -1783,7 +1783,7 @@ VkResult anv_AllocateMemory( * consumer side relying on implicit fencing can have a fence to * wait for render complete. */ - if (pdevice->instance->external_memory_implicit_sync && + if (pdevice->instance->drirc.debug.external_memory_implicit_sync && (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE; } diff --git a/src/intel/vulkan/anv_dricrc_gen.py b/src/intel/vulkan/anv_dricrc_gen.py new file mode 100644 index 00000000000..d0bad545688 --- /dev/null +++ b/src/intel/vulkan/anv_dricrc_gen.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +# Copyright © 2026 Intel Corporation +# SPDX-License-Identifier: MIT + +import argparse +import sys + +def declare_options(android_version): + from drirc_gen import DrircBool as B + from drirc_gen import DrircInt as I + from drirc_gen import DrircFloat as F + from drirc_gen import DrircUint64 as U64 + from drirc_gen import DrircEnum as E + from drirc_gen import DrircEnumValue as EV + + from drirc_gen import DrircSection as Section + + debug_options = [ + # WSI stuff + I("vk_x11_override_min_image_count", 0, 0, 999, + "Override the VkSurfaceCapabilitiesKHR::minImageCount (0 = no override)"), + B("vk_x11_strict_image_count", False, + "Force the X11 WSI to create exactly the number of image specified " + "by the application in VkSwapchainCreateInfoKHR::minImageCount"), + B("vk_x11_ensure_min_image_count", False, + "Force the X11 WSI to create at least the number of image specified " + "by the driver in VkSurfaceCapabilitiesKHR::minImageCount"), + B("vk_xwayland_wait_ready", False, + "Wait for fences before submitting buffers to Xwayland"), + B("vk_wsi_force_bgra8_unorm_first", False, + "Force vkGetPhysicalDeviceSurfaceFormatsKHR to return VK_FORMAT_B8G8R8A8_UNORM as the first format"), + B("vk_wsi_force_swapchain_to_current_extent", False, + "Force VkSwapchainCreateInfoKHR::imageExtent to be VkSurfaceCapabilities2KHR::currentExtent"), + B("vk_wsi_disable_unordered_submits", False, + "Disable unordered WSI submits to workaround application synchronization bugs"), + B("vk_x11_ignore_suboptimal", False, + "Force the X11 WSI to never report VK_SUBOPTIMAL_KHR"), + + # Workaround subgroups + I("anv_assume_full_subgroups", 0, 0, 32, + "Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size", + c_name="assume_full_subgroups"), + B("anv_assume_full_subgroups_with_barrier", False, + "Assume full subgroups requirement for compute shaders that use control barriers", + c_name="assume_full_subgroups_with_barrier"), + B("anv_assume_full_subgroups_with_shared_memory", False, + "Allow assuming full subgroups requirement for shaders using shared memory even when it's not specified explicitly", + c_name="assume_full_subgroups_with_shared_memory"), + B("anv_brw_disable_subgroup_size_control", False, + "Disable EXT_subgroup_size_control support when using brw compiler.", + c_name="disable_subgroup_size_control"), + B("anv_large_workgroup_non_coherent_image_workaround", False, + "Fixup image coherency qualifier for certain shaders.", + c_name="large_workgroup_non_coherent_image_workaround"), + + # Workaround various compiler related + B("anv_emulate_read_without_format", android_version >= 35, + "Emulate shaderStorageImageReadWithoutFormat with shader conversions", + c_name="read_without_format_emu"), + B("anv_sample_mask_out_opengl_behaviour", False, + "Ignore sample mask out when having single sampled target", + c_name="sample_mask_out_opengl_behaviour"), + B("anv_disable_link_time_optimization", False, + "Disable linking of graphics pipeline shaders", + c_name="disable_lto"), + F("lower_depth_range_rate", 1.0, 0.0, 1.0, + "Lower depth range for fixing misrendering issues due to z coordinate float point interpolation accuracy", + c_name="lower_depth_range_rate"), + B("force_indirect_descriptors", False, + "Use an indirection to access buffer/image/texture/sampler handles", + c_name="force_indirect_descriptors"), + B("limit_trig_input_range", False, + "Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel", + c_name="limit_trig_input_range"), + B("fp64_workaround_enabled", False, + "Use softpf64 when the shader uses float64, but the device doesn't support that type", + c_name="fp64_emu"), + B("no_16bit", False, + "Disable 16-bit instructions", + c_name="no_16bit"), + B("vk_lower_terminate_to_discard", False, + "Lower terminate to discard (which is implicitly demote)", + c_name="lower_terminate_to_discard"), + I("shader_spilling_rate", 11, 0, 100, + "Speed up shader compilation by increasing number of spilled registers after ra_allocate failure", + c_name="shader_spilling_rate"), + + # Workaround various driver + B("always_flush_cache", False, + "Enable flushing GPU caches with each draw call", c_name="always_flush_cache"), + B("anv_force_filter_addr_rounding", False, + "Force min/mag filter address rounding to be enabled even for NEAREST sampling", + c_name="force_filter_addr_rounding"), + B("anv_disable_fcv", False, + "Disable FCV optimization", + c_name="disable_fcv"), + B("anv_enable_buffer_comp", False, + "Enable CCS on buffers where possible", + c_name="enable_buffer_comp"), + B("anv_external_memory_implicit_sync", False, + "Implicit sync on external BOs", + c_name="external_memory_implicit_sync"), + B("anv_fake_nonlocal_memory", False, + "Present host-visible device-local memory types as non device-local", + c_name="fake_nonlocal_mem"), + B("anv_upper_bound_descriptor_pool_sampler", False, + "Overallocate samplers in descriptor pools to workaround app bug", + c_name="upper_bound_desc_pool_sampler"), + B("anv_disable_drm_ccs_modifiers", False, + "Disable DRM CCS modifier usage", + c_name="disable_xe2_ccs_modifiers"), + B("compression_control_enabled", android_version >= 37, + "Enable VK_EXT_image_compression_control support", + c_name="compression_control_enabled"), + B("custom_border_colors_without_format", android_version == 0, + "Enable custom border colors without format", + c_name="custom_border_colors_without_format"), + B("fake_sparse", False, + "Advertise support for sparse binding of textures regardless of real support", + c_name="fake_sparse"), + I("force_vk_vendor", 0, -1, 2147483647, + "Override GPU vendor id", + c_name="force_vk_vendor"), + B("intel_sampler_route_to_lsc", False, + "Specific toggle to enable sampler route to LSC", + c_name="sampler_route_to_lsc"), + B("intel_storage_cache_policy_wt", False, + "Enable write-through cache policy for storage buffers/images", + c_name="storage_l1_wt"), + B("intel_tbimr", True, + "Enable TBIMR tiled rendering", + c_name="tbimr"), + B("intel_te_distribution", True, + "Enable tesselation distribution", + c_name="te_distribution"), + B("intel_vf_distribution", True, + "Enable geometry distribution", + c_name="vf_distribution"), + B("vk_require_astc", android_version >= 34, + "Implement emulated ASTC on HW that does not support it", + c_name="vk_require_astc"), + + # Workaround command emission + B("anv_barrier_post_untyped_clear_shader", False, + "Insert pipeline barriers post clearing shader on untyped data", + c_name="barrier_post_untyped_clear_shader"), + B("anv_barrier_post_typed_clear_shader", False, + "Insert pipeline barriers post clearing shader on typed data", + c_name="barrier_post_typed_clear_shader"), + B("intel_enable_wa_14018912822", False, + "Workaround for using zero blend constants", + c_name="wa_14018912822"), + B("intel_enable_wa_14024015672_msaa", False, + "Workaround for RHWO MSAA", + c_name="wa_14024015672_msaa"), + ] + + perf_options = [ + B("adaptive_sync", True, + "Adapt the monitor sync to the application performance (when possible)", + c_name="adaptive_sync"), + I("generated_indirect_threshold", 4, 0, 0x7fffffff, + "Indirect threshold count above which we start generating commands", + c_name="generated_indirect_threshold"), + I("generated_indirect_ring_threshold", 100, 0, 0x7fffffff, + "Indirect threshold count above which we start generating commands in a ring buffer", + c_name="generated_indirect_ring_threshold"), + I("query_clear_with_blorp_threshold", 6, 0, 0x7fffffff, + "Query threshold count above which query buffers are cleared with blorp", + c_name="query_clear_with_blorp_threshold"), + I("query_copy_with_shader_threshold", 6, 0, 0x7fffffff, + "Query threshold count above which query copies are executed with a shader", + c_name="query_copy_with_shader_threshold"), + + B("anv_disable_push_constant_alloc", True, + "Disable push constant space allocations", + c_name="disable_push_const_alloc"), + I("anv_binding_table_block_size", + 4096, 1024, 128 * 1024, + "Binding table block allocation size (3DSTATE_BINDING_TABLE_POOL_ALLOC)", + c_name="bt_block_size"), + B("anv_promote_cbv_to_push_buffers", False, + "Promote CBV 64bit pointers in push constant data to push buffers", + c_name="promote_cbv_push_buffer"), + B("anv_state_cache_perf_fix", False, + "Whether COMMON_SLICE_CHICKEN3 bit13 should be programmed to enable BTP+BTI RCC keying", + c_name="state_cache_perf_fix"), + B("anv_vf_component_packing", True, + "Vertex fetching component packing", + c_name="vf_comp_packing"), + I("anv_enable_opt_divergent_atomics", 0, 0, 3, + "Enable fusion of divergent atomics (see brw_divergent_atomics_flags)", + c_name="opt_divergent_atomics"), + I("anv_enable_opt_divergent_atomics_compute_only", 0, 0, 3, + "Enable fusion of divergent atomics for compute shaders only (see brw_divergent_atomics_flags)", + c_name="opt_divergent_atomics_compute_only"), + B("intel_force_compute_surface_prefetch", True, + "Enable binding table surface prefteching for compute shaders", + c_name="cs_surface_prefetch"), + B("intel_force_sampler_prefetch", False, + "Enable binding table sampler prefteching", + c_name="sampler_prefetch"), + + B("force_guc_low_latency", False, + "Enable low latency GuC strategy.", + c_name="guc_low_latency"), + + E("anv_stack_ids", 512, 256, 2048, + [EV(256, "256 stackids"), + EV(512, "512 stackids"), + EV(1024, "1024 stackids"), + EV(2048, "2048 stackids")], + "Control the number stackIDs (i.e. number of unique rays in the RT subsytem)", + c_name="stack_ids"), + E("anv_rt_dispatch_timeout", 512, 64, 4096, + [EV(64, "64 clocks"), + EV(128, "128 clocks"), + EV(192, "192 clocks"), + EV(256, "256 clocks"), + EV(384, "384 clocks"), + EV(512, "512 clocks"), + EV(640, "640 clocks"), + EV(768, "768 clocks"), + EV(896, "896 clocks"), + EV(1024, "1024 clocks"), + EV(1152, "1152 clocks"), + EV(1280, "1280 clocks"), + EV(1408, "1408 clocks"), + EV(1536, "1536 clocks"), + EV(1664, "1664 clocks"), + EV(1792, "1792 clocks"), + EV(1920, "1920 clocks"), + EV(2048, "2048 clocks"), + EV(4096, "4096 clocks")], + "Force BTD child dispatches if dispatches do not happen naturally for number of clocks equal to the programmed timeout counter", + c_name="rt_dispatch_timeout"), + ] + + feature_options = [ + B("anv_enable_scratch_page", True, + "Disables surface padding and suppresses all page faults, drops writes and returns zeros on reads.", + c_name="scratch_page"), + B("anv_enable_fully_covered", False, + "Enable fullyCoveredFragmentShaderInputVariable (Alchemist and newer only).", + c_name="fully_covered"), + ] + + return [Section("Debugging", debug_options, c_name="debug"), + Section("Features", feature_options, c_name="features"), + Section("Performance", perf_options, c_name="perf")] + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + parser.add_argument('--drirc-src', required=True) + parser.add_argument('--drirc-hdr', required=True) + parser.add_argument('--android-ver', type=int, default=0, required=False) + parser.add_argument('--validate', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + + options = declare_options(args.android_ver) + + from drirc_gen import drirc_validate + drirc_validate([args.validate], options, driver="anv") + + from drirc_gen import drirc_generate + drirc_generate(args.drirc_src, args.drirc_hdr, "anv", options) + + +if __name__ == '__main__': + main() diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index f06b3a6fcd1..fca036879b8 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -541,7 +541,7 @@ anv_get_format(const struct anv_physical_device *device, VkFormat vk_format) * disabled. */ if ((format->flags & ANV_FORMAT_FLAG_NO_CBCWF) && - device->instance->custom_border_colors_without_format) + device->instance->drirc.debug.custom_border_colors_without_format) return NULL; return format; @@ -898,7 +898,7 @@ anv_get_color_format_features(const struct anv_physical_device *physical_device, */ if ((anv_format->flags & ANV_FORMAT_FLAG_STORAGE_FORMAT_EMULATED) == 0) { if (isl_format_supports_typed_reads(devinfo, base_isl_format) || - (physical_device->instance->emulate_read_without_format && + (physical_device->instance->drirc.debug.read_without_format_emu && isl_is_storage_image_format(devinfo, plane_format.isl_format))) flags |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT; if (isl_format_supports_typed_writes(devinfo, base_isl_format)) @@ -1240,7 +1240,7 @@ get_drm_format_modifier_properties_list(const struct anv_physical_device *physic continue; if (physical_device->info.ver >= 20 && - physical_device->instance->disable_xe2_drm_ccs_modifiers && + physical_device->instance->drirc.debug.disable_xe2_ccs_modifiers && isl_mod_info->supports_render_compression) continue; diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 96346e8653b..9d14dac1927 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -575,7 +575,7 @@ genX(cmd_buffer_rhwo_wa_14024015672)(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; const bool rhwo_opt_enable = - !device->physical->instance->intel_enable_wa_14024015672_msaa && + !device->physical->instance->drirc.debug.wa_14024015672_msaa && msaa_enabled; if (intel_needs_workaround(device->info, 14024015672) && cmd_buffer->state.pending_rhwo_optimization_enabled != rhwo_opt_enable) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 18382a454c1..730b1a32169 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2039,7 +2039,7 @@ anv_image_init(struct anv_device *device, struct anv_image *image, /* Workaround to disable XE2 CCS modifiers from drirc. */ if (device->info->ver >= 20 && image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && - device->physical->instance->disable_xe2_drm_ccs_modifiers) { + device->physical->instance->drirc.debug.disable_xe2_ccs_modifiers) { anv_perf_warn(VK_LOG_OBJS(&image->vk.base), "Disabling aux: " "drirc disable_xe2_drm_ccs_modifiers"); diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c index 8fe85d7e1e2..81eb3123163 100644 --- a/src/intel/vulkan/anv_instance.c +++ b/src/intel/vulkan/anv_instance.c @@ -7,118 +7,6 @@ #include "util/driconf.h" -static const driOptionDescription anv_dri_options[] = { - DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_ADAPTIVE_SYNC(true) - DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) - DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false) - DRI_CONF_VK_WSI_DISABLE_UNORDERED_SUBMITS(false) - DRI_CONF_VK_XWAYLAND_WAIT_READY(false) - DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0) - DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(false) - DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(false) - DRI_CONF_ANV_BARRIER_POST_TYPED_CLEAR_SHADER(false) - DRI_CONF_ANV_BARRIER_POST_UNTYPED_CLEAR_SHADER(false) - DRI_CONF_ANV_DISABLE_FCV(false) - DRI_CONF_ANV_ENABLE_BUFFER_COMP(false) - DRI_CONF_ANV_DISABLE_DRM_AUX_MODIFIERS(false) - DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true) - DRI_CONF_ANV_FORCE_GUC_LOW_LATENCY(false) - DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false) - DRI_CONF_ANV_FORCE_FILTER_ADDR_ROUNDING(false) - DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false) - DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4) - DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100) - DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(false) - DRI_CONF_ANV_STATE_CACHE_PERF_FIX(false) - DRI_CONF_NO_16BIT(false) - DRI_CONF_INTEL_BINDING_TABLE_BLOCK_SIZE(BINDING_TABLE_POOL_DEFAULT_BLOCK_SIZE, - 1024, 128 * 1024) - DRI_CONF_INTEL_DISABLE_PUSH_CONSTANT_ALLOC(true) - DRI_CONF_INTEL_ENABLE_WA_14018912822(false) - DRI_CONF_INTEL_ENABLE_WA_14024015672_MSAA(false) - DRI_CONF_INTEL_SAMPLER_ROUTE_TO_LSC(false) - DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6) - DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6) - DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false) - DRI_CONF_ANV_DISABLE_LINK_TIME_OPTIMIZATION(false) - DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS(0) - DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS_COMPUTE_ONLY(0) - DRI_CONF_ANV_BRW_DISABLE_SUBGROUP_SIZE_CONTROL(false) - DRI_CONF_SHADER_SPILLING_RATE(11) - DRI_CONFIG_INTEL_FORCE_COMPUTE_SURFACE_PREFETCH(true) - DRI_CONFIG_INTEL_FORCE_SAMPLER_PREFETCH(false) - DRI_CONFIG_INTEL_TBIMR(true) - DRI_CONFIG_INTEL_VF_DISTRIBUTION(true) - DRI_CONFIG_INTEL_TE_DISTRIBUTION(true) - DRI_CONFIG_INTEL_STORAGE_CACHE_POLICY_WT(false) - DRI_CONF_ANV_LARGE_WORKGROUP_NON_COHERENT_IMAGE_WORKAROUND(false) -#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 37 - DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(true) -#else - DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(false) -#endif - DRI_CONF_ANV_FAKE_NONLOCAL_MEMORY(false) - DRI_CONF_OPT_E(intel_stack_id, 512, 256, 2048, - "Control the number stackIDs (i.e. number of unique rays in the RT subsytem)", - DRI_CONF_ENUM(256, "256 stackids") - DRI_CONF_ENUM(512, "512 stackids") - DRI_CONF_ENUM(1024, "1024 stackids") - DRI_CONF_ENUM(2048, "2048 stackids")) - DRI_CONF_OPT_E(dispatch_timeout_counter, 512, 64, 4096, - "Force BTD child dispatches if dispatches do not happen naturally for number of clocks equal to the programmed timeout counter", - DRI_CONF_ENUM(64, "64 clocks") - DRI_CONF_ENUM(128, "128 clocks") - DRI_CONF_ENUM(192, "192 clocks") - DRI_CONF_ENUM(256, "256 clocks") - DRI_CONF_ENUM(384, "384 clocks") - DRI_CONF_ENUM(512, "512 clocks") - DRI_CONF_ENUM(640, "640 clocks") - DRI_CONF_ENUM(768, "768 clocks") - DRI_CONF_ENUM(896, "896 clocks") - DRI_CONF_ENUM(1024, "1024 clocks") - DRI_CONF_ENUM(1152, "1152 clocks") - DRI_CONF_ENUM(1280, "1280 clocks") - DRI_CONF_ENUM(1408, "1408 clocks") - DRI_CONF_ENUM(1536, "1536 clocks") - DRI_CONF_ENUM(1664, "1664 clocks") - DRI_CONF_ENUM(1792, "1792 clocks") - DRI_CONF_ENUM(1920, "1920 clocks") - DRI_CONF_ENUM(2048, "2048 clocks") - DRI_CONF_ENUM(4096, "4096 clocks")) - DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(false) - DRI_CONF_ANV_ENABLE_FULLY_COVERED(false) - DRI_CONF_SECTION_END - - DRI_CONF_SECTION_DEBUG - DRI_CONF_ALWAYS_FLUSH_CACHE(false) - DRI_CONF_VK_LOWER_TERMINATE_TO_DISCARD(false) - DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) - DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false) - DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false) - DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) -#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 35 - DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(true) -#else - DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(false) -#endif - DRI_CONF_FORCE_VK_VENDOR() - DRI_CONF_FAKE_SPARSE(false) - DRI_CONF_CUSTOM_BORDER_COLORS_WITHOUT_FORMAT(!DETECT_OS_ANDROID) -#if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 34 - DRI_CONF_VK_REQUIRE_ASTC(true) -#else - DRI_CONF_VK_REQUIRE_ASTC(false) -#endif - DRI_CONF_ANV_VF_COMPONENT_PACKING(true) - DRI_CONF_ANV_ENABLE_SCRATCH_PAGE(true) - DRI_CONF_SECTION_END - - DRI_CONF_SECTION_QUALITY - DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE() - DRI_CONF_SECTION_END -}; - static const struct debug_control debug_control[] = { { "bindless", ANV_DEBUG_BINDLESS}, { "desc-dirty", ANV_DEBUG_DESCRIPTOR_DIRTY}, @@ -208,139 +96,41 @@ VkResult anv_EnumerateInstanceExtensionProperties( static void anv_init_dri_options(struct anv_instance *instance) { - driParseOptionInfo(&instance->available_dri_options, anv_dri_options, - ARRAY_SIZE(anv_dri_options)); - driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, - &(driConfigFileParseParams) { - .driverName = "anv", - .applicationName = instance->vk.app_info.app_name, - .applicationVersion = instance->vk.app_info.app_version, - .engineName = instance->vk.app_info.engine_name, - .engineVersion = instance->vk.app_info.engine_version, - }); - - instance->assume_full_subgroups = - driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups"); - instance->assume_full_subgroups_with_barrier = - driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_barrier"); - instance->assume_full_subgroups_with_shared_memory = - driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups_with_shared_memory"); - instance->limit_trig_input_range = - driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); - instance->sample_mask_out_opengl_behaviour = - driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); - instance->force_filter_addr_rounding = - driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding"); - instance->promote_cbv_to_push_buffers = - driQueryOptionb(&instance->dri_options, "anv_promote_cbv_to_push_buffers"); - instance->state_cache_perf_fix = - driQueryOptionb(&instance->dri_options, "anv_state_cache_perf_fix"); - instance->lower_depth_range_rate = - driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); - instance->no_16bit = - driQueryOptionb(&instance->dri_options, "no_16bit"); - instance->intel_enable_wa_14018912822 = - driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822"); - instance->intel_enable_wa_14024015672_msaa = - driQueryOptionb(&instance->dri_options, "intel_enable_wa_14024015672_msaa"); - instance->emulate_read_without_format = - driQueryOptionb(&instance->dri_options, "anv_emulate_read_without_format"); - instance->fp64_workaround_enabled = - driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); - instance->generated_indirect_threshold = - driQueryOptioni(&instance->dri_options, "generated_indirect_threshold"); - instance->generated_indirect_ring_threshold = - driQueryOptioni(&instance->dri_options, "generated_indirect_ring_threshold"); - instance->query_clear_with_blorp_threshold = - driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold"); - instance->query_copy_with_shader_threshold = - driQueryOptioni(&instance->dri_options, "query_copy_with_shader_threshold"); - instance->force_vk_vendor = - driQueryOptioni(&instance->dri_options, "force_vk_vendor"); - instance->has_fake_sparse = - driQueryOptionb(&instance->dri_options, "fake_sparse"); - instance->force_sampler_prefetch = - driQueryOptionb(&instance->dri_options, "intel_force_sampler_prefetch"); - instance->force_compute_surface_prefetch = - driQueryOptionb(&instance->dri_options, "intel_force_compute_surface_prefetch"); - instance->enable_tbimr = driQueryOptionb(&instance->dri_options, "intel_tbimr"); - instance->enable_vf_distribution = - driQueryOptionb(&instance->dri_options, "intel_vf_distribution"); - instance->enable_te_distribution = - driQueryOptionb(&instance->dri_options, "intel_te_distribution"); - instance->large_workgroup_non_coherent_image_workaround = - driQueryOptionb(&instance->dri_options, "anv_large_workgroup_non_coherent_image_workaround"); - instance->disable_fcv = - driQueryOptionb(&instance->dri_options, "anv_disable_fcv"); - instance->enable_buffer_comp = - driQueryOptionb(&instance->dri_options, "anv_enable_buffer_comp"); - instance->external_memory_implicit_sync = - driQueryOptionb(&instance->dri_options, "anv_external_memory_implicit_sync"); - instance->compression_control_enabled = - driQueryOptionb(&instance->dri_options, "compression_control_enabled"); - instance->anv_fake_nonlocal_memory = - driQueryOptionb(&instance->dri_options, "anv_fake_nonlocal_memory"); - instance->anv_upper_bound_descriptor_pool_sampler = - driQueryOptionb(&instance->dri_options, - "anv_upper_bound_descriptor_pool_sampler"); - instance->custom_border_colors_without_format = - driQueryOptionb(&instance->dri_options, - "custom_border_colors_without_format"); - instance->vf_component_packing = - driQueryOptionb(&instance->dri_options, "anv_vf_component_packing"); - instance->lower_terminate_to_discard = - driQueryOptionb(&instance->dri_options, "vk_lower_terminate_to_discard"); - instance->disable_xe2_drm_ccs_modifiers = - driQueryOptionb(&instance->dri_options, "anv_disable_drm_ccs_modifiers"); - instance->binding_table_block_size = util_next_power_of_two( - driQueryOptioni(&instance->dri_options, "intel_binding_table_block_size")); - instance->barrier_post_typed_clear_shader = - driQueryOptionb(&instance->dri_options, "anv_barrier_post_typed_clear_shader"); - instance->barrier_post_untyped_clear_shader = - driQueryOptionb(&instance->dri_options, "anv_barrier_post_untyped_clear_shader"); - instance->disable_push_constant_alloc = - driQueryOptionb(&instance->dri_options, "intel_disable_push_constant_alloc"); - instance->enable_fully_covered = - driQueryOptionb(&instance->dri_options, "anv_enable_fully_covered"); + anv_parse_dri_options(&instance->drirc, + &(driConfigFileParseParams) { + .driverName = "anv", + .applicationName = instance->vk.app_info.app_name, + .applicationVersion = instance->vk.app_info.app_version, + .engineName = instance->vk.app_info.engine_name, + .engineVersion = instance->vk.app_info.engine_version, + }); if (instance->vk.app_info.engine_name && !strcmp(instance->vk.app_info.engine_name, "DXVK")) { - /* Since 2.3.1+, DXVK uses the application version to signal D3D9. */ - const bool is_d3d9 = instance->vk.app_info.app_version & 0x1; + /* Since 2.3.1+, DXVK uses the application version to signal D3D9. */ + const bool is_d3d9 = instance->vk.app_info.app_version & 0x1; - /* This driconf bit enables D3D10+ behaviour for texture coordinate - * rounding. As D3D9 wants the Vulkan behaviour instead, apply the - * workaround only to D3D10+. - */ - instance->force_filter_addr_rounding &= !is_d3d9; + /* This driconf bit enables D3D10+ behaviour for texture coordinate + * rounding. As D3D9 wants the Vulkan behaviour instead, apply the + * workaround only to D3D10+. + */ + instance->drirc.debug.force_filter_addr_rounding &= !is_d3d9; } - instance->disable_lto = - driQueryOptionb(&instance->dri_options, "anv_disable_link_time_optimization"); - instance->enable_opt_divergent_atomics = - driQueryOptioni(&instance->dri_options, "anv_enable_opt_divergent_atomics"); - instance->enable_opt_divergent_atomics_compute_only = - driQueryOptioni(&instance->dri_options, "anv_enable_opt_divergent_atomics_compute_only"); - - instance->stack_ids = driQueryOptioni(&instance->dri_options, "intel_stack_id"); - switch (instance->stack_ids) { + switch (instance->drirc.perf.stack_ids) { case 256: case 512: case 1024: case 2048: break; default: - mesa_logw("Invalid value provided for drirc intel_stack_id=%u, reverting to 512.", - instance->stack_ids); - instance->stack_ids = 512; + mesa_logw("Invalid value provided for drirc anv_stack_id=%u, reverting to 512.", + instance->drirc.perf.stack_ids); + instance->drirc.perf.stack_ids = 512; break; } - instance->force_guc_low_latency = - driQueryOptionb(&instance->dri_options, "force_guc_low_latency"); - instance->dispatch_timeout_counter = - driQueryOptioni(&instance->dri_options, "dispatch_timeout_counter"); - switch(instance->dispatch_timeout_counter) { + switch(instance->drirc.perf.rt_dispatch_timeout) { case 64: case 128: case 192: @@ -362,9 +152,9 @@ anv_init_dri_options(struct anv_instance *instance) case 4096: break; default: - mesa_logw("Invalid value provided for drirc dispatch_timeout_counter=%u, reverting to 512.", - instance->dispatch_timeout_counter); - instance->dispatch_timeout_counter = 512; + mesa_logw("Invalid value provided for drirc anv_rt_dispatch_timeout=%u, reverting to 512.", + instance->drirc.perf.rt_dispatch_timeout); + instance->drirc.perf.rt_dispatch_timeout = 512; break; } } @@ -432,8 +222,8 @@ void anv_DestroyInstance( VG(VALGRIND_DESTROY_MEMPOOL(instance)); - driDestroyOptionCache(&instance->dri_options); - driDestroyOptionInfo(&instance->available_dri_options); + driDestroyOptionCache(&instance->drirc.options); + driDestroyOptionInfo(&instance->drirc.available_options); vk_instance_finish(&instance->vk); vk_free(&instance->vk.alloc, instance); diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index c327fd0bc76..6cc42f892d7 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -143,7 +143,7 @@ get_device_extensions(const struct anv_physical_device *device, *ext = (struct vk_device_extension_table) { .KHR_8bit_storage = true, - .KHR_16bit_storage = !device->instance->no_16bit, + .KHR_16bit_storage = !device->instance->drirc.debug.no_16bit, .KHR_acceleration_structure = rt_enabled, .KHR_bind_memory2 = true, .KHR_buffer_device_address = true, @@ -227,7 +227,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_shader_constant_data = true, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, - .KHR_shader_float16_int8 = !device->instance->no_16bit, + .KHR_shader_float16_int8 = !device->instance->drirc.debug.no_16bit, .KHR_shader_float_controls = true, .KHR_shader_float_controls2 = true, .KHR_shader_integer_dot_product = true, @@ -469,7 +469,7 @@ get_features(const struct anv_physical_device *pdevice, * read/writes, on Gfx11 & Gfx12.0 we emulate for 3 formats. */ .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125 || - pdevice->instance->emulate_read_without_format, + pdevice->instance->drirc.debug.read_without_format_emu, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, @@ -478,7 +478,7 @@ get_features(const struct anv_physical_device *pdevice, .shaderClipDistance = true, .shaderCullDistance = true, .shaderFloat64 = pdevice->info.has_64bit_float || - pdevice->instance->fp64_workaround_enabled, + pdevice->instance->drirc.debug.fp64_emu, .shaderInt64 = true, .shaderInt16 = true, .shaderResourceMinLod = true, @@ -499,8 +499,8 @@ get_features(const struct anv_physical_device *pdevice, .inheritedQueries = true, /* Vulkan 1.1 */ - .storageBuffer16BitAccess = !pdevice->instance->no_16bit, - .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit, + .storageBuffer16BitAccess = !pdevice->instance->drirc.debug.no_16bit, + .uniformAndStorageBuffer16BitAccess = !pdevice->instance->drirc.debug.no_16bit, .storagePushConstant16 = true, .storageInputOutput16 = true, .multiview = true, @@ -520,8 +520,8 @@ get_features(const struct anv_physical_device *pdevice, .storagePushConstant8 = true, .shaderBufferInt64Atomics = true, .shaderSharedInt64Atomics = false, - .shaderFloat16 = !pdevice->instance->no_16bit, - .shaderInt8 = !pdevice->instance->no_16bit, + .shaderFloat16 = !pdevice->instance->drirc.debug.no_16bit, + .shaderInt8 = !pdevice->instance->drirc.debug.no_16bit, .descriptorIndexing = true, .shaderInputAttachmentArrayDynamicIndexing = false, @@ -619,7 +619,7 @@ get_features(const struct anv_physical_device *pdevice, /* VK_EXT_custom_border_color */ .customBorderColors = true, .customBorderColorWithoutFormat = - pdevice->instance->custom_border_colors_without_format, + pdevice->instance->drirc.debug.custom_border_colors_without_format, /* VK_KHR_depth_clamp_zero_one */ .depthClampZeroOne = true, @@ -1356,8 +1356,8 @@ get_properties(const struct anv_physical_device *pdevice, *props = (struct vk_properties) { .apiVersion = ANV_API_VERSION, .driverVersion = vk_get_driver_version(), - .vendorID = pdevice->instance->force_vk_vendor != 0 ? - pdevice->instance->force_vk_vendor : 0x8086, + .vendorID = pdevice->instance->drirc.debug.force_vk_vendor != 0 ? + pdevice->instance->drirc.debug.force_vk_vendor : 0x8086, .deviceID = pdevice->info.pci_device_id, .deviceType = pdevice->info.has_local_mem ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : @@ -1702,7 +1702,8 @@ get_properties(const struct anv_physical_device *pdevice, props->degenerateTrianglesRasterized = true; props->degenerateLinesRasterized = false; - bool fully_covered = pdevice->instance->enable_fully_covered && + const bool fully_covered = + pdevice->instance->drirc.features.fully_covered && pdevice->info.verx10 >= 125; props->fullyCoveredFragmentShaderInputVariable = fully_covered; @@ -2386,7 +2387,7 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) * is now inconsistent with some of the memory types, but the game doesn't * seem to care about it. */ - if (device->instance->anv_fake_nonlocal_memory && + if (device->instance->drirc.debug.fake_nonlocal_mem && !anv_physical_device_has_vram(device)) { const uint32_t base_types_count = device->memory.type_count; for (int i = 0; i < base_types_count; i++) { @@ -2844,7 +2845,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->info.xe_has_state_cache_perf_fix); device->rt_change_needs_flush = - !instance->state_cache_perf_fix || !platform_supports_btp_bit_rcc; + !instance->drirc.perf.state_cache_perf_fix || + !platform_supports_btp_bit_rcc; device->gtt_size = device->info.gtt_size ? device->info.gtt_size : device->info.aperture_bytes; @@ -2866,19 +2868,17 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->has_astc_ldr = isl_format_supports_sampling(&device->info, ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16); - if (!device->has_astc_ldr && - driQueryOptionb(&device->instance->dri_options, "vk_require_astc")) + if (!device->has_astc_ldr && instance->drirc.debug.vk_require_astc) device->emu_astc_ldr = true; if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) { device->flush_astc_ldr_void_extent_denorms = device->has_astc_ldr && !device->emu_astc_ldr; } device->disable_fcv = device->info.verx10 >= 125 || - instance->disable_fcv; + instance->drirc.debug.disable_fcv; device->brw_disable_subgroup_size_control = !intel_use_jay(&device->info, MESA_SHADER_COMPUTE) && - driQueryOptionb(&device->instance->dri_options, - "anv_brw_disable_subgroup_size_control"); + instance->drirc.debug.disable_subgroup_size_control; result = anv_physical_device_init_heaps(device, fd); if (result != VK_SUCCESS) @@ -2894,7 +2894,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, * we can't support EXT_image_compression_control on Xe2+. */ device->has_compression_control = - instance->compression_control_enabled && device->info.ver < 20; + instance->drirc.debug.compression_control_enabled && + device->info.ver < 20; if (is_virtio) { struct util_sync_provider *sync = intel_virtio_sync_provider(fd); @@ -2915,7 +2916,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->indirect_descriptors = !intel_has_extended_bindless(&devinfo) || - driQueryOptionb(&instance->dri_options, "force_indirect_descriptors"); + instance->drirc.debug.force_indirect_descriptors; device->alloc_aux_tt_mem = device->info.has_aux_map && device->info.verx10 >= 125; @@ -2940,12 +2941,12 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, } } if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) { - if (instance->has_fake_sparse) + if (instance->drirc.debug.fake_sparse) device->sparse_type = ANV_SPARSE_TYPE_FAKE; } device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) || - driQueryOptionb(&instance->dri_options, "always_flush_cache"); + instance->drirc.debug.always_flush_cache; /* The ring buffer mechanism for page fault reporting is not supported until * PVC (unsupported by our Mesa driver), so we keep the scratch page enabled @@ -2953,7 +2954,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, */ device->has_scratch_page = device->info.ver < 20 || device->info.kmd_type == INTEL_KMD_TYPE_I915 || - driQueryOptionb(&instance->dri_options, "anv_enable_scratch_page"); + instance->drirc.features.scratch_page; device->compiler = brw_compiler_create(NULL, &device->info); if (device->compiler == NULL) { @@ -2962,15 +2963,12 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, } device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; - device->compiler->spilling_rate = - driQueryOptioni(&instance->dri_options, "shader_spilling_rate"); + device->compiler->spilling_rate = instance->drirc.debug.shader_spilling_rate; isl_device_init(&device->isl_dev, &device->info); device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328); - device->isl_dev.sampler_route_to_lsc = - driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc"); - device->isl_dev.l1_storage_wt = - driQueryOptionb(&instance->dri_options, "intel_storage_cache_policy_wt"); + device->isl_dev.sampler_route_to_lsc = instance->drirc.debug.sampler_route_to_lsc; + device->isl_dev.l1_storage_wt = instance->drirc.debug.storage_l1_wt; device->isl_dev.requires_padding = !device->has_scratch_page; result = anv_physical_device_init_uuids(device); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 4f632340ac3..989f32057fe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -113,6 +113,8 @@ #include "vk_video.h" #include "vk_meta.h" +#include "anv_drirc.h" + #ifdef __cplusplus extern "C" { #endif @@ -1764,73 +1766,7 @@ static inline bool anv_needs_printf_buffer(void) struct anv_instance { struct vk_instance vk; - struct driOptionCache dri_options; - struct driOptionCache available_dri_options; - - bool enable_tbimr; - bool enable_vf_distribution; - bool enable_te_distribution; - bool external_memory_implicit_sync; - bool force_guc_low_latency; - bool emulate_read_without_format; - bool promote_cbv_to_push_buffers; - bool enable_fully_covered; - - /** - * Workarounds for game bugs. - */ - uint8_t assume_full_subgroups; - bool assume_full_subgroups_with_barrier; - bool assume_full_subgroups_with_shared_memory; - bool limit_trig_input_range; - bool lower_terminate_to_discard; - bool sample_mask_out_opengl_behaviour; - bool force_filter_addr_rounding; - bool fp64_workaround_enabled; - float lower_depth_range_rate; - unsigned force_vk_vendor; - bool has_fake_sparse; - bool disable_fcv; - bool enable_buffer_comp; - bool disable_xe2_drm_ccs_modifiers; - bool compression_control_enabled; - bool anv_fake_nonlocal_memory; - bool anv_upper_bound_descriptor_pool_sampler; - bool custom_border_colors_without_format; - bool large_workgroup_non_coherent_image_workaround; - bool barrier_post_typed_clear_shader; - bool barrier_post_untyped_clear_shader; - - /* HW workarounds */ - bool no_16bit; - bool intel_enable_wa_14018912822; - bool intel_enable_wa_14024015672_msaa; - - /** - * Performance workarounds - */ - unsigned binding_table_block_size; - bool disable_lto; - bool disable_push_constant_alloc; - enum brw_divergent_atomics_flags enable_opt_divergent_atomics; - enum brw_divergent_atomics_flags enable_opt_divergent_atomics_compute_only; - bool force_sampler_prefetch; - bool force_compute_surface_prefetch; - unsigned generated_indirect_threshold; - unsigned generated_indirect_ring_threshold; - unsigned query_clear_with_blorp_threshold; - unsigned query_copy_with_shader_threshold; - bool state_cache_perf_fix; - bool vf_component_packing; - - /** - * Ray tracing configuration. - */ - unsigned stack_ids; - /** - * 3DSTATE_BTD dispatch timeout counter configuration. - */ - unsigned dispatch_timeout_counter; + struct anv_drirc drirc; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index 498d7e3ac89..2739cee8600 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -245,32 +245,32 @@ anv_shader_init_uuid(struct anv_physical_device *device) const int spilling_rate = device->compiler->spilling_rate; _mesa_blake3_update(&ctx, &spilling_rate, sizeof(spilling_rate)); - const uint8_t afs = device->instance->assume_full_subgroups; + const uint8_t afs = device->instance->drirc.debug.assume_full_subgroups; _mesa_blake3_update(&ctx, &afs, sizeof(afs)); - const bool afswb = device->instance->assume_full_subgroups_with_barrier; + const bool afswb = device->instance->drirc.debug.assume_full_subgroups_with_barrier; _mesa_blake3_update(&ctx, &afswb, sizeof(afswb)); - const bool afs_shm = device->instance->assume_full_subgroups_with_shared_memory; + const bool afs_shm = device->instance->drirc.debug.assume_full_subgroups_with_shared_memory; _mesa_blake3_update(&ctx, &afs_shm, sizeof(afs_shm)); - const bool erwf = device->instance->emulate_read_without_format; - _mesa_blake3_update(&ctx, &erwf, sizeof(erwf)); + const bool rwfe = device->instance->drirc.debug.read_without_format_emu; + _mesa_blake3_update(&ctx, &rwfe, sizeof(rwfe)); - const bool lttd = device->instance->lower_terminate_to_discard; + const bool lttd = device->instance->drirc.debug.lower_terminate_to_discard; _mesa_blake3_update(&ctx, <td, sizeof(lttd)); const bool large_wg_wa = - device->instance->large_workgroup_non_coherent_image_workaround; + device->instance->drirc.debug.large_workgroup_non_coherent_image_workaround; _mesa_blake3_update(&ctx, &large_wg_wa, sizeof(large_wg_wa)); - const bool lto_disable = device->instance->disable_lto; + const bool lto_disable = device->instance->drirc.debug.disable_lto; _mesa_blake3_update(&ctx, <o_disable, sizeof(lto_disable)); const bool btp_bti_rcc = device->rt_change_needs_flush; _mesa_blake3_update(&ctx, &btp_bti_rcc, sizeof(btp_bti_rcc)); - const bool cbv_push_buffer = device->instance->promote_cbv_to_push_buffers; + const bool cbv_push_buffer = device->instance->drirc.perf.promote_cbv_push_buffer; _mesa_blake3_update(&ctx, &cbv_push_buffer, sizeof(cbv_push_buffer)); uint8_t blake3[BLAKE3_KEY_LEN]; @@ -318,7 +318,7 @@ anv_shader_get_spirv_options(struct vk_physical_device *device, .min_ssbo_alignment = ANV_SSBO_ALIGNMENT, .workarounds = { - .lower_terminate_to_discard = pdevice->instance->lower_terminate_to_discard, + .lower_terminate_to_discard = pdevice->instance->drirc.debug.lower_terminate_to_discard, }, }; } @@ -381,8 +381,8 @@ populate_base_prog_key(struct brw_base_prog_key *key, */ if (rs != NULL) key->robust_flags = anv_get_robust_flags(rs); - key->divergent_atomics_flags = pdevice->instance->enable_opt_divergent_atomics; - key->limit_trig_input_range = pdevice->instance->limit_trig_input_range; + key->divergent_atomics_flags = pdevice->instance->drirc.perf.opt_divergent_atomics; + key->limit_trig_input_range = pdevice->instance->drirc.debug.limit_trig_input_range; } static void @@ -418,7 +418,7 @@ populate_vs_prog_key(struct brw_vs_prog_key *key, populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages); - key->vf_component_packing = pdevice->instance->vf_component_packing; + key->vf_component_packing = pdevice->instance->drirc.perf.vf_comp_packing; } static void @@ -609,7 +609,7 @@ populate_fs_prog_key(struct brw_fs_prog_key *key, (state->ms->alpha_to_coverage_enable ? INTEL_ALWAYS : INTEL_NEVER); /* TODO: We should make this dynamic */ - if (pdevice->instance->sample_mask_out_opengl_behaviour) + if (pdevice->instance->drirc.debug.sample_mask_out_opengl_behaviour) key->ignore_sample_mask_out = !key->multisample_fbo; } else { /* Consider all inputs as valid until we look at the NIR variables. */ @@ -672,7 +672,7 @@ populate_cs_prog_key(struct brw_cs_prog_key *key, populate_base_prog_key(&key->base, device, rs); key->base.divergent_atomics_flags |= - pdevice->instance->enable_opt_divergent_atomics_compute_only; + pdevice->instance->drirc.perf.opt_divergent_atomics_compute_only; } static void @@ -872,7 +872,7 @@ anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader) * which can cause bugs, as they may expect bigger size of the * subgroup than we choose for the execution. */ - if (instance->assume_full_subgroups && + if (instance->drirc.debug.assume_full_subgroups && info->uses_wide_subgroup_intrinsics && info->api_subgroup_size == BRW_SUBGROUP_SIZE && local_size && @@ -881,7 +881,7 @@ anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader) info->min_subgroup_size = BRW_SUBGROUP_SIZE; } - if (instance->assume_full_subgroups_with_barrier && + if (instance->drirc.debug.assume_full_subgroups_with_barrier && info->stage == MESA_SHADER_COMPUTE && device->info->verx10 <= 125 && info->uses_control_barrier && @@ -895,7 +895,7 @@ anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader) /* Similarly, sometimes games rely on the implicit synchronization of * the shared memory accesses, and choosing smaller subgroups than the game * expects will cause bugs. */ - if (instance->assume_full_subgroups_with_shared_memory && + if (instance->drirc.debug.assume_full_subgroups_with_shared_memory && info->shared_size > 0 && info->min_subgroup_size != info->max_subgroup_size && local_size && @@ -1550,7 +1550,7 @@ anv_shader_lower_nir(struct anv_device *device, } if (nir->info.stage == MESA_SHADER_COMPUTE && - pdevice->instance->large_workgroup_non_coherent_image_workaround) { + pdevice->instance->drirc.debug.large_workgroup_non_coherent_image_workaround) { const unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; @@ -1653,7 +1653,7 @@ anv_shader_lower_nir(struct anv_device *device, .lower_loads = true, .lower_stores_64bit = true, .lower_loads_without_formats = - pdevice->instance->emulate_read_without_format, + pdevice->instance->drirc.debug.read_without_format_emu, }); if (lower_64bit_atomics) { @@ -1674,7 +1674,7 @@ anv_shader_lower_nir(struct anv_device *device, nir_address_format_32bit_offset); /* Realign pointers to CBV on stages that can promote to push buffers. */ - if (pdevice->instance->promote_cbv_to_push_buffers && + if (pdevice->instance->drirc.perf.promote_cbv_push_buffer && nir->info.stage <= MESA_SHADER_FRAGMENT) { /* Cleanup for the analysis, we don't want any ALU */ cleanup_nir(nir); diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index d7ba71126a8..f9ee1e2bb39 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -48,7 +48,7 @@ anv_init_wsi(struct anv_physical_device *physical_device) anv_wsi_proc_addr, &physical_device->instance->vk.alloc, physical_device->master_fd, - &physical_device->instance->dri_options, + &physical_device->instance->drirc.options, &(struct wsi_device_options){ .sw_device = false, .emulate_24as32 = true, diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 56fe5a8c481..2ba75d704c2 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -62,7 +62,7 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer, total_scratch, protected); #if GFX_VER >= 20 - switch (cmd_buffer->device->physical->instance->stack_ids) { + switch (cmd_buffer->device->physical->instance->drirc.perf.stack_ids) { case 256: cfe.StackIDControl = StackIDs256; break; case 512: cfe.StackIDControl = StackIDs512; break; case 1024: cfe.StackIDControl = StackIDs1024; break; @@ -478,12 +478,13 @@ cmd_buffer_post_dispatch_wa(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; + const struct anv_instance *instance = cmd_buffer->device->physical->instance; /* Workaround WaW hazards in applications that clear a buffer and start * writing to it immediately without a barrier between the clear & write * operations. */ - if (cmd_buffer->device->physical->instance->barrier_post_typed_clear_shader && + if (instance->drirc.debug.barrier_post_typed_clear_shader && (comp_state->shader->bind_map.inferred_behavior & ANV_PIPELINE_BEHAVIOR_CLEAR_TYPED)) { anv_add_pending_pipe_bits(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, @@ -491,7 +492,7 @@ cmd_buffer_post_dispatch_wa(struct anv_cmd_buffer *cmd_buffer) ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, "clear shader typed L1 flush app wa"); } - if (cmd_buffer->device->physical->instance->barrier_post_untyped_clear_shader && + if (instance->drirc.debug.barrier_post_untyped_clear_shader && (comp_state->shader->bind_map.inferred_behavior & ANV_PIPELINE_BEHAVIOR_CLEAR_UNTYPED)) { anv_add_pending_pipe_bits(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, @@ -1290,8 +1291,9 @@ cmd_buffer_flush_rt_state(struct anv_cmd_buffer *cmd_buffer, #endif anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) { + const struct anv_instance *instance = device->physical->instance; uint32_t dispatch_timeout_counter = - cmd_buffer->device->physical->instance->dispatch_timeout_counter; + instance->drirc.perf.rt_dispatch_timeout; uint32_t clamped_timeout_counter = genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter); #if GFX_VERx10 >= 200 @@ -1328,7 +1330,7 @@ cmd_buffer_flush_rt_state(struct anv_cmd_buffer *cmd_buffer, btd.DynamicstackmanagementmechanismHITREWARD = HIT_REWARD_1; btd.DynamicstackmanagementmechanismSCALINGFACTOR = SCALING_FACTOR_4; btd.DynamicstackmanagementmechanismREDUCTIONCAP = - get_stack_id_reduction_cap(cmd_buffer->device->physical->instance->stack_ids); + get_stack_id_reduction_cap(cmd_buffer->device->physical->instance->drirc.perf.stack_ids); #endif } diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index d86a7a0ef12..4c7771ebd64 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -107,7 +107,7 @@ genX(batch_emit_push_constants_alloc)(struct anv_batch *batch, static void cmd_buffer_alloc_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->device->physical->instance->disable_push_constant_alloc) + if (cmd_buffer->device->physical->instance->drirc.perf.disable_push_const_alloc) return; struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; @@ -1086,7 +1086,7 @@ genX(cmd_buffer_flush_gfx)(struct anv_cmd_buffer *cmd_buffer) ALWAYS_INLINE static bool anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count) { - const struct anv_device *device = cmd_buffer->device; + const struct anv_instance *instance = cmd_buffer->device->physical->instance; /* We cannot generate readable commands in protected mode. */ if (cmd_buffer->vk.pool->flags & VK_COMMAND_POOL_CREATE_PROTECTED_BIT) @@ -1099,7 +1099,7 @@ anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count) anv_gfx_has_stage(&cmd_buffer->state.gfx, MESA_SHADER_TESS_CTRL)) return false; - return count >= device->physical->instance->generated_indirect_threshold; + return count >= instance->drirc.perf.generated_indirect_threshold; } #include "genX_cmd_draw_helpers.h" diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 20e8214d532..d8077a03877 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -673,6 +673,8 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer uint32_t max_draw_count, bool indexed) { + const struct anv_instance *instance = cmd_buffer->device->physical->instance; + /* In order to have the vertex fetch gather the data we need to have a non * 0 stride. It's possible to have a 0 stride given by the application when * draw_count is 1, but we need a correct value for the @@ -686,7 +688,7 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer assert(indirect_data_stride > 0); const bool use_ring_buffer = max_draw_count >= - cmd_buffer->device->physical->instance->generated_indirect_ring_threshold; + instance->drirc.perf.generated_indirect_ring_threshold; if (use_ring_buffer) { genX(cmd_buffer_emit_indirect_generated_draws_inring)(cmd_buffer, indirect_data_addr, diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 3040ebe9be1..c8d5e081656 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -1405,7 +1405,7 @@ update_te(struct anv_gfx_dynamic_state *hw_state, distrib_mode = TEDMODE_OFF; /* Debug feature for hang analysis */ - if (!device->physical->instance->enable_te_distribution) + if (!device->physical->instance->drirc.debug.te_distribution) distrib_mode = TEDMODE_OFF; SET(TE, te.TessellationDistributionMode, distrib_mode); @@ -1920,7 +1920,7 @@ update_blend_state(struct anv_gfx_dynamic_state *hw_state, DestinationBlendFactor = BLENDFACTOR_ONE; } - if (instance->intel_enable_wa_14018912822 && + if (instance->drirc.debug.wa_14018912822 && intel_needs_workaround(device->info, 14018912822) && dyn->ms.rasterization_samples > 1) { if (DestinationBlendFactor == BLENDFACTOR_ZERO) { @@ -2021,8 +2021,8 @@ update_viewports(struct anv_gfx_dynamic_state *hw_state, }; /* Fix depth test misrenderings by lowering translated depth range */ - if (instance->lower_depth_range_rate != 1.0f) - sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; + if (instance->drirc.debug.lower_depth_range_rate != 1.0f) + sfv.ViewportMatrixElementm32 *= instance->drirc.debug.lower_depth_range_rate; const uint32_t fb_size_max = 1 << 14; uint32_t x_min = 0, x_max = fb_size_max; @@ -2217,9 +2217,10 @@ update_tbimr_info(struct anv_gfx_dynamic_state *hw_state, const struct anv_cmd_graphics_state *gfx, const struct intel_l3_config *l3_config) { + const struct anv_instance *instance = device->physical->instance; unsigned fb_width, fb_height, tile_width, tile_height; - if (device->physical->instance->enable_tbimr && + if (instance->drirc.debug.tbimr && calculate_render_area(gfx, &fb_width, &fb_height) && calculate_tile_dimensions(device, gfx, l3_config, fb_width, fb_height, @@ -2782,9 +2783,9 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state, if (IS_DIRTY(VF)) { anv_gfx_pack(vf, GENX(3DSTATE_VF), vf) { #if GFX_VERx10 >= 125 - vf.GeometryDistributionEnable = instance->enable_vf_distribution; + vf.GeometryDistributionEnable = instance->drirc.debug.vf_distribution; #endif - vf.ComponentPackingEnable = instance->vf_component_packing; + vf.ComponentPackingEnable = instance->drirc.perf.vf_comp_packing; SET(vf, vf, IndexedDrawCutIndexEnable); SET(vf, vf, CutIndex); } @@ -2839,7 +2840,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state, if (IS_DIRTY(VF_SGVS_INSTANCING)) anv_gfx_copy_variable(vf_sgvs_instancing, MESA_SHADER_VERTEX, vs.vf_sgvs_instancing); - if (instance->vf_component_packing && IS_DIRTY(VF_COMPONENT_PACKING)) { + if (instance->drirc.perf.vf_comp_packing && + IS_DIRTY(VF_COMPONENT_PACKING)) { anv_gfx_copy(vf_component_packing, GENX(3DSTATE_VF_COMPONENT_PACKING), MESA_SHADER_VERTEX, vs.vf_component_packing); } @@ -3488,7 +3490,7 @@ emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer) } anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { vf.GeometryDistributionEnable = - cmd_buffer->device->physical->instance->enable_vf_distribution; + cmd_buffer->device->physical->instance->drirc.debug.vf_distribution; } #endif @@ -3625,6 +3627,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) { struct anv_batch *batch = &cmd_buffer->batch; struct anv_device *device = cmd_buffer->device; + const struct anv_instance *instance = device->physical->instance; struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; const struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; @@ -3748,7 +3751,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_SGVS_2), vf_sgvs_2); #endif - if (device->physical->instance->vf_component_packing && + if (instance->drirc.perf.vf_comp_packing && IS_DIRTY(VF_COMPONENT_PACKING)) { anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_COMPONENT_PACKING), vf_component_packing); diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index bdac04819a0..94cfe44a44f 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -84,7 +84,7 @@ emit_common_so_memcpy(struct anv_memcpy_state *state, * distribution. */ vf.GeometryDistributionEnable = - device->physical->instance->enable_vf_distribution; + device->physical->instance->drirc.debug.vf_distribution; #endif } anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs); diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index f44c68bc370..e5d2f3e31e5 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -354,7 +354,7 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) if (ANV_SUPPORT_RT && device->info->has_ray_tracing) { anv_batch_emit(batch, GENX(3DSTATE_BTD), btd) { uint32_t dispatch_timeout_counter = - device->physical->instance->dispatch_timeout_counter; + device->physical->instance->drirc.perf.rt_dispatch_timeout; uint32_t clamped_timeout_counter = genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter); #if GFX_VERx10 >= 200 @@ -404,6 +404,7 @@ static VkResult init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) { struct anv_device *device = queue->device; + const struct anv_instance *instance = device->physical->instance; UNUSED const struct intel_device_info *devinfo = queue->device->info; struct anv_async_submit *submit; @@ -783,7 +784,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) } #endif - if (device->physical->instance->disable_push_constant_alloc) { + if (instance->drirc.perf.disable_push_const_alloc) { genX(batch_emit_push_constants_alloc)( batch, device, VK_SHADER_STAGE_VERTEX_BIT | @@ -1343,6 +1344,7 @@ genX(emit_sampler_state)(const struct anv_device *device, uint32_t border_color_offset, struct anv_sampler_state *state) { + const struct anv_instance *instance = device->physical->instance; const bool seamless_cube = !(vk_state->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT); @@ -1360,7 +1362,7 @@ genX(emit_sampler_state)(const struct anv_device *device, const VkFilter mag_filter = plane_has_chroma ? vk_state->ycbcr_conversion.chroma_filter : vk_state->mag_filter; const bool force_addr_rounding = - device->physical->instance->force_filter_addr_rounding; + instance->drirc.debug.force_filter_addr_rounding; const bool enable_min_filter_addr_rounding = force_addr_rounding || min_filter != VK_FILTER_NEAREST; const bool enable_mag_filter_addr_rounding = diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index c39a2f8b305..80c9f7cd152 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -862,14 +862,15 @@ void genX(CmdResetQueryPool)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - struct anv_physical_device *pdevice = cmd_buffer->device->physical; + const struct anv_physical_device *pdevice = cmd_buffer->device->physical; + const struct anv_instance *instance = pdevice->instance; /* Shader clearing is only possible on render/compute when not in protected * mode. */ if (anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) && (cmd_buffer->vk.pool->flags & VK_COMMAND_POOL_CREATE_PROTECTED_BIT) == 0 && - queryCount >= pdevice->instance->query_clear_with_blorp_threshold) { + queryCount >= instance->drirc.perf.query_clear_with_blorp_threshold) { trace_intel_begin_query_clear_blorp(&cmd_buffer->trace); anv_cmd_buffer_fill_area(cmd_buffer, @@ -2040,9 +2041,10 @@ void genX(CmdCopyQueryPoolResults)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); struct anv_device *device = cmd_buffer->device; - struct anv_physical_device *pdevice = device->physical; + const struct anv_physical_device *pdevice = device->physical; + const struct anv_instance *instance = pdevice->instance; - if (queryCount > pdevice->instance->query_copy_with_shader_threshold && + if (queryCount > instance->drirc.perf.query_copy_with_shader_threshold && anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer)) { copy_query_results_with_shader(cmd_buffer, pool, anv_address_add(buffer->address, @@ -2074,12 +2076,13 @@ void genX(CmdCopyQueryPoolResultsToMemoryKHR)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); struct anv_device *device = cmd_buffer->device; - struct anv_physical_device *pdevice = device->physical; + const struct anv_physical_device *pdevice = device->physical; + const struct anv_instance *instance = pdevice->instance; struct anv_address dst_addr = anv_address_from_strided_range_flags(*pDstRange, dstFlags); - if (queryCount > pdevice->instance->query_copy_with_shader_threshold) { + if (queryCount > instance->drirc.perf.query_copy_with_shader_threshold) { copy_query_results_with_shader(cmd_buffer, pool, dst_addr, pDstRange->stride, diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c index 410e5aa244d..b74676adc99 100644 --- a/src/intel/vulkan/genX_shader.c +++ b/src/intel/vulkan/genX_shader.c @@ -32,7 +32,7 @@ get_surface_count(const struct anv_device *device, { #if GFX_VERx10 >= 125 if (shader->vk.stage == MESA_SHADER_COMPUTE && - !device->physical->instance->force_compute_surface_prefetch) + !device->physical->instance->drirc.perf.cs_surface_prefetch) return 0; #endif return shader->bind_map.surface_count; @@ -51,7 +51,7 @@ get_sampler_count(const struct anv_device *device, */ return 0; #else - if (!device->physical->instance->force_sampler_prefetch) + if (!device->physical->instance->drirc.perf.sampler_prefetch) return 0; return DIV_ROUND_UP( @@ -533,7 +533,7 @@ emit_vs_shader(struct anv_batch *batch, } #endif - if (device->physical->instance->vf_component_packing) { + if (device->physical->instance->drirc.perf.vf_comp_packing) { anv_shader_emit(batch, shader, vs.vf_component_packing, GENX(3DSTATE_VF_COMPONENT_PACKING), vfc) { vfc.VertexElementEnablesDW[0] = vs_prog_data->vf_component_packing[0]; diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index fcbd46cc8d1..ce9cf622edf 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -44,6 +44,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) struct anv_batch *batch = state->batch; struct anv_device *device = state->device; + const struct anv_instance *instance = device->physical->instance; const struct brw_fs_prog_data *prog_data = brw_fs_prog_data_const(state->kernel->prog_data); @@ -88,7 +89,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) * distribution. */ vf.GeometryDistributionEnable = - device->physical->instance->enable_vf_distribution; + instance->drirc.debug.vf_distribution; #endif } anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) { @@ -273,7 +274,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); #endif - if (!device->physical->instance->disable_push_constant_alloc) { + if (!instance->drirc.perf.disable_push_const_alloc) { VkShaderStageFlags push_stages = genX(push_constant_alloc_stages)(VK_SHADER_STAGE_FRAGMENT_BIT); genX(batch_emit_push_constants_alloc)(batch, device, push_stages); diff --git a/src/intel/vulkan/i915/anv_queue.c b/src/intel/vulkan/i915/anv_queue.c index 97961cdbe2e..d079279b697 100644 --- a/src/intel/vulkan/i915/anv_queue.c +++ b/src/intel/vulkan/i915/anv_queue.c @@ -65,7 +65,7 @@ anv_i915_create_engine(struct anv_device *device, if (pCreateInfo->flags & VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT) flags |= INTEL_GEM_CREATE_CONTEXT_EXT_PROTECTED_FLAG; - if (device->physical->instance->force_guc_low_latency && + if (device->physical->instance->drirc.perf.guc_low_latency && physical->info.supports_low_latency_hint) flags |= INTEL_GEM_CREATE_CONTEXT_EXT_LOW_LATENCY_FLAG; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 9d13c7bbda4..e479c6e0e61 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -85,6 +85,38 @@ _dev_icd = custom_target( devenv.append('VK_DRIVER_FILES', _dev_icd.full_path()) +libanv_drirc_args = [] +if with_platform_android + libanv_drirc_args += [ '--android-ver', get_option('platform-sdk-version').to_string() ] +endif + +libanv_drirc_files = custom_target( + 'libanv_drirc', + input : ['anv_dricrc_gen.py', drirc_gen], + output : ['anv_drirc.c', 'anv_drirc.h'], + command : [prog_python, '@INPUT0@', + '-p', join_paths(dir_source_root, 'src/util/'), + libanv_drirc_args, + '--drirc-src', '@OUTPUT0@', + '--drirc-hdr', '@OUTPUT1@', + '--validate', join_paths(dir_source_root, 'src/util/00-mesa-defaults.conf')], +) + +libanv_drirc = static_library( + 'libanv_drirc', + libanv_drirc_files, + include_directories : [ + inc_include, inc_src, inc_intel, + ], + c_args : anv_flags, + gnu_symbol_visibility : 'hidden', + dependencies : idep_mesautil) + +idep_anv_drirc = declare_dependency( + sources : libanv_drirc_files[1], + link_with : libanv_drirc, +) + libanv_per_hw_ver_libs = [] anv_per_hw_ver_files = files( 'genX_blorp_exec.c', @@ -125,6 +157,7 @@ foreach _gfx_ver : ['90', '110', '120', '125', '200', '300', '350'] idep_vulkan_runtime_headers, idep_mesautil, idep_intel_dev, idep_intel_driver_ds_headers, idep_intel_shaders, idep_intel_blorp, + idep_anv_drirc, ], ) endforeach @@ -225,6 +258,7 @@ anv_deps = [ idep_intel_shaders, idep_intel_blorp, idep_mda, + idep_anv_drirc, ] if with_platform_x11 diff --git a/src/intel/vulkan/xe/anv_queue.c b/src/intel/vulkan/xe/anv_queue.c index 72c9eb9373d..87079354bee 100644 --- a/src/intel/vulkan/xe/anv_queue.c +++ b/src/intel/vulkan/xe/anv_queue.c @@ -146,7 +146,7 @@ create_engine(struct anv_device *device, DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY, &state_cache_perf_ext.base); } - if (device->physical->instance->force_guc_low_latency && + if (device->physical->instance->drirc.perf.guc_low_latency && physical->info.supports_low_latency_hint) create.flags |= DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT; diff --git a/src/util/driconf.h b/src/util/driconf.h index b5ce1d3a320..4f58e51a414 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -353,18 +353,6 @@ #define DRI_CONF_FORCE_VK_VENDOR() \ DRI_CONF_OPT_I(force_vk_vendor, 0, -1, 2147483647, "Override GPU vendor id") -#define DRI_CONF_FAKE_SPARSE(def) \ - DRI_CONF_OPT_B(fake_sparse, def, \ - "Advertise support for sparse binding of textures regardless of real support") - -#define DRI_CONF_INTEL_BINDING_TABLE_BLOCK_SIZE(def,min,max) \ - DRI_CONF_OPT_I(intel_binding_table_block_size, def, min, max, \ - "Intel binding table block allocation size (3DSTATE_BINDING_TABLE_POOL_ALLOC)") - -#define DRI_CONF_INTEL_DISABLE_PUSH_CONSTANT_ALLOC(def) \ - DRI_CONF_OPT_B(intel_disable_push_constant_alloc, def, \ - "Disable push constant space allocations") - #define DRI_CONFIG_INTEL_TBIMR(def) \ DRI_CONF_OPT_B(intel_tbimr, def, "Enable TBIMR tiled rendering") @@ -537,10 +525,6 @@ DRI_CONF_OPT_B(vertex_program_default_out, def, \ "Initialize outputs of vertex program to a default value vec4(0, 0, 0, 1)") -#define DRI_CONF_CUSTOM_BORDER_COLORS_WITHOUT_FORMAT(def) \ - DRI_CONF_OPT_B(custom_border_colors_without_format, def, \ - "Enable custom border colors without format") - #define DRI_CONF_NO_FP16(def) \ DRI_CONF_OPT_B(no_fp16, def, \ "Disable 16-bit float support") @@ -715,136 +699,6 @@ DRI_CONF_OPT_B(venus_wsi_multi_plane_modifiers, def, \ "Enable support of multi-plane format modifiers for wsi images") -/** - * \brief ANV specific configuration options - */ - -#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(def) \ - DRI_CONF_OPT_I(anv_assume_full_subgroups, def, 0, 32, \ - "Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size") - -#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_BARRIER(def) \ - DRI_CONF_OPT_B(anv_assume_full_subgroups_with_barrier, def, \ - "Assume full subgroups requirement for compute shaders that use control barriers") - -#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS_WITH_SHARED_MEMORY(def) \ - DRI_CONF_OPT_B(anv_assume_full_subgroups_with_shared_memory, def, \ - "Allow assuming full subgroups requirement for shaders using shared memory even when it's not specified explicitly") - -#define DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(def) \ - DRI_CONF_OPT_B(anv_emulate_read_without_format, def, \ - "Emulate shaderStorageImageReadWithoutFormat with shader conversions") - -#define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \ - DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \ - "Ignore sample mask out when having single sampled target") - -#define DRI_CONF_ANV_FORCE_FILTER_ADDR_ROUNDING(def) \ - DRI_CONF_OPT_B(anv_force_filter_addr_rounding, def, \ - "Force min/mag filter address rounding to be enabled even for NEAREST sampling") - -#define DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(def) \ - DRI_CONF_OPT_B(fp64_workaround_enabled, def, \ - "Use softpf64 when the shader uses float64, but the device doesn't support that type") - -#define DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(def) \ - DRI_CONF_OPT_I(generated_indirect_threshold, def, 0, INT32_MAX, \ - "Indirect threshold count above which we start generating commands") - -#define DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(def) \ - DRI_CONF_OPT_I(generated_indirect_ring_threshold, def, 0, INT32_MAX, \ - "Indirect threshold count above which we start generating commands in a ring buffer") - -#define DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(def) \ - DRI_CONF_OPT_I(query_clear_with_blorp_threshold, def, 0, INT32_MAX, \ - "Query threshold count above which query buffers are cleared with blorp") - -#define DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(def) \ - DRI_CONF_OPT_I(query_copy_with_shader_threshold, def, 0, INT32_MAX, \ - "Query threshold count above which query copies are executed with a shader") - -#define DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(def) \ - DRI_CONF_OPT_B(force_indirect_descriptors, def, \ - "Use an indirection to access buffer/image/texture/sampler handles") - -#define DRI_CONF_ANV_DISABLE_FCV(def) \ - DRI_CONF_OPT_B(anv_disable_fcv, def, \ - "Disable FCV optimization") - -#define DRI_CONF_ANV_ENABLE_BUFFER_COMP(def) \ - DRI_CONF_OPT_B(anv_enable_buffer_comp, def, \ - "Enable CCS on buffers where possible") - -#define DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(def) \ - DRI_CONF_OPT_B(anv_external_memory_implicit_sync, def, "Implicit sync on external BOs") - -#define DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(def) \ - DRI_CONF_OPT_B(anv_promote_cbv_to_push_buffers, def, \ - "Promote CBV 64bit pointers in push constant data to push buffers") - -#define DRI_CONF_ANV_STATE_CACHE_PERF_FIX(def) \ - DRI_CONF_OPT_B(anv_state_cache_perf_fix, def, \ - "Whether COMMON_SLICE_CHICKEN3 bit13 should be programmed to enable BTP+BTI RCC keying") - -#define DRI_CONF_ANV_COMPRESSION_CONTROL_ENABLED(def) \ - DRI_CONF_OPT_B(compression_control_enabled, def, "Enable VK_EXT_image_compression_control support") - -#define DRI_CONF_ANV_FAKE_NONLOCAL_MEMORY(def) \ - DRI_CONF_OPT_B(anv_fake_nonlocal_memory, def, \ - "Present host-visible device-local memory types as non device-local") - -#define DRI_CONF_ANV_UPPER_BOUND_DESCRIPTOR_POOL_SAMPLER(def) \ - DRI_CONF_OPT_B(anv_upper_bound_descriptor_pool_sampler, def, \ - "Overallocate samplers in descriptor pools to workaround app bug") - -#define DRI_CONF_ANV_VF_COMPONENT_PACKING(def) \ - DRI_CONF_OPT_B(anv_vf_component_packing, def, \ - "Vertex fetching component packing") - -#define DRI_CONF_ANV_LARGE_WORKGROUP_NON_COHERENT_IMAGE_WORKAROUND(def) \ - DRI_CONF_OPT_B(anv_large_workgroup_non_coherent_image_workaround, def, \ - "Fixup image coherency qualifier for certain shaders.") - -#define DRI_CONF_ANV_FORCE_GUC_LOW_LATENCY(def) \ - DRI_CONF_OPT_B(force_guc_low_latency, def, \ - "Enable low latency GuC strategy.") - -#define DRI_CONF_ANV_DISABLE_DRM_AUX_MODIFIERS(def) \ - DRI_CONF_OPT_B(anv_disable_drm_ccs_modifiers, def, \ - "Disable DRM CCS modifier usage") - -#define DRI_CONF_ANV_DISABLE_LINK_TIME_OPTIMIZATION(def) \ - DRI_CONF_OPT_B(anv_disable_link_time_optimization, def, \ - "Disable linking of graphics pipeline shaders") - -#define DRI_CONF_ANV_BARRIER_POST_UNTYPED_CLEAR_SHADER(def) \ - DRI_CONF_OPT_B(anv_barrier_post_untyped_clear_shader, def, \ - "Insert pipeline barriers post clearing shader on untyped data") - -#define DRI_CONF_ANV_BARRIER_POST_TYPED_CLEAR_SHADER(def) \ - DRI_CONF_OPT_B(anv_barrier_post_typed_clear_shader, def, \ - "Insert pipeline barriers post clearing shader on typed data") - -#define DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS(def) \ - DRI_CONF_OPT_I(anv_enable_opt_divergent_atomics, def, 0, 3,\ - "Enable fusion of divergent atomics (see brw_divergent_atomics_flags)") - -#define DRI_CONF_ANV_ENABLE_OPT_DIVERGENT_ATOMICS_COMPUTE_ONLY(def) \ - DRI_CONF_OPT_I(anv_enable_opt_divergent_atomics_compute_only, def, 0, 3,\ - "Enable fusion of divergent atomics for compute shaders only (see brw_divergent_atomics_flags)") - -#define DRI_CONF_ANV_BRW_DISABLE_SUBGROUP_SIZE_CONTROL(def) \ - DRI_CONF_OPT_B(anv_brw_disable_subgroup_size_control, def, \ - "Disable EXT_subgroup_size_control support when using brw compiler.") - -#define DRI_CONF_ANV_ENABLE_SCRATCH_PAGE(def) \ - DRI_CONF_OPT_B(anv_enable_scratch_page, def, \ - "Disables surface padding and suppresses all page faults, drops writes and returns zeros on reads.") - -#define DRI_CONF_ANV_ENABLE_FULLY_COVERED(def) \ - DRI_CONF_OPT_B(anv_enable_fully_covered, def, \ - "Enable fullyCoveredFragmentShaderInputVariable (Alchemist and newer only).") - /** * \brief HASVK specific configuration options */