diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 1905e41aac9..4e536057969 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -2267,8 +2267,8 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) if (CHIP >= A8XX) tu_cs_emit_regs(cs, SP_ALPHA_TEST_CNTL(CHIP)); - tu_cs_emit_regs(cs, A6XX_TPL1_GFX_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor))); - tu_cs_emit_regs(cs, A6XX_TPL1_CS_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor))); + tu_cs_emit_regs(cs, A6XX_TPL1_GFX_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor_builtin))); + tu_cs_emit_regs(cs, A6XX_TPL1_CS_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor_builtin))); /* BR-only registers */ /* non-ctx regs programmed by KMD (and blocked from UMD) on gen8+ */ diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 213d9587d68..afe8a26f001 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -25,6 +25,7 @@ #include "vk_debug_utils.h" #include "vk_shader_module.h" #include "vk_util.h" +#include "vk_sampler.h" #include "common/freedreno_uuid.h" #include "fdl/freedreno_layout.h" @@ -1435,7 +1436,8 @@ tu_get_properties(struct tu_physical_device *pdevice, /* VK_KHR_maintenance5 */ props->earlyFragmentMultisampleCoverageAfterSampleCounting = true; props->earlyFragmentSampleMaskTestBeforeSampleCounting = true; - props->depthStencilSwizzleOneSupport = true; + props->depthStencilSwizzleOneSupport = + pdevice->info->props.has_z24uint_s8uint && pdevice->instance->enable_d24s8_border_color_workaround; props->polygonModePointSize = true; props->nonStrictWideLinesUseParallelogram = false; props->nonStrictSinglePixelWideLinesUseParallelogram = false; @@ -1852,7 +1854,8 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false) DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false) DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false) - DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false) + DRI_CONF_TU_ENABLE_D24S8_BORDER_COLOR_WORKAROUND(false) + DRI_CONF_TU_ENABLE_FAST_BORDER_COLOR_FOR_UNDEFINED_FORMATS(false) DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false) DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false) DRI_CONF_TU_ENABLE_SOFTFLOAT32(false) @@ -1881,8 +1884,10 @@ tu_init_dri_options(struct tu_instance *instance) !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set"); instance->allow_oob_indirect_ubo_loads = driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads"); - instance->disable_d24s8_border_color_workaround = - driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround"); + instance->enable_d24s8_border_color_workaround = + driQueryOptionb(&instance->dri_options, "tu_enable_d24s8_border_color_workaround"); + instance->enable_fast_border_color_for_undefined_formats = + driQueryOptionb(&instance->dri_options, "tu_enable_fast_border_color_for_undefined_formats"); instance->use_tex_coord_round_nearest_even_mode = driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode"); instance->ignore_frag_depth_direction = @@ -3042,6 +3047,12 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, global->zero_64b = 0; + for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) { + VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i); + tu6_pack_border_color(&global->bcolor_builtin[i], &border_color, + vk_border_color_is_int((VkBorderColor) i)); + } + /* initialize to ones so ffs can be used to find unused slots */ BITSET_ONES(device->custom_border_color); @@ -3139,7 +3150,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, device->use_z24uint_s8uint = physical_device->info->props.has_z24uint_s8uint && (!border_color_without_format || - physical_device->instance->disable_d24s8_border_color_workaround); + !physical_device->instance->enable_d24s8_border_color_workaround); device->use_lrz = !TU_DEBUG_START(NOLRZ); tu_gpu_tracepoint_config_variable(); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index e5e58c99e2b..7083561ac31 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -35,6 +35,7 @@ #define TU_MAX_QUEUE_FAMILIES 2 #define TU_BORDER_COLOR_COUNT 4096 +#define TU_BORDER_COLOR_BUILTIN 6 #define TU_BLIT_SHADER_SIZE 4096 @@ -217,13 +218,25 @@ struct tu_instance */ bool allow_oob_indirect_ubo_loads; - /* DXVK and VKD3D-Proton use customBorderColorWithoutFormat - * and have most of D24S8 images with USAGE_SAMPLED, in such case we - * disable UBWC for correctness. However, games don't use border color for - * depth-stencil images. So we elect to ignore this edge case and force - * UBWC to be enabled. + /* The hardware doesn't support Vulkan's stencil swizzling rules for + * custom border colors. Vulkan requires stencil to be sampled as the red + * component, but hardware samples it as the green component. Without + * customBorderColorWithoutFormat we can work around this issue without + * perf loss, but with customBorderColorWithoutFormat we have to disable + * UBWC for D24S8 images with USAGE_SAMPLED set. + * However, VkPhysicalDeviceMaintenance5Properties.depthStencilSwizzleOneSupport + * forbids this state combination when false. It was added after the HW + * deficiency was discovered, and we want to work around apps that aren't + * aware of this. */ - bool disable_d24s8_border_color_workaround; + bool enable_d24s8_border_color_workaround; + + /* When D24S8 is used without enable_d24s8_border_color_workaround, the + * fast border color HW feature results in an incorrect color being used. + * However, we want to enable fast border colors for apps that are known + * not to use border colors with D24S8, such as DXVK and vkd3d-proton. + */ + bool enable_fast_border_color_for_undefined_formats; /* D3D emulation requires texture coordinates to be rounded to nearest even value. */ bool use_tex_coord_round_nearest_even_mode; @@ -329,6 +342,7 @@ struct tu6_global uint64_t preemption_latency_cmp_scratch; uint64_t zero_64b; + struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN]; struct bcolor_entry bcolor[]; }; #define gb_offset(member) offsetof(struct tu6_global, member) diff --git a/src/freedreno/vulkan/tu_sampler.cc b/src/freedreno/vulkan/tu_sampler.cc index b6138206c6a..7a7710ce9e6 100644 --- a/src/freedreno/vulkan/tu_sampler.cc +++ b/src/freedreno/vulkan/tu_sampler.cc @@ -58,7 +58,9 @@ tu_CreateSampler(VkDevice _device, tu6_pack_border_color( &device->global_bo_map->bcolor[border_color], &color, pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT); - } else { + border_color += TU_BORDER_COLOR_BUILTIN; + } else if (sampler->vk.format != VK_FORMAT_UNDEFINED || + device->instance->enable_fast_border_color_for_undefined_formats) { fast_border_color_enable = true; switch (pCreateInfo->borderColor) { case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: @@ -192,7 +194,8 @@ tu_DestroySampler(VkDevice _device, pkt_field_get(A6XX_TEX_SAMP_2_BCOLOR, sampler->descriptor[2]); } - if (!fast_border_color) { + if (!fast_border_color && border_color >= TU_BORDER_COLOR_BUILTIN) { + border_color -= TU_BORDER_COLOR_BUILTIN; /* if the sampler had a custom border color, free it. TODO: no lock */ mtx_lock(&device->mutex); assert(!BITSET_TEST(device->custom_border_color, border_color)); diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index c5eca519f84..53a0e46f2f3 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -1402,11 +1402,9 @@ TODO: document the other workarounds. - diff --git a/src/util/driconf.h b/src/util/driconf.h index ca447061760..20de0497ccb 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -648,9 +648,13 @@ DRI_CONF_OPT_B(tu_allow_oob_indirect_ubo_loads, def, \ "Some D3D11 games rely on out-of-bounds indirect UBO loads to return real values from underlying bound descriptor, this prevents us from lowering indirectly accessed UBOs to consts") -#define DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(def) \ - DRI_CONF_OPT_B(tu_disable_d24s8_border_color_workaround, def, \ - "Use UBWC for D24S8 images with VK_IMAGE_USAGE_SAMPLED_BIT when customBorderColorWithoutFormat is enabled") +#define DRI_CONF_TU_ENABLE_D24S8_BORDER_COLOR_WORKAROUND(def) \ + DRI_CONF_OPT_B(tu_enable_d24s8_border_color_workaround, def, \ + "Disable UBWC for D24S8 images with VK_IMAGE_USAGE_SAMPLED_BIT when customBorderColorWithoutFormat is enabled") + +#define DRI_CONF_TU_ENABLE_FAST_BORDER_COLOR_FOR_UNDEFINED_FORMATS(def) \ + DRI_CONF_OPT_B(tu_enable_fast_border_color_for_undefined_formats, def, \ + "Enables fast border color HW feature for VK_FORMAT_UNDEFINED sampler formats.") #define DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(def) \ DRI_CONF_OPT_B(tu_use_tex_coord_round_nearest_even_mode, def, \