diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc
index 1905e41aac9..4e536057969 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@@ -2267,8 +2267,8 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
if (CHIP >= A8XX)
tu_cs_emit_regs(cs, SP_ALPHA_TEST_CNTL(CHIP));
- tu_cs_emit_regs(cs, A6XX_TPL1_GFX_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor)));
- tu_cs_emit_regs(cs, A6XX_TPL1_CS_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor)));
+ tu_cs_emit_regs(cs, A6XX_TPL1_GFX_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor_builtin)));
+ tu_cs_emit_regs(cs, A6XX_TPL1_CS_BORDER_COLOR_BASE(.qword = dev->global_bo->iova + gb_offset(bcolor_builtin)));
/* BR-only registers */
/* non-ctx regs programmed by KMD (and blocked from UMD) on gen8+ */
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index 213d9587d68..afe8a26f001 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -25,6 +25,7 @@
#include "vk_debug_utils.h"
#include "vk_shader_module.h"
#include "vk_util.h"
+#include "vk_sampler.h"
#include "common/freedreno_uuid.h"
#include "fdl/freedreno_layout.h"
@@ -1435,7 +1436,8 @@ tu_get_properties(struct tu_physical_device *pdevice,
/* VK_KHR_maintenance5 */
props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
- props->depthStencilSwizzleOneSupport = true;
+ props->depthStencilSwizzleOneSupport =
+ pdevice->info->props.has_z24uint_s8uint && pdevice->instance->enable_d24s8_border_color_workaround;
props->polygonModePointSize = true;
props->nonStrictWideLinesUseParallelogram = false;
props->nonStrictSinglePixelWideLinesUseParallelogram = false;
@@ -1852,7 +1854,8 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
- DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
+ DRI_CONF_TU_ENABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
+ DRI_CONF_TU_ENABLE_FAST_BORDER_COLOR_FOR_UNDEFINED_FORMATS(false)
DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false)
DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false)
DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
@@ -1881,8 +1884,10 @@ tu_init_dri_options(struct tu_instance *instance)
!driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
instance->allow_oob_indirect_ubo_loads =
driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
- instance->disable_d24s8_border_color_workaround =
- driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
+ instance->enable_d24s8_border_color_workaround =
+ driQueryOptionb(&instance->dri_options, "tu_enable_d24s8_border_color_workaround");
+ instance->enable_fast_border_color_for_undefined_formats =
+ driQueryOptionb(&instance->dri_options, "tu_enable_fast_border_color_for_undefined_formats");
instance->use_tex_coord_round_nearest_even_mode =
driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode");
instance->ignore_frag_depth_direction =
@@ -3042,6 +3047,12 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
global->zero_64b = 0;
+ for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
+ VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
+ tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
+ vk_border_color_is_int((VkBorderColor) i));
+ }
+
/* initialize to ones so ffs can be used to find unused slots */
BITSET_ONES(device->custom_border_color);
@@ -3139,7 +3150,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->use_z24uint_s8uint =
physical_device->info->props.has_z24uint_s8uint &&
(!border_color_without_format ||
- physical_device->instance->disable_d24s8_border_color_workaround);
+ !physical_device->instance->enable_d24s8_border_color_workaround);
device->use_lrz = !TU_DEBUG_START(NOLRZ);
tu_gpu_tracepoint_config_variable();
diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h
index e5e58c99e2b..7083561ac31 100644
--- a/src/freedreno/vulkan/tu_device.h
+++ b/src/freedreno/vulkan/tu_device.h
@@ -35,6 +35,7 @@
#define TU_MAX_QUEUE_FAMILIES 2
#define TU_BORDER_COLOR_COUNT 4096
+#define TU_BORDER_COLOR_BUILTIN 6
#define TU_BLIT_SHADER_SIZE 4096
@@ -217,13 +218,25 @@ struct tu_instance
*/
bool allow_oob_indirect_ubo_loads;
- /* DXVK and VKD3D-Proton use customBorderColorWithoutFormat
- * and have most of D24S8 images with USAGE_SAMPLED, in such case we
- * disable UBWC for correctness. However, games don't use border color for
- * depth-stencil images. So we elect to ignore this edge case and force
- * UBWC to be enabled.
+ /* The hardware doesn't support Vulkan's stencil swizzling rules for
+ * custom border colors. Vulkan requires stencil to be sampled as the red
+ * component, but hardware samples it as the green component. Without
+ * customBorderColorWithoutFormat we can work around this issue without
+ * perf loss, but with customBorderColorWithoutFormat we have to disable
+ * UBWC for D24S8 images with USAGE_SAMPLED set.
+ * However, VkPhysicalDeviceMaintenance5Properties.depthStencilSwizzleOneSupport
+ * forbids this state combination when false. It was added after the HW
+ * deficiency was discovered, and we want to work around apps that aren't
+ * aware of this.
*/
- bool disable_d24s8_border_color_workaround;
+ bool enable_d24s8_border_color_workaround;
+
+ /* When D24S8 is used without enable_d24s8_border_color_workaround, the
+ * fast border color HW feature results in an incorrect color being used.
+ * However, we want to enable fast border colors for apps that are known
+ * not to use border colors with D24S8, such as DXVK and vkd3d-proton.
+ */
+ bool enable_fast_border_color_for_undefined_formats;
/* D3D emulation requires texture coordinates to be rounded to nearest even value. */
bool use_tex_coord_round_nearest_even_mode;
@@ -329,6 +342,7 @@ struct tu6_global
uint64_t preemption_latency_cmp_scratch;
uint64_t zero_64b;
+ struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN];
struct bcolor_entry bcolor[];
};
#define gb_offset(member) offsetof(struct tu6_global, member)
diff --git a/src/freedreno/vulkan/tu_sampler.cc b/src/freedreno/vulkan/tu_sampler.cc
index b6138206c6a..7a7710ce9e6 100644
--- a/src/freedreno/vulkan/tu_sampler.cc
+++ b/src/freedreno/vulkan/tu_sampler.cc
@@ -58,7 +58,9 @@ tu_CreateSampler(VkDevice _device,
tu6_pack_border_color(
&device->global_bo_map->bcolor[border_color], &color,
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
- } else {
+ border_color += TU_BORDER_COLOR_BUILTIN;
+ } else if (sampler->vk.format != VK_FORMAT_UNDEFINED ||
+ device->instance->enable_fast_border_color_for_undefined_formats) {
fast_border_color_enable = true;
switch (pCreateInfo->borderColor) {
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
@@ -192,7 +194,8 @@ tu_DestroySampler(VkDevice _device,
pkt_field_get(A6XX_TEX_SAMP_2_BCOLOR, sampler->descriptor[2]);
}
- if (!fast_border_color) {
+ if (!fast_border_color && border_color >= TU_BORDER_COLOR_BUILTIN) {
+ border_color -= TU_BORDER_COLOR_BUILTIN;
/* if the sampler had a custom border color, free it. TODO: no lock */
mtx_lock(&device->mutex);
assert(!BITSET_TEST(device->custom_border_color, border_color));
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index c5eca519f84..53a0e46f2f3 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -1402,11 +1402,9 @@ TODO: document the other workarounds.
-
+
+
+
diff --git a/src/util/driconf.h b/src/util/driconf.h
index ca447061760..20de0497ccb 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -648,9 +648,13 @@
DRI_CONF_OPT_B(tu_allow_oob_indirect_ubo_loads, def, \
"Some D3D11 games rely on out-of-bounds indirect UBO loads to return real values from underlying bound descriptor, this prevents us from lowering indirectly accessed UBOs to consts")
-#define DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(def) \
- DRI_CONF_OPT_B(tu_disable_d24s8_border_color_workaround, def, \
- "Use UBWC for D24S8 images with VK_IMAGE_USAGE_SAMPLED_BIT when customBorderColorWithoutFormat is enabled")
+#define DRI_CONF_TU_ENABLE_D24S8_BORDER_COLOR_WORKAROUND(def) \
+ DRI_CONF_OPT_B(tu_enable_d24s8_border_color_workaround, def, \
+ "Disable UBWC for D24S8 images with VK_IMAGE_USAGE_SAMPLED_BIT when customBorderColorWithoutFormat is enabled")
+
+#define DRI_CONF_TU_ENABLE_FAST_BORDER_COLOR_FOR_UNDEFINED_FORMATS(def) \
+ DRI_CONF_OPT_B(tu_enable_fast_border_color_for_undefined_formats, def, \
+ "Enables fast border color HW feature for VK_FORMAT_UNDEFINED sampler formats.")
#define DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(def) \
DRI_CONF_OPT_B(tu_use_tex_coord_round_nearest_even_mode, def, \