diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c
index a8c667d24ba..8002a5cc5f7 100644
--- a/src/intel/vulkan/anv_instance.c
+++ b/src/intel/vulkan/anv_instance.c
@@ -29,6 +29,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
+ DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(false)
DRI_CONF_ANV_STATE_CACHE_PERF_FIX(false)
DRI_CONF_NO_16BIT(false)
DRI_CONF_INTEL_BINDING_TABLE_BLOCK_SIZE(BINDING_TABLE_POOL_DEFAULT_BLOCK_SIZE,
@@ -198,6 +199,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
instance->force_filter_addr_rounding =
driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding");
+ instance->promote_cbv_to_push_buffers =
+ driQueryOptionb(&instance->dri_options, "anv_promote_cbv_to_push_buffers");
instance->state_cache_perf_fix =
driQueryOptionb(&instance->dri_options, "anv_state_cache_perf_fix");
instance->lower_depth_range_rate =
diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h
index f7664e5afb4..637d3bdc810 100644
--- a/src/intel/vulkan/anv_nir.h
+++ b/src/intel/vulkan/anv_nir.h
@@ -125,6 +125,8 @@ struct anv_nir_push_layout_info {
bool anv_nir_shrink_push_constant_ranges(nir_shader *nir);
+bool anv_nir_realign_cbv(nir_shader *shader);
+
bool anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
enum brw_robustness_flags robust_flags,
diff --git a/src/intel/vulkan/anv_nir_realign_cbv.c b/src/intel/vulkan/anv_nir_realign_cbv.c
new file mode 100644
index 00000000000..85a437bfbd9
--- /dev/null
+++ b/src/intel/vulkan/anv_nir_realign_cbv.c
@@ -0,0 +1,79 @@
+/* Copyright © 2026 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "anv_nir.h"
+#include "nir/nir_builder.h"
+
+/**
+ * This file implements a pass that looks for global read-only loads, from a
+ * pointer in the push constant data and based on the block size (64KiB
+ * indicating a CBV resource), align the load to 256B which the alignment
+ * guarantee the applications should make. This alignment guarantee can later
+ * be used to promote those 64bit pointers to push buffers (HW needs 32B
+ * alignment).
+ */
+
+static bool
+realign_cbv(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
+{
+ if (intrin->intrinsic != nir_intrinsic_load_deref)
+ return false;
+
+ /* If writable, it's not CBV. */
+ if ((nir_intrinsic_access(intrin) & ACCESS_NON_WRITEABLE) == 0)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+
+ /* Find the root of the deref to see if it's a pointer in the push constant
+ * data.
+ */
+ while (true) {
+ if (deref->deref_type == nir_deref_type_var)
+ return false;
+
+ nir_deref_instr *parent = nir_src_as_deref(deref->parent);
+ if (!parent)
+ break;
+
+ deref = parent;
+ }
+ assert(deref->deref_type == nir_deref_type_cast);
+
+ /* This is the magic value vkd3d-proton puts allowing us to recognize a
+ * CBV.
+ */
+ if (glsl_get_explicit_size(deref->type, true) != 64 * 1024)
+ return false;
+
+ nir_scalar val = { deref->parent.ssa, 0 };
+
+ if (nir_scalar_is_alu(val)) {
+ nir_alu_instr *pack_alu = nir_def_as_alu(val.def);
+ if (pack_alu->op != nir_op_pack_64_2x32_split)
+ return false;
+
+ val = (nir_scalar){ pack_alu->src[0].src.ssa, pack_alu->src[0].swizzle[0] };
+ }
+
+ if (!nir_scalar_is_intrinsic(val))
+ return false;
+
+ /* If it's not a value coming from the push constant data, give up. */
+ nir_intrinsic_instr *push_intrin = nir_def_as_intrinsic(val.def);
+ if (push_intrin->intrinsic != nir_intrinsic_load_push_constant)
+ return false;
+
+ /* Realign to the CBV requirement */
+ deref = nir_src_as_deref(intrin->src[0]);
+ deref->cast.align_mul = 256;
+
+ return true;
+}
+
+bool
+anv_nir_realign_cbv(nir_shader *shader)
+{
+ return nir_shader_intrinsics_pass(shader, realign_cbv, nir_metadata_all, NULL);
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f9e812ab8c5..44c2d1c7a82 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1820,6 +1820,7 @@ struct anv_instance {
bool external_memory_implicit_sync;
bool force_guc_low_latency;
bool emulate_read_without_format;
+ bool promote_cbv_to_push_buffers;
/**
* Workarounds for game bugs.
diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c
index 1315df9d27f..f8c27ac217e 100644
--- a/src/intel/vulkan/anv_shader_compile.c
+++ b/src/intel/vulkan/anv_shader_compile.c
@@ -186,6 +186,9 @@ anv_shader_init_uuid(struct anv_physical_device *device)
const bool btp_bti_rcc = device->rt_change_needs_flush;
_mesa_blake3_update(&ctx, &btp_bti_rcc, sizeof(btp_bti_rcc));
+ const bool cbv_push_buffer = device->instance->promote_cbv_to_push_buffers;
+ _mesa_blake3_update(&ctx, &cbv_push_buffer, sizeof(cbv_push_buffer));
+
uint8_t blake3[BLAKE3_KEY_LEN];
_mesa_blake3_final(&ctx, blake3);
memcpy(device->shader_binary_uuid, blake3, sizeof(device->shader_binary_uuid));
@@ -1530,11 +1533,24 @@ anv_shader_lower_nir(struct anv_device *device,
pdevice->isl_dev.shader_tiling);
}
- NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
- nir_address_format_64bit_global);
+ /* Lower push constants variables prior to global realignment for CBV
+ * resources, it makes identifying a 64bit pointer from the push constants
+ * easier.
+ */
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
nir_address_format_32bit_offset);
+ /* Realign pointers to CBV on stages that can promote to push buffers. */
+ if (pdevice->instance->promote_cbv_to_push_buffers &&
+ nir->info.stage <= MESA_SHADER_FRAGMENT) {
+ /* Cleanup for the analysis, we don't want any ALU */
+ cleanup_nir(nir);
+ NIR_PASS(_, nir, anv_nir_realign_cbv);
+ }
+
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
+ nir_address_format_64bit_global);
+
NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
shader_data->push_desc_info.used_descriptors =
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 22ed0712599..5894e535e9d 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -185,6 +185,7 @@ libanv_files = files(
'anv_nir_lower_unaligned_dispatch.c',
'anv_nir_push_constants_analysis.c',
'anv_nir_push_descriptor_analysis.c',
+ 'anv_nir_realign_cbv.c',
'anv_perf.c',
'anv_physical_device.c',
'anv_pipeline_cache.c',
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index 9b5dcfe8728..58f1db2bc9b 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -1056,6 +1056,7 @@ TODO: document the other workarounds.
+
diff --git a/src/util/driconf.h b/src/util/driconf.h
index efd756fe765..348425a00c6 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -937,6 +937,10 @@
#define DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(def) \
DRI_CONF_OPT_B(anv_external_memory_implicit_sync, def, "Implicit sync on external BOs")
+#define DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(def) \
+ DRI_CONF_OPT_B(anv_promote_cbv_to_push_buffers, def, \
+ "Promote CBV 64bit pointers in push constant data to push buffers")
+
#define DRI_CONF_ANV_STATE_CACHE_PERF_FIX(def) \
DRI_CONF_OPT_B(anv_state_cache_perf_fix, def, \
"Whether COMMON_SLICE_CHICKEN3 bit13 should be programmed to enable BTP+BTI RCC keying")