mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 20:18:12 +02:00
anv: add a pass to realign global loads on DX CBV resources
CBV resources are supposed to be 256B aligned (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT). vkd3d-proton will puts CBV addresses in the push constant data and do global loads on them. Unfortunately those loads don't have a 256B alignment value on them. So when looking at what we can promote to HW push buffers, we can't consider them. This change introduces a detection pass for CBV resources (according to vkd3d-proton devs those are 64KiB in size) and realign the loads to be 256B aligned. This is only enabled on DX emulation. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Felix DeGrood <felix.j.degrood@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39451>
This commit is contained in:
parent
bba428ce3f
commit
eda83bc2b6
8 changed files with 109 additions and 2 deletions
|
|
@ -29,6 +29,7 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
|
||||
DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
|
||||
DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
|
||||
DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(false)
|
||||
DRI_CONF_ANV_STATE_CACHE_PERF_FIX(false)
|
||||
DRI_CONF_NO_16BIT(false)
|
||||
DRI_CONF_INTEL_BINDING_TABLE_BLOCK_SIZE(BINDING_TABLE_POOL_DEFAULT_BLOCK_SIZE,
|
||||
|
|
@ -198,6 +199,8 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
|
||||
instance->force_filter_addr_rounding =
|
||||
driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding");
|
||||
instance->promote_cbv_to_push_buffers =
|
||||
driQueryOptionb(&instance->dri_options, "anv_promote_cbv_to_push_buffers");
|
||||
instance->state_cache_perf_fix =
|
||||
driQueryOptionb(&instance->dri_options, "anv_state_cache_perf_fix");
|
||||
instance->lower_depth_range_rate =
|
||||
|
|
|
|||
|
|
@ -125,6 +125,8 @@ struct anv_nir_push_layout_info {
|
|||
|
||||
bool anv_nir_shrink_push_constant_ranges(nir_shader *nir);
|
||||
|
||||
bool anv_nir_realign_cbv(nir_shader *shader);
|
||||
|
||||
bool anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
|
|
|
|||
79
src/intel/vulkan/anv_nir_realign_cbv.c
Normal file
79
src/intel/vulkan/anv_nir_realign_cbv.c
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* This file implements a pass that looks for global read-only loads, from a
|
||||
* pointer in the push constant data and based on the block size (64KiB
|
||||
* indicating a CBV resource), align the load to 256B which the alignment
|
||||
* guarantee the applications should make. This alignment guarantee can later
|
||||
* be used to promote those 64bit pointers to push buffers (HW needs 32B
|
||||
* alignment).
|
||||
*/
|
||||
|
||||
static bool
|
||||
realign_cbv(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
|
||||
/* If writable, it's not CBV. */
|
||||
if ((nir_intrinsic_access(intrin) & ACCESS_NON_WRITEABLE) == 0)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
/* Find the root of the deref to see if it's a pointer in the push constant
|
||||
* data.
|
||||
*/
|
||||
while (true) {
|
||||
if (deref->deref_type == nir_deref_type_var)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
|
||||
if (!parent)
|
||||
break;
|
||||
|
||||
deref = parent;
|
||||
}
|
||||
assert(deref->deref_type == nir_deref_type_cast);
|
||||
|
||||
/* This is the magic value vkd3d-proton puts allowing us to recognize a
|
||||
* CBV.
|
||||
*/
|
||||
if (glsl_get_explicit_size(deref->type, true) != 64 * 1024)
|
||||
return false;
|
||||
|
||||
nir_scalar val = { deref->parent.ssa, 0 };
|
||||
|
||||
if (nir_scalar_is_alu(val)) {
|
||||
nir_alu_instr *pack_alu = nir_def_as_alu(val.def);
|
||||
if (pack_alu->op != nir_op_pack_64_2x32_split)
|
||||
return false;
|
||||
|
||||
val = (nir_scalar){ pack_alu->src[0].src.ssa, pack_alu->src[0].swizzle[0] };
|
||||
}
|
||||
|
||||
if (!nir_scalar_is_intrinsic(val))
|
||||
return false;
|
||||
|
||||
/* If it's not a value coming from the push constant data, give up. */
|
||||
nir_intrinsic_instr *push_intrin = nir_def_as_intrinsic(val.def);
|
||||
if (push_intrin->intrinsic != nir_intrinsic_load_push_constant)
|
||||
return false;
|
||||
|
||||
/* Realign to the CBV requirement */
|
||||
deref = nir_src_as_deref(intrin->src[0]);
|
||||
deref->cast.align_mul = 256;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_realign_cbv(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, realign_cbv, nir_metadata_all, NULL);
|
||||
}
|
||||
|
|
@ -1820,6 +1820,7 @@ struct anv_instance {
|
|||
bool external_memory_implicit_sync;
|
||||
bool force_guc_low_latency;
|
||||
bool emulate_read_without_format;
|
||||
bool promote_cbv_to_push_buffers;
|
||||
|
||||
/**
|
||||
* Workarounds for game bugs.
|
||||
|
|
|
|||
|
|
@ -186,6 +186,9 @@ anv_shader_init_uuid(struct anv_physical_device *device)
|
|||
const bool btp_bti_rcc = device->rt_change_needs_flush;
|
||||
_mesa_blake3_update(&ctx, &btp_bti_rcc, sizeof(btp_bti_rcc));
|
||||
|
||||
const bool cbv_push_buffer = device->instance->promote_cbv_to_push_buffers;
|
||||
_mesa_blake3_update(&ctx, &cbv_push_buffer, sizeof(cbv_push_buffer));
|
||||
|
||||
uint8_t blake3[BLAKE3_KEY_LEN];
|
||||
_mesa_blake3_final(&ctx, blake3);
|
||||
memcpy(device->shader_binary_uuid, blake3, sizeof(device->shader_binary_uuid));
|
||||
|
|
@ -1530,11 +1533,24 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
pdevice->isl_dev.shader_tiling);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
|
||||
nir_address_format_64bit_global);
|
||||
/* Lower push constants variables prior to global realignment for CBV
|
||||
* resources, it makes identifying a 64bit pointer from the push constants
|
||||
* easier.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
|
||||
nir_address_format_32bit_offset);
|
||||
|
||||
/* Realign pointers to CBV on stages that can promote to push buffers. */
|
||||
if (pdevice->instance->promote_cbv_to_push_buffers &&
|
||||
nir->info.stage <= MESA_SHADER_FRAGMENT) {
|
||||
/* Cleanup for the analysis, we don't want any ALU */
|
||||
cleanup_nir(nir);
|
||||
NIR_PASS(_, nir, anv_nir_realign_cbv);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
|
||||
|
||||
shader_data->push_desc_info.used_descriptors =
|
||||
|
|
|
|||
|
|
@ -185,6 +185,7 @@ libanv_files = files(
|
|||
'anv_nir_lower_unaligned_dispatch.c',
|
||||
'anv_nir_push_constants_analysis.c',
|
||||
'anv_nir_push_descriptor_analysis.c',
|
||||
'anv_nir_realign_cbv.c',
|
||||
'anv_perf.c',
|
||||
'anv_physical_device.c',
|
||||
'anv_pipeline_cache.c',
|
||||
|
|
|
|||
|
|
@ -1056,6 +1056,7 @@ TODO: document the other workarounds.
|
|||
</application>
|
||||
<engine engine_name_match="vkd3d|DXVK">
|
||||
<option name="anv_force_filter_addr_rounding" value="true" />
|
||||
<option name="anv_promote_cbv_to_push_buffers" value="true" />
|
||||
</engine>
|
||||
<!-- Needed to avoid XeSS code paths. -->
|
||||
<application name="Marvel's Spider-Man Remastered" executable="Spider-Man.exe">
|
||||
|
|
|
|||
|
|
@ -937,6 +937,10 @@
|
|||
#define DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(def) \
|
||||
DRI_CONF_OPT_B(anv_external_memory_implicit_sync, def, "Implicit sync on external BOs")
|
||||
|
||||
#define DRI_CONF_ANV_PROMOTE_CBV_TO_PUSH_BUFFERS(def) \
|
||||
DRI_CONF_OPT_B(anv_promote_cbv_to_push_buffers, def, \
|
||||
"Promote CBV 64bit pointers in push constant data to push buffers")
|
||||
|
||||
#define DRI_CONF_ANV_STATE_CACHE_PERF_FIX(def) \
|
||||
DRI_CONF_OPT_B(anv_state_cache_perf_fix, def, \
|
||||
"Whether COMMON_SLICE_CHICKEN3 bit13 should be programmed to enable BTP+BTI RCC keying")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue