drirc/anv: implement steps to disable RHWO for Wa_14024015672

Disable RHWO by default for singlesample draws and for MSAA
draws if a drirc key is set (avoid perf hit if not needed).

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39404>
This commit is contained in:
Tapani Pälli 2026-01-20 09:04:41 +02:00 committed by Marge Bot
parent 055a89cffb
commit f66ff97d58
6 changed files with 46 additions and 7 deletions

View file

@ -29,6 +29,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
DRI_CONF_NO_16BIT(false)
DRI_CONF_INTEL_ENABLE_WA_14018912822(false)
DRI_CONF_INTEL_ENABLE_WA_14024015672_MSAA(false)
DRI_CONF_INTEL_SAMPLER_ROUTE_TO_LSC(false)
DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6)
@ -180,6 +181,8 @@ anv_init_dri_options(struct anv_instance *instance)
driQueryOptionb(&instance->dri_options, "no_16bit");
instance->intel_enable_wa_14018912822 =
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822");
instance->intel_enable_wa_14024015672_msaa =
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14024015672_msaa");
instance->emulate_read_without_format =
driQueryOptionb(&instance->dri_options, "anv_emulate_read_without_format");
instance->fp64_workaround_enabled =

View file

@ -1789,6 +1789,7 @@ struct anv_instance {
/* HW workarounds */
bool no_16bit;
bool intel_enable_wa_14018912822;
bool intel_enable_wa_14024015672_msaa;
/**
* Ray tracing configuration.
@ -4732,7 +4733,7 @@ struct anv_cmd_state {
enum isl_aux_op color_aux_op;
/**
* Whether RHWO optimization is enabled (Wa_1508744258).
* Whether RHWO optimization is enabled (Wa_1508744258 and Wa_14024015672).
*/
bool rhwo_optimization_enabled;

View file

@ -2435,7 +2435,7 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
ALWAYS_INLINE void
genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
{
#if INTEL_NEEDS_WA_1508744258
#if INTEL_WA_1508744258_GFX_VER || INTEL_WA_14024015672_GFX_VER
/* If we're changing the state of the RHWO optimization, we need to have
* sb_stall+cs_stall.
*/
@ -2510,18 +2510,27 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
&emitted_bits);
anv_cmd_buffer_update_pending_query_bits(cmd_buffer, emitted_bits);
#if INTEL_NEEDS_WA_1508744258
#if INTEL_WA_1508744258_GFX_VER || INTEL_WA_14024015672_GFX_VER
if (rhwo_opt_change) {
#if GFX_VERx10 == 120
anv_batch_write_reg(&cmd_buffer->batch, GENX(COMMON_SLICE_CHICKEN1), c1) {
c1.RCCRHWOOptimizationDisable =
!cmd_buffer->state.pending_rhwo_optimization_enabled;
c1.RCCRHWOOptimizationDisableMask = true;
}
#else
if (intel_needs_workaround(cmd_buffer->device->info, 14024015672)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_3D_MODE), p) {
p.RCCRHWOOptimizationDisable =
!cmd_buffer->state.pending_rhwo_optimization_enabled;
p.RCCRHWOOptimizationDisableMask = true;
}
}
#endif
cmd_buffer->state.rhwo_optimization_enabled =
cmd_buffer->state.pending_rhwo_optimization_enabled;
}
#endif
}
static inline struct anv_state

View file

@ -790,6 +790,19 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PS))
cmd_buffer_maybe_flush_rt_writes(cmd_buffer, gfx, dyn);
/* With Wa_14024015672, RHWO is initially disabled. We enable it for MSAA
* draws and disable for single sample unless explicitly disabled via
* drirc key.
*/
#if INTEL_WA_14024015672_GFX_VER
if (intel_needs_workaround(device->info, 14024015672) &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES)) {
cmd_buffer->state.pending_rhwo_optimization_enabled =
!device->physical->instance->intel_enable_wa_14024015672_msaa &&
dyn->ms.rasterization_samples > 1;
}
#endif
/* Apply any pending pipeline flushes we may have. We want to apply them
* now because, if any of those flushes are for things like push constants,
* the GPU will read the state at weird times.

View file

@ -701,14 +701,23 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
genX(emit_pipeline_select)(batch, _3D, device);
#endif
#if GFX_VER >= 20
#if GFX_VERx10 >= 125
anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), p) {
p.DX10OGLBorderModeforYCRCB = true;
p.DX10OGLBorderModeforYCRCBMask = true;
if (device->info->verx10 > 125 ||
intel_device_info_is_mtl_or_arl(device->info)) {
p.DX10OGLBorderModeforYCRCB = true;
p.DX10OGLBorderModeforYCRCBMask = true;
}
#if INTEL_NEEDS_WA_14019857787
p.EnableOOOreadsinRCPB = true;
p.EnableOOOreadsinRCPBMask = true;
#endif
/* Disable RHWO optimization by default and turn it on only for MSAA draws
* later unless Wa_14024015672 drirc is set.
*/
p.RCCRHWOOptimizationDisable =
intel_needs_workaround(device->info, 14024015672);
p.RCCRHWOOptimizationDisableMask = true;
}
#endif

View file

@ -366,6 +366,10 @@
DRI_CONF_OPT_B(intel_enable_wa_14018912822, def, \
"Intel workaround for using zero blend constants")
#define DRI_CONF_INTEL_ENABLE_WA_14024015672_MSAA(def) \
DRI_CONF_OPT_B(intel_enable_wa_14024015672_msaa, def, \
"Intel workaround for RHWO MSAA")
#define DRI_CONF_INTEL_SAMPLER_ROUTE_TO_LSC(def) \
DRI_CONF_OPT_B(intel_sampler_route_to_lsc, def, \
"Intel specific toggle to enable sampler route to LSC")