anv: implement workaround for Wa_18038825448

Description states that we need to enable PS_EXTRA state
EnablePSdependencyonCPsizechange whenever PixelShaderIsPerCoarsePixel
state changes.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30475>
This commit is contained in:
Tapani Pälli 2024-08-06 15:23:38 +03:00
parent 9582de9ee3
commit 7a4020e129
6 changed files with 70 additions and 9 deletions

View file

@ -51,6 +51,7 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
state->current_pipeline = UINT32_MAX;
state->gfx.restart_index = UINT32_MAX;
state->gfx.object_preemption = true;
state->gfx.coarse_pixel_active = ANV_COARSE_PIXEL_STATE_UNKNOWN;
state->gfx.dirty = 0;
memcpy(state->gfx.dyn_state.dirty,

View file

@ -153,6 +153,25 @@ genX(cmd_buffer_ensure_wa_14018283232)(struct anv_cmd_buffer *cmd_buffer,
}
#endif
static inline bool
genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer,
enum anv_coarse_pixel_state state)
{
#if INTEL_WA_18038825448_GFX_VER
struct anv_cmd_graphics_state *gfx =
&cmd_buffer->state.gfx;
if (intel_needs_workaround(cmd_buffer->device->info, 18038825448) &&
gfx->coarse_pixel_active != state) {
gfx->coarse_pixel_active = state;
gfx->dirty |= ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE;
return true;
}
return false;
#else
return false;
#endif
}
void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
struct anv_device *device,
struct anv_cmd_buffer *cmd_buffer,

View file

@ -3315,6 +3315,7 @@ enum anv_cmd_dirty_bits {
ANV_CMD_DIRTY_RESTART_INDEX = 1 << 5,
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE = 1 << 6,
ANV_CMD_DIRTY_FS_MSAA_FLAGS = 1 << 7,
ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE = 1 << 8,
};
typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
@ -3787,6 +3788,12 @@ struct anv_cmd_pipeline_state {
struct anv_pipeline *pipeline;
};
enum anv_coarse_pixel_state {
ANV_COARSE_PIXEL_STATE_UNKNOWN,
ANV_COARSE_PIXEL_STATE_DISABLED,
ANV_COARSE_PIXEL_STATE_ENABLED,
};
/** State tracking for graphics pipeline
*
* This has anv_cmd_pipeline_state as a base struct to track things which get
@ -3850,6 +3857,11 @@ struct anv_cmd_graphics_state {
*/
bool viewport_set;
/**
* State tracking for Wa_18038825448.
*/
enum anv_coarse_pixel_state coarse_pixel_active;
struct intel_urb_config urb_cfg;
uint32_t n_occlusion_queries;

View file

@ -339,6 +339,13 @@ blorp_exec_on_render(struct blorp_batch *batch,
genX(cmd_buffer_ensure_wa_14018283232)(cmd_buffer, false);
#endif
#if INTEL_WA_18038825448_GFX_VER
if (genX(cmd_buffer_set_coarse_pixel_active)
(cmd_buffer, ANV_COARSE_PIXEL_STATE_DISABLED)) {
batch->flags |= BLORP_BATCH_FORCE_CPS_DEPENDENCY;
}
#endif
if (params->depth.enabled &&
!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
@ -421,7 +428,8 @@ blorp_exec_on_render(struct blorp_batch *batch,
ANV_CMD_DIRTY_XFB_ENABLE |
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE |
ANV_CMD_DIRTY_FS_MSAA_FLAGS |
ANV_CMD_DIRTY_RESTART_INDEX);
ANV_CMD_DIRTY_RESTART_INDEX |
ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE);
cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty |= dirty;

View file

@ -566,7 +566,8 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
}
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_FS_MSAA_FLAGS)) {
(gfx->dirty & ANV_CMD_DIRTY_FS_MSAA_FLAGS) ||
(gfx->dirty & ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE)) {
if (wm_prog_data) {
const struct anv_shader_bin *fs_bin =
pipeline->base.shaders[MESA_SHADER_FRAGMENT];
@ -617,15 +618,24 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
brw_wm_prog_data_is_persample(wm_prog_data, gfx->fs_msaa_flags));
#if GFX_VER >= 11
SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel,
brw_wm_prog_data_is_coarse(wm_prog_data, gfx->fs_msaa_flags));
const bool uses_coarse_pixel =
brw_wm_prog_data_is_coarse(wm_prog_data, gfx->fs_msaa_flags);
SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel);
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader
* uses a fragment shading rate that is not constant.
*/
SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange,
brw_wm_prog_data_is_coarse(wm_prog_data, gfx->fs_msaa_flags));
enum anv_coarse_pixel_state cps_state = uses_coarse_pixel ?
ANV_COARSE_PIXEL_STATE_ENABLED : ANV_COARSE_PIXEL_STATE_DISABLED;
bool cps_state_toggled =
genX(cmd_buffer_set_coarse_pixel_active)(cmd_buffer, cps_state);
if (cps_state_toggled)
dirty_state_mask |= ANV_CMD_DIRTY_COARSE_PIXEL_ACTIVE;
const bool needs_ps_dependency =
/* TODO: We should only require this when the last geometry shader
* uses a fragment shading rate that is not constant.
*/
uses_coarse_pixel || cps_state_toggled;
SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, needs_ps_dependency);
#endif
SET(WM, wm.BarycentricInterpolationMode,
wm_prog_data_barycentric_modes(wm_prog_data, gfx->fs_msaa_flags));

View file

@ -214,6 +214,13 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
#if INTEL_WA_18038825448_GFX_VER
const bool needs_ps_dependency =
state->cmd_buffer != NULL &&
genX(cmd_buffer_set_coarse_pixel_active)
(state->cmd_buffer, ANV_COARSE_PIXEL_STATE_DISABLED);
#endif
anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
psx.PixelShaderValid = true;
#if GFX_VER < 20
@ -222,6 +229,10 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
#if INTEL_WA_18038825448_GFX_VER
psx.EnablePSDependencyOnCPsizeChange = needs_ps_dependency;
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {