anv: WA 16014538804 for DG2, MTL A0

Send empty/dummy PIPE_CONTROL after every third 3DPRIMITIVE command.

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25039>
This commit is contained in:
Rohan Garg 2022-04-11 13:33:07 +03:00 committed by Marge Bot
parent 1da1bcd038
commit de6653dc5d
5 changed files with 108 additions and 59 deletions

View file

@ -179,10 +179,11 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
enum anv_timestamp_capture_type type,
void *data);
void genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
struct anv_device *device,
uint32_t primitive_topology,
uint32_t vertex_count);
void
genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
const struct anv_device *device,
uint32_t primitive_topology,
uint32_t vertex_count);
VkPolygonMode
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,

View file

@ -1965,6 +1965,11 @@ struct anv_batch {
VkResult status;
enum intel_engine_class engine_class;
/**
* Number of 3DPRIMITIVE's emitted for WA 16014538804
*/
uint8_t num_3d_primitives_emitted;
};
void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);

View file

@ -87,15 +87,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) {
fprintf(stdout, ") reason: %s\n", reason); \
}
ALWAYS_INLINE static void
genX(emit_dummy_post_sync_op)(struct anv_cmd_buffer *cmd_buffer,
uint32_t vertex_count)
{
genX(batch_emit_dummy_post_sync_op)(&cmd_buffer->batch, cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
vertex_count);
}
void
genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
{
@ -3595,6 +3586,8 @@ genX(CmdExecuteCommands)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, container, commandBuffer);
struct anv_device *device = container->device;
if (anv_batch_has_error(&container->batch))
return;
@ -3642,7 +3635,7 @@ genX(CmdExecuteCommands)(
* regardless of conditional rendering being enabled in container.
*/
struct mi_builder b;
mi_builder_init(&b, container->device->info, &container->batch);
mi_builder_init(&b, device->info, &container->batch);
mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG),
mi_imm(UINT64_MAX));
}
@ -3660,9 +3653,9 @@ genX(CmdExecuteCommands)(
genX(cmd_buffer_so_memcpy)(
container,
anv_state_pool_state_address(&container->device->internal_surface_state_pool,
anv_state_pool_state_address(&device->internal_surface_state_pool,
dst_state),
anv_state_pool_state_address(&container->device->internal_surface_state_pool,
anv_state_pool_state_address(&device->internal_surface_state_pool,
src_state),
src_state.alloc_size);
}
@ -3704,6 +3697,11 @@ genX(CmdExecuteCommands)(
"Secondary cmd buffer not tracked in VF cache");
}
#if INTEL_NEEDS_WA_16014538804
if (intel_needs_workaround(device->info, 16014538804))
anv_batch_emit(&container->batch, GENX(PIPE_CONTROL), pc);
#endif
/* The secondary may have selected a different pipeline (3D or compute) and
* may have changed the current L3$ configuration. Reset our tracking
* variables to invalid values to ensure that we re-emit these in the case
@ -3716,7 +3714,7 @@ genX(CmdExecuteCommands)(
container->state.gfx.push_constant_stages = 0;
container->state.gfx.ds_write_state = false;
memcpy(container->state.gfx.dyn_state.dirty,
container->device->gfx_dirty_state,
device->gfx_dirty_state,
sizeof(container->state.gfx.dyn_state.dirty));
/* Each of the secondary command buffers will use its own state base
@ -3729,7 +3727,6 @@ genX(CmdExecuteCommands)(
genX(cmd_buffer_emit_state_base_address)(container);
/* Copy of utrace timestamp buffers from secondary into container */
struct anv_device *device = container->device;
if (u_trace_enabled(&device->ds.trace_context)) {
trace_intel_begin_trace_copy(&container->trace);
@ -4246,12 +4243,12 @@ void genX(CmdDraw)(
#endif
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
vertexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer, vertexCount);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
trace_intel_end_draw(&cmd_buffer->trace, count);
@ -4304,6 +4301,12 @@ void genX(CmdDrawMultiEXT)(
prim.BaseVertexLocation = 0;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pVertexInfo[drawCount - 1].vertexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_draw_multi(&cmd_buffer->trace, count);
}
@ -4342,16 +4345,17 @@ void genX(CmdDrawMultiEXT)(
prim.ExtendedParameter2 = i;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pVertexInfo[drawCount - 1].vertexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_draw_multi(&cmd_buffer->trace, count);
}
#endif
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer,
drawCount == 0 ? 0 :
pVertexInfo[drawCount - 1].vertexCount);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
}
@ -4416,12 +4420,12 @@ void genX(CmdDrawIndexed)(
#endif
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
indexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer, indexCount);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
trace_intel_end_draw_indexed(&cmd_buffer->trace, count);
@ -4489,6 +4493,13 @@ void genX(CmdDrawMultiIndexedEXT)(
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = *pVertexOffset;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pIndexInfo[drawCount - 1].indexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device,
false);
trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count);
@ -4524,6 +4535,13 @@ void genX(CmdDrawMultiIndexedEXT)(
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = *pVertexOffset;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pIndexInfo[drawCount - 1].indexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device,
false);
trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count);
@ -4554,6 +4572,13 @@ void genX(CmdDrawMultiIndexedEXT)(
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = draw->vertexOffset;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pIndexInfo[drawCount - 1].indexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count);
}
@ -4594,16 +4619,18 @@ void genX(CmdDrawMultiIndexedEXT)(
prim.ExtendedParameter1 = firstInstance;
prim.ExtendedParameter2 = i;
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
drawCount == 0 ? 0 :
pIndexInfo[drawCount - 1].indexCount);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count);
}
#endif
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer,
drawCount == 0 ? 0 :
pIndexInfo[drawCount - 1].indexCount);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
}
@ -4717,10 +4744,11 @@ void genX(CmdDrawIndirectByteCountEXT)(
#endif
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
1);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer, 1);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
@ -4849,10 +4877,12 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer,
#endif
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
1);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer, 1);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer,
indexed ? RANDOM : SEQUENTIAL);
@ -5076,10 +5106,11 @@ emit_indirect_count_draws(struct anv_cmd_buffer *cmd_buffer,
#endif
}
genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.gfx.primitive_topology,
1);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
#if GFX_VERx10 == 125
genX(emit_dummy_post_sync_op)(cmd_buffer, 1);
#endif
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
}
@ -8189,15 +8220,14 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
}
void
genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
struct anv_device *device,
uint32_t primitive_topology,
uint32_t vertex_count)
genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
const struct anv_device *device,
uint32_t primitive_topology,
uint32_t vertex_count)
{
if (!intel_needs_workaround(device->info, 22014412737))
return;
if ((primitive_topology == _3DPRIM_POINTLIST ||
#if INTEL_NEEDS_WA_22014412737 || INTEL_NEEDS_WA_16014538804
if (intel_needs_workaround(device->info, 22014412737) &&
(primitive_topology == _3DPRIM_POINTLIST ||
primitive_topology == _3DPRIM_LINELIST ||
primitive_topology == _3DPRIM_LINESTRIP ||
primitive_topology == _3DPRIM_LINELIST_ADJ ||
@ -8212,7 +8242,20 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
(batch, device->info, 0, WriteImmediateData,
device->workaround_address, 0, 0);
/* Reset counter because we just emitted a PC */
batch->num_3d_primitives_emitted = 0;
} else if (intel_needs_workaround(device->info, 16014538804)) {
batch->num_3d_primitives_emitted++;
/* WA 16014538804:
* After every 3 3D_Primitive command,
* atleast 1 pipe_control must be inserted.
*/
if (batch->num_3d_primitives_emitted == 3) {
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc);
batch->num_3d_primitives_emitted = 0;
}
}
#endif
}
struct anv_state

View file

@ -236,12 +236,12 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
prim.StartInstanceLocation = 0;
prim.BaseVertexLocation = 0;
}
genX(emit_breakpoint)(batch, device, false);
#if GFX_VERx10 == 125
genX(batch_emit_dummy_post_sync_op)(batch, device, _3DPRIM_POINTLIST,
size / bs);
#endif
genX(batch_emit_post_3dprimitive_was)(batch,
device,
_3DPRIM_POINTLIST, size / bs);
genX(emit_breakpoint)(batch, device, false);
}
void

View file

@ -524,8 +524,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
genX(batch_emit_post_3dprimitive_was)(batch, device, _3DPRIM_RECTLIST, 3);
genX(emit_breakpoint)(batch, device, false);
genX(batch_emit_dummy_post_sync_op)(batch, device, _3DPRIM_RECTLIST, 3);
} else {
const struct intel_device_info *devinfo = device->info;
const struct brw_cs_prog_data *prog_data =