mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 11:08:03 +02:00
anv: Batch MCS and CCS aux-op flushes
The PRMs suggest that certain classes of auxiliary surface operations
will automatically synchronize when performed back-to-back:
Any transition from any value in {Clear, Render, Resolve} to a
different value in {Clear, Render, Resolve} requires end of pipe
synchronization.
Make use of this functionality by batching CCS and MCS flushes when
compatible auxiliary surface operations are performed within a command
buffer.
Ref: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11325
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29922>
This commit is contained in:
parent
f854161928
commit
54631ebc68
7 changed files with 424 additions and 239 deletions
|
|
@ -1522,7 +1522,6 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
anv_image_aux_layers(image, aspect, level));
|
||||
|
||||
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
|
||||
struct blorp_surf surf;
|
||||
get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
|
||||
|
|
@ -1540,107 +1539,8 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (clear_value)
|
||||
surf.clear_color = *clear_value;
|
||||
|
||||
char flush_reason[64];
|
||||
int ret =
|
||||
snprintf(flush_reason, sizeof(flush_reason),
|
||||
"ccs op start: %s", isl_aux_op_to_name(ccs_op));
|
||||
assert(ret < sizeof(flush_reason));
|
||||
|
||||
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
|
||||
*
|
||||
* "After Render target fast clear, pipe-control with color cache
|
||||
* write-flush must be issued before sending any DRAW commands on
|
||||
* that render target."
|
||||
*
|
||||
* This comment is a bit cryptic and doesn't really tell you what's going
|
||||
* or what's really needed. It appears that fast clear ops are not
|
||||
* properly synchronized with other drawing. This means that we cannot
|
||||
* have a fast clear operation in the pipe at the same time as other
|
||||
* regular drawing operations. We need to use a PIPE_CONTROL to ensure
|
||||
* that the contents of the previous draw hit the render target before we
|
||||
* resolve and then use a second PIPE_CONTROL after the resolve to ensure
|
||||
* that it is completed before any additional drawing occurs.
|
||||
*
|
||||
* Bspec 57340 (r59562):
|
||||
*
|
||||
* Synchronization:
|
||||
* Due to interaction of scaled clearing rectangle with pixel
|
||||
* scoreboard, we require one of the following commands to be issued.
|
||||
* (Rows of PIPE_CONTROL command in the table)
|
||||
*
|
||||
* Requiring tile cache flush bit has been dropped since Xe2.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
(devinfo->verx10 < 200 ?
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) |
|
||||
(devinfo->verx10 == 120 ?
|
||||
ANV_PIPE_DEPTH_STALL_BIT : 0) |
|
||||
(devinfo->verx10 == 125 ?
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
|
||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
flush_reason);
|
||||
|
||||
switch (ccs_op) {
|
||||
case ISL_AUX_OP_FAST_CLEAR:
|
||||
/* From the ICL PRMs, Volume 9: Render Engine, State Caching :
|
||||
*
|
||||
* "Any values referenced by pointers within the RENDER_SURFACE_STATE
|
||||
* or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
|
||||
* Indirect State Pointer) are considered to be part of that state
|
||||
* and any changes to these referenced values requires an
|
||||
* invalidation of the L1 state cache to ensure the new values are
|
||||
* being used as part of the state. In the case of surface data
|
||||
* pointed to by the Surface Base Address in RENDER SURFACE STATE,
|
||||
* the Texture Cache must be invalidated if the surface data
|
||||
* changes."
|
||||
*
|
||||
* and From the Render Target Fast Clear section,
|
||||
*
|
||||
* "HwManaged FastClear allows SW to store FastClearValue in separate
|
||||
* graphics allocation, instead of keeping them in
|
||||
* RENDER_SURFACE_STATE. This behavior can be enabled by setting
|
||||
* ClearValueAddressEnable in RENDER_SURFACE_STATE.
|
||||
*
|
||||
* Proper sequence of commands is as follows:
|
||||
*
|
||||
* 1. Storing clear color to allocation
|
||||
* 2. Ensuring that step 1. is finished and visible for TextureCache
|
||||
* 3. Performing FastClear
|
||||
*
|
||||
* Step 2. is required on products with ClearColorConversion feature.
|
||||
* This feature is enabled by setting ClearColorConversionEnable.
|
||||
* This causes HW to read stored color from ClearColorAllocation and
|
||||
* write back with the native format or RenderTarget - and clear
|
||||
* color needs to be present and visible. Reading is done from
|
||||
* TextureCache, writing is done to RenderCache."
|
||||
*
|
||||
* We're going to change the clear color. Invalidate the texture cache
|
||||
* now to ensure the clear color conversion feature works properly.
|
||||
* Although the docs seem to require invalidating the texture cache
|
||||
* after updating the clear color allocation, we can do this beforehand
|
||||
* so long as we ensure:
|
||||
*
|
||||
* 1. Step 1 is complete before the texture cache is accessed in step 3
|
||||
* 2. We don't access the texture cache between invalidation and step 3
|
||||
*
|
||||
* The second requirement is satisfied because we'll be performing step
|
||||
* 1 and 3 right after invalidating. The first is satisfied because
|
||||
* BLORP updates the clear color before performing the fast clear and it
|
||||
* performs the synchronizations suggested by the Render Target Fast
|
||||
* Clear section (not quoted here) to ensure its completion.
|
||||
*
|
||||
* While we're here, also invalidate the state cache as suggested.
|
||||
*/
|
||||
if (devinfo->ver >= 11) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
|
||||
"before blorp clear color update");
|
||||
}
|
||||
|
||||
blorp_fast_clear(batch, &surf, format, swizzle,
|
||||
level, base_layer, layer_count,
|
||||
0, 0, level_width, level_height);
|
||||
|
|
@ -1670,15 +1570,6 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
default:
|
||||
unreachable("Unsupported CCS operation");
|
||||
}
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
(devinfo->verx10 == 120 ?
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_DEPTH_STALL_BIT : 0) |
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"ccs op finish");
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1697,7 +1588,6 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
/* Multisampling with multi-planar formats is not supported */
|
||||
assert(image->n_planes == 1);
|
||||
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
struct blorp_surf surf;
|
||||
get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
|
||||
0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
|
||||
|
|
@ -1710,101 +1600,8 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (clear_value)
|
||||
surf.clear_color = *clear_value;
|
||||
|
||||
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
|
||||
*
|
||||
* "After Render target fast clear, pipe-control with color cache
|
||||
* write-flush must be issued before sending any DRAW commands on
|
||||
* that render target."
|
||||
*
|
||||
* This comment is a bit cryptic and doesn't really tell you what's going
|
||||
* or what's really needed. It appears that fast clear ops are not
|
||||
* properly synchronized with other drawing. This means that we cannot
|
||||
* have a fast clear operation in the pipe at the same time as other
|
||||
* regular drawing operations. We need to use a PIPE_CONTROL to ensure
|
||||
* that the contents of the previous draw hit the render target before we
|
||||
* resolve and then use a second PIPE_CONTROL after the resolve to ensure
|
||||
* that it is completed before any additional drawing occurs.
|
||||
*
|
||||
* Bspec 57340 (r59562):
|
||||
*
|
||||
* Synchronization:
|
||||
* Due to interaction of scaled clearing rectangle with pixel
|
||||
* scoreboard, we require one of the following commands to be issued.
|
||||
* (Rows of PIPE_CONTROL command in the table)
|
||||
*
|
||||
* Requiring tile cache flush bit has been dropped since Xe2.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
(devinfo->verx10 < 200 ?
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) |
|
||||
(devinfo->verx10 == 120 ?
|
||||
ANV_PIPE_DEPTH_STALL_BIT : 0) |
|
||||
(devinfo->verx10 == 125 ?
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
|
||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"before fast clear mcs");
|
||||
|
||||
switch (mcs_op) {
|
||||
case ISL_AUX_OP_FAST_CLEAR:
|
||||
/* From the ICL PRMs, Volume 9: Render Engine, State Caching :
|
||||
*
|
||||
* "Any values referenced by pointers within the RENDER_SURFACE_STATE
|
||||
* or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
|
||||
* Indirect State Pointer) are considered to be part of that state
|
||||
* and any changes to these referenced values requires an
|
||||
* invalidation of the L1 state cache to ensure the new values are
|
||||
* being used as part of the state. In the case of surface data
|
||||
* pointed to by the Surface Base Address in RENDER SURFACE STATE,
|
||||
* the Texture Cache must be invalidated if the surface data
|
||||
* changes."
|
||||
*
|
||||
* and From the Render Target Fast Clear section,
|
||||
*
|
||||
* "HwManaged FastClear allows SW to store FastClearValue in separate
|
||||
* graphics allocation, instead of keeping them in
|
||||
* RENDER_SURFACE_STATE. This behavior can be enabled by setting
|
||||
* ClearValueAddressEnable in RENDER_SURFACE_STATE.
|
||||
*
|
||||
* Proper sequence of commands is as follows:
|
||||
*
|
||||
* 1. Storing clear color to allocation
|
||||
* 2. Ensuring that step 1. is finished and visible for TextureCache
|
||||
* 3. Performing FastClear
|
||||
*
|
||||
* Step 2. is required on products with ClearColorConversion feature.
|
||||
* This feature is enabled by setting ClearColorConversionEnable.
|
||||
* This causes HW to read stored color from ClearColorAllocation and
|
||||
* write back with the native format or RenderTarget - and clear
|
||||
* color needs to be present and visible. Reading is done from
|
||||
* TextureCache, writing is done to RenderCache."
|
||||
*
|
||||
* We're going to change the clear color. Invalidate the texture cache
|
||||
* now to ensure the clear color conversion feature works properly.
|
||||
* Although the docs seem to require invalidating the texture cache
|
||||
* after updating the clear color allocation, we can do this beforehand
|
||||
* so long as we ensure:
|
||||
*
|
||||
* 1. Step 1 is complete before the texture cache is accessed in step 3
|
||||
* 2. We don't access the texture cache between invalidation and step 3
|
||||
*
|
||||
* The second requirement is satisfied because we'll be performing step
|
||||
* 1 and 3 right after invalidating. The first is satisfied because
|
||||
* BLORP updates the clear color before performing the fast clear and it
|
||||
* performs the synchronizations suggested by the Render Target Fast
|
||||
* Clear section (not quoted here) to ensure its completion.
|
||||
*
|
||||
* While we're here, also invalidate the state cache as suggested.
|
||||
*/
|
||||
if (devinfo->ver >= 11) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
|
||||
"before blorp clear color update");
|
||||
}
|
||||
|
||||
blorp_fast_clear(batch, &surf, format, swizzle,
|
||||
0, base_layer, layer_count,
|
||||
0, 0, image->vk.extent.width, image->vk.extent.height);
|
||||
|
|
@ -1820,15 +1617,6 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
|
|||
default:
|
||||
unreachable("Unsupported MCS operation");
|
||||
}
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
(devinfo->verx10 == 120 ?
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_DEPTH_STALL_BIT : 0) |
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"after fast clear mcs");
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -88,6 +88,10 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void
|
||||
genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum isl_aux_op aux_op);
|
||||
|
||||
void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct isl_surf *surf);
|
||||
|
||||
|
|
|
|||
|
|
@ -3993,6 +3993,11 @@ struct anv_cmd_state {
|
|||
*/
|
||||
enum anv_depth_reg_mode depth_reg_mode;
|
||||
|
||||
/* The last auxiliary surface operation (or equivalent operation) provided
|
||||
* to genX(cmd_buffer_update_color_aux_op).
|
||||
*/
|
||||
enum isl_aux_op color_aux_op;
|
||||
|
||||
/**
|
||||
* Whether RHWO optimization is enabled (Wa_1508744258).
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -458,6 +458,55 @@ blorp_exec_on_blitter(struct blorp_batch *batch,
|
|||
blorp_exec(batch, params);
|
||||
}
|
||||
|
||||
static enum isl_aux_op
|
||||
get_color_aux_op(const struct blorp_params *params)
|
||||
{
|
||||
switch (params->op) {
|
||||
case BLORP_OP_CCS_RESOLVE:
|
||||
case BLORP_OP_CCS_PARTIAL_RESOLVE:
|
||||
case BLORP_OP_CCS_COLOR_CLEAR:
|
||||
case BLORP_OP_MCS_COLOR_CLEAR:
|
||||
assert(params->fast_clear_op != ISL_AUX_OP_NONE);
|
||||
return params->fast_clear_op;
|
||||
|
||||
/* Some auxiliary surface operations are not provided by hardware. To
|
||||
* provide that functionality, BLORP sometimes tries to emulate what
|
||||
* hardware would do with custom pixel shaders. For now, we assume that
|
||||
* BLORP's implementation has the same cache invalidation and flushing
|
||||
* requirements as similar hardware operations.
|
||||
*/
|
||||
case BLORP_OP_CCS_AMBIGUATE:
|
||||
assert(GFX_VER >= 11 || params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ISL_AUX_OP_AMBIGUATE;
|
||||
case BLORP_OP_MCS_AMBIGUATE:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ISL_AUX_OP_AMBIGUATE;
|
||||
case BLORP_OP_MCS_PARTIAL_RESOLVE:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ISL_AUX_OP_PARTIAL_RESOLVE;
|
||||
|
||||
/* If memory aliasing is being done on an image, a pending fast clear
|
||||
* could hit the destination address at an unknown time. Go back to the
|
||||
* regular drawing mode to avoid this case.
|
||||
*/
|
||||
case BLORP_OP_HIZ_AMBIGUATE:
|
||||
case BLORP_OP_HIZ_CLEAR:
|
||||
case BLORP_OP_HIZ_RESOLVE:
|
||||
case BLORP_OP_SLOW_DEPTH_CLEAR:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ISL_AUX_OP_NONE;
|
||||
|
||||
/* The remaining operations are considered regular draws. */
|
||||
case BLORP_OP_SLOW_COLOR_CLEAR:
|
||||
case BLORP_OP_BLIT:
|
||||
case BLORP_OP_COPY:
|
||||
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
|
||||
return ISL_AUX_OP_NONE;
|
||||
}
|
||||
|
||||
unreachable("Invalid value in params->op");
|
||||
}
|
||||
|
||||
void
|
||||
genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
|
|
@ -474,6 +523,10 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
|||
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
||||
}
|
||||
|
||||
/* Flush any in-progress CCS/MCS operations as needed. */
|
||||
const enum isl_aux_op aux_op = get_color_aux_op(params);
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, aux_op));
|
||||
|
||||
if (batch->flags & BLORP_BATCH_USE_BLITTER)
|
||||
blorp_exec_on_blitter(batch, params);
|
||||
else if (batch->flags & BLORP_BATCH_USE_COMPUTE)
|
||||
|
|
|
|||
|
|
@ -1342,28 +1342,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (resolve_op == ISL_AUX_OP_NONE)
|
||||
return;
|
||||
|
||||
/* Perform a resolve to synchronize data between the main and aux buffer.
|
||||
* Before we begin, we must satisfy the cache flushing requirement specified
|
||||
* in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*
|
||||
* We perform a flush of the write cache before and after the clear and
|
||||
* resolve operations to meet this requirement.
|
||||
*
|
||||
* Unlike other drawing, fast clear operations are not properly
|
||||
* synchronized. The first PIPE_CONTROL here likely ensures that the
|
||||
* contents of the previous render or clear hit the render target before we
|
||||
* resolve and the second likely ensures that the resolve is complete before
|
||||
* we do any more rendering or clearing.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"before transition RT");
|
||||
|
||||
for (uint32_t l = 0; l < level_count; l++) {
|
||||
uint32_t level = base_level + l;
|
||||
|
||||
|
|
@ -1406,11 +1384,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"after transition RT");
|
||||
}
|
||||
|
||||
static MUST_CHECK VkResult
|
||||
|
|
@ -2806,6 +2779,352 @@ genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
aux_op_resolves(enum isl_aux_op aux_op)
|
||||
{
|
||||
return aux_op == ISL_AUX_OP_FULL_RESOLVE ||
|
||||
aux_op == ISL_AUX_OP_PARTIAL_RESOLVE;
|
||||
}
|
||||
|
||||
static bool
|
||||
aux_op_clears(enum isl_aux_op aux_op)
|
||||
{
|
||||
return aux_op == ISL_AUX_OP_FAST_CLEAR ||
|
||||
aux_op == ISL_AUX_OP_AMBIGUATE;
|
||||
}
|
||||
|
||||
static bool
|
||||
aux_op_renders(enum isl_aux_op aux_op)
|
||||
{
|
||||
return aux_op == ISL_AUX_OP_NONE;
|
||||
}
|
||||
|
||||
static void
|
||||
add_pending_pipe_bits_for_color_aux_op(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum isl_aux_op next_aux_op,
|
||||
enum anv_pipe_bits pipe_bits)
|
||||
{
|
||||
const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op;
|
||||
assert(next_aux_op != last_aux_op);
|
||||
|
||||
char flush_reason[64] = {};
|
||||
if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) ||
|
||||
u_trace_enabled(&cmd_buffer->device->ds.trace_context)) {
|
||||
int ret = snprintf(flush_reason, sizeof(flush_reason),
|
||||
"color aux-op: %s -> %s",
|
||||
isl_aux_op_to_name(last_aux_op),
|
||||
isl_aux_op_to_name(next_aux_op));
|
||||
assert(ret < sizeof(flush_reason));
|
||||
}
|
||||
|
||||
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits, flush_reason);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum isl_aux_op next_aux_op)
|
||||
{
|
||||
const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op;
|
||||
|
||||
if (!aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)) {
|
||||
#if GFX_VER >= 20
|
||||
/* From the Xe2 Bspec 57340 (r59562),
|
||||
* "MCS/CCS Buffers, Fast Clear for Render Target(s)":
|
||||
*
|
||||
* Synchronization:
|
||||
* Due to interaction of scaled clearing rectangle with pixel
|
||||
* scoreboard, we require one of the following commands to be
|
||||
* issued. [...]
|
||||
*
|
||||
* PIPE_CONTROL
|
||||
* PSS Stall Sync Enable [...] 1b (Enable)
|
||||
* Machine-wide Stall at Pixel Stage, wait for all Prior Pixel
|
||||
* Work to Reach End of Pipe
|
||||
* Render Target Cache Flush Enable [...] 1b (Enable)
|
||||
* Post-Sync Op Flushes Render Cache before Unblocking Stall
|
||||
*
|
||||
* This synchronization step is required before and after the fast
|
||||
* clear pass, to ensure correct ordering between pixels.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
|
||||
|
||||
#elif GFX_VERx10 == 125
|
||||
/* From the ACM Bspec 47704 (r52663), "Render Target Fast Clear":
|
||||
*
|
||||
* Preamble pre fast clear synchronization
|
||||
*
|
||||
* PIPE_CONTROL:
|
||||
* PS sync stall = 1
|
||||
* Tile Cache Flush = 1
|
||||
* RT Write Flush = 1
|
||||
* HDC Flush = 1
|
||||
* DC Flush = 1
|
||||
* Texture Invalidate = 1
|
||||
*
|
||||
* [...]
|
||||
*
|
||||
* Objective of the preamble flushes is to ensure all data is
|
||||
* evicted from L1 caches prior to fast clear.
|
||||
*
|
||||
* From the ACM PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
|
||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
|
||||
#elif GFX_VERx10 == 120
|
||||
/* From the TGL Bspec 47704 (r52663), "Render Target Fast Clear":
|
||||
*
|
||||
* Preamble pre fast clear synchronization
|
||||
*
|
||||
* PIPE_CONTROL:
|
||||
* Depth Stall = 1
|
||||
* Tile Cache Flush = 1
|
||||
* RT Write Flush = 1
|
||||
* Texture Invalidate = 1
|
||||
*
|
||||
* [...]
|
||||
*
|
||||
* Objective of the preamble flushes is to ensure all data is
|
||||
* evicted from L1 caches prior to fast clear.
|
||||
*
|
||||
* From the TGL PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_DEPTH_STALL_BIT |
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
|
||||
#else
|
||||
/* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*
|
||||
* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
|
||||
*
|
||||
* After Render target fast clear, pipe-control with color cache
|
||||
* write-flush must be issued before sending any DRAW commands on
|
||||
* that render target.
|
||||
*
|
||||
* The last comment is a bit cryptic and doesn't really tell you what's
|
||||
* going or what's really needed. It appears that fast clear ops are
|
||||
* not properly synchronized with other drawing. This means that we
|
||||
* cannot have a fast clear operation in the pipe at the same time as
|
||||
* other regular drawing operations. We need to use a PIPE_CONTROL
|
||||
* to ensure that the contents of the previous draw hit the render
|
||||
* target before we resolve and then use a second PIPE_CONTROL after
|
||||
* the resolve to ensure that it is completed before any additional
|
||||
* drawing occurs.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
#endif
|
||||
|
||||
} else if (aux_op_clears(last_aux_op) && !aux_op_clears(next_aux_op)) {
|
||||
#if GFX_VER >= 20
|
||||
/* From the Xe2 Bspec 57340 (r59562),
|
||||
* "MCS/CCS Buffers, Fast Clear for Render Target(s)":
|
||||
*
|
||||
* Synchronization:
|
||||
* Due to interaction of scaled clearing rectangle with pixel
|
||||
* scoreboard, we require one of the following commands to be
|
||||
* issued. [...]
|
||||
*
|
||||
* PIPE_CONTROL
|
||||
* PSS Stall Sync Enable [...] 1b (Enable)
|
||||
* Machine-wide Stall at Pixel Stage, wait for all Prior Pixel
|
||||
* Work to Reach End of Pipe
|
||||
* Render Target Cache Flush Enable [...] 1b (Enable)
|
||||
* Post-Sync Op Flushes Render Cache before Unblocking Stall
|
||||
*
|
||||
* This synchronization step is required before and after the fast
|
||||
* clear pass, to ensure correct ordering between pixels.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
|
||||
|
||||
#elif GFX_VERx10 == 125
|
||||
/* From the ACM PRM Vol. 9, "Color Fast Clear Synchronization":
|
||||
*
|
||||
* Postamble post fast clear synchronization
|
||||
*
|
||||
* PIPE_CONTROL:
|
||||
* PS sync stall = 1
|
||||
* RT flush = 1
|
||||
*
|
||||
* From the ACM PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
|
||||
#elif GFX_VERx10 == 120
|
||||
/* From the TGL PRM Vol. 9, "Color Fast Clear Synchronization":
|
||||
*
|
||||
* Postamble post fast clear synchronization
|
||||
*
|
||||
* PIPE_CONTROL:
|
||||
* Depth Stall = 1
|
||||
* Tile Cache Flush = 1
|
||||
* RT Write Flush = 1
|
||||
*
|
||||
* From the TGL PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_DEPTH_STALL_BIT |
|
||||
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
|
||||
#else
|
||||
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
|
||||
*
|
||||
* After Render target fast clear, pipe-control with color cache
|
||||
* write-flush must be issued before sending any DRAW commands on
|
||||
* that render target.
|
||||
*
|
||||
* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
#endif
|
||||
|
||||
} else if (aux_op_renders(last_aux_op) != aux_op_renders(next_aux_op)) {
|
||||
assert(aux_op_resolves(last_aux_op) != aux_op_resolves(next_aux_op));
|
||||
/* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*
|
||||
* We perform a flush of the write cache before and after the clear and
|
||||
* resolve operations to meet this requirement.
|
||||
*
|
||||
* Unlike other drawing, fast clear operations are not properly
|
||||
* synchronized. The first PIPE_CONTROL here likely ensures that the
|
||||
* contents of the previous render or clear hit the render target before
|
||||
* we resolve and the second likely ensures that the resolve is complete
|
||||
* before we do any more rendering or clearing.
|
||||
*/
|
||||
add_pending_pipe_bits_for_color_aux_op(
|
||||
cmd_buffer, next_aux_op,
|
||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
}
|
||||
|
||||
if (next_aux_op == ISL_AUX_OP_FAST_CLEAR &&
|
||||
cmd_buffer->device->isl_dev.ss.clear_color_state_size > 0) {
|
||||
/* From the ICL PRM Vol. 9, "Render Target Fast Clear":
|
||||
*
|
||||
* HwManaged FastClear allows SW to store FastClearValue in separate
|
||||
* graphics allocation, instead of keeping them in
|
||||
* RENDER_SURFACE_STATE. This behavior can be enabled by setting
|
||||
* ClearValueAddressEnable in RENDER_SURFACE_STATE.
|
||||
*
|
||||
* Proper sequence of commands is as follows:
|
||||
*
|
||||
* 1. Storing clear color to allocation
|
||||
* 2. Ensuring that step 1. is finished and visible for
|
||||
* TextureCache
|
||||
* 3. Performing FastClear
|
||||
*
|
||||
* Step 2. is required on products with ClearColorConversion feature.
|
||||
* This feature is enabled by setting ClearColorConversionEnable.
|
||||
* This causes HW to read stored color from ClearColorAllocation and
|
||||
* write back with the native format or RenderTarget - and clear
|
||||
* color needs to be present and visible. Reading is done from
|
||||
* TextureCache, writing is done to RenderCache.
|
||||
*
|
||||
* Invalidate the texture cache so that the clear color conversion
|
||||
* feature works properly.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
|
||||
"Invalidate for clear color conversion");
|
||||
|
||||
/* From the ICL PRM Vol. 9, "State Caching":
|
||||
*
|
||||
* Any values referenced by pointers within the RENDER_SURFACE_STATE
|
||||
* or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
|
||||
* Indirect State Pointer) are considered to be part of that state
|
||||
* and any changes to these referenced values requires an
|
||||
* invalidation of the L1 state cache to ensure the new values are
|
||||
* being used as part of the state. In the case of surface data
|
||||
* pointed to by the Surface Base Address in RENDER SURFACE STATE,
|
||||
* the Texture Cache must be invalidated if the surface data changes.
|
||||
*
|
||||
* We could alternatively perform this invalidation when we stop
|
||||
* fast-clearing. A benefit to doing it now, when transitioning to a
|
||||
* fast clear, is that we save a pipe control by combining the state
|
||||
* cache invalidation with the texture cache invalidation.
|
||||
*/
|
||||
if (last_aux_op != ISL_AUX_OP_FAST_CLEAR) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
"Invalidate for new clear color");
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the auxiliary surface operation, but with one exception. */
|
||||
if (last_aux_op == ISL_AUX_OP_FAST_CLEAR &&
|
||||
next_aux_op == ISL_AUX_OP_AMBIGUATE) {
|
||||
assert(aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op));
|
||||
/* Fast clears and ambiguates are in the same class of operation, but
|
||||
* fast clears have more stringent synchronization requirements. For
|
||||
* better performance, don't replace the current fast clear operation
|
||||
* state with ambiguate. This allows us to perform one state cache
|
||||
* invalidation when leaving a sequence which alternates between
|
||||
* ambiguates and clears, instead of multiple such invalidations.
|
||||
*/
|
||||
} else {
|
||||
cmd_buffer->state.color_aux_op = next_aux_op;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
genX(cmd_buffer_set_protected_memory)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool enabled)
|
||||
|
|
@ -3103,6 +3422,9 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer)
|
|||
"query clear flush prior command buffer end");
|
||||
}
|
||||
|
||||
/* Flush any in-progress CCS/MCS operations in preparation for chaining. */
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
||||
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
||||
|
||||
/* Turn on object level preemption if it is disabled to have it in known
|
||||
|
|
@ -3200,6 +3522,11 @@ genX(CmdExecuteCommands)(
|
|||
"query clear flush prior to secondary buffer");
|
||||
}
|
||||
|
||||
/* Ensure we're in a regular drawing cache mode (assumption for all
|
||||
* secondary).
|
||||
*/
|
||||
genX(cmd_buffer_update_color_aux_op(container, ISL_AUX_OP_NONE));
|
||||
|
||||
/* The secondary command buffer doesn't know which textures etc. have been
|
||||
* flushed prior to their execution. Apply those flushes now.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -97,6 +97,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
||||
genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base);
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
|
@ -600,6 +602,8 @@ genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, kernel->l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
||||
/* Apply any pending pipeline flushes we may have. We want to apply them
|
||||
|
|
@ -889,6 +893,8 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
||||
genX(flush_descriptor_buffers)(cmd_buffer, &rt->base);
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -706,6 +706,8 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.base.l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
||||
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
|
||||
|
||||
genX(flush_descriptor_buffers)(cmd_buffer, &cmd_buffer->state.gfx.base);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue