diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 185cd70095b..b2c0c1e19ce 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1522,7 +1522,6 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer, anv_image_aux_layers(image, aspect, level)); const uint32_t plane = anv_image_aspect_to_plane(image, aspect); - const struct intel_device_info *devinfo = cmd_buffer->device->info; struct blorp_surf surf; get_blorp_surf_for_anv_image(cmd_buffer, image, aspect, @@ -1540,107 +1539,8 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer, if (clear_value) surf.clear_color = *clear_value; - char flush_reason[64]; - int ret = - snprintf(flush_reason, sizeof(flush_reason), - "ccs op start: %s", isl_aux_op_to_name(ccs_op)); - assert(ret < sizeof(flush_reason)); - - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": - * - * "After Render target fast clear, pipe-control with color cache - * write-flush must be issued before sending any DRAW commands on - * that render target." - * - * This comment is a bit cryptic and doesn't really tell you what's going - * or what's really needed. It appears that fast clear ops are not - * properly synchronized with other drawing. This means that we cannot - * have a fast clear operation in the pipe at the same time as other - * regular drawing operations. We need to use a PIPE_CONTROL to ensure - * that the contents of the previous draw hit the render target before we - * resolve and then use a second PIPE_CONTROL after the resolve to ensure - * that it is completed before any additional drawing occurs. - * - * Bspec 57340 (r59562): - * - * Synchronization: - * Due to interaction of scaled clearing rectangle with pixel - * scoreboard, we require one of the following commands to be issued. - * (Rows of PIPE_CONTROL command in the table) - * - * Requiring tile cache flush bit has been dropped since Xe2. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - (devinfo->verx10 < 200 ? - ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | - (devinfo->verx10 == 120 ? - ANV_PIPE_DEPTH_STALL_BIT : 0) | - (devinfo->verx10 == 125 ? - ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | - ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) | - ANV_PIPE_PSS_STALL_SYNC_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - flush_reason); - switch (ccs_op) { case ISL_AUX_OP_FAST_CLEAR: - /* From the ICL PRMs, Volume 9: Render Engine, State Caching : - * - * "Any values referenced by pointers within the RENDER_SURFACE_STATE - * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or - * Indirect State Pointer) are considered to be part of that state - * and any changes to these referenced values requires an - * invalidation of the L1 state cache to ensure the new values are - * being used as part of the state. In the case of surface data - * pointed to by the Surface Base Address in RENDER SURFACE STATE, - * the Texture Cache must be invalidated if the surface data - * changes." - * - * and From the Render Target Fast Clear section, - * - * "HwManaged FastClear allows SW to store FastClearValue in separate - * graphics allocation, instead of keeping them in - * RENDER_SURFACE_STATE. This behavior can be enabled by setting - * ClearValueAddressEnable in RENDER_SURFACE_STATE. - * - * Proper sequence of commands is as follows: - * - * 1. Storing clear color to allocation - * 2. Ensuring that step 1. is finished and visible for TextureCache - * 3. Performing FastClear - * - * Step 2. is required on products with ClearColorConversion feature. - * This feature is enabled by setting ClearColorConversionEnable. - * This causes HW to read stored color from ClearColorAllocation and - * write back with the native format or RenderTarget - and clear - * color needs to be present and visible. Reading is done from - * TextureCache, writing is done to RenderCache." - * - * We're going to change the clear color. Invalidate the texture cache - * now to ensure the clear color conversion feature works properly. - * Although the docs seem to require invalidating the texture cache - * after updating the clear color allocation, we can do this beforehand - * so long as we ensure: - * - * 1. Step 1 is complete before the texture cache is accessed in step 3 - * 2. We don't access the texture cache between invalidation and step 3 - * - * The second requirement is satisfied because we'll be performing step - * 1 and 3 right after invalidating. The first is satisfied because - * BLORP updates the clear color before performing the fast clear and it - * performs the synchronizations suggested by the Render Target Fast - * Clear section (not quoted here) to ensure its completion. - * - * While we're here, also invalidate the state cache as suggested. - */ - if (devinfo->ver >= 11) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, - "before blorp clear color update"); - } - blorp_fast_clear(batch, &surf, format, swizzle, level, base_layer, layer_count, 0, 0, level_width, level_height); @@ -1670,15 +1570,6 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer, default: unreachable("Unsupported CCS operation"); } - - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - (devinfo->verx10 == 120 ? - ANV_PIPE_TILE_CACHE_FLUSH_BIT | - ANV_PIPE_DEPTH_STALL_BIT : 0) | - ANV_PIPE_PSS_STALL_SYNC_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "ccs op finish"); } static void @@ -1697,7 +1588,6 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer, /* Multisampling with multi-planar formats is not supported */ assert(image->n_planes == 1); - const struct intel_device_info *devinfo = cmd_buffer->device->info; struct blorp_surf surf; get_blorp_surf_for_anv_image(cmd_buffer, image, aspect, 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, @@ -1710,101 +1600,8 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer, if (clear_value) surf.clear_color = *clear_value; - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": - * - * "After Render target fast clear, pipe-control with color cache - * write-flush must be issued before sending any DRAW commands on - * that render target." - * - * This comment is a bit cryptic and doesn't really tell you what's going - * or what's really needed. It appears that fast clear ops are not - * properly synchronized with other drawing. This means that we cannot - * have a fast clear operation in the pipe at the same time as other - * regular drawing operations. We need to use a PIPE_CONTROL to ensure - * that the contents of the previous draw hit the render target before we - * resolve and then use a second PIPE_CONTROL after the resolve to ensure - * that it is completed before any additional drawing occurs. - * - * Bspec 57340 (r59562): - * - * Synchronization: - * Due to interaction of scaled clearing rectangle with pixel - * scoreboard, we require one of the following commands to be issued. - * (Rows of PIPE_CONTROL command in the table) - * - * Requiring tile cache flush bit has been dropped since Xe2. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - (devinfo->verx10 < 200 ? - ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | - (devinfo->verx10 == 120 ? - ANV_PIPE_DEPTH_STALL_BIT : 0) | - (devinfo->verx10 == 125 ? - ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | - ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) | - ANV_PIPE_PSS_STALL_SYNC_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "before fast clear mcs"); - switch (mcs_op) { case ISL_AUX_OP_FAST_CLEAR: - /* From the ICL PRMs, Volume 9: Render Engine, State Caching : - * - * "Any values referenced by pointers within the RENDER_SURFACE_STATE - * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or - * Indirect State Pointer) are considered to be part of that state - * and any changes to these referenced values requires an - * invalidation of the L1 state cache to ensure the new values are - * being used as part of the state. In the case of surface data - * pointed to by the Surface Base Address in RENDER SURFACE STATE, - * the Texture Cache must be invalidated if the surface data - * changes." - * - * and From the Render Target Fast Clear section, - * - * "HwManaged FastClear allows SW to store FastClearValue in separate - * graphics allocation, instead of keeping them in - * RENDER_SURFACE_STATE. This behavior can be enabled by setting - * ClearValueAddressEnable in RENDER_SURFACE_STATE. - * - * Proper sequence of commands is as follows: - * - * 1. Storing clear color to allocation - * 2. Ensuring that step 1. is finished and visible for TextureCache - * 3. Performing FastClear - * - * Step 2. is required on products with ClearColorConversion feature. - * This feature is enabled by setting ClearColorConversionEnable. - * This causes HW to read stored color from ClearColorAllocation and - * write back with the native format or RenderTarget - and clear - * color needs to be present and visible. Reading is done from - * TextureCache, writing is done to RenderCache." - * - * We're going to change the clear color. Invalidate the texture cache - * now to ensure the clear color conversion feature works properly. - * Although the docs seem to require invalidating the texture cache - * after updating the clear color allocation, we can do this beforehand - * so long as we ensure: - * - * 1. Step 1 is complete before the texture cache is accessed in step 3 - * 2. We don't access the texture cache between invalidation and step 3 - * - * The second requirement is satisfied because we'll be performing step - * 1 and 3 right after invalidating. The first is satisfied because - * BLORP updates the clear color before performing the fast clear and it - * performs the synchronizations suggested by the Render Target Fast - * Clear section (not quoted here) to ensure its completion. - * - * While we're here, also invalidate the state cache as suggested. - */ - if (devinfo->ver >= 11) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, - "before blorp clear color update"); - } - blorp_fast_clear(batch, &surf, format, swizzle, 0, base_layer, layer_count, 0, 0, image->vk.extent.width, image->vk.extent.height); @@ -1820,15 +1617,6 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer, default: unreachable("Unsupported MCS operation"); } - - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - (devinfo->verx10 == 120 ? - ANV_PIPE_TILE_CACHE_FLUSH_BIT | - ANV_PIPE_DEPTH_STALL_BIT : 0) | - ANV_PIPE_PSS_STALL_SYNC_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "after fast clear mcs"); } static void diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 8894841df45..dc231950e80 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -88,6 +88,10 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); +void +genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, + enum isl_aux_op aux_op); + void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer, const struct isl_surf *surf); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ab077016c40..8a17b9675ce 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3993,6 +3993,11 @@ struct anv_cmd_state { */ enum anv_depth_reg_mode depth_reg_mode; + /* The last auxiliary surface operation (or equivalent operation) provided + * to genX(cmd_buffer_update_color_aux_op). + */ + enum isl_aux_op color_aux_op; + /** * Whether RHWO optimization is enabled (Wa_1508744258). */ diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 6786be68de4..b99afcba0c7 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -458,6 +458,55 @@ blorp_exec_on_blitter(struct blorp_batch *batch, blorp_exec(batch, params); } +static enum isl_aux_op +get_color_aux_op(const struct blorp_params *params) +{ + switch (params->op) { + case BLORP_OP_CCS_RESOLVE: + case BLORP_OP_CCS_PARTIAL_RESOLVE: + case BLORP_OP_CCS_COLOR_CLEAR: + case BLORP_OP_MCS_COLOR_CLEAR: + assert(params->fast_clear_op != ISL_AUX_OP_NONE); + return params->fast_clear_op; + + /* Some auxiliary surface operations are not provided by hardware. To + * provide that functionality, BLORP sometimes tries to emulate what + * hardware would do with custom pixel shaders. For now, we assume that + * BLORP's implementation has the same cache invalidation and flushing + * requirements as similar hardware operations. + */ + case BLORP_OP_CCS_AMBIGUATE: + assert(GFX_VER >= 11 || params->fast_clear_op == ISL_AUX_OP_NONE); + return ISL_AUX_OP_AMBIGUATE; + case BLORP_OP_MCS_AMBIGUATE: + assert(params->fast_clear_op == ISL_AUX_OP_NONE); + return ISL_AUX_OP_AMBIGUATE; + case BLORP_OP_MCS_PARTIAL_RESOLVE: + assert(params->fast_clear_op == ISL_AUX_OP_NONE); + return ISL_AUX_OP_PARTIAL_RESOLVE; + + /* If memory aliasing is being done on an image, a pending fast clear + * could hit the destination address at an unknown time. Go back to the + * regular drawing mode to avoid this case. + */ + case BLORP_OP_HIZ_AMBIGUATE: + case BLORP_OP_HIZ_CLEAR: + case BLORP_OP_HIZ_RESOLVE: + case BLORP_OP_SLOW_DEPTH_CLEAR: + assert(params->fast_clear_op == ISL_AUX_OP_NONE); + return ISL_AUX_OP_NONE; + + /* The remaining operations are considered regular draws. */ + case BLORP_OP_SLOW_COLOR_CLEAR: + case BLORP_OP_BLIT: + case BLORP_OP_COPY: + assert(params->fast_clear_op == ISL_AUX_OP_NONE); + return ISL_AUX_OP_NONE; + } + + unreachable("Invalid value in params->op"); +} + void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params) @@ -474,6 +523,10 @@ genX(blorp_exec)(struct blorp_batch *batch, genX(cmd_buffer_config_l3)(cmd_buffer, cfg); } + /* Flush any in-progress CCS/MCS operations as needed. */ + const enum isl_aux_op aux_op = get_color_aux_op(params); + genX(cmd_buffer_update_color_aux_op(cmd_buffer, aux_op)); + if (batch->flags & BLORP_BATCH_USE_BLITTER) blorp_exec_on_blitter(batch, params); else if (batch->flags & BLORP_BATCH_USE_COMPUTE) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f8b136c26ac..57ddc0b76ac 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1342,28 +1342,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (resolve_op == ISL_AUX_OP_NONE) return; - /* Perform a resolve to synchronize data between the main and aux buffer. - * Before we begin, we must satisfy the cache flushing requirement specified - * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": - * - * Any transition from any value in {Clear, Render, Resolve} to a - * different value in {Clear, Render, Resolve} requires end of pipe - * synchronization. - * - * We perform a flush of the write cache before and after the clear and - * resolve operations to meet this requirement. - * - * Unlike other drawing, fast clear operations are not properly - * synchronized. The first PIPE_CONTROL here likely ensures that the - * contents of the previous render or clear hit the render target before we - * resolve and the second likely ensures that the resolve is complete before - * we do any more rendering or clearing. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "before transition RT"); - for (uint32_t l = 0; l < level_count; l++) { uint32_t level = base_level + l; @@ -1406,11 +1384,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } } } - - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "after transition RT"); } static MUST_CHECK VkResult @@ -2806,6 +2779,352 @@ genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *cmd_buffer, } } +static bool +aux_op_resolves(enum isl_aux_op aux_op) +{ + return aux_op == ISL_AUX_OP_FULL_RESOLVE || + aux_op == ISL_AUX_OP_PARTIAL_RESOLVE; +} + +static bool +aux_op_clears(enum isl_aux_op aux_op) +{ + return aux_op == ISL_AUX_OP_FAST_CLEAR || + aux_op == ISL_AUX_OP_AMBIGUATE; +} + +static bool +aux_op_renders(enum isl_aux_op aux_op) +{ + return aux_op == ISL_AUX_OP_NONE; +} + +static void +add_pending_pipe_bits_for_color_aux_op(struct anv_cmd_buffer *cmd_buffer, + enum isl_aux_op next_aux_op, + enum anv_pipe_bits pipe_bits) +{ + const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op; + assert(next_aux_op != last_aux_op); + + char flush_reason[64] = {}; + if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) || + u_trace_enabled(&cmd_buffer->device->ds.trace_context)) { + int ret = snprintf(flush_reason, sizeof(flush_reason), + "color aux-op: %s -> %s", + isl_aux_op_to_name(last_aux_op), + isl_aux_op_to_name(next_aux_op)); + assert(ret < sizeof(flush_reason)); + } + + anv_add_pending_pipe_bits(cmd_buffer, pipe_bits, flush_reason); +} + +void +genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, + enum isl_aux_op next_aux_op) +{ + const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op; + + if (!aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)) { +#if GFX_VER >= 20 + /* From the Xe2 Bspec 57340 (r59562), + * "MCS/CCS Buffers, Fast Clear for Render Target(s)": + * + * Synchronization: + * Due to interaction of scaled clearing rectangle with pixel + * scoreboard, we require one of the following commands to be + * issued. [...] + * + * PIPE_CONTROL + * PSS Stall Sync Enable [...] 1b (Enable) + * Machine-wide Stall at Pixel Stage, wait for all Prior Pixel + * Work to Reach End of Pipe + * Render Target Cache Flush Enable [...] 1b (Enable) + * Post-Sync Op Flushes Render Cache before Unblocking Stall + * + * This synchronization step is required before and after the fast + * clear pass, to ensure correct ordering between pixels. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_PSS_STALL_SYNC_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); + +#elif GFX_VERx10 == 125 + /* From the ACM Bspec 47704 (r52663), "Render Target Fast Clear": + * + * Preamble pre fast clear synchronization + * + * PIPE_CONTROL: + * PS sync stall = 1 + * Tile Cache Flush = 1 + * RT Write Flush = 1 + * HDC Flush = 1 + * DC Flush = 1 + * Texture Invalidate = 1 + * + * [...] + * + * Objective of the preamble flushes is to ensure all data is + * evicted from L1 caches prior to fast clear. + * + * From the ACM PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_PSS_STALL_SYNC_BIT | + ANV_PIPE_TILE_CACHE_FLUSH_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); + +#elif GFX_VERx10 == 120 + /* From the TGL Bspec 47704 (r52663), "Render Target Fast Clear": + * + * Preamble pre fast clear synchronization + * + * PIPE_CONTROL: + * Depth Stall = 1 + * Tile Cache Flush = 1 + * RT Write Flush = 1 + * Texture Invalidate = 1 + * + * [...] + * + * Objective of the preamble flushes is to ensure all data is + * evicted from L1 caches prior to fast clear. + * + * From the TGL PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_DEPTH_STALL_BIT | + ANV_PIPE_TILE_CACHE_FLUSH_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); + +#else + /* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + * + * From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": + * + * After Render target fast clear, pipe-control with color cache + * write-flush must be issued before sending any DRAW commands on + * that render target. + * + * The last comment is a bit cryptic and doesn't really tell you what's + * going or what's really needed. It appears that fast clear ops are + * not properly synchronized with other drawing. This means that we + * cannot have a fast clear operation in the pipe at the same time as + * other regular drawing operations. We need to use a PIPE_CONTROL + * to ensure that the contents of the previous draw hit the render + * target before we resolve and then use a second PIPE_CONTROL after + * the resolve to ensure that it is completed before any additional + * drawing occurs. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); +#endif + + } else if (aux_op_clears(last_aux_op) && !aux_op_clears(next_aux_op)) { +#if GFX_VER >= 20 + /* From the Xe2 Bspec 57340 (r59562), + * "MCS/CCS Buffers, Fast Clear for Render Target(s)": + * + * Synchronization: + * Due to interaction of scaled clearing rectangle with pixel + * scoreboard, we require one of the following commands to be + * issued. [...] + * + * PIPE_CONTROL + * PSS Stall Sync Enable [...] 1b (Enable) + * Machine-wide Stall at Pixel Stage, wait for all Prior Pixel + * Work to Reach End of Pipe + * Render Target Cache Flush Enable [...] 1b (Enable) + * Post-Sync Op Flushes Render Cache before Unblocking Stall + * + * This synchronization step is required before and after the fast + * clear pass, to ensure correct ordering between pixels. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_PSS_STALL_SYNC_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); + +#elif GFX_VERx10 == 125 + /* From the ACM PRM Vol. 9, "Color Fast Clear Synchronization": + * + * Postamble post fast clear synchronization + * + * PIPE_CONTROL: + * PS sync stall = 1 + * RT flush = 1 + * + * From the ACM PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_PSS_STALL_SYNC_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); + +#elif GFX_VERx10 == 120 + /* From the TGL PRM Vol. 9, "Color Fast Clear Synchronization": + * + * Postamble post fast clear synchronization + * + * PIPE_CONTROL: + * Depth Stall = 1 + * Tile Cache Flush = 1 + * RT Write Flush = 1 + * + * From the TGL PRM Vol. 9, "MCS/CCS Buffers for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + * + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_DEPTH_STALL_BIT | + ANV_PIPE_TILE_CACHE_FLUSH_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); + +#else + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": + * + * After Render target fast clear, pipe-control with color cache + * write-flush must be issued before sending any DRAW commands on + * that render target. + * + * From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); +#endif + + } else if (aux_op_renders(last_aux_op) != aux_op_renders(next_aux_op)) { + assert(aux_op_resolves(last_aux_op) != aux_op_resolves(next_aux_op)); + /* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": + * + * Any transition from any value in {Clear, Render, Resolve} to a + * different value in {Clear, Render, Resolve} requires end of pipe + * synchronization. + * + * We perform a flush of the write cache before and after the clear and + * resolve operations to meet this requirement. + * + * Unlike other drawing, fast clear operations are not properly + * synchronized. The first PIPE_CONTROL here likely ensures that the + * contents of the previous render or clear hit the render target before + * we resolve and the second likely ensures that the resolve is complete + * before we do any more rendering or clearing. + */ + add_pending_pipe_bits_for_color_aux_op( + cmd_buffer, next_aux_op, + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_BIT); + } + + if (next_aux_op == ISL_AUX_OP_FAST_CLEAR && + cmd_buffer->device->isl_dev.ss.clear_color_state_size > 0) { + /* From the ICL PRM Vol. 9, "Render Target Fast Clear": + * + * HwManaged FastClear allows SW to store FastClearValue in separate + * graphics allocation, instead of keeping them in + * RENDER_SURFACE_STATE. This behavior can be enabled by setting + * ClearValueAddressEnable in RENDER_SURFACE_STATE. + * + * Proper sequence of commands is as follows: + * + * 1. Storing clear color to allocation + * 2. Ensuring that step 1. is finished and visible for + * TextureCache + * 3. Performing FastClear + * + * Step 2. is required on products with ClearColorConversion feature. + * This feature is enabled by setting ClearColorConversionEnable. + * This causes HW to read stored color from ClearColorAllocation and + * write back with the native format or RenderTarget - and clear + * color needs to be present and visible. Reading is done from + * TextureCache, writing is done to RenderCache. + * + * Invalidate the texture cache so that the clear color conversion + * feature works properly. + */ + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, + "Invalidate for clear color conversion"); + + /* From the ICL PRM Vol. 9, "State Caching": + * + * Any values referenced by pointers within the RENDER_SURFACE_STATE + * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or + * Indirect State Pointer) are considered to be part of that state + * and any changes to these referenced values requires an + * invalidation of the L1 state cache to ensure the new values are + * being used as part of the state. In the case of surface data + * pointed to by the Surface Base Address in RENDER SURFACE STATE, + * the Texture Cache must be invalidated if the surface data changes. + * + * We could alternatively perform this invalidation when we stop + * fast-clearing. A benefit to doing it now, when transitioning to a + * fast clear, is that we save a pipe control by combining the state + * cache invalidation with the texture cache invalidation. + */ + if (last_aux_op != ISL_AUX_OP_FAST_CLEAR) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + "Invalidate for new clear color"); + } + } + + /* Update the auxiliary surface operation, but with one exception. */ + if (last_aux_op == ISL_AUX_OP_FAST_CLEAR && + next_aux_op == ISL_AUX_OP_AMBIGUATE) { + assert(aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)); + /* Fast clears and ambiguates are in the same class of operation, but + * fast clears have more stringent synchronization requirements. For + * better performance, don't replace the current fast clear operation + * state with ambiguate. This allows us to perform one state cache + * invalidation when leaving a sequence which alternates between + * ambiguates and clears, instead of multiple such invalidations. + */ + } else { + cmd_buffer->state.color_aux_op = next_aux_op; + } +} + static void genX(cmd_buffer_set_protected_memory)(struct anv_cmd_buffer *cmd_buffer, bool enabled) @@ -3103,6 +3422,9 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer) "query clear flush prior command buffer end"); } + /* Flush any in-progress CCS/MCS operations in preparation for chaining. */ + genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_flush_generated_draws)(cmd_buffer); /* Turn on object level preemption if it is disabled to have it in known @@ -3200,6 +3522,11 @@ genX(CmdExecuteCommands)( "query clear flush prior to secondary buffer"); } + /* Ensure we're in a regular drawing cache mode (assumption for all + * secondary). + */ + genX(cmd_buffer_update_color_aux_op(container, ISL_AUX_OP_NONE)); + /* The secondary command buffer doesn't know which textures etc. have been * flushed prior to their execution. Apply those flushes now. */ diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 1c07616595d..6162c724b70 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -97,6 +97,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); + genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base); genX(flush_pipeline_select_gpgpu)(cmd_buffer); @@ -600,6 +602,8 @@ genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_config_l3)(cmd_buffer, kernel->l3_config); + genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(flush_pipeline_select_gpgpu)(cmd_buffer); /* Apply any pending pipeline flushes we may have. We want to apply them @@ -889,6 +893,8 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); + genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(flush_descriptor_buffers)(cmd_buffer, &rt->base); genX(flush_pipeline_select_gpgpu)(cmd_buffer); diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index f823fbe68f6..08664e43e55 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -706,6 +706,8 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.base.l3_config); + genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1); genX(flush_descriptor_buffers)(cmd_buffer, &cmd_buffer->state.gfx.base);