diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index a7cfe5b7553..64f2e57243c 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1516,6 +1516,8 @@ aspect_write_mask_generic_clear(enum pipe_format format, VkImageAspectFlags aspe mask = 0x1; if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) mask = 0x2; + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) + mask = 0x3; } return mask; } @@ -1882,6 +1884,7 @@ pack_blit_event_clear_value(const VkClearValue *val, enum pipe_format format, ui static void event_blit_setup(struct tu_cs *cs, + uint32_t buffer_id, const struct tu_render_pass_attachment *att, enum a6xx_blit_event_type blit_event_type, uint32_t clear_mask) @@ -1899,7 +1902,8 @@ event_blit_setup(struct tu_cs *cs, vk_format_is_int(att->format) || vk_format_is_depth_or_stencil(att->format), .depth = vk_format_is_depth_or_stencil(att->format), - .clear_mask = clear_mask, )); + .clear_mask = clear_mask, + .buffer_id = buffer_id)); } struct event_blit_dst_view { @@ -1984,6 +1988,7 @@ event_blit_run(struct tu_cmd_buffer *cmd, static void tu7_generic_layer_clear(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + uint32_t buffer_id, enum pipe_format format, uint8_t clear_mask, bool separate_stencil, @@ -2003,7 +2008,7 @@ tu7_generic_layer_clear(struct tu_cmd_buffer *cmd, event_blit_dst_view blt_view = blt_view_from_tu_view(iview, layer); - event_blit_setup(cs, att, BLIT_EVENT_CLEAR, clear_mask); + event_blit_setup(cs, buffer_id, att, BLIT_EVENT_CLEAR, clear_mask); event_blit_run(cmd, cs, att, &blt_view, separate_stencil); } @@ -3476,6 +3481,70 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd, } TU_GENX(tu_resolve_sysmem); +enum tu_resolve_group_buffer_type { + TU_RESOLVE_GROUP_COLOR_BUFFER, + TU_RESOLVE_GROUP_DEPTH_BUFFER, + TU_RESOLVE_GROUP_STENCIL_BUFFER, +}; + +template +static uint32_t +tu_resolve_group_include_buffer(struct tu_resolve_group *resolve_group, + enum tu_resolve_group_buffer_type buffer_type) +{ + /* Resolve groups are not usable on a6xx, so no pending resolve is + * established. The default value of 0 is returned as the buffer ID. + */ + if (CHIP == A6XX) + return 0; + + resolve_group->pending_resolves = true; + + if (buffer_type == TU_RESOLVE_GROUP_DEPTH_BUFFER) + return 0x8; + if (buffer_type == TU_RESOLVE_GROUP_STENCIL_BUFFER) + return 0x9; + + const uint32_t max_color_buffers = 8; + uint32_t buffer_id = resolve_group->color_buffer_id++; + return buffer_id % max_color_buffers; +} + +template +static uint32_t +tu_resolve_group_include_buffer_for_format(struct tu_resolve_group *resolve_group, + VkFormat format) +{ + enum tu_resolve_group_buffer_type buffer_type = TU_RESOLVE_GROUP_COLOR_BUFFER; + + /* D24_UNORM_S8_UINT should be assigned the depth buffer type, regardless of + * whether depth, stencil or both are being resolved. + */ + if (format == VK_FORMAT_D24_UNORM_S8_UINT) + buffer_type = TU_RESOLVE_GROUP_DEPTH_BUFFER; + + return tu_resolve_group_include_buffer(resolve_group, buffer_type); +} + +template +void +tu_emit_resolve_group(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + struct tu_resolve_group *resolve_group) +{ + /* Resolve groups are not usable on A6XX, so that template instantiation + * should behave as a no-op. + */ + if (CHIP == A6XX || !resolve_group->pending_resolves) + return; + + resolve_group->color_buffer_id = 0; + resolve_group->pending_resolves = false; + + tu_emit_raw_event_write(cmd, cs, CCU_END_RESOLVE_GROUP, false); +} +TU_GENX(tu_emit_resolve_group); + template static void clear_image_cp_blit(struct tu_cmd_buffer *cmd, @@ -3538,6 +3607,7 @@ clear_image_cp_blit(struct tu_cmd_buffer *cmd, static void clear_image_event_blit(struct tu_cmd_buffer *cmd, struct tu_image *image, + uint32_t buffer_id, const VkClearValue *clear_value, const VkImageSubresourceRange *range, VkImageAspectFlags aspect_mask) @@ -3573,7 +3643,8 @@ clear_image_event_blit(struct tu_cmd_buffer *cmd, .sample_0 = vk_format_is_int(vk_format) || vk_format_is_depth_or_stencil(vk_format), .depth = vk_format_is_depth_or_stencil(vk_format), - .clear_mask = aspect_write_mask_generic_clear(format, aspect_mask))); + .clear_mask = aspect_write_mask_generic_clear(format, aspect_mask), + .buffer_id = buffer_id)); uint32_t clear_vals[4] = {}; pack_blit_event_clear_value(clear_value, format, clear_vals); @@ -3656,12 +3727,13 @@ template static void clear_image(struct tu_cmd_buffer *cmd, struct tu_image *image, + uint32_t buffer_id, const VkClearValue *clear_value, const VkImageSubresourceRange *range, VkImageAspectFlags aspect_mask) { if (use_generic_clear_for_image_clear(cmd, image)) { - clear_image_event_blit(cmd, image, clear_value, range, aspect_mask); + clear_image_event_blit(cmd, image, buffer_id, clear_value, range, aspect_mask); } else { clear_image_cp_blit(cmd, image, clear_value, range, aspect_mask); } @@ -3686,9 +3758,14 @@ tu_CmdClearColorImage(VkCommandBuffer commandBuffer, tu_emit_cache_flush(cmd); } + struct tu_resolve_group resolve_group = {}; + for (unsigned i = 0; i < rangeCount; i++) { - clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT); + uint32_t buffer_id = tu_resolve_group_include_buffer(&resolve_group, TU_RESOLVE_GROUP_COLOR_BUFFER); + clear_image(cmd, image, buffer_id, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT); } + + tu_emit_resolve_group(cmd, &cmd->cs, &resolve_group); } TU_GENX(tu_CmdClearColorImage); @@ -3712,19 +3789,31 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, tu_emit_cache_flush(cmd); } + struct tu_resolve_group resolve_group = {}; + for (unsigned i = 0; i < rangeCount; i++) { const VkImageSubresourceRange *range = &pRanges[i]; if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { /* can't clear both depth and stencil at once, split up the aspect mask */ - u_foreach_bit(b, range->aspectMask) - clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b)); + u_foreach_bit(b, range->aspectMask) { + uint32_t buffer_id = 0; + if (BIT(b) == VK_IMAGE_ASPECT_DEPTH_BIT) + buffer_id = tu_resolve_group_include_buffer(&resolve_group, TU_RESOLVE_GROUP_DEPTH_BUFFER); + if (BIT(b) == VK_IMAGE_ASPECT_STENCIL_BIT) + buffer_id = tu_resolve_group_include_buffer(&resolve_group, TU_RESOLVE_GROUP_STENCIL_BUFFER); + + clear_image(cmd, image, buffer_id, (const VkClearValue*) pDepthStencil, range, BIT(b)); + } continue; } - clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask); + uint32_t buffer_id = tu_resolve_group_include_buffer_for_format(&resolve_group, image->vk.format); + clear_image(cmd, image, buffer_id, (const VkClearValue*) pDepthStencil, range, range->aspectMask); } + tu_emit_resolve_group(cmd, &cmd->cs, &resolve_group); + tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges); } TU_GENX(tu_CmdClearDepthStencilImage); @@ -3933,6 +4022,7 @@ template static void clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + uint32_t buffer_id, enum pipe_format format, uint8_t clear_mask, uint32_t gmem_offset, @@ -3943,7 +4033,8 @@ clear_gmem_attachment(struct tu_cmd_buffer *cmd, blit_base_format(format, false, true))); tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.type = BLIT_EVENT_CLEAR, - .clear_mask = clear_mask)); + .clear_mask = clear_mask, + .buffer_id = buffer_id)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); tu_cs_emit(cs, gmem_offset); @@ -3964,6 +4055,7 @@ template static void tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t attachment, uint32_t base_layer, uint32_t layers, @@ -3984,15 +4076,18 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, uint32_t layer = i + base_layer; if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) { - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_DEPTH_BUFFER); + clear_gmem_attachment(cmd, cs, buffer_id, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att, layer), value); } if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) { - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_STENCIL_BUFFER); + clear_gmem_attachment(cmd, cs, buffer_id, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att, layer), value); } } else { - clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), + uint32_t buffer_id = tu_resolve_group_include_buffer_for_format(resolve_group, att->format); + clear_gmem_attachment(cmd, cs, buffer_id, format, aspect_write_mask(format, mask), tu_attachment_gmem_offset(cmd, att, layer), value); } } @@ -4016,6 +4111,8 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, if (rect_count > 1) perf_debug(cmd->device, "TODO: Swap tu_clear_gmem_attachments() loop for smaller command stream"); + struct tu_resolve_group resolve_group = {}; + for (unsigned i = 0; i < rect_count; i++) { unsigned x1 = rects[i].rect.offset.x; unsigned y1 = rects[i].rect.offset.y; @@ -4036,13 +4133,16 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, if (a == VK_ATTACHMENT_UNUSED) continue; - tu_emit_clear_gmem_attachment(cmd, cs, a, rects[i].baseArrayLayer, + tu_emit_clear_gmem_attachment(cmd, cs, &resolve_group, a, + rects[i].baseArrayLayer, rects[i].layerCount, subpass->multiview_mask, attachments[j].aspectMask, &attachments[j].clearValue); } } + + tu_emit_resolve_group(cmd, cs, &resolve_group); } template @@ -4109,6 +4209,7 @@ static void tu7_clear_attachment_generic_single_rect( struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, const struct tu_render_pass_attachment *att, const VkClearAttachment *clear_att, uint32_t a, @@ -4136,15 +4237,18 @@ tu7_clear_attachment_generic_single_rect( if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (clear_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_Z32_FLOAT, mask, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_DEPTH_BUFFER); + tu7_generic_layer_clear(cmd, cs, buffer_id, PIPE_FORMAT_Z32_FLOAT, mask, false, layer, value, a); } if (clear_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_S8_UINT, mask, true, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_STENCIL_BUFFER); + tu7_generic_layer_clear(cmd, cs, buffer_id, PIPE_FORMAT_S8_UINT, mask, true, layer, value, a); } } else { - tu7_generic_layer_clear(cmd, cs, format, mask, false, layer, value, a); + uint32_t buffer_id = tu_resolve_group_include_buffer_for_format(resolve_group, att->format); + tu7_generic_layer_clear(cmd, cs, buffer_id, format, mask, false, layer, value, a); } } } @@ -4178,6 +4282,8 @@ tu_clear_attachments_generic(struct tu_cmd_buffer *cmd, tu_cs_emit_wfi(cs); tu_cond_exec_end(cs); + struct tu_resolve_group resolve_group = {}; + const struct tu_subpass *subpass = cmd->state.subpass; for (uint32_t i = 0; i < attachmentCount; i++) { uint32_t a; @@ -4194,11 +4300,13 @@ tu_clear_attachments_generic(struct tu_cmd_buffer *cmd, iview->view.ubwc_enabled, att->samples); for (unsigned j = 0; j < rectCount; j++) { tu7_clear_attachment_generic_single_rect( - cmd, cs, att, &pAttachments[i], a, &pRects[j]); + cmd, cs, &resolve_group, att, &pAttachments[i], a, &pRects[j]); } trace_end_generic_clear(&cmd->trace, cs); } } + + tu_emit_resolve_group(cmd, cs, &resolve_group); } template @@ -4330,6 +4438,7 @@ template void tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a) { const struct tu_render_pass_attachment *attachment = @@ -4338,7 +4447,8 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, if (!attachment->clear_mask) return; - tu_emit_clear_gmem_attachment(cmd, cs, a, 0, cmd->state.framebuffer->layers, + tu_emit_clear_gmem_attachment(cmd, cs, resolve_group, a, 0, + cmd->state.framebuffer->layers, attachment->clear_views, attachment->clear_mask, &cmd->state.clear_values[a]); @@ -4346,7 +4456,10 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, TU_GENX(tu_clear_gmem_attachment); void -tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a) +tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + struct tu_resolve_group *resolve_group, + uint32_t a) { const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; @@ -4363,15 +4476,18 @@ tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32 aspect_write_mask_generic_clear(format, att->clear_mask); if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (att->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) { - tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_Z32_FLOAT, mask, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_DEPTH_BUFFER); + tu7_generic_layer_clear(cmd, cs, buffer_id, PIPE_FORMAT_Z32_FLOAT, mask, false, layer, value, a); } if (att->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { - tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_S8_UINT, mask, true, + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, TU_RESOLVE_GROUP_STENCIL_BUFFER); + tu7_generic_layer_clear(cmd, cs, buffer_id, PIPE_FORMAT_S8_UINT, mask, true, layer, value, a); } } else { - tu7_generic_layer_clear(cmd, cs, format, mask, false, layer, value, a); + uint32_t buffer_id = tu_resolve_group_include_buffer_for_format(resolve_group, att->format); + tu7_generic_layer_clear(cmd, cs, buffer_id, format, mask, false, layer, value, a); } } @@ -4385,6 +4501,7 @@ template static void tu_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, const struct tu_image_view *iview, const struct tu_render_pass_attachment *attachment, const VkClearValue *clear_value, @@ -4426,7 +4543,18 @@ tu_emit_blit(struct tu_cmd_buffer *cmd, tu_cs_emit_array(cs, clear_vals, 4); } - event_blit_setup(cs, attachment, blit_event_type, clear_mask); + enum tu_resolve_group_buffer_type buffer_type = TU_RESOLVE_GROUP_COLOR_BUFFER; + if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (!separate_stencil) + buffer_type = TU_RESOLVE_GROUP_DEPTH_BUFFER; + else + buffer_type = TU_RESOLVE_GROUP_STENCIL_BUFFER; + } else if (attachment->format == VK_FORMAT_D24_UNORM_S8_UINT) { + buffer_type = TU_RESOLVE_GROUP_DEPTH_BUFFER; + } + + uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, buffer_type); + event_blit_setup(cs, buffer_id, attachment, blit_event_type, clear_mask); for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) { event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i); @@ -4618,6 +4746,7 @@ template void tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a, bool cond_exec_allowed, bool force_load) @@ -4659,10 +4788,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, load_3d_blit(cmd, cs, iview, attachment, true); } else { if (load_common) - tu_emit_blit(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, false); + tu_emit_blit(cmd, cs, resolve_group, iview, attachment, NULL, BLIT_EVENT_LOAD, false); if (load_stencil) - tu_emit_blit(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, true); + tu_emit_blit(cmd, cs, resolve_group, iview, attachment, NULL, BLIT_EVENT_LOAD, true); } if (cond_exec) @@ -4928,6 +5057,7 @@ template void tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a, uint32_t gmem_a, uint32_t layers, @@ -4982,9 +5112,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, /* use fast path when render area is aligned, except for unsupported resolve cases */ if (use_fast_path) { if (store_common) - tu_emit_blit(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, false); + tu_emit_blit(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, false); if (store_separate_stencil) - tu_emit_blit(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, true); + tu_emit_blit(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, true); if (cond_exec) { tu_end_load_store_cond_exec(cmd, cs, false); diff --git a/src/freedreno/vulkan/tu_clear_blit.h b/src/freedreno/vulkan/tu_clear_blit.h index cd938dc184d..d2a3c754f88 100644 --- a/src/freedreno/vulkan/tu_clear_blit.h +++ b/src/freedreno/vulkan/tu_clear_blit.h @@ -34,6 +34,17 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd, uint32_t layers, const VkRect2D *rect); +struct tu_resolve_group { + uint32_t color_buffer_id; + bool pending_resolves; +}; + +template +void +tu_emit_resolve_group(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + struct tu_resolve_group *resolve_group); + template void tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, @@ -44,17 +55,20 @@ template void tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a); void tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a); template void tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a, bool cond_exec_allowed, bool force_load); @@ -64,6 +78,7 @@ template void tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + struct tu_resolve_group *resolve_group, uint32_t a, uint32_t gmem_a, uint32_t layers, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 18777d52299..4dc597de9f9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -44,7 +44,7 @@ tu_clone_trace(struct tu_cmd_buffer *cmd, struct tu_cs *cs, } template -static void +void tu_emit_raw_event_write(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum vgt_event_type event, @@ -67,6 +67,7 @@ tu_emit_raw_event_write(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, 0); } } +TU_GENX(tu_emit_raw_event_write); template void @@ -1241,6 +1242,8 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_blit_scissor(cmd, cs, true); + struct tu_resolve_group resolve_group = {}; + /* Resolve should happen before store in case BLIT_EVENT_STORE_AND_CLEAR is * used for a store. */ @@ -1249,8 +1252,8 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) uint32_t a = subpass->resolve_attachments[i].attachment; if (a != VK_ATTACHMENT_UNUSED) { uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i); - tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers, - subpass->multiview_mask, false); + tu_store_gmem_attachment(cmd, cs, &resolve_group, a, gmem_a, + fb->layers, subpass->multiview_mask, false); } } } @@ -1259,12 +1262,14 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) if (pass->attachments[a].gmem) { const bool cond_exec_allowed = cmd->state.tiling->binning_possible && cmd->state.pass->has_cond_load_store; - tu_store_gmem_attachment(cmd, cs, a, a, + tu_store_gmem_attachment(cmd, cs, &resolve_group, a, a, fb->layers, subpass->multiview_mask, cond_exec_allowed); } } + tu_emit_resolve_group(cmd, cs, &resolve_group); + if (pass->has_fdm) tu_cs_set_writeable(cs, false); } @@ -1295,10 +1300,20 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) * change per-RP and don't require a WFI to take effect, only CCU inval/flush * events are required. */ - tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP, + + enum a7xx_concurrent_resolve_mode resolve_mode = CONCURRENT_RESOLVE_MODE_2; + if (TU_DEBUG(NO_CONCURRENT_RESOLVES)) + resolve_mode = CONCURRENT_RESOLVE_MODE_DISABLED; + + enum a7xx_concurrent_unresolve_mode unresolve_mode = CONCURRENT_UNRESOLVE_MODE_FULL; + if (TU_DEBUG(NO_CONCURRENT_UNRESOLVES)) + unresolve_mode = CONCURRENT_UNRESOLVE_MODE_DISABLED; + + tu_cs_emit_regs(cs, RB_CCU_CNTL(A7XX, .gmem_fast_clear_disable = - !dev->physical_device->info->a6xx.has_gmem_fast_clear, - .concurrent_resolve = dev->physical_device->info->a6xx.concurrent_resolve, + !dev->physical_device->info->a6xx.has_gmem_fast_clear, + .concurrent_resolve_mode = resolve_mode, + .concurrent_unresolve_mode = unresolve_mode, )); } @@ -4467,7 +4482,7 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, template static void -tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd) +tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resolve_group) { struct tu_cs *cs = &cmd->draw_cs; uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses; @@ -4498,7 +4513,8 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd) tu6_emit_blit_scissor(cmd, cs, true); emitted_scissor = true; } - tu_load_gmem_attachment(cmd, cs, i, cond_load_allowed, false); + tu_load_gmem_attachment(cmd, cs, resolve_group, i, + cond_load_allowed, false); } } @@ -4513,7 +4529,7 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd) tu6_emit_blit_scissor(cmd, cs, false); emitted_scissor = true; } - tu_clear_gmem_attachment(cmd, cs, i); + tu_clear_gmem_attachment(cmd, cs, resolve_group, i); } } } @@ -4546,7 +4562,7 @@ tu_emit_subpass_begin_sysmem(struct tu_cmd_buffer *cmd) } static void -tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd) +tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd, struct tu_resolve_group *resolve_group) { if (cmd->state.render_area.extent.width == 0 || cmd->state.render_area.extent.height == 0) @@ -4564,7 +4580,7 @@ tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd) tu6_emit_blit_scissor(cmd, cs, false); emitted_scissor = true; } - tu7_generic_clear_attachment(cmd, cs, i); + tu7_generic_clear_attachment(cmd, cs, resolve_group, i); } } } @@ -4582,12 +4598,16 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd) { tu_fill_render_pass_state(&cmd->state.vk_rp, cmd->state.pass, cmd->state.subpass); - tu_emit_subpass_begin_gmem(cmd); + struct tu_resolve_group resolve_group = {}; + + tu_emit_subpass_begin_gmem(cmd, &resolve_group); tu_emit_subpass_begin_sysmem(cmd); if (cmd->device->physical_device->info->a7xx.has_generic_clear) { - tu7_emit_subpass_clear(cmd); + tu7_emit_subpass_clear(cmd, &resolve_group); } + tu_emit_resolve_group(cmd, &cmd->draw_cs, &resolve_group); + tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); @@ -4964,6 +4984,8 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, if (subpass->resolve_attachments) { tu6_emit_blit_scissor(cmd, cs, true); + struct tu_resolve_group resolve_group = {}; + for (unsigned i = 0; i < subpass->resolve_count; i++) { uint32_t a = subpass->resolve_attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED) @@ -4971,8 +4993,8 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i); - tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers, - subpass->multiview_mask, false); + tu_store_gmem_attachment(cmd, cs, &resolve_group, a, gmem_a, + fb->layers, subpass->multiview_mask, false); if (!pass->attachments[a].gmem) continue; @@ -4981,8 +5003,10 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, * if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM.. */ perf_debug(cmd->device, "TODO: missing GMEM->GMEM resolve path\n"); - tu_load_gmem_attachment(cmd, cs, a, false, true); + tu_load_gmem_attachment(cmd, cs, &resolve_group, a, false, true); } + + tu_emit_resolve_group(cmd, cs, &resolve_group); } tu_cond_exec_end(cs); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 669b8a0efc8..0be7f06e7e3 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -679,6 +679,13 @@ void tu_cmd_render(struct tu_cmd_buffer *cmd); enum fd_gpu_event : uint32_t; +template +void +tu_emit_raw_event_write(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + enum vgt_event_type event, + bool needs_seqno); + template void tu_emit_event_write(struct tu_cmd_buffer *cmd, diff --git a/src/freedreno/vulkan/tu_util.cc b/src/freedreno/vulkan/tu_util.cc index 992a410129e..dee233656c5 100644 --- a/src/freedreno/vulkan/tu_util.cc +++ b/src/freedreno/vulkan/tu_util.cc @@ -43,6 +43,8 @@ static const struct debug_control tu_debug_options[] = { { "noconform", TU_DEBUG_NOCONFORM }, { "rd", TU_DEBUG_RD }, { "hiprio", TU_DEBUG_HIPRIO }, + { "noconcurrentresolves", TU_DEBUG_NO_CONCURRENT_RESOLVES }, + { "noconcurrentunresolves", TU_DEBUG_NO_CONCURRENT_UNRESOLVES }, { NULL, 0 } }; diff --git a/src/freedreno/vulkan/tu_util.h b/src/freedreno/vulkan/tu_util.h index 0024d9ead2f..0910acc183e 100644 --- a/src/freedreno/vulkan/tu_util.h +++ b/src/freedreno/vulkan/tu_util.h @@ -49,6 +49,8 @@ enum tu_debug_flags TU_DEBUG_NOCONFORM = 1 << 24, TU_DEBUG_RD = 1 << 25, TU_DEBUG_HIPRIO = 1 << 26, + TU_DEBUG_NO_CONCURRENT_RESOLVES = 1 << 27, + TU_DEBUG_NO_CONCURRENT_UNRESOLVES = 1 << 28, }; struct tu_env {