From b157a5d0d68ee8a1b4cb862a56b97bd881841413 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 28 Jul 2021 14:42:08 +0200 Subject: [PATCH] tu: Implement non-aligned multisample GMEM STORE_OP_STORE We have to a bit careful here when disabling draw states. This also necessitates moving the actual recording of the stores to the end so that we set the dirty flag correctly. Closes: #4462 Part-of: --- .../ci/deqp-freedreno-a630-fails.txt | 3 - src/freedreno/vulkan/tu_clear_blit.c | 113 +++++++++++++++--- src/freedreno/vulkan/tu_cmd_buffer.c | 13 +- src/freedreno/vulkan/tu_private.h | 2 + 4 files changed, 104 insertions(+), 27 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 463a09a0726..0de08181475 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail dEQP-VK.compute.basic.max_local_size_x,Crash dEQP-VK.compute.basic.max_local_size_y,Crash -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462 -dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail - # https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019 # should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745 dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index fd8e75b13f2..5ecf11ce530 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd, r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); } +static void +r3d_src_gmem(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + VkFormat format, + uint32_t gmem_offset, + uint32_t cpp) +{ + uint32_t desc[A6XX_TEX_CONST_DWORDS]; + memcpy(desc, iview->descriptor, sizeof(desc)); + + /* patch the format so that depth/stencil get the right format */ + desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK; + desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt); + + /* patched for gmem */ + desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); + desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); + desc[2] = + A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | + A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp); + desc[3] = 0; + desc[4] = cmd->device->physical_device->gmem_base + gmem_offset; + desc[5] = A6XX_TEX_CONST_5_DEPTH(1); + for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++) + desc[i] = 0; + + r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); +} + static void r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { @@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd, tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); } +static void +store_3d_blit(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t dst_samples, + bool separate_stencil, + VkFormat format, + const VkRect2D *render_area, + uint32_t gmem_offset, + uint32_t cpp) +{ + r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, + iview->ubwc_enabled, dst_samples); + + r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); + + if (separate_stencil) + r3d_dst_stencil(cs, iview, 0); + else + r3d_dst(cs, iview, 0); + + r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp); + + /* sync GMEM writes with CACHE. */ + tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); + + r3d_run(cmd, cs); + + /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to + * sysmem, and we generally assume that GMEM renderpasses leave their + * results in sysmem, so we need to flush manually here. The 3d blit path + * writes to depth images as a color RT, so there's no need to flush depth. + */ + tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); +} + void tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -2782,26 +2848,39 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, return; } - if (dst->samples > 1) { - /* I guess we need to use shader path in this case? - * need a testcase which fails because of this - */ - tu_finishme("unaligned store of msaa attachment\n"); - return; - } - - r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - VkFormat format = src->format; if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) format = VK_FORMAT_D32_SFLOAT; - if (dst->store) { - store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format, - src->gmem_offset, src->cpp); - } - if (dst->store_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, - src->gmem_offset_stencil, src->samples); + if (dst->samples > 1) { + /* If we hit this path, we have to disable draw states after every tile + * instead of once at the end of the renderpass, so that they aren't + * executed when calling CP_DRAW. + * + * TODO: store a flag somewhere so we don't do this more than once and + * don't do it after the renderpass when this happens. + */ + if (dst->store || dst->store_stencil) + tu_disable_draw_states(cmd, cs); + + if (dst->store) { + store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format, + render_area, src->gmem_offset, src->cpp); + } + if (dst->store_stencil) { + store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT, + render_area, src->gmem_offset, src->samples); + } + } else { + r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); + + if (dst->store) { + store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format, + src->gmem_offset, src->cpp); + } + if (dst->store_stencil) { + store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, + src->gmem_offset_stencil, src->samples); + } } } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 45eeea2fbdf..47aa21789fa 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } -static void +void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); @@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, cmd->state.framebuffer = fb; cmd->state.render_area = pRenderPassBegin->renderArea; - tu6_emit_tile_store(cmd, &cmd->tile_store_cs); - /* Note: because this is external, any flushes will happen before draw_cs * gets called. However deferred flushes could have to happen later as part * of the subpass. @@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, { TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs); + tu_cs_end(&cmd_buffer->draw_cs); tu_cs_end(&cmd_buffer->tile_store_cs); tu_cs_end(&cmd_buffer->draw_epilogue_cs); @@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, else tu_cmd_render_tiles(cmd_buffer); - /* outside of renderpasses we assume all draw states are disabled - * we can do this in the main cs because no resolve/store commands - * should use a draw command (TODO: this will change if unaligned - * GMEM stores are supported) + /* Outside of renderpasses we assume all draw states are disabled. We do + * this outside the draw CS for the normal case where 3d gmem stores aren't + * used. */ tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 153e1880e0a..5400072c859 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_ void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1); +void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs); + struct tu_pvtmem_config { uint64_t iova; uint32_t per_fiber_size;