mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 22:00:13 +01:00
tu: Implement non-aligned multisample GMEM STORE_OP_STORE
We have to a bit careful here when disabling draw states. This also necessitates moving the actual recording of the stores to the end so that we set the dirty flag correctly. Closes: #4462 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12102>
This commit is contained in:
parent
7948c4b0b4
commit
b157a5d0d6
4 changed files with 104 additions and 27 deletions
|
|
@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
|
|||
dEQP-VK.compute.basic.max_local_size_x,Crash
|
||||
dEQP-VK.compute.basic.max_local_size_y,Crash
|
||||
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462
|
||||
dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail
|
||||
|
||||
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
|
||||
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
|
||||
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||
|
|
|
|||
|
|
@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd,
|
|||
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_src_gmem(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
VkFormat format,
|
||||
uint32_t gmem_offset,
|
||||
uint32_t cpp)
|
||||
{
|
||||
uint32_t desc[A6XX_TEX_CONST_DWORDS];
|
||||
memcpy(desc, iview->descriptor, sizeof(desc));
|
||||
|
||||
/* patch the format so that depth/stencil get the right format */
|
||||
desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
|
||||
desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
|
||||
|
||||
/* patched for gmem */
|
||||
desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
|
||||
desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
desc[2] =
|
||||
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
|
||||
desc[3] = 0;
|
||||
desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
|
||||
desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
|
||||
for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
|
||||
desc[i] = 0;
|
||||
|
||||
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
|
|
@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
|||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||
}
|
||||
|
||||
static void
|
||||
store_3d_blit(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t dst_samples,
|
||||
bool separate_stencil,
|
||||
VkFormat format,
|
||||
const VkRect2D *render_area,
|
||||
uint32_t gmem_offset,
|
||||
uint32_t cpp)
|
||||
{
|
||||
r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
|
||||
iview->ubwc_enabled, dst_samples);
|
||||
|
||||
r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||
|
||||
if (separate_stencil)
|
||||
r3d_dst_stencil(cs, iview, 0);
|
||||
else
|
||||
r3d_dst(cs, iview, 0);
|
||||
|
||||
r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
|
||||
|
||||
/* sync GMEM writes with CACHE. */
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
|
||||
|
||||
r3d_run(cmd, cs);
|
||||
|
||||
/* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
|
||||
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||
* results in sysmem, so we need to flush manually here. The 3d blit path
|
||||
* writes to depth images as a color RT, so there's no need to flush depth.
|
||||
*/
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||
}
|
||||
|
||||
void
|
||||
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
|
|
@ -2782,20 +2848,32 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
return;
|
||||
}
|
||||
|
||||
if (dst->samples > 1) {
|
||||
/* I guess we need to use shader path in this case?
|
||||
* need a testcase which fails because of this
|
||||
*/
|
||||
tu_finishme("unaligned store of msaa attachment\n");
|
||||
return;
|
||||
}
|
||||
|
||||
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||
|
||||
VkFormat format = src->format;
|
||||
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
format = VK_FORMAT_D32_SFLOAT;
|
||||
|
||||
if (dst->samples > 1) {
|
||||
/* If we hit this path, we have to disable draw states after every tile
|
||||
* instead of once at the end of the renderpass, so that they aren't
|
||||
* executed when calling CP_DRAW.
|
||||
*
|
||||
* TODO: store a flag somewhere so we don't do this more than once and
|
||||
* don't do it after the renderpass when this happens.
|
||||
*/
|
||||
if (dst->store || dst->store_stencil)
|
||||
tu_disable_draw_states(cmd, cs);
|
||||
|
||||
if (dst->store) {
|
||||
store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
|
||||
render_area, src->gmem_offset, src->cpp);
|
||||
}
|
||||
if (dst->store_stencil) {
|
||||
store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
|
||||
render_area, src->gmem_offset, src->samples);
|
||||
}
|
||||
} else {
|
||||
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||
|
||||
if (dst->store) {
|
||||
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
|
||||
src->gmem_offset, src->cpp);
|
||||
|
|
@ -2805,3 +2883,4 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
src->gmem_offset_stencil, src->samples);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
||||
|
|
@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
|||
cmd->state.framebuffer = fb;
|
||||
cmd->state.render_area = pRenderPassBegin->renderArea;
|
||||
|
||||
tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
|
||||
|
||||
/* Note: because this is external, any flushes will happen before draw_cs
|
||||
* gets called. However deferred flushes could have to happen later as part
|
||||
* of the subpass.
|
||||
|
|
@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
|
||||
|
||||
tu_cs_end(&cmd_buffer->draw_cs);
|
||||
tu_cs_end(&cmd_buffer->tile_store_cs);
|
||||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||
|
|
@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
|||
else
|
||||
tu_cmd_render_tiles(cmd_buffer);
|
||||
|
||||
/* outside of renderpasses we assume all draw states are disabled
|
||||
* we can do this in the main cs because no resolve/store commands
|
||||
* should use a draw command (TODO: this will change if unaligned
|
||||
* GMEM stores are supported)
|
||||
/* Outside of renderpasses we assume all draw states are disabled. We do
|
||||
* this outside the draw CS for the normal case where 3d gmem stores aren't
|
||||
* used.
|
||||
*/
|
||||
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_
|
|||
|
||||
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
|
||||
|
||||
void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
|
||||
struct tu_pvtmem_config {
|
||||
uint64_t iova;
|
||||
uint32_t per_fiber_size;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue