mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
tu: Implement non-aligned multisample GMEM STORE_OP_STORE
We have to a bit careful here when disabling draw states. This also necessitates moving the actual recording of the stores to the end so that we set the dirty flag correctly. Closes: #4462 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12102>
This commit is contained in:
parent
7948c4b0b4
commit
b157a5d0d6
4 changed files with 104 additions and 27 deletions
|
|
@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
|
||||||
dEQP-VK.compute.basic.max_local_size_x,Crash
|
dEQP-VK.compute.basic.max_local_size_x,Crash
|
||||||
dEQP-VK.compute.basic.max_local_size_y,Crash
|
dEQP-VK.compute.basic.max_local_size_y,Crash
|
||||||
|
|
||||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462
|
|
||||||
dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail
|
|
||||||
|
|
||||||
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
|
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
|
||||||
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
|
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
|
||||||
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||||
|
|
|
||||||
|
|
@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd,
|
||||||
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
r3d_src_gmem(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_cs *cs,
|
||||||
|
const struct tu_image_view *iview,
|
||||||
|
VkFormat format,
|
||||||
|
uint32_t gmem_offset,
|
||||||
|
uint32_t cpp)
|
||||||
|
{
|
||||||
|
uint32_t desc[A6XX_TEX_CONST_DWORDS];
|
||||||
|
memcpy(desc, iview->descriptor, sizeof(desc));
|
||||||
|
|
||||||
|
/* patch the format so that depth/stencil get the right format */
|
||||||
|
desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
|
||||||
|
desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
|
||||||
|
|
||||||
|
/* patched for gmem */
|
||||||
|
desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
|
||||||
|
desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||||
|
desc[2] =
|
||||||
|
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||||
|
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
|
||||||
|
desc[3] = 0;
|
||||||
|
desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
|
||||||
|
desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
|
||||||
|
for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
|
||||||
|
desc[i] = 0;
|
||||||
|
|
||||||
|
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||||
{
|
{
|
||||||
|
|
@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
||||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
store_3d_blit(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_cs *cs,
|
||||||
|
const struct tu_image_view *iview,
|
||||||
|
uint32_t dst_samples,
|
||||||
|
bool separate_stencil,
|
||||||
|
VkFormat format,
|
||||||
|
const VkRect2D *render_area,
|
||||||
|
uint32_t gmem_offset,
|
||||||
|
uint32_t cpp)
|
||||||
|
{
|
||||||
|
r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
|
||||||
|
iview->ubwc_enabled, dst_samples);
|
||||||
|
|
||||||
|
r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||||
|
|
||||||
|
if (separate_stencil)
|
||||||
|
r3d_dst_stencil(cs, iview, 0);
|
||||||
|
else
|
||||||
|
r3d_dst(cs, iview, 0);
|
||||||
|
|
||||||
|
r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
|
||||||
|
|
||||||
|
/* sync GMEM writes with CACHE. */
|
||||||
|
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
|
||||||
|
|
||||||
|
r3d_run(cmd, cs);
|
||||||
|
|
||||||
|
/* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
|
||||||
|
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||||
|
* results in sysmem, so we need to flush manually here. The 3d blit path
|
||||||
|
* writes to depth images as a color RT, so there's no need to flush depth.
|
||||||
|
*/
|
||||||
|
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
|
|
@ -2782,20 +2848,32 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dst->samples > 1) {
|
|
||||||
/* I guess we need to use shader path in this case?
|
|
||||||
* need a testcase which fails because of this
|
|
||||||
*/
|
|
||||||
tu_finishme("unaligned store of msaa attachment\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
|
||||||
|
|
||||||
VkFormat format = src->format;
|
VkFormat format = src->format;
|
||||||
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||||
format = VK_FORMAT_D32_SFLOAT;
|
format = VK_FORMAT_D32_SFLOAT;
|
||||||
|
|
||||||
|
if (dst->samples > 1) {
|
||||||
|
/* If we hit this path, we have to disable draw states after every tile
|
||||||
|
* instead of once at the end of the renderpass, so that they aren't
|
||||||
|
* executed when calling CP_DRAW.
|
||||||
|
*
|
||||||
|
* TODO: store a flag somewhere so we don't do this more than once and
|
||||||
|
* don't do it after the renderpass when this happens.
|
||||||
|
*/
|
||||||
|
if (dst->store || dst->store_stencil)
|
||||||
|
tu_disable_draw_states(cmd, cs);
|
||||||
|
|
||||||
|
if (dst->store) {
|
||||||
|
store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
|
||||||
|
render_area, src->gmem_offset, src->cpp);
|
||||||
|
}
|
||||||
|
if (dst->store_stencil) {
|
||||||
|
store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
|
||||||
|
render_area, src->gmem_offset, src->samples);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||||
|
|
||||||
if (dst->store) {
|
if (dst->store) {
|
||||||
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
|
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
|
||||||
src->gmem_offset, src->cpp);
|
src->gmem_offset, src->cpp);
|
||||||
|
|
@ -2805,3 +2883,4 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
src->gmem_offset_stencil, src->samples);
|
src->gmem_offset_stencil, src->samples);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
{
|
{
|
||||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
||||||
|
|
@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
cmd->state.framebuffer = fb;
|
cmd->state.framebuffer = fb;
|
||||||
cmd->state.render_area = pRenderPassBegin->renderArea;
|
cmd->state.render_area = pRenderPassBegin->renderArea;
|
||||||
|
|
||||||
tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
|
|
||||||
|
|
||||||
/* Note: because this is external, any flushes will happen before draw_cs
|
/* Note: because this is external, any flushes will happen before draw_cs
|
||||||
* gets called. However deferred flushes could have to happen later as part
|
* gets called. However deferred flushes could have to happen later as part
|
||||||
* of the subpass.
|
* of the subpass.
|
||||||
|
|
@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
|
||||||
|
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
|
||||||
|
|
||||||
tu_cs_end(&cmd_buffer->draw_cs);
|
tu_cs_end(&cmd_buffer->draw_cs);
|
||||||
tu_cs_end(&cmd_buffer->tile_store_cs);
|
tu_cs_end(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||||
|
|
@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
else
|
else
|
||||||
tu_cmd_render_tiles(cmd_buffer);
|
tu_cmd_render_tiles(cmd_buffer);
|
||||||
|
|
||||||
/* outside of renderpasses we assume all draw states are disabled
|
/* Outside of renderpasses we assume all draw states are disabled. We do
|
||||||
* we can do this in the main cs because no resolve/store commands
|
* this outside the draw CS for the normal case where 3d gmem stores aren't
|
||||||
* should use a draw command (TODO: this will change if unaligned
|
* used.
|
||||||
* GMEM stores are supported)
|
|
||||||
*/
|
*/
|
||||||
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
|
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_
|
||||||
|
|
||||||
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
|
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
|
||||||
|
|
||||||
|
void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||||
|
|
||||||
struct tu_pvtmem_config {
|
struct tu_pvtmem_config {
|
||||||
uint64_t iova;
|
uint64_t iova;
|
||||||
uint32_t per_fiber_size;
|
uint32_t per_fiber_size;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue