mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
turnip: add support for D32_SFLOAT_S8_UINT
Add support for D32_SFLOAT_S8_UINT, which requires special handling because it is actually two images. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5537>
This commit is contained in:
parent
a133f7d288
commit
67b1163f9f
7 changed files with 351 additions and 104 deletions
|
|
@ -228,6 +228,17 @@ r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
|||
tu_cs_image_flag_ref(cs, iview, layer);
|
||||
}
|
||||
|
||||
static void
|
||||
r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
assert(iview->image->samples == 1);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
|
||||
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
|
||||
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
|
||||
tu_cs_emit(cs, iview->stencil_PITCH);
|
||||
}
|
||||
|
||||
static void
|
||||
r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
|
||||
{
|
||||
|
|
@ -681,6 +692,19 @@ r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
|||
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
|
||||
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
|
||||
tu_cs_image_stencil_ref(cs, iview, layer);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
|
||||
{
|
||||
|
|
@ -885,6 +909,11 @@ copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer)
|
|||
return format;
|
||||
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
|
||||
return VK_FORMAT_R32_UINT;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
return VK_FORMAT_S8_UINT;
|
||||
assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
return VK_FORMAT_D32_SFLOAT;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1640,14 +1669,15 @@ static void
|
|||
clear_image(struct tu_cmd_buffer *cmd,
|
||||
struct tu_image *image,
|
||||
const VkClearValue *clear_value,
|
||||
const VkImageSubresourceRange *range)
|
||||
const VkImageSubresourceRange *range,
|
||||
VkImageAspectFlags aspect_mask)
|
||||
{
|
||||
uint32_t level_count = tu_get_levelCount(image, range);
|
||||
uint32_t layer_count = tu_get_layerCount(image, range);
|
||||
struct tu_cs *cs = &cmd->cs;
|
||||
VkFormat format = image->vk_format;
|
||||
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
format = VK_FORMAT_R32_UINT;
|
||||
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
format = copy_format(format, aspect_mask, false);
|
||||
|
||||
if (image->type == VK_IMAGE_TYPE_3D) {
|
||||
assert(layer_count == 1);
|
||||
|
|
@ -1656,8 +1686,11 @@ clear_image(struct tu_cmd_buffer *cmd,
|
|||
|
||||
const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
|
||||
|
||||
ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true, image->layout[0].ubwc);
|
||||
ops->clear_value(cs, image->vk_format, clear_value);
|
||||
ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc);
|
||||
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
|
||||
else
|
||||
ops->clear_value(cs, format, clear_value);
|
||||
|
||||
for (unsigned j = 0; j < level_count; j++) {
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -1670,7 +1703,7 @@ clear_image(struct tu_cmd_buffer *cmd,
|
|||
|
||||
struct tu_image_view dst;
|
||||
tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
|
||||
.aspectMask = range->aspectMask,
|
||||
.aspectMask = aspect_mask,
|
||||
.mipLevel = range->baseMipLevel + j,
|
||||
.baseArrayLayer = range->baseArrayLayer,
|
||||
.layerCount = 1,
|
||||
|
|
@ -1697,7 +1730,7 @@ tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
|
|||
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < rangeCount; i++)
|
||||
clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i);
|
||||
clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1713,8 +1746,19 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
|||
|
||||
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < rangeCount; i++)
|
||||
clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i);
|
||||
for (unsigned i = 0; i < rangeCount; i++) {
|
||||
const VkImageSubresourceRange *range = &pRanges[i];
|
||||
|
||||
if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
/* can't clear both depth and stencil at once, split up the aspect mask */
|
||||
uint32_t b;
|
||||
for_each_bit(b, range->aspectMask)
|
||||
clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
|
||||
continue;
|
||||
}
|
||||
|
||||
clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1905,6 +1949,34 @@ pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_v
|
|||
util_format_pack_rgba(pformat, clear_value, color.uint32, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
VkFormat format,
|
||||
uint8_t clear_mask,
|
||||
uint32_t gmem_offset,
|
||||
const VkClearValue *value)
|
||||
{
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
|
||||
tu_cs_emit(cs, gmem_offset);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
uint32_t clear_vals[4] = {};
|
||||
pack_gmem_clear_value(value, format, clear_vals);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
|
||||
tu_cs_emit_array(cs, clear_vals, 4);
|
||||
|
||||
tu6_emit_event_write(cmd, cs, BLIT);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
|
|
@ -1912,28 +1984,18 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
VkImageAspectFlags mask,
|
||||
const VkClearValue *value)
|
||||
{
|
||||
VkFormat vk_format = cmd->state.pass->attachments[attachment].format;
|
||||
const struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[attachment];
|
||||
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
|
||||
if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
|
||||
return;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format)));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1,
|
||||
.clear_mask = aspect_write_mask(vk_format, mask)));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
|
||||
tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
uint32_t clear_vals[4] = {};
|
||||
pack_gmem_clear_value(value, vk_format, clear_vals);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
|
||||
tu_cs_emit_array(cs, clear_vals, 4);
|
||||
|
||||
tu6_emit_event_write(cmd, cs, BLIT);
|
||||
clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1997,35 +2059,65 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
|||
tu_cond_exec_end(cs);
|
||||
}
|
||||
|
||||
static void
|
||||
clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
VkFormat format,
|
||||
VkImageAspectFlags clear_mask,
|
||||
const VkRenderPassBeginInfo *info,
|
||||
uint32_t a,
|
||||
bool separate_stencil)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct blit_ops *ops = &r2d_ops;
|
||||
if (cmd->state.pass->attachments[a].samples > 1)
|
||||
ops = &r3d_ops;
|
||||
|
||||
ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled);
|
||||
ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
|
||||
ops->clear_value(cs, format, &info->pClearValues[a]);
|
||||
|
||||
for (uint32_t i = 0; i < fb->layers; i++) {
|
||||
if (separate_stencil) {
|
||||
if (ops == &r3d_ops)
|
||||
r3d_dst_stencil(cs, iview, i);
|
||||
else
|
||||
r2d_dst_stencil(cs, iview, i);
|
||||
} else {
|
||||
ops->dst(cs, iview, i);
|
||||
}
|
||||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
const VkRenderPassBeginInfo *info)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_render_pass_attachment *attachment =
|
||||
&cmd->state.pass->attachments[a];
|
||||
|
||||
if (!attachment->clear_mask)
|
||||
return;
|
||||
|
||||
const struct blit_ops *ops = &r2d_ops;
|
||||
if (attachment->samples > 1)
|
||||
ops = &r3d_ops;
|
||||
|
||||
ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0,
|
||||
true, iview->ubwc_enabled);
|
||||
ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
|
||||
ops->clear_value(cs, attachment->format, &info->pClearValues[a]);
|
||||
|
||||
/* Wait for any flushes at the beginning of the renderpass to complete */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
for (uint32_t i = 0; i < fb->layers; i++) {
|
||||
ops->dst(cs, iview, i);
|
||||
ops->run(cmd, cs);
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
info, a, false);
|
||||
}
|
||||
if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
info, a, true);
|
||||
}
|
||||
} else {
|
||||
clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
|
||||
info, a, false);
|
||||
}
|
||||
|
||||
/* The spec doesn't explicitly say, but presumably the initial renderpass
|
||||
|
|
@ -2069,7 +2161,8 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
const struct tu_render_pass_attachment *attachment,
|
||||
bool resolve)
|
||||
bool resolve,
|
||||
bool separate_stencil)
|
||||
{
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
|
||||
|
|
@ -2081,14 +2174,23 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
|
|||
.integer = vk_format_is_int(attachment->format)));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
|
||||
tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
|
||||
tu_cs_image_ref_2d(cs, iview, 0, false);
|
||||
if (separate_stencil) {
|
||||
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
|
||||
tu_cs_emit_qw(cs, iview->stencil_base_addr);
|
||||
tu_cs_emit(cs, iview->stencil_PITCH);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
|
||||
tu_cs_image_flag_ref(cs, iview, 0);
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
|
||||
} else {
|
||||
tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
|
||||
tu_cs_image_ref_2d(cs, iview, 0, false);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
|
||||
tu_cs_image_flag_ref(cs, iview, 0);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
|
||||
}
|
||||
|
||||
tu6_emit_event_write(cmd, cs, BLIT);
|
||||
}
|
||||
|
|
@ -2140,7 +2242,58 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
&cmd->state.pass->attachments[a];
|
||||
|
||||
if (attachment->load || force_load)
|
||||
tu_emit_blit(cmd, cs, iview, attachment, false);
|
||||
tu_emit_blit(cmd, cs, iview, attachment, false, false);
|
||||
|
||||
if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
|
||||
tu_emit_blit(cmd, cs, iview, attachment, false, true);
|
||||
}
|
||||
|
||||
static void
|
||||
store_cp_blit(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
struct tu_image_view *iview,
|
||||
uint32_t samples,
|
||||
bool separate_stencil,
|
||||
VkFormat format,
|
||||
uint32_t gmem_offset,
|
||||
uint32_t cpp)
|
||||
{
|
||||
r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false,
|
||||
iview->ubwc_enabled, true);
|
||||
if (separate_stencil)
|
||||
r2d_dst_stencil(cs, iview, 0);
|
||||
else
|
||||
r2d_dst(cs, iview, 0);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_SP_PS_2D_SRC_INFO(
|
||||
.color_format = tu6_format_texture(format, TILE6_2).fmt,
|
||||
.tile_mode = TILE6_2,
|
||||
.srgb = vk_format_is_srgb(format),
|
||||
.samples = tu_msaa_samples(samples),
|
||||
.samples_average = !vk_format_is_int(format),
|
||||
.unk20 = 1,
|
||||
.unk22 = 1),
|
||||
/* note: src size does not matter when not scaling */
|
||||
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
|
||||
A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + gmem_offset),
|
||||
A6XX_SP_PS_2D_SRC_HI(),
|
||||
A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp));
|
||||
|
||||
/* sync GMEM writes with CACHE. */
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
|
||||
|
||||
/* Wait for CACHE_INVALIDATE to land */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
|
||||
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
|
||||
/* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
|
||||
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||
* results in sysmem, so we need to flush manually here.
|
||||
*/
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2149,13 +2302,12 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const VkRect2D *render_area = &cmd->state.render_area;
|
||||
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
|
||||
struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
|
||||
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
|
||||
|
||||
if (!dst->store)
|
||||
if (!dst->store && !dst->store_stencil)
|
||||
return;
|
||||
|
||||
uint32_t x1 = render_area->offset.x;
|
||||
|
|
@ -2176,7 +2328,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
|
||||
/* use fast path when render area is aligned, except for unsupported resolve cases */
|
||||
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
|
||||
tu_emit_blit(cmd, cs, iview, src, true);
|
||||
if (dst->store)
|
||||
tu_emit_blit(cmd, cs, iview, src, true, false);
|
||||
if (dst->store_stencil)
|
||||
tu_emit_blit(cmd, cs, iview, src, true, true);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2188,38 +2343,18 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
return;
|
||||
}
|
||||
|
||||
r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
ROTATE_0, false, iview->ubwc_enabled, true);
|
||||
r2d_dst(cs, iview, 0);
|
||||
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_SP_PS_2D_SRC_INFO(
|
||||
.color_format = tu6_format_texture(src->format, TILE6_2).fmt,
|
||||
.tile_mode = TILE6_2,
|
||||
.srgb = vk_format_is_srgb(src->format),
|
||||
.samples = tu_msaa_samples(src->samples),
|
||||
.samples_average = !vk_format_is_int(src->format),
|
||||
.unk20 = 1,
|
||||
.unk22 = 1),
|
||||
/* note: src size does not matter when not scaling */
|
||||
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
|
||||
A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
|
||||
A6XX_SP_PS_2D_SRC_HI(),
|
||||
A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
|
||||
VkFormat format = src->format;
|
||||
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
format = VK_FORMAT_D32_SFLOAT;
|
||||
|
||||
/* sync GMEM writes with CACHE. */
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
|
||||
|
||||
/* Wait for CACHE_INVALIDATE to land */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
|
||||
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
|
||||
/* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
|
||||
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||
* results in sysmem, so we need to flush manually here.
|
||||
*/
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
|
||||
if (dst->store) {
|
||||
store_cp_blit(cmd, cs, iview, src->samples, false, format,
|
||||
src->gmem_offset, src->cpp);
|
||||
}
|
||||
if (dst->store_stencil) {
|
||||
store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
|
||||
src->gmem_offset_stencil, src->samples);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -290,11 +290,18 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
|
|||
A6XX_GRAS_LRZ_BUFFER_PITCH(0),
|
||||
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(0));
|
||||
|
||||
if (attachment->format == VK_FORMAT_S8_UINT) {
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
|
||||
attachment->format == VK_FORMAT_S8_UINT) {
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 6);
|
||||
tu_cs_emit(cs, A6XX_RB_STENCIL_INFO(.separate_stencil = true).value);
|
||||
tu_cs_image_ref(cs, iview, 0);
|
||||
tu_cs_emit(cs, attachment->gmem_offset);
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
tu_cs_image_stencil_ref(cs, iview, 0);
|
||||
tu_cs_emit(cs, attachment->gmem_offset_stencil);
|
||||
} else {
|
||||
tu_cs_image_ref(cs, iview, 0);
|
||||
tu_cs_emit(cs, attachment->gmem_offset);
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_STENCIL_INFO(0));
|
||||
|
|
@ -1053,7 +1060,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
* renderpass, this would avoid emitting both sysmem/gmem versions
|
||||
*
|
||||
* emit two texture descriptors for each input, as a workaround for
|
||||
* d24s8, which can be sampled as both float (depth) and integer (stencil)
|
||||
* d24s8/d32s8, which can be sampled as both float (depth) and integer (stencil)
|
||||
* tu_shader lowers uint input attachment loads to use the 2nd descriptor
|
||||
* in the pair
|
||||
* TODO: a smarter workaround
|
||||
|
|
@ -1077,6 +1084,8 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[a];
|
||||
uint32_t *dst = &texture.map[A6XX_TEX_CONST_DWORDS * i];
|
||||
uint32_t gmem_offset = att->gmem_offset;
|
||||
uint32_t cpp = att->cpp;
|
||||
|
||||
memcpy(dst, iview->descriptor, A6XX_TEX_CONST_DWORDS * 4);
|
||||
|
||||
|
|
@ -1102,6 +1111,19 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
if (i % 2 == 1 && att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
dst[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
|
||||
dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_UINT);
|
||||
dst[2] &= ~(A6XX_TEX_CONST_2_PITCHALIGN__MASK | A6XX_TEX_CONST_2_PITCH__MASK);
|
||||
dst[2] |= A6XX_TEX_CONST_2_PITCH(iview->stencil_PITCH << 6);
|
||||
dst[3] = 0;
|
||||
dst[4] = iview->stencil_base_addr;
|
||||
dst[5] = (dst[5] & 0xffff) | iview->stencil_base_addr >> 32;
|
||||
|
||||
cpp = att->samples;
|
||||
gmem_offset = att->gmem_offset_stencil;
|
||||
}
|
||||
|
||||
if (!gmem)
|
||||
continue;
|
||||
|
||||
|
|
@ -1110,9 +1132,9 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
dst[2] =
|
||||
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * att->cpp);
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
|
||||
dst[3] = 0;
|
||||
dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
|
||||
dst[4] = cmd->device->physical_device->gmem_base + gmem_offset;
|
||||
dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
|
||||
for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
|
||||
dst[i] = 0;
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ static const struct tu_native_format tu6_format_table[] = {
|
|||
TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
|
||||
TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
|
||||
TU6_xTC(D24_UNORM_S8_UINT, 8_8_8_8_UNORM, WZYX), /* 129 */
|
||||
TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
|
||||
TU6_xTC(D32_SFLOAT_S8_UINT, NONE, WZYX), /* 130 */
|
||||
|
||||
/* compressed */
|
||||
TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, WZYX), /* 131 */
|
||||
|
|
@ -449,6 +449,12 @@ tu_physical_device_get_format_properties(
|
|||
if (tu6_pipe2depth(format) != (enum a6xx_depth_format)~0)
|
||||
optimal |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
|
||||
/* D32_SFLOAT_S8_UINT is tiled as two images, so no linear format
|
||||
* blob enables some linear features, but its not useful, so don't bother.
|
||||
*/
|
||||
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
linear = 0;
|
||||
|
||||
end:
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = optimal;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ tu6_plane_count(VkFormat format)
|
|||
default:
|
||||
return 1;
|
||||
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
return 2;
|
||||
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
|
||||
return 3;
|
||||
|
|
@ -58,13 +59,15 @@ tu6_plane_format(VkFormat format, uint32_t plane)
|
|||
return plane ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
|
||||
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
|
||||
return VK_FORMAT_R8_UNORM;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
return plane ? VK_FORMAT_S8_UINT : VK_FORMAT_D32_SFLOAT;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu6_plane_index(VkImageAspectFlags aspect_mask)
|
||||
tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask)
|
||||
{
|
||||
switch (aspect_mask) {
|
||||
default:
|
||||
|
|
@ -73,6 +76,8 @@ tu6_plane_index(VkImageAspectFlags aspect_mask)
|
|||
return 1;
|
||||
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
||||
return 2;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
return format == VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -228,6 +233,10 @@ tu_image_create(VkDevice _device,
|
|||
width0 = (width0 + 1) >> 1;
|
||||
height0 = (height0 + 1) >> 1;
|
||||
break;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
/* no UBWC for separate stencil */
|
||||
ubwc_enabled = false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -372,6 +381,14 @@ tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t la
|
|||
tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
tu_cs_emit(cs, iview->stencil_PITCH);
|
||||
tu_cs_emit(cs, iview->stencil_layer_size >> 6);
|
||||
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src)
|
||||
{
|
||||
|
|
@ -420,7 +437,8 @@ tu_image_view_init(struct tu_image_view *iview,
|
|||
|
||||
memset(iview->descriptor, 0, sizeof(iview->descriptor));
|
||||
|
||||
struct fdl_layout *layout = &image->layout[tu6_plane_index(aspect_mask)];
|
||||
struct fdl_layout *layout =
|
||||
&image->layout[tu6_plane_index(image->vk_format, aspect_mask)];
|
||||
|
||||
uint32_t width = u_minify(layout->width0, range->baseMipLevel);
|
||||
uint32_t height = u_minify(layout->height0, range->baseMipLevel);
|
||||
|
|
@ -447,6 +465,9 @@ tu_image_view_init(struct tu_image_view *iview,
|
|||
uint32_t ubwc_pitch = fdl_ubwc_pitch(layout, range->baseMipLevel);
|
||||
uint32_t layer_size = fdl_layer_stride(layout, range->baseMipLevel);
|
||||
|
||||
if (aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
format = tu6_plane_format(format, tu6_plane_index(format, aspect_mask));
|
||||
|
||||
struct tu_native_format fmt = tu6_format_texture(format, layout->tile_mode);
|
||||
/* note: freedreno layout assumes no TILE_ALL bit for non-UBWC
|
||||
* this means smaller mipmap levels have a linear tile mode
|
||||
|
|
@ -642,6 +663,14 @@ tu_image_view_init(struct tu_image_view *iview,
|
|||
.color_format = cfmt.fmt,
|
||||
.color_swap = cfmt.swap,
|
||||
.flags = ubwc_enabled).value;
|
||||
|
||||
if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
layout = &image->layout[1];
|
||||
iview->stencil_base_addr = image->bo->iova + image->bo_offset +
|
||||
fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
|
||||
iview->stencil_layer_size = fdl_layer_stride(layout, range->baseMipLevel);
|
||||
iview->stencil_PITCH = A6XX_RB_STENCIL_BUFFER_PITCH(fdl_pitch(layout, range->baseMipLevel)).value;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -720,7 +749,7 @@ tu_GetImageSubresourceLayout(VkDevice _device,
|
|||
TU_FROM_HANDLE(tu_image, image, _image);
|
||||
|
||||
struct fdl_layout *layout =
|
||||
&image->layout[tu6_plane_index(pSubresource->aspectMask)];
|
||||
&image->layout[tu6_plane_index(image->vk_format, pSubresource->aspectMask)];
|
||||
const struct fdl_slice *slice = layout->slices + pSubresource->mipLevel;
|
||||
|
||||
pLayout->offset =
|
||||
|
|
|
|||
|
|
@ -350,12 +350,20 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
|
|||
uint32_t cpp_total = 0;
|
||||
for (uint32_t i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
bool cpp1 = (att->cpp == 1);
|
||||
if (att->gmem_offset >= 0) {
|
||||
cpp_total += att->cpp;
|
||||
|
||||
/* take into account the separate stencil: */
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
cpp1 = (att->samples == 1);
|
||||
cpp_total += att->samples;
|
||||
}
|
||||
|
||||
/* texture pitch must be aligned to 64, use a tile_align_w that is
|
||||
* a multiple of 64 for cpp==1 attachment to work as input attachment
|
||||
*/
|
||||
if (att->cpp == 1 && tile_align_w % 64 != 0) {
|
||||
if (cpp1 && tile_align_w % 64 != 0) {
|
||||
tile_align_w *= 2;
|
||||
block_align_shift -= 1;
|
||||
}
|
||||
|
|
@ -379,8 +387,8 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
|
|||
* optimal: nblocks = {13, 51}, pixels = 208896
|
||||
*/
|
||||
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
|
||||
uint32_t offset = 0, pixels = ~0u;
|
||||
for (uint32_t i = 0; i < pass->attachment_count; i++) {
|
||||
uint32_t offset = 0, pixels = ~0u, i;
|
||||
for (i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
if (att->gmem_offset < 0)
|
||||
continue;
|
||||
|
|
@ -390,18 +398,33 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
|
|||
uint32_t align = MAX2(1, att->cpp >> block_align_shift);
|
||||
uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
|
||||
|
||||
if (nblocks > gmem_blocks) {
|
||||
pixels = 0;
|
||||
if (nblocks > gmem_blocks)
|
||||
break;
|
||||
}
|
||||
|
||||
gmem_blocks -= nblocks;
|
||||
cpp_total -= att->cpp;
|
||||
offset += nblocks * gmem_align;
|
||||
pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
|
||||
|
||||
/* repeat the same for separate stencil */
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
att->gmem_offset_stencil = offset;
|
||||
|
||||
/* note: for s8_uint, block align is always 1 */
|
||||
uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
|
||||
if (nblocks > gmem_blocks)
|
||||
break;
|
||||
|
||||
gmem_blocks -= nblocks;
|
||||
cpp_total -= att->samples;
|
||||
offset += nblocks * gmem_align;
|
||||
pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
|
||||
}
|
||||
}
|
||||
|
||||
pass->gmem_pixels = pixels;
|
||||
/* if the loop didn't complete then the gmem config is impossible */
|
||||
if (i == pass->attachment_count)
|
||||
pass->gmem_pixels = pixels;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -437,6 +460,16 @@ attachment_set_ops(struct tu_render_pass_attachment *att,
|
|||
att->load = stencil_load;
|
||||
att->store = stencil_store;
|
||||
break;
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
|
||||
if (att->clear_mask)
|
||||
att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (stencil_clear)
|
||||
att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
if (stencil_load)
|
||||
att->load_stencil = true;
|
||||
if (stencil_store)
|
||||
att->store_stencil = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -600,7 +633,13 @@ tu_CreateRenderPass2(VkDevice _device,
|
|||
|
||||
att->format = pCreateInfo->pAttachments[i].format;
|
||||
att->samples = pCreateInfo->pAttachments[i].samples;
|
||||
att->cpp = vk_format_get_blocksize(att->format) * att->samples;
|
||||
/* for d32s8, cpp is for the depth image, and
|
||||
* att->samples will be used as the cpp for the stencil image
|
||||
*/
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
att->cpp = 4 * att->samples;
|
||||
else
|
||||
att->cpp = vk_format_get_blocksize(att->format) * att->samples;
|
||||
att->gmem_offset = -1;
|
||||
|
||||
attachment_set_ops(att,
|
||||
|
|
|
|||
|
|
@ -1349,6 +1349,11 @@ struct tu_image_view
|
|||
uint32_t RB_2D_DST_INFO;
|
||||
|
||||
uint32_t RB_BLIT_DST_INFO;
|
||||
|
||||
/* for d32s8 separate stencil */
|
||||
uint64_t stencil_base_addr;
|
||||
uint32_t stencil_layer_size;
|
||||
uint32_t stencil_PITCH;
|
||||
};
|
||||
|
||||
struct tu_sampler_ycbcr_conversion {
|
||||
|
|
@ -1378,6 +1383,12 @@ tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t
|
|||
void
|
||||
tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
|
||||
|
||||
void
|
||||
tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
|
||||
|
||||
#define tu_image_view_stencil(iview, x) \
|
||||
((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT))
|
||||
|
||||
VkResult
|
||||
tu_image_create(VkDevice _device,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
|
|
@ -1484,6 +1495,10 @@ struct tu_render_pass_attachment
|
|||
bool load;
|
||||
bool store;
|
||||
int32_t gmem_offset;
|
||||
/* for D32S8 separate stencil: */
|
||||
bool load_stencil;
|
||||
bool store_stencil;
|
||||
int32_t gmem_offset_stencil;
|
||||
};
|
||||
|
||||
struct tu_render_pass
|
||||
|
|
|
|||
|
|
@ -227,6 +227,7 @@ tu6_pipe2depth(VkFormat format)
|
|||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return DEPTH6_24_8;
|
||||
case VK_FORMAT_D32_SFLOAT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
case VK_FORMAT_S8_UINT:
|
||||
return DEPTH6_32;
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue