diff --git a/src/nouveau/vulkan/nvk_cmd_clear.c b/src/nouveau/vulkan/nvk_cmd_clear.c index 6352678a430..7538b684b70 100644 --- a/src/nouveau/vulkan/nvk_cmd_clear.c +++ b/src/nouveau/vulkan/nvk_cmd_clear.c @@ -142,6 +142,21 @@ emit_clear_rects(struct nvk_cmd_buffer *cmd, } } +static uint32_t +get_color_target_index(const struct vk_dynamic_graphics_state *dyn, + uint32_t attachment) +{ + if (attachment == VK_ATTACHMENT_UNUSED) + return VK_ATTACHMENT_UNUSED; + + for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { + if (dyn->rp.color_attachment_remap[a] == attachment) + return a; + } + + return VK_ATTACHMENT_UNUSED; +} + VKAPI_ATTR void VKAPI_CALL nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, @@ -189,7 +204,9 @@ nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, if (pAttachments[i].aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) continue; - if (pAttachments[i].colorAttachment == VK_ATTACHMENT_UNUSED) + const uint32_t ct = + get_color_target_index(dyn, pAttachments[i].colorAttachment); + if (ct == VK_ATTACHMENT_UNUSED) continue; VkClearColorValue color = pAttachments[i].clearValue.color; @@ -201,8 +218,7 @@ nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, P_NV9097_SET_COLOR_CLEAR_VALUE(p, 2, color.uint32[2]); P_NV9097_SET_COLOR_CLEAR_VALUE(p, 3, color.uint32[3]); - emit_clear_rects(cmd, pAttachments[i].colorAttachment, - clear_depth, clear_stencil, rectCount, pRects); + emit_clear_rects(cmd, ct, clear_depth, clear_stencil, rectCount, pRects); /* We only need to clear depth/stencil once */ clear_depth = clear_stencil = false; diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 35a59c618d3..2069e262bc9 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -749,6 +749,11 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd, render->stencil_att.vk_format = inheritance_info->stencilAttachmentFormat; + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(inheritance_info->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); + const VkRenderingAttachmentLocationInfoKHR att_loc_info_default = { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR, .colorAttachmentCount = inheritance_info->colorAttachmentCount, @@ -1404,8 +1409,17 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, if (sample_layout != NIL_SAMPLE_LAYOUT_INVALID) nvk_cmd_set_sample_layout(cmd, sample_layout); - if (render->flags & VK_RENDERING_RESUMING_BIT) + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + + if (render->flags & VK_RENDERING_RESUMING_BIT) { + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); return; + } + + /* We don't want a previous remap messing up our clears */ + vk_cmd_set_rendering_attachment_remap(&cmd->vk, NULL); for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { const struct nvk_image_view *iview = render->color_att[i].iview; @@ -1477,7 +1491,11 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, P_NV9097_SET_RENDER_ENABLE_OVERRIDE(p, MODE_USE_RENDER_ENABLE); } - /* TODO: Attachment clears */ + /* This needs to be set after the clears because the clears that are part + * of CmdBeginRendering() apply to the entire render, not just the + * attachments selected by the remap. + */ + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); } VKAPI_ATTR void VKAPI_CALL @@ -3179,14 +3197,18 @@ nvk_flush_ds_state(struct nvk_cmd_buffer *cmd) const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.depth.test_enable && + dyn->rp.depth_stencil_attachment_enable && render->depth_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_DEPTH_TEST, enable); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.depth.write_enable && + dyn->rp.depth_stencil_attachment_enable && render->depth_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_DEPTH_WRITE, enable); } @@ -3208,8 +3230,10 @@ nvk_flush_ds_state(struct nvk_cmd_buffer *cmd) P_NV9097_SET_DEPTH_BOUNDS_MAX(p, fui(dyn->ds.depth.bounds_test.max)); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.stencil.test_enable && + dyn->rp.depth_stencil_attachment_enable && render->stencil_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_STENCIL_TEST, enable); } @@ -3376,6 +3400,33 @@ nvk_mme_set_write_mask(struct mme_builder *b) mme_emit(b, common_mask); } +static const struct vk_color_blend_attachment_state * +get_blend_attachment_state(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + NULL : &dyn->cb.attachments[remap]; +} + +static bool +get_color_write_enabled(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + false : (dyn->cb.color_write_enables & BITFIELD_BIT(remap)); +} + +static uint8_t +get_color_attachment_location(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : dyn->cal.color_map[remap]; +} + static void nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) { @@ -3394,16 +3445,23 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { for (uint8_t a = 0; a < render->color_att_count; a++) { - P_IMMD(p, NV9097, SET_BLEND(a), dyn->cb.attachments[a].blend_enable); + const struct vk_color_blend_attachment_state *att = + get_blend_attachment_state(dyn, a); + P_IMMD(p, NV9097, SET_BLEND(a), att != NULL && att->blend_enable); } } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { for (uint8_t a = 0; a < render->color_att_count; a++) { const struct vk_color_blend_attachment_state *att = - &dyn->cb.attachments[a]; + get_blend_attachment_state(dyn, a); + if (att == NULL) + continue; + P_MTHD(p, NV9097, SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(a)); P_NV9097_SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(p, a, ENABLE_TRUE); P_NV9097_SET_BLEND_PER_TARGET_COLOR_OP(p, a, @@ -3424,26 +3482,31 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_ATTACHMENTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { uint32_t color_write_enables = 0x0; for (uint8_t a = 0; a < render->color_att_count; a++) { - if (dyn->cb.color_write_enables & BITFIELD_BIT(a)) + if (get_color_write_enabled(dyn, a)) color_write_enables |= 0xf << (4 * a); } uint32_t cb_att_write_mask = 0x0; - for (uint8_t a = 0; a < render->color_att_count; a++) - cb_att_write_mask |= dyn->cb.attachments[a].write_mask << (a * 4); + for (uint8_t a = 0; a < render->color_att_count; a++) { + const struct vk_color_blend_attachment_state *att = + get_blend_attachment_state(dyn, a); + cb_att_write_mask |= (att != NULL ? att->write_mask : 0) << (a * 4); + } uint32_t rp_att_write_mask = 0x0; for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { - if (dyn->rp.attachments & (MESA_VK_RP_ATTACHMENT_COLOR_0_BIT << a)) + if (dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_COLOR_BIT(a)) rp_att_write_mask |= 0xf << (4 * a); } uint32_t att_has_loc_mask = 0x0; for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { - if (dyn->cal.color_map[a] != MESA_VK_ATTACHMENT_UNUSED) + uint8_t loc = get_color_attachment_location(dyn, a); + if (loc != MESA_VK_ATTACHMENT_UNUSED) att_has_loc_mask |= 0xf << (4 * a); } @@ -3455,19 +3518,22 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) att_has_loc_mask); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_ATTACHMENTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { int8_t loc_att[NVK_MAX_RTS] = { -1, -1, -1, -1, -1, -1, -1, -1}; uint8_t max_loc = 0; uint32_t att_used = 0; for (uint8_t a = 0; a < render->color_att_count; a++) { - if (dyn->cal.color_map[a] == MESA_VK_ATTACHMENT_UNUSED) + uint8_t loc = get_color_attachment_location(dyn, a); + if (loc == MESA_VK_ATTACHMENT_UNUSED) continue; att_used |= BITFIELD_BIT(a); - assert(dyn->cal.color_map[a] < NVK_MAX_RTS); - loc_att[dyn->cal.color_map[a]] = a; - max_loc = MAX2(max_loc, dyn->cal.color_map[a]); + assert(loc < NVK_MAX_RTS); + loc_att[loc] = a; + max_loc = MAX2(max_loc, loc); } for (uint8_t l = 0; l < NVK_MAX_RTS; l++) { diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 3bdb4b71425..564981b334b 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -918,7 +918,12 @@ pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx, cfg.internal_buffer_offset = cbuf_offset; cfg.clear = rt_clear(&fb->rts[idx]); cfg.dithering_enable = true; - cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; + if (rt) { + get_rt_formats(rt->format, &cfg.writeback_format, + &cfg.internal_format, &cfg.swizzle); + } else { + cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; + } #if PAN_ARCH >= 7 cfg.writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; #endif diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index b0c56c5f0aa..a1d346b840d 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -455,6 +455,8 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0; } @@ -462,6 +464,8 @@ has_depth_att(struct panvk_cmd_buffer *cmdbuf) static bool has_stencil_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0; } @@ -653,6 +657,7 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, CB_BLEND_EQUATIONS) || dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || dyn_gfx_state_dirty(cmdbuf, COLOR_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE); @@ -1799,6 +1804,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs) dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ); @@ -1960,6 +1966,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || /* writes_depth() uses vk_depth_stencil_state */ dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) || @@ -2915,6 +2922,11 @@ panvk_per_arch(cmd_inherit_render_state)( att_loc_info = &att_loc_info_default; vk_cmd_set_rendering_attachment_locations(&cmdbuf->vk, att_loc_info); + + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(inheritance_info->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmdbuf->vk, rar_info); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index e381fd614a8..bfd01f1e93d 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -92,6 +92,8 @@ is_indirect_draw(const struct panvk_draw_data *draw) static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0; } @@ -99,6 +101,8 @@ has_depth_att(struct panvk_cmd_buffer *cmdbuf) static bool has_stencil_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0; } @@ -225,6 +229,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, OQ) || gfx_state_dirty(cmdbuf, RENDER_STATE); diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 1a2853a6354..7bef529116f 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -326,7 +326,7 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, uint64_t blend_shaders[8] = {}; /* All bits set to one encodes unused fixed-function blend constant. */ unsigned ff_blend_constant = ~0; - uint8_t remap_catts[MAX_RTS] = { + uint8_t loc_rt[MAX_RTS] = { MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, @@ -334,41 +334,50 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, }; uint32_t blend_count = MAX2(cmdbuf->state.gfx.render.fb.info.rt_count, 1); - static_assert(ARRAY_SIZE(remap_catts) <= ARRAY_SIZE(cal->color_map), - "vk_color_attachment_location_state::color_map is too small"); - - for (uint32_t i = 0; i < ARRAY_SIZE(remap_catts); i++) { - if (cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED) { - assert(cal->color_map[i] < MAX_RTS); - remap_catts[cal->color_map[i]] = i; - } - } - memset(blend_info, 0, sizeof(*blend_info)); - for (uint8_t i = 0; i < cb->attachment_count; i++) { - struct pan_blend_rt_state *rt = &bs.rts[i]; - if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) { + for (uint8_t rt_idx = 0; rt_idx < MAX_RTS; rt_idx++) { + struct pan_blend_rt_state *rt = &bs.rts[rt_idx]; + + /* This is the API-level attachment for things other than the bound + * render targets. In particular, we use this index for blend + * attachments and color write enables. + */ + const uint8_t att = dyns->rp.color_attachment_remap[rt_idx]; + if (att == MESA_VK_ATTACHMENT_UNUSED) { + rt->equation.color_mask = 0; + continue; + } + assert(att < MAX_RTS); + + /* This is the location inside the shader where this color attachment + * will be bound. + */ + const uint8_t loc = cal->color_map[rt_idx]; + if (loc == MESA_VK_ATTACHMENT_UNUSED) { + rt->equation.color_mask = 0; + continue; + } + assert(loc < MAX_RTS); + loc_rt[loc] = rt_idx; + + if (!(cb->color_write_enables & BITFIELD_BIT(att))) { rt->equation.color_mask = 0; continue; } - if (!(cb->color_write_enables & BITFIELD_BIT(i))) { + if (color_attachment_formats[rt_idx] == VK_FORMAT_UNDEFINED) { rt->equation.color_mask = 0; continue; } - if (color_attachment_formats[i] == VK_FORMAT_UNDEFINED) { + if (!cb->attachments[att].write_mask) { rt->equation.color_mask = 0; continue; } - if (!cb->attachments[i].write_mask) { - rt->equation.color_mask = 0; - continue; - } - - rt->format = vk_format_to_pipe_format(color_attachment_formats[i]); + rt->format = vk_format_to_pipe_format(color_attachment_formats[rt_idx]); + rt->nr_samples = color_attachment_samples[rt_idx]; /* Disable blending for LOGICOP_NOOP unless the format is float/srgb */ if (bs.logicop_enable && bs.logicop_func == PIPE_LOGICOP_NOOP && @@ -378,21 +387,20 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, continue; } - rt->nr_samples = color_attachment_samples[i]; - rt->equation.blend_enable = cb->attachments[i].blend_enable; - rt->equation.color_mask = cb->attachments[i].write_mask; + rt->equation.blend_enable = cb->attachments[att].blend_enable; + rt->equation.color_mask = cb->attachments[att].write_mask; rt->equation.rgb_func = - vk_blend_op_to_pipe(cb->attachments[i].color_blend_op); + vk_blend_op_to_pipe(cb->attachments[att].color_blend_op); rt->equation.rgb_src_factor = - vk_blend_factor_to_pipe(cb->attachments[i].src_color_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].src_color_blend_factor); rt->equation.rgb_dst_factor = - vk_blend_factor_to_pipe(cb->attachments[i].dst_color_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].dst_color_blend_factor); rt->equation.alpha_func = - vk_blend_op_to_pipe(cb->attachments[i].alpha_blend_op); + vk_blend_op_to_pipe(cb->attachments[att].alpha_blend_op); rt->equation.alpha_src_factor = - vk_blend_factor_to_pipe(cb->attachments[i].src_alpha_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].src_alpha_blend_factor); rt->equation.alpha_dst_factor = - vk_blend_factor_to_pipe(cb->attachments[i].dst_alpha_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].dst_alpha_blend_factor); bool dest_has_alpha = util_format_has_alpha(rt->format); if (!dest_has_alpha) { @@ -409,12 +417,12 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation); - if (blend_needs_shader(&bs, i, &ff_blend_constant)) { - nir_alu_type src0_type = fs_info->bifrost.blend[i].type; + if (blend_needs_shader(&bs, rt_idx, &ff_blend_constant)) { + nir_alu_type src0_type = fs_info->bifrost.blend[loc].type; nir_alu_type src1_type = fs_info->bifrost.blend_src1_type; VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type, - i, &blend_shaders[i]); + rt_idx, &blend_shaders[rt_idx]); if (result != VK_SUCCESS) return result; @@ -429,13 +437,13 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, ff_blend_constant = 0; /* Now that we've collected all the information, we can emit. */ - for (uint8_t i = 0; i < blend_count; i++) { - uint32_t catt_idx = remap_catts[i]; + for (uint8_t loc = 0; loc < blend_count; loc++) { + uint32_t rt_idx = loc_rt[loc]; uint64_t blend_shader = - catt_idx != MESA_VK_ATTACHMENT_UNUSED ? blend_shaders[catt_idx] : 0; + rt_idx != MESA_VK_ATTACHMENT_UNUSED ? blend_shaders[rt_idx] : 0; - emit_blend_desc(fs_info, fs_code, &bs, i, catt_idx, - blend_shader, ff_blend_constant, &bds[i]); + emit_blend_desc(fs_info, fs_code, &bs, loc, rt_idx, + blend_shader, ff_blend_constant, &bds[loc]); } if (blend_info->shader_loads_blend_const) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 6ee4fd43513..c35f66406ce 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -463,6 +463,11 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, } assert(fbinfo->width && fbinfo->height); + + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmdbuf->vk, rar_info); } void diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index 1022274a6a8..359037f9592 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -235,6 +235,8 @@ panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, const VkClearRect *pRects) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + const struct vk_dynamic_graphics_state *dyns = + &cmdbuf->vk.dynamic_graphics_state; struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_cmd_meta_graphics_save_ctx save = {0}; struct vk_meta_rendering_info render = { @@ -252,9 +254,36 @@ panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; } + STACK_ARRAY(VkClearAttachment, attachments, attachmentCount); + + uint32_t attachment_count = 0; + for (uint32_t i = 0; i < attachmentCount; i++) { + if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + attachments[attachment_count++] = pAttachments[i]; + continue; + } + + uint32_t rt; + for (rt = 0; rt < MAX_RTS; rt++) { + if (dyns->rp.color_attachment_remap[rt] == + pAttachments[i].colorAttachment) + break; + } + if (rt == MAX_RTS) + continue; + + attachments[attachment_count++] = (VkClearAttachment) { + .aspectMask = pAttachments[i].aspectMask, + .colorAttachment = rt, + .clearValue = pAttachments[i].clearValue, + }; + } + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); - vk_meta_clear_attachments(&cmdbuf->vk, &dev->meta, &render, attachmentCount, - pAttachments, rectCount, pRects); + vk_meta_clear_attachments(&cmdbuf->vk, &dev->meta, &render, + attachment_count, attachments, + rectCount, pRects); panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 60dd109f215..3f457b14aef 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -488,6 +488,10 @@ panvk_hash_state(struct vk_physical_device *device, _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, sizeof(state->rp->view_mask)); + if (state->rp) + _mesa_blake3_update(&blake3_ctx, state->rp->color_attachment_remap, + sizeof(state->rp->color_attachment_remap)); + if (state->ial) _mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial)); } @@ -1373,6 +1377,29 @@ panvk_compile_shader(struct panvk_device *dev, if (state && state->ms && state->ms->sample_shading_enable) nir->info.fs.uses_sample_shading = true; + bool demoted_output = false; + nir_foreach_shader_out_variable(var, nir) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + uint32_t loc = var->data.location - FRAG_RESULT_DATA0; + uint32_t rt; + for (rt = 0; rt < MAX_RTS; rt++) { + if (state->rp->color_attachment_remap[rt] == loc) + break; + } + if (rt < MAX_RTS) { + var->data.location = FRAG_RESULT_DATA0 + rt; + } else { + var->data.mode = nir_var_shader_temp; + demoted_output = true; + } + } + if (demoted_output) { + NIR_PASS(_, nir, nir_fixup_deref_modes); + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + } + /* We need to lower input attachments before we lower descriptors */ NIR_PASS(_, nir, panvk_per_arch(nir_lower_input_attachment_loads), state, &variant->fs.input_attachment_read); diff --git a/src/vulkan/runtime/vk_graphics_state.c b/src/vulkan/runtime/vk_graphics_state.c index 2c6415ee796..8f60bbc41e8 100644 --- a/src/vulkan/runtime/vk_graphics_state.c +++ b/src/vulkan/runtime/vk_graphics_state.c @@ -1303,6 +1303,17 @@ vk_render_pass_state_init(struct vk_render_pass_state *rp, rp->depth_stencil_attachment_samples = asc_info->depthStencilAttachmentSamples; } + const VkRenderingAttachmentRemapInfoMESA *rar_info = + !driver_rp ? vk_get_pipeline_rendering_ar_info(info) : NULL; + for (uint32_t i = 0; i < MESA_VK_MAX_COLOR_ATTACHMENTS; i++) { + rp->color_attachment_remap[i] = + rar_info == NULL ? i : + rar_info->colorAttachmentRemap[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : rar_info->colorAttachmentRemap[i]; + } + rp->depth_stencil_attachment_enable = + rar_info == NULL || rar_info->depthStencilAttachmentEnable; + for (uint32_t i = 0; i < r_info->colorAttachmentCount; i++) { if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); @@ -1320,6 +1331,14 @@ vk_dynamic_graphics_state_init_rp(struct vk_dynamic_graphics_state *dst, const struct vk_render_pass_state *rp) { dst->rp.attachments = rp->attachments; + + STATIC_ASSERT(sizeof(dst->rp.color_attachment_remap) == + sizeof(rp->color_attachment_remap)); + memcpy(dst->rp.color_attachment_remap, rp->color_attachment_remap, + sizeof(rp->color_attachment_remap)); + + dst->rp.depth_stencil_attachment_enable = + rp->depth_stencil_attachment_enable; } #define FOREACH_STATE_GROUP(f) \ @@ -2264,6 +2283,11 @@ vk_dynamic_graphics_state_copy(struct vk_dynamic_graphics_state *dst, COPY_ARRAY(CB_BLEND_CONSTANTS, cb.blend_constants, 4); COPY_IF_SET(RP_ATTACHMENTS, rp.attachments); + if (IS_SET_IN_SRC(RP_REMAP)) { + COPY_ARRAY(RP_REMAP, rp.color_attachment_remap, + MESA_VK_MAX_COLOR_ATTACHMENTS); + COPY_MEMBER(RP_REMAP, rp.attachments); + } if (IS_SET_IN_SRC(INPUT_ATTACHMENT_MAP)) { COPY_MEMBER(INPUT_ATTACHMENT_MAP, ial.color_attachment_count); @@ -3191,6 +3215,23 @@ vk_common_CmdSetRenderingAttachmentLocationsKHR( vk_cmd_set_rendering_attachment_locations(cmd, pLocationInfo); } +void +vk_cmd_set_rendering_attachment_remap(struct vk_command_buffer *cmd, + const VkRenderingAttachmentRemapInfoMESA *info) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < MESA_VK_MAX_COLOR_ATTACHMENTS; i++) { + const uint8_t remap = + info == NULL ? i : + info->colorAttachmentRemap[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : info->colorAttachmentRemap[i]; + SET_DYN_VALUE(dyn, RP_REMAP, rp.color_attachment_remap[i], remap); + } + SET_DYN_VALUE(dyn, RP_REMAP, rp.depth_stencil_attachment_enable, + info == NULL || info->depthStencilAttachmentEnable); +} + VKAPI_ATTR void VKAPI_CALL vk_common_CmdSetRenderingInputAttachmentIndicesKHR( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/runtime/vk_graphics_state.h b/src/vulkan/runtime/vk_graphics_state.h index 8713759df39..dd12ba5200b 100644 --- a/src/vulkan/runtime/vk_graphics_state.h +++ b/src/vulkan/runtime/vk_graphics_state.h @@ -27,6 +27,7 @@ #include "vulkan/vulkan_core.h" #include "vk_limits.h" +#include "vk_internal_exts.h" #include "util/bitset.h" #include "util/enum_operators.h" @@ -105,6 +106,7 @@ enum mesa_vk_dynamic_graphics_state { MESA_VK_DYNAMIC_CB_WRITE_MASKS, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS, MESA_VK_DYNAMIC_RP_ATTACHMENTS, + MESA_VK_DYNAMIC_RP_REMAP, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP, MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP, @@ -768,6 +770,10 @@ struct vk_color_attachment_location_state { /***/ struct vk_render_pass_state { /** Set of image aspects bound as color/depth/stencil attachments + * + * If VkRenderingDepthStencilOutputEnableInfoMESA is included in the pNext + * chain of VkPipelineRenderingCreateInfo, the depth and stencil bits will + * only be included in attachments if they are enabled. * * Set to MESA_VK_RP_ATTACHMENT_INFO_INVALID to indicate that attachment * info is invalid. @@ -797,6 +803,12 @@ struct vk_render_pass_state { /** VkCustomResolveCreateInfoEXT::customResolve */ bool custom_resolve; + + /** VkRenderingAttachmentRemapInfoMESA::colorAttachmentRemap */ + uint8_t color_attachment_remap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkRenderingAttachmentRemapInfoMESA::depthStencilAttachmentEnable */ + bool depth_stencil_attachment_enable; }; static inline bool @@ -943,7 +955,14 @@ struct vk_dynamic_graphics_state { struct vk_color_blend_state cb; struct { + /** MESA_VK_DYNAMIC_RP_ATTACHMENTS */ enum vk_rp_attachment_flags attachments; + + /** MESA_VK_DYNAMIC_RP_REMAP */ + uint8_t color_attachment_remap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** MESA_VK_DYNAMIC_RP_REMAP */ + bool depth_stencil_attachment_enable; } rp; /** MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE */ @@ -1299,6 +1318,10 @@ void vk_cmd_set_rendering_attachment_locations(struct vk_command_buffer *cmd, const VkRenderingAttachmentLocationInfoKHR *info); +void +vk_cmd_set_rendering_attachment_remap(struct vk_command_buffer *cmd, + const VkRenderingAttachmentRemapInfoMESA *info); + const char * vk_dynamic_graphic_state_to_str(enum mesa_vk_dynamic_graphics_state state); diff --git a/src/vulkan/runtime/vk_image.c b/src/vulkan/runtime/vk_image.c index e3a13845a1b..79cb8fbbc6f 100644 --- a/src/vulkan/runtime/vk_image.c +++ b/src/vulkan/runtime/vk_image.c @@ -778,6 +778,44 @@ vk_image_layout_is_depth_only(VkImageLayout layout) } } +VkImageLayout +vk_image_layout_depth_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + + default: + return layout; + } +} + +VkImageLayout +vk_image_layout_stencil_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + return VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + + default: + return layout; + } +} + static VkResult vk_image_create_get_format_list_uncompressed(struct vk_device *device, const VkImageCreateInfo *pCreateInfo, diff --git a/src/vulkan/runtime/vk_image.h b/src/vulkan/runtime/vk_image.h index 92689e619e2..86c5be14b01 100644 --- a/src/vulkan/runtime/vk_image.h +++ b/src/vulkan/runtime/vk_image.h @@ -401,6 +401,8 @@ vk_image_view_subresource_range(const struct vk_image_view *view) bool vk_image_layout_is_read_only(VkImageLayout layout, VkImageAspectFlagBits aspect); bool vk_image_layout_is_depth_only(VkImageLayout layout); +VkImageLayout vk_image_layout_depth_only(VkImageLayout layout); +VkImageLayout vk_image_layout_stencil_only(VkImageLayout layout); VkImageUsageFlags vk_image_layout_to_usage_flags(VkImageLayout layout, VkImageAspectFlagBits aspect); diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index b3d51d9a512..a792ccb32a6 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -31,6 +31,8 @@ #include "vk_format.h" #include "vk_framebuffer.h" #include "vk_image.h" +#include "vk_physical_device.h" +#include "vk_synchronization.h" #include "vk_util.h" #include "vk_android.h" @@ -356,6 +358,25 @@ vk_render_pass_attachment_init(struct vk_render_pass_attachment *att, .has_external_format = vk_android_rp_attachment_has_external_format(desc), }; + + /* We require separate stencil layotus */ + if (att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + att->initial_layout = vk_image_layout_depth_only(att->initial_layout); + att->final_layout = vk_image_layout_depth_only(att->final_layout); + } else if (att->aspects == VK_IMAGE_ASPECT_STENCIL_BIT) { + att->initial_layout = VK_IMAGE_LAYOUT_UNDEFINED; + att->final_layout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + if (att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + att->initial_stencil_layout = + vk_image_layout_stencil_only(att->initial_stencil_layout); + att->final_stencil_layout = + vk_image_layout_stencil_only(att->final_stencil_layout); + } else { + assert(att->initial_stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + assert(att->final_stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + } } static void @@ -385,6 +406,17 @@ vk_subpass_attachment_init(struct vk_subpass_attachment *att, .stencil_layout = vk_att_ref_stencil_layout(ref, attachments), }; + /* We require separate stencil layotus */ + if (att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + att->layout = vk_image_layout_depth_only(att->layout); + else if (att->aspects == VK_IMAGE_ASPECT_STENCIL_BIT) + att->layout = VK_IMAGE_LAYOUT_UNDEFINED; + + if (att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + att->stencil_layout = vk_image_layout_stencil_only(att->stencil_layout); + else + assert(att->stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + switch (usage) { case VK_IMAGE_USAGE_TRANSFER_DST_BIT: break; /* No special aspect requirements */ @@ -426,14 +458,795 @@ vk_subpass_attachment_link_resolve(struct vk_subpass_attachment *att, att->resolve = resolve; } -VKAPI_ATTR VkResult VKAPI_CALL -vk_common_CreateRenderPass2(VkDevice _device, - const VkRenderPassCreateInfo2 *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkRenderPass *pRenderPass) +static VkResolveModeFlagBits +resolve_mode_for_format(VkFormat format) { - VK_FROM_HANDLE(vk_device, device, _device); + if (vk_format_is_int(format)) + return VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + else + return VK_RESOLVE_MODE_AVERAGE_BIT; +} +static uint32_t +vk_render_pass_find_merge_begin(const struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + while (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_BEGIN)) { + /* The first subpass is always a begin */ + assert(subpass_idx > 0); + subpass_idx--; + } + return subpass_idx; +} + +static uint32_t +vk_render_pass_find_merge_end(const struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + while (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_END)) { + /* The last subpass is always an end */ + assert(subpass_idx < pass->subpass_count - 1); + subpass_idx++; + } + return subpass_idx; +} + +struct vk_render { + uint32_t view_mask; + VkSampleCountFlagBits mrtss_samples; + VkSampleCountFlagBits attachment_samples; + bool legacy_dithering_enabled; + + struct vk_subpass_attachment color_attachments[MESA_VK_MAX_COLOR_ATTACHMENTS]; + struct vk_subpass_attachment depth_stencil_attachment; + + VkExtent2D fragment_shading_rate_attachment_texel_size; + struct vk_subpass_attachment fragment_shading_rate_attachment; +}; + +static void +vk_render_init_empty(struct vk_render *render) +{ + *render = (struct vk_render) { + .depth_stencil_attachment = { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .fragment_shading_rate_attachment = { + .attachment = VK_ATTACHMENT_UNUSED, + }, + }; + + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + render->color_attachments[c] = (struct vk_subpass_attachment) { + .attachment = VK_ATTACHMENT_UNUSED, + }; + } +} + +#ifndef NDEBUG +static bool +vk_render_has_used_attachment(const struct vk_render *render) +{ + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + return true; + } + + if (render->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) + return true; + + return false; +} +#endif + +static bool +vk_render_has_attachments(const struct vk_render *render) +{ + assert(vk_render_has_used_attachment(render) == + (render->attachment_samples != 0)); + return render->attachment_samples != 0; +} + +static bool +vk_render_is_empty(const struct vk_render *render) +{ + if (render->view_mask == 0) + assert(!vk_render_has_attachments(render)); + + return render->view_mask == 0; +} + +static uint32_t +vk_subpass_attachment_samples(const struct vk_render_pass *pass, + const struct vk_subpass_attachment *sp_att) +{ + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + return 0; + + assert(sp_att->attachment < pass->attachment_count); + return pass->attachments[sp_att->attachment].samples; +} + +static void +vk_render_init_for_single_subpass(struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + vk_render_init_empty(render); + + render->view_mask = subpass->view_mask; + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + render->color_attachments[a] = *sp_att; + render->attachment_samples |= vk_subpass_attachment_samples(pass, sp_att); + } + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + render->depth_stencil_attachment = *sp_att; + render->attachment_samples |= vk_subpass_attachment_samples(pass, sp_att); + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + render->fragment_shading_rate_attachment = + *subpass->fragment_shading_rate_attachment; + } +} + +static bool +vk_render_can_add_subpass(const struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + const struct vk_physical_device *device = pass->base.device->physical; + + /* We can always add to an empty render */ + if (vk_render_is_empty(render)) + return true; + + /* If the subpass has no attachments, then the sample count is determined + * based on dynamic state and/or pipelines. Merging two of those is + * probably safe if variableMultisampleRate is supported but there also + * doesn't seem to be much point in doing so since there's no tile memory + * to share anyway. We definitely can't merge one with attachments into + * one without. + */ + if (!vk_render_has_attachments(render) || + (subpass->color_count == 0 && subpass->depth_stencil_attachment == NULL)) + return false; + + /* View mask is per-render */ + if (render->view_mask != subpass->view_mask) + return false; + + /* Multisample render to single sample does a multisampled render and then + * immediately resolves the results, leaving no multisampling at the end of + * the subpass. If the previous subpass did MRTSS then it makes no sense + * to merge something into the end of it because that subpass would have + * access to the per-sample values, not the resolved values. + * + * However, if the sample counts otheriwse match, then it's fine to have a + * regular multisampled subpass merged with a MRTSS subpass right after it. + */ + if (render->mrtss_samples != 0) + return false; + + if (subpass->mrtss_samples != 0 && + subpass->mrtss_samples != render->attachment_samples) + return false; + + if (subpass->legacy_dithering_enabled != render->legacy_dithering_enabled) + return false; + + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + color_count++; + } + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + /* We could theoretically merge subpasses with different sample counts + * if we have VK_AMD_mixed_attachment_samples or + * VK_NV_framebuffer_mixed_samples but the rules are weird and hard to + * follow and those drivers aren't going to benefit much from subpass + * merging anyway. Just reject merges with differing sample counts. + */ + if (vk_subpass_attachment_samples(pass, sp_att) != + render->attachment_samples) + return false; + + bool found = false; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = + &render->color_attachments[c]; + if (r_att->attachment == sp_att->attachment) { + /* If a previous subpass resolves this attachment then we need to + * do that resolve BEFORE this subpass writes anything. We can't + * merge them. + */ + if (r_att->resolve != NULL) + return false; + + found = true; + break; + } + } + + /* If we didn't find it, we'll need a new attachment */ + if (!found) + color_count++; + } + + if (color_count > device->properties.maxColorAttachments) + return false; + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + const struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + + /* We can only have one depth/stencil attachment bound and can't switch + * it mid-rendering. If there's a mismatch, we can't merge. + */ + assert(sp_att->attachment != VK_ATTACHMENT_UNUSED); + if (r_att->attachment != VK_ATTACHMENT_UNUSED && + r_att->attachment != sp_att->attachment) + return false; + + /* See the similar check for color above */ + if (vk_subpass_attachment_samples(pass, sp_att) != + render->attachment_samples) + return false; + + /* If a previous subpass resolves depth/stencil then we need to do that + * resolve BEFORE this subpass writes anything. We can't merge them + * unless this subpass treats the depth/stencil atachment as read-only. + */ + if (r_att->resolve != NULL) { + if ((sp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + !vk_image_layout_is_read_only(sp_att->layout, + VK_IMAGE_ASPECT_DEPTH_BIT)) + return false; + + if ((sp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + !vk_image_layout_is_read_only(sp_att->stencil_layout, + VK_IMAGE_ASPECT_STENCIL_BIT)) + return false; + + /* Or if we also have a resolve attachment. We can't resolve twice + * with a single CmdBegin/EndRendering(). + */ + if (r_att->resolve != NULL) + return false; + } + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + const struct vk_subpass_attachment *r_att = + &render->fragment_shading_rate_attachment; + + /* We can't use a fragment shading rate attachment that was written as a + * color attachment in a previous subpass. This shouldn't even really + * be possible given size and other constraints but it doesn't hurt to + * check. + */ + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment == sp_att->attachment) + return false; + } + + /* We can only have one fragment shading rate attachment bound and can't + * switch it mid-rendering. If there's a mismatch, we can't merge. + */ + assert(r_att->attachment != VK_ATTACHMENT_UNUSED); + if (r_att->attachment != VK_ATTACHMENT_UNUSED && + r_att->attachment != sp_att->attachment) + return false; + + if (subpass->fragment_shading_rate_attachment_texel_size.width != + render->fragment_shading_rate_attachment_texel_size.width || + subpass->fragment_shading_rate_attachment_texel_size.height != + render->fragment_shading_rate_attachment_texel_size.height) + return false; + } + + return true; +} + +/* Earlier layouts win. We prefer We prefer read/write over read-only and + * GENERAL over specific layouts because, presumably, the client had a + * reason for specifying GENERAL. + */ +static const VkImageLayout color_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout depth_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout stencil_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout fsr_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static VkImageLayout +choose_best_layout(VkImageLayout a, VkImageLayout b, + const VkImageLayout *ranking) +{ + if (a == b) + return a; + + for (uint32_t i = 0; ranking[i] != VK_IMAGE_LAYOUT_MAX_ENUM; i++) { + if (ranking[i] == a) + return a; + if (ranking[i] == b) + return b; + } + + UNREACHABLE("Invalid layout for aspect"); + return VK_IMAGE_LAYOUT_GENERAL; +} + +static void +vk_subpass_attachment_merge(struct vk_subpass_attachment *r_att, + const struct vk_subpass_attachment *sp_att, + const VkImageLayout *layout_ranking) +{ + assert(r_att->aspects == sp_att->aspects); + r_att->usage |= sp_att->usage; + + r_att->layout = choose_best_layout(r_att->layout, sp_att->layout, + layout_ranking); + if (r_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + r_att->stencil_layout = choose_best_layout(r_att->stencil_layout, + sp_att->stencil_layout, + stencil_layout_ranking); + } else { + assert(r_att->stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + } + + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + + assert(r_att->resolve_mode == VK_RESOLVE_MODE_NONE); + assert(r_att->stencil_resolve_mode == VK_RESOLVE_MODE_NONE); + r_att->resolve_mode = sp_att->resolve_mode; + r_att->stencil_resolve_mode = sp_att->stencil_resolve_mode; + + if (sp_att->resolve != NULL) { + assert(r_att->resolve == NULL); + r_att->resolve = sp_att->resolve; + } +} + +static void +vk_render_add_subpass(struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + assert(vk_render_can_add_subpass(render, pass, subpass)); + + if (render->view_mask == 0) + render->view_mask = subpass->view_mask; + else + assert(render->view_mask == subpass->view_mask); + + if (subpass->mrtss_samples != 0) { + assert(render->mrtss_samples == 0); + render->mrtss_samples = subpass->mrtss_samples; + } + + render->legacy_dithering_enabled |= subpass->legacy_dithering_enabled; + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + ASSERTED bool found_or_inserted = false; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + struct vk_subpass_attachment *r_att = &render->color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + *r_att = *sp_att; + render->attachment_samples |= + vk_subpass_attachment_samples(pass, sp_att); + found_or_inserted = true; + break; + } else if (r_att->attachment == sp_att->attachment) { + vk_subpass_attachment_merge(r_att, sp_att, color_layout_ranking); + found_or_inserted = true; + break; + } + } + assert(found_or_inserted); + } + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + *r_att = *sp_att; + render->attachment_samples |= + vk_subpass_attachment_samples(pass, sp_att); + } else { + vk_subpass_attachment_merge(r_att, sp_att, depth_layout_ranking); + } + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + struct vk_subpass_attachment *r_att = + &render->fragment_shading_rate_attachment; + + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + render->fragment_shading_rate_attachment_texel_size = + subpass->fragment_shading_rate_attachment_texel_size; + *r_att = *sp_att; + } else { + assert(subpass->fragment_shading_rate_attachment_texel_size.width == + render->fragment_shading_rate_attachment_texel_size.width); + assert(subpass->fragment_shading_rate_attachment_texel_size.height == + render->fragment_shading_rate_attachment_texel_size.height); + vk_subpass_attachment_merge(r_att, sp_att, fsr_layout_ranking); + } + } + + /* We don't actually bind input attachments directly here. However, since + * we extend the lifeteims of subpass attachments, we need to take input + * attachments into account in the first/last_subpass flags so they + * accurately represent the entire range described by the render. + */ + for (uint32_t a = 0; a < subpass->input_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->input_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + if (sp_att->first_subpass == 0 && sp_att->last_subpass == 0) + continue; + + struct vk_subpass_attachment *r_att; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + r_att = &render->color_attachments[c]; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + break; + } + } + + r_att = &render->depth_stencil_attachment; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + } + + r_att = &render->fragment_shading_rate_attachment; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + } + } +} + +static bool +vk_subpass_dependency_allowed_mid_subpass(const struct vk_subpass_dependency *dep, + uint32_t view_mask) +{ + /* From the Vulkan 1.4.335 spec: + * + * "If srcSubpass is equal to dstSubpass then the VkSubpassDependency + * does not directly define a dependency. Instead, it enables pipeline + * barriers to be used in a render pass instance within the identified + * subpass, where the scopes of one pipeline barrier must be a subset of + * those described by one subpass dependency. Subpass dependencies + * specified in this way that include framebuffer-space stages in the + * srcStageMask must only include framebuffer-space stages in + * dstStageMask, and must include VK_DEPENDENCY_BY_REGION_BIT." + */ + + const VkPipelineStageFlags2 src_stage_mask = + vk_expand_src_stage_flags2(dep->src_stage_mask); + + const VkPipelineStageFlags2 framebuffer_stage_mask = + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + if (!(src_stage_mask & framebuffer_stage_mask)) + return true; + + const VkPipelineStageFlags2 dst_stage_mask = + vk_expand_dst_stage_flags2(dep->dst_stage_mask); + + if (dst_stage_mask & ~framebuffer_stage_mask) + return false; + + if (!(dep->flags & VK_DEPENDENCY_BY_REGION_BIT)) + return false; + + /* From the Vulkan 1.4.335 spec: + * + * "When a subpass dependency is specified in this way for a subpass + * that has more than one view in its view mask, its dependencyFlags + * must include VK_DEPENDENCY_VIEW_LOCAL_BIT." + */ + if (util_bitcount(view_mask) > 1) { + if (!(dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT)) + return false; + + /* This is a bit conservative because Vulkan technically allows you to + * thread subpasses together that have different view masks but where + * views in one subpass map to views in another. We really don't want + * to deal with that shenanigans so we just disallow subpass merging + * whenever viewOffset != 0. This is safe to do (and even assert!) + * because this is required when srcSubpass == dstSubpass. + */ + if (dep->view_offset != 0) + return false; + } + + return true; +} + +static uint32_t +vk_subpass_get_color_attachment_index(const struct vk_subpass *subpass, + uint32_t attachment) +{ + uint32_t color_index = VK_ATTACHMENT_UNUSED; + for (uint32_t i = 0; i < subpass->color_count; i++) { + if (subpass->color_attachments[i].attachment == attachment) { + assert(color_index == VK_ATTACHMENT_UNUSED); + color_index = i; + } + } + return color_index; +} + +static uint32_t +vk_subpass_get_input_attachment_index(const struct vk_subpass *subpass, + uint32_t attachment) +{ + uint32_t input_index = VK_ATTACHMENT_UNUSED; + for (uint32_t i = 0; i < subpass->input_count; i++) { + if (subpass->input_attachments[i].attachment == attachment) { + assert(input_index == VK_ATTACHMENT_UNUSED); + input_index = i; + } + } + return input_index; +} + +static void +vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, + uint32_t subpass_idx, + const struct vk_render *render) +{ + struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + uint32_t rar_colors[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + STATIC_ASSERT(VK_ATTACHMENT_UNUSED == (uint32_t)-1); + memset(subpass->ial.colors, -1, sizeof(subpass->ial.colors)); + memset(rar_colors, -1, sizeof(rar_colors)); + + STATIC_ASSERT(VK_FORMAT_UNDEFINED == 0); + memset(subpass->color_attachment_formats, 0, + sizeof(subpass->color_attachment_formats)); + + memset(subpass->sample_count_amd.samples, 0, + sizeof(subpass->sample_count_amd.samples)); + + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = &render->color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(r_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[r_att->attachment]; + + color_count = MAX2(color_count, c + 1); + + rar_colors[c] = + vk_subpass_get_color_attachment_index(subpass, r_att->attachment); + subpass->ial.colors[c] = + vk_subpass_get_input_attachment_index(subpass, r_att->attachment); + + subpass->color_attachment_formats[c] = rp_att->format; + subpass->sample_count_amd.samples[c] = rp_att->samples; + } + + subpass->ial.depth = VK_ATTACHMENT_UNUSED; + subpass->ial.stencil = VK_ATTACHMENT_UNUSED; + + VkFormat depth_format = VK_FORMAT_UNDEFINED; + VkFormat stencil_format = VK_FORMAT_UNDEFINED; + VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; + if (render->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + assert(r_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[r_att->attachment]; + + const uint32_t ia_idx = + vk_subpass_get_input_attachment_index(subpass, r_att->attachment); + + if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + depth_format = rp_att->format; + subpass->ial.depth = ia_idx; + } + + if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + stencil_format = rp_att->format; + subpass->ial.stencil = ia_idx; + } + + depth_stencil_samples = rp_att->samples; + } + + subpass->sample_count_amd.info = (VkAttachmentSampleCountInfoAMD) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, + .pNext = NULL, + .colorAttachmentCount = color_count, + .pColorAttachmentSamples = subpass->sample_count_amd.samples, + .depthStencilAttachmentSamples = depth_stencil_samples, + }; + + subpass->rar_info = (VkRenderingAttachmentRemapInfoMESA) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA, + .pNext = &subpass->sample_count_amd.info, + .depthStencilAttachmentEnable = + subpass->depth_stencil_attachment != NULL, + }; + STATIC_ASSERT(sizeof(subpass->rar_info.colorAttachmentRemap) == + sizeof(rar_colors)); + memcpy(subpass->rar_info.colorAttachmentRemap, + rar_colors, sizeof(rar_colors)); + + subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, + .pNext = &subpass->rar_info, + .colorAttachmentCount = color_count, + .pColorAttachmentInputIndices = subpass->ial.colors, + /* From the Vulkan 1.3.204 spec: + * + * VUID-vkCmdDraw-OpTypeImage-07468 + * + * "If any shader executed by this pipeline accesses an OpTypeImage + * variable with a Dim operand of SubpassData, it must be decorated + * with an InputAttachmentIndex that corresponds to a valid input + * attachment in the current subpass." + * + * So we don't have to worry about the missing InputAttachmentIndex + * decoration (AKA NO_INDEX) here, the depth/stencil attachment is + * either not used as an input attachment, or it has an explicit + * index. + */ + .pDepthInputAttachmentIndex = &subpass->ial.depth, + .pStencilInputAttachmentIndex = &subpass->ial.stencil, + }; + + subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, + .pNext = &subpass->ial.info, + .viewMask = pass->is_multiview ? render->view_mask : 0, + .colorAttachmentCount = color_count, + .pColorAttachmentFormats = subpass->color_attachment_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + }; + + subpass->inheritance_info = (VkCommandBufferInheritanceRenderingInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, + .pNext = &subpass->ial.info, + /* If we're inheriting, the contents are clearly in secondaries */ + .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, + .viewMask = pass->is_multiview ? render->view_mask : 0, + .colorAttachmentCount = color_count, + .pColorAttachmentFormats = subpass->color_attachment_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + .rasterizationSamples = render->attachment_samples, + }; +} + +bool +vk_render_pass_try_merge_subpass(struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + assert(subpass_idx > 0); + assert(pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE); + + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx - 1); + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->dst_subpass == VK_SUBPASS_EXTERNAL) + continue; + + assert(dep->src_subpass <= dep->dst_subpass); + + if (dep->src_subpass < merge_begin || subpass_idx < dep->dst_subpass) + continue; + + if (dep->src_subpass == dep->dst_subpass) + continue; + + /* We won't merge them unless the view masks match anyway, so it doesn't + * matter which one we choose. + */ + const uint32_t view_mask = + pass->subpasses[dep->dst_subpass].view_mask; + if (!vk_subpass_dependency_allowed_mid_subpass(dep, view_mask)) + return false; + } + + struct vk_render render; + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s < subpass_idx; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + + if (!vk_render_can_add_subpass(&render, pass, + &pass->subpasses[subpass_idx])) + return false; + + vk_render_add_subpass(&render, pass, &pass->subpasses[subpass_idx]); + + /* Adjust the merge flags */ + assert(pass->subpasses[subpass_idx - 1].merge & MESA_VK_SUBPASS_MERGE_END); + assert(pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE); + + pass->subpasses[subpass_idx - 1].merge &= ~MESA_VK_SUBPASS_MERGE_END; + pass->subpasses[subpass_idx].merge = MESA_VK_SUBPASS_MERGE_END; + + /* Regenerate pipeline info */ + for (uint32_t s = merge_begin; s <= subpass_idx; s++) + vk_subpass_init_pipeline_infos(pass, s, &render); + + return true; +} + +struct vk_render_pass * +vk_render_pass_create(struct vk_device *device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); VK_MULTIALLOC(ma); @@ -446,23 +1259,15 @@ vk_common_CreateRenderPass2(VkDevice _device, pCreateInfo->dependencyCount); uint32_t subpass_attachment_count = 0; - uint32_t subpass_color_attachment_count = 0; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { subpass_attachment_count += num_subpass_attachments2(&pCreateInfo->pSubpasses[i]); - subpass_color_attachment_count += - pCreateInfo->pSubpasses[i].colorAttachmentCount; } VK_MULTIALLOC_DECL(&ma, struct vk_subpass_attachment, subpass_attachments, subpass_attachment_count); - VK_MULTIALLOC_DECL(&ma, VkFormat, subpass_color_formats, - subpass_color_attachment_count); - VK_MULTIALLOC_DECL(&ma, VkSampleCountFlagBits, subpass_color_samples, - subpass_color_attachment_count); - if (!vk_object_multizalloc(device, &ma, pAllocator, - VK_OBJECT_TYPE_RENDER_PASS)) - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!vk_object_multizalloc(device, &ma, alloc, VK_OBJECT_TYPE_RENDER_PASS)) + return NULL; pass->attachment_count = pCreateInfo->attachmentCount; pass->attachments = attachments; @@ -477,16 +1282,11 @@ vk_common_CreateRenderPass2(VkDevice _device, } struct vk_subpass_attachment *next_subpass_attachment = subpass_attachments; - VkFormat *next_subpass_color_format = subpass_color_formats; - VkSampleCountFlagBits *next_subpass_color_samples = subpass_color_samples; for (uint32_t s = 0; s < pCreateInfo->subpassCount; s++) { const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[s]; struct vk_subpass *subpass = &pass->subpasses[s]; - const VkMultisampledRenderToSingleSampledInfoEXT *mrtss = - vk_find_struct_const(desc->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); - if (mrtss && !mrtss->multisampledRenderToSingleSampledEnable) - mrtss = NULL; + subpass->merge = MESA_VK_SUBPASS_MERGE_SINGLE; subpass->attachment_count = num_subpass_attachments2(desc); subpass->attachments = next_subpass_attachment; @@ -512,6 +1312,11 @@ vk_common_CreateRenderPass2(VkDevice _device, subpass->view_mask = desc->viewMask ? desc->viewMask : 1; pass->view_mask |= subpass->view_mask; + const VkMultisampledRenderToSingleSampledInfoEXT *mrtss_info = + vk_find_struct_const(desc->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); + if (mrtss_info && mrtss_info->multisampledRenderToSingleSampledEnable) + subpass->mrtss_samples = mrtss_info->rasterizationSamples; + subpass->input_count = desc->inputAttachmentCount; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = next_subpass_attachment; @@ -557,6 +1362,35 @@ vk_common_CreateRenderPass2(VkDevice _device, } } + for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { + struct vk_subpass_attachment *sp_att = &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + if (sp_att->resolve != NULL) { + assert(rp_att->samples > 1); + + assert(sp_att->resolve->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *resolve_rp_att = + &pass->attachments[sp_att->resolve->attachment]; + + if (resolve_rp_att->has_external_format) { + sp_att->resolve_mode = + VK_RESOLVE_MODE_EXTERNAL_FORMAT_DOWNSAMPLE_BIT_ANDROID; + } else { + assert(resolve_rp_att->format == rp_att->format); + sp_att->resolve_mode = resolve_mode_for_format(rp_att->format); + } + } else if (subpass->mrtss_samples != 0 && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { + sp_att->resolve_mode = resolve_mode_for_format(rp_att->format); + } + } + if (desc->pDepthStencilAttachment && desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { subpass->depth_stencil_attachment = next_subpass_attachment++; @@ -572,7 +1406,7 @@ vk_common_CreateRenderPass2(VkDevice _device, vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); - if (ds_resolve) { + if (ds_resolve && subpass->depth_stencil_attachment) { if (ds_resolve->pDepthStencilResolveAttachment && ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { subpass->depth_stencil_resolve_attachment = next_subpass_attachment++; @@ -586,7 +1420,39 @@ vk_common_CreateRenderPass2(VkDevice _device, subpass->depth_stencil_resolve_attachment, pCreateInfo); } - if (subpass->depth_stencil_resolve_attachment || mrtss) { + + struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + if (subpass->depth_stencil_resolve_attachment) + assert(rp_att->samples > 1); + + VkImageAspectFlags resolve_aspects = rp_att->aspects; + if (subpass->depth_stencil_resolve_attachment) { + struct vk_subpass_attachment *resolve_sp_att = + subpass->depth_stencil_resolve_attachment; + assert(resolve_sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *resolve_rp_att = + &pass->attachments[resolve_sp_att->attachment]; + + /* From the Vulkan 1.3.204 spec: + * + * "VkSubpassDescriptionDepthStencilResolve::depthResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a depth component. Similarly, + * VkSubpassDescriptionDepthStencilResolve::stencilResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a stencil component." + */ + resolve_aspects &= resolve_rp_att->aspects; + } + + if (subpass->depth_stencil_resolve_attachment || + (subpass->mrtss_samples != 0 && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { /* From the Vulkan 1.3.204 spec: * * VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03178 @@ -598,9 +1464,19 @@ vk_common_CreateRenderPass2(VkDevice _device, assert(ds_resolve->depthResolveMode != VK_RESOLVE_MODE_NONE || ds_resolve->stencilResolveMode != VK_RESOLVE_MODE_NONE); - subpass->depth_resolve_mode = ds_resolve->depthResolveMode; - subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; + if (resolve_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + sp_att->resolve_mode = ds_resolve->depthResolveMode; + if (resolve_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + sp_att->stencil_resolve_mode = ds_resolve->stencilResolveMode; } + + /* Technicaly, I don't think this is supposed to happen but there's + * enough crazy around resolves and depth/stencil aspects that we may + * as well do one final check just to be sure. + */ + if (sp_att->resolve_mode == VK_RESOLVE_MODE_NONE && + sp_att->stencil_resolve_mode == VK_RESOLVE_MODE_NONE) + sp_att->resolve = NULL; } const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info = @@ -663,149 +1539,12 @@ vk_common_CreateRenderPass2(VkDevice _device, } } - VkFormat *color_formats = NULL; - VkSampleCountFlagBits *color_samples = NULL; - VkSampleCountFlagBits samples = 0; - if (desc->colorAttachmentCount > 0) { - color_formats = next_subpass_color_format; - color_samples = next_subpass_color_samples; - for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { - const VkAttachmentReference2 *ref = &desc->pColorAttachments[a]; - if (ref->attachment >= pCreateInfo->attachmentCount) { - color_formats[a] = VK_FORMAT_UNDEFINED; - color_samples[a] = VK_SAMPLE_COUNT_1_BIT; - } else { - const VkAttachmentDescription2 *att = - &pCreateInfo->pAttachments[ref->attachment]; - - color_formats[a] = att->format; - color_samples[a] = att->samples; - - samples |= att->samples; - } - } - next_subpass_color_format += desc->colorAttachmentCount; - next_subpass_color_samples += desc->colorAttachmentCount; - } - - subpass->ial.depth = VK_ATTACHMENT_UNUSED; - subpass->ial.stencil = VK_ATTACHMENT_UNUSED; - - VkFormat depth_format = VK_FORMAT_UNDEFINED; - VkFormat stencil_format = VK_FORMAT_UNDEFINED; - VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; - if (desc->pDepthStencilAttachment != NULL) { - const VkAttachmentReference2 *ref = desc->pDepthStencilAttachment; - if (ref->attachment < pCreateInfo->attachmentCount) { - const VkAttachmentDescription2 *att = - &pCreateInfo->pAttachments[ref->attachment]; - uint32_t ia_idx = VK_ATTACHMENT_UNUSED; - - for (uint32_t j = 0; j < subpass->input_count; j++) { - if (subpass->input_attachments[j].attachment == ref->attachment) - ia_idx = j; - } - - if (vk_format_has_depth(att->format)) { - depth_format = att->format; - subpass->ial.depth = ia_idx; - } - - if (vk_format_has_stencil(att->format)) { - stencil_format = att->format; - subpass->ial.stencil = ia_idx; - } - - depth_stencil_samples = att->samples; - - samples |= att->samples; - } - } - - subpass->sample_count_info_amd = (VkAttachmentSampleCountInfoAMD) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, - .pNext = NULL, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentSamples = color_samples, - .depthStencilAttachmentSamples = depth_stencil_samples, - }; - - subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { - .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, - .pNext = &subpass->sample_count_info_amd, - .colorAttachmentCount = subpass->color_count, - .pColorAttachmentInputIndices = subpass->ial.colors, - /* From the Vulkan 1.3.204 spec: - * - * VUID-vkCmdDraw-OpTypeImage-07468 - * - * "If any shader executed by this pipeline accesses an OpTypeImage - * variable with a Dim operand of SubpassData, it must be decorated - * with an InputAttachmentIndex that corresponds to a valid input - * attachment in the current subpass." - * - * So we don't have to worry about the missing InputAttachmentIndex - * decoration (AKA NO_INDEX) here, the depth/stencil attachment is - * either not used as an input attachment, or it has an explicit - * index. - */ - .pDepthInputAttachmentIndex = &subpass->ial.depth, - .pStencilInputAttachmentIndex = &subpass->ial.stencil, - }; - - /* Build the color -> input attachment map. */ - for (uint32_t i = 0; i < subpass->color_count; i++) { - subpass->ial.colors[i] = VK_ATTACHMENT_UNUSED; - - if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) - continue; - - for (uint32_t j = 0; j < subpass->input_count; j++) { - if (subpass->input_attachments[j].attachment == - subpass->color_attachments[i].attachment) { - subpass->ial.colors[i] = j; - } - } - } - - subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, - .pNext = &subpass->ial.info, - .viewMask = desc->viewMask, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = color_formats, - .depthAttachmentFormat = depth_format, - .stencilAttachmentFormat = stencil_format, - }; - - subpass->inheritance_info = (VkCommandBufferInheritanceRenderingInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, - .pNext = &subpass->ial.info, - /* If we're inheriting, the contents are clearly in secondaries */ - .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, - .viewMask = desc->viewMask, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = color_formats, - .depthAttachmentFormat = depth_format, - .stencilAttachmentFormat = stencil_format, - .rasterizationSamples = samples, - }; - - if (mrtss) { - assert(mrtss->multisampledRenderToSingleSampledEnable); - subpass->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { - .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, - .multisampledRenderToSingleSampledEnable = VK_TRUE, - .rasterizationSamples = mrtss->rasterizationSamples, - }; - } + struct vk_render render; + vk_render_init_for_single_subpass(&render, pass, subpass); + vk_subpass_init_pipeline_infos(pass, s, &render); } assert(next_subpass_attachment == subpass_attachments + subpass_attachment_count); - assert(next_subpass_color_format == - subpass_color_formats + subpass_color_attachment_count); - assert(next_subpass_color_samples == - subpass_color_samples + subpass_color_attachment_count); /* Walk forwards over the subpasses to compute first_subpass masks for all * attachments. @@ -904,6 +1643,16 @@ vk_common_CreateRenderPass2(VkDevice _device, .view_offset = dep->viewOffset, }; + if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) { + assert(dep->dstSubpass < pass->subpass_count); + pass->subpasses[dep->dstSubpass].has_external_src_dependency = true; + } + + if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) { + assert(dep->srcSubpass < pass->subpass_count); + pass->subpasses[dep->srcSubpass].has_external_dst_dependency = true; + } + /* From the Vulkan 1.3.204 spec: * * "If a VkMemoryBarrier2 is included in the pNext chain, @@ -931,6 +1680,25 @@ vk_common_CreateRenderPass2(VkDevice _device, pass->fragment_density_map.layout = VK_IMAGE_LAYOUT_UNDEFINED; } + return pass; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateRenderPass2(VkDevice _device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_render_pass *pass = + vk_render_pass_create(device, pCreateInfo, pAllocator); + if (pass == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + for (uint32_t s = 1; s < pass->subpass_count; s++) + vk_render_pass_try_merge_subpass(pass, s); + *pRenderPass = vk_render_pass_to_handle(pass); return VK_SUCCESS; @@ -948,6 +1716,18 @@ vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info) return vk_find_struct_const(info->pNext, PIPELINE_RENDERING_CREATE_INFO); } +const VkRenderingAttachmentRemapInfoMESA * +vk_get_pipeline_rendering_ar_info(const VkGraphicsPipelineCreateInfo *info) +{ + VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); + if (render_pass != NULL) { + assert(info->subpass < render_pass->subpass_count); + return &render_pass->subpasses[info->subpass].rar_info; + } + + return NULL; +} + const VkRenderingInputAttachmentIndexInfo * vk_get_pipeline_rendering_ial_info(const VkGraphicsPipelineCreateInfo *info) { @@ -983,7 +1763,7 @@ vk_get_pipeline_sample_count_info_amd(const VkGraphicsPipelineCreateInfo *info) VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); if (render_pass != NULL) { assert(info->subpass < render_pass->subpass_count); - return &render_pass->subpasses[info->subpass].sample_count_info_amd; + return &render_pass->subpasses[info->subpass].sample_count_amd.info; } return vk_find_struct_const(info->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD); @@ -1083,13 +1863,40 @@ vk_get_command_buffer_inheritance_as_rendering_resume( if (pass == NULL) return NULL; - assert(inheritance->subpass < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[inheritance->subpass]; + const uint32_t subpass_idx = inheritance->subpass; + assert(subpass_idx < pass->subpass_count); VK_FROM_HANDLE(vk_framebuffer, fb, inheritance->framebuffer); if (fb == NULL || (fb->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) return NULL; + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); + + struct vk_render render; + if (pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE) { + /* For singular subpasses, we use init_for_single_subpass, which doesn't + * re-arrange anything so we avoid requiring DRLR and + * CmdSetRenderingAttachmentLocationInfo(). + */ + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + } else { + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s <= merge_end; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + } + + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render.color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + color_count = MAX2(color_count, c + 1); + } + const bool has_ds = + render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED; + data->rendering = (VkRenderingInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, .flags = VK_RENDERING_RESUMING_BIT, @@ -1098,57 +1905,57 @@ vk_get_command_buffer_inheritance_as_rendering_resume( .extent = { fb->width, fb->height }, }, .layerCount = fb->layers, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, + .viewMask = pass->is_multiview ? render.view_mask : 0, }; VkRenderingAttachmentInfo *attachments = data->attachments; - VkRenderingAttachmentFlagsInfoKHR *attachments_flags = (VkRenderingAttachmentFlagsInfoKHR *) - (data->attachments + subpass->color_count + - 2 * (subpass->depth_stencil_attachment != NULL)); + VkRenderingAttachmentFlagsInfoKHR *attachments_flags = + (VkRenderingAttachmentFlagsInfoKHR *) + (data->attachments + color_count + 2 * has_ds); - for (unsigned i = 0; i < subpass->color_count; i++) { - const struct vk_subpass_attachment *sp_att = - &subpass->color_attachments[i]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { - attachments[i] = (VkRenderingAttachmentInfo) { + for (unsigned c = 0; c < color_count; c++) { + const struct vk_subpass_attachment *r_att = &render.color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + attachments[c] = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = VK_NULL_HANDLE, }; continue; } - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = &pass->attachments[c]; - attachments_flags[i] = (VkRenderingAttachmentFlagsInfoKHR) { + attachments_flags[c] = (VkRenderingAttachmentFlagsInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_FLAGS_INFO_KHR, .flags = vk_attachment_description_flags_to_rendering_flags( - pass->attachments[sp_att->attachment].flags), + rp_att->flags), }; - attachments[i] = (VkRenderingAttachmentInfo) { + attachments[c] = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, - .pNext = &attachments_flags[i], - .imageView = fb->attachments[sp_att->attachment], - .imageLayout = sp_att->layout, + .pNext = &attachments_flags[c], + .imageView = fb->attachments[r_att->attachment], + .imageLayout = r_att->layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; } - data->rendering.colorAttachmentCount = subpass->color_count; + data->rendering.colorAttachmentCount = color_count; data->rendering.pColorAttachments = attachments; - attachments += subpass->color_count; + attachments += color_count; - if (subpass->depth_stencil_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->depth_stencil_attachment; - assert(sp_att->attachment < pass->attachment_count); + if (render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.depth_stencil_attachment; + assert(r_att->attachment < pass->attachment_count); - VK_FROM_HANDLE(vk_image_view, iview, fb->attachments[sp_att->attachment]); + VK_FROM_HANDLE(vk_image_view, iview, fb->attachments[r_att->attachment]); if (iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { *attachments = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = vk_image_view_to_handle(iview), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; @@ -1159,7 +1966,7 @@ vk_get_command_buffer_inheritance_as_rendering_resume( *attachments = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = vk_image_view_to_handle(iview), - .imageLayout = sp_att->stencil_layout, + .imageLayout = r_att->stencil_layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; @@ -1167,26 +1974,36 @@ vk_get_command_buffer_inheritance_as_rendering_resume( } } - if (subpass->fragment_shading_rate_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->fragment_shading_rate_attachment; - assert(sp_att->attachment < pass->attachment_count); + if (render.fragment_shading_rate_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.fragment_shading_rate_attachment; + assert(r_att->attachment < pass->attachment_count); data->fsr_att = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, - .imageView = fb->attachments[sp_att->attachment], - .imageLayout = sp_att->layout, + .imageView = fb->attachments[r_att->attachment], + .imageLayout = r_att->layout, .shadingRateAttachmentTexelSize = - subpass->fragment_shading_rate_attachment_texel_size, + render.fragment_shading_rate_attachment_texel_size, }; __vk_append_struct(&data->rendering, &data->fsr_att); } - /* Append this one last because it lives in the subpass and we don't want - * to be changed by appending other structures later. - */ - if (subpass->mrtss.multisampledRenderToSingleSampledEnable) - __vk_append_struct(&data->rendering, (void *)&subpass->mrtss); + if (render.mrtss_samples != 0) { + data->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { + .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + .multisampledRenderToSingleSampledEnable = true, + .rasterizationSamples = render.mrtss_samples, + }; + __vk_append_struct(&data->rendering, (void *)&data->mrtss); + } + + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + if (subpass->merge != MESA_VK_SUBPASS_MERGE_SINGLE) { + data->rar = subpass->rar_info; + data->rar.pNext = NULL; + __vk_append_struct(&data->rendering, (void *)&data->rar); + } return &data->rendering; } @@ -1772,6 +2589,373 @@ transition_attachment(struct vk_command_buffer *cmd_buffer, return view_mask; } +static void +barrier_before_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t last_subpass) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->dst_subpass < first_subpass || last_subpass < dep->dst_subpass) + continue; + + const struct vk_subpass *dst_subpass = + &pass->subpasses[dep->dst_subpass]; + + assert(dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass <= dep->dst_subpass); + + if (dep->src_subpass != VK_SUBPASS_EXTERNAL && + first_subpass <= dep->src_subpass) { + /* These get handled by barrier_mid_subpass_range() */ + assert(dep->view_offset == 0); + continue; + } + + if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubpassDependency2-dependencyFlags-03091 + * + * "If dependencyFlags includes VK_DEPENDENCY_VIEW_LOCAL_BIT, + * dstSubpass must not be equal to VK_SUBPASS_EXTERNAL" + */ + assert(dep->src_subpass != VK_SUBPASS_EXTERNAL); + + assert(dep->src_subpass < pass->subpass_count); + const struct vk_subpass *src_subpass = + &pass->subpasses[dep->src_subpass]; + + /* Figure out the set of views in the source subpass affected by this + * dependency. + */ + uint32_t src_dep_view_mask = dst_subpass->view_mask; + if (dep->view_offset >= 0) + src_dep_view_mask <<= dep->view_offset; + else + src_dep_view_mask >>= -dep->view_offset; + + /* From the Vulkan 1.3.204 spec: + * + * "If the dependency is view-local, then each view (dstView) in + * the destination subpass depends on the view dstView + + * pViewOffsets[dependency] in the source subpass. If there is not + * such a view in the source subpass, then this dependency does + * not affect that view in the destination subpass." + */ + if (!(src_subpass->view_mask & src_dep_view_mask)) + continue; + } + + assert(mem_barrier_count < max_mem_barrier_count); + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + uint32_t max_image_barrier_count = 0; + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + max_image_barrier_count += util_bitcount(subpass->view_mask) * + util_bitcount(rp_att->aspects); + } + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) + max_image_barrier_count += util_bitcount(subpass->view_mask); + } + STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); + uint32_t image_barrier_count = 0; + + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + + bool has_layout_transition = false; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + /* If we're using an initial layout, the attachment will already be + * marked as transitioned and this will be a no-op. + */ + uint32_t transitioned_views = + transition_attachment(cmd_buffer, sp_att->attachment, + subpass->view_mask, + sp_att->layout, sp_att->stencil_layout, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + + has_layout_transition |= (sp_att->first_subpass & transitioned_views) != 0; + } + + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { + transition_attachment(cmd_buffer, + pass->fragment_density_map.attachment, + subpass->view_mask, + pass->fragment_density_map.layout, + VK_IMAGE_LAYOUT_UNDEFINED, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + } + + if (has_layout_transition && !subpass->has_external_src_dependency) { + /* From the Vulkan 1.3.232 spec: + * + * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to + * the first subpass that uses an attachment, then an implicit + * subpass dependency exists from VK_SUBPASS_EXTERNAL to the first + * subpass it is used in. The implicit subpass dependency only + * exists if there exists an automatic layout transition away from + * initialLayout. The subpass dependency operates as if defined + * with the following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = VK_SUBPASS_EXTERNAL; + * .dstSubpass = firstSubpass; // First subpass attachment is used in + * .srcStageMask = VK_PIPELINE_STAGE_NONE; + * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .srcAccessMask = 0; + * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dependencyFlags = 0; + * };" + */ + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + }; + } + } + assert(image_barrier_count <= max_image_barrier_count); + + if (mem_barrier_count > 0 || image_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, + .imageMemoryBarrierCount = image_barrier_count, + .pImageMemoryBarriers = image_barrier_count > 0 ? image_barriers : NULL, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(image_barriers); + STACK_ARRAY_FINISH(mem_barriers); +} + +static void +barrier_mid_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t subpass_idx, + uint32_t last_subpass, uint32_t view_mask) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= subpass_idx); + assert(subpass_idx <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->dst_subpass != subpass_idx) + continue; + + assert(dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass <= dep->dst_subpass); + + /* This case is handled by barrier_before_subpass_range() */ + if (dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass < first_subpass) + continue; + + assert(vk_subpass_dependency_allowed_mid_subpass(dep, view_mask)); + + /* These dependendies aren't real memory barriers. They just exist to + * say that the client is allowed to insert a barrer. + */ + if (dep->src_subpass == subpass_idx) + continue; + + assert(mem_barrier_count < max_mem_barrier_count); + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + if (mem_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(mem_barriers); +} + +static void +barrier_after_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t last_subpass) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->src_subpass < first_subpass || last_subpass < dep->src_subpass) + continue; + + if (dep->dst_subpass != VK_SUBPASS_EXTERNAL) + continue; + + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + if (subpass->has_external_dst_dependency) + continue; + + bool has_layout_transition = false; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + uint32_t view_mask = transition_view_mask(cmd_buffer, sp_att->attachment, + subpass->view_mask, + rp_att->final_layout, + rp_att->final_stencil_layout); + + has_layout_transition |= (sp_att->last_subpass & view_mask) != 0; + } + + /* From the Vulkan 1.3.232 spec: + * + * "Similarly, if there is no subpass dependency from the last + * subpass that uses an attachment to VK_SUBPASS_EXTERNAL, then an + * implicit subpass dependency exists from the last subpass it is + * used in to VK_SUBPASS_EXTERNAL. The implicit subpass dependency + * only exists if there exists an automatic layout transition into + * finalLayout. The subpass dependency operates as if defined with + * the following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = lastSubpass; // Last subpass attachment is used in + * .dstSubpass = VK_SUBPASS_EXTERNAL; + * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .dstStageMask = VK_PIPELINE_STAGE_NONE; + * .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dstAccessMask = 0; + * .dependencyFlags = 0; + * };" + */ + if (has_layout_transition) { + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstAccessMask = VK_ACCESS_2_NONE, + }; + } + } + + if (mem_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barriers, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(mem_barriers); +} + static void load_attachment(struct vk_command_buffer *cmd_buffer, uint32_t att_idx, uint32_t view_mask, @@ -1857,36 +3041,71 @@ static void begin_subpass(struct vk_command_buffer *cmd_buffer, const VkSubpassBeginInfo *begin_info) { + VkCommandBuffer cmd_buffer_h = vk_command_buffer_to_handle(cmd_buffer); const struct vk_render_pass *pass = cmd_buffer->render_pass; const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; const uint32_t subpass_idx = cmd_buffer->subpass_idx; assert(subpass_idx < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); + + if (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_BEGIN)) { + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + + /* If this isn't a begin, just do mid-subpass barriers */ + barrier_mid_subpass_range(cmd_buffer, merge_begin, subpass_idx, + merge_end, subpass->view_mask); + + /* And update input indices and color locations */ + VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; + ial_tmp.pNext = NULL; + disp->CmdSetRenderingInputAttachmentIndices(cmd_buffer_h, &ial_tmp); + + VkRenderingAttachmentRemapInfoMESA rar_tmp = subpass->rar_info; + rar_tmp.pNext = NULL; + vk_cmd_set_rendering_attachment_remap(cmd_buffer, &rar_tmp); + + /* And that's it! */ + return; + } + + struct vk_render render; + if (pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE) { + /* For singular subpasses, we use init_for_single_subpass, which doesn't + * re-arrange anything so we avoid requiring DRLR and + * CmdSetRenderingAttachmentLocationInfo(). + */ + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + } else { + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s <= merge_end; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + } + /* First, we figure out all our attachments and attempt to handle image * layout transitions and load ops as part of vkCmdBeginRendering if we * can. For any we can't handle this way, we'll need explicit barriers * or quick vkCmdBegin/EndRendering to do the load op. */ - STACK_ARRAY(VkRenderingAttachmentFlagsInfoKHR, color_attachments_flags, - subpass->color_count); - STACK_ARRAY(VkRenderingAttachmentInfo, color_attachments, - subpass->color_count); - STACK_ARRAY(VkRenderingAttachmentInitialLayoutInfoMESA, - color_attachment_initial_layouts, - subpass->color_count); + VkRenderingAttachmentInfo color_attachments[MESA_VK_MAX_COLOR_ATTACHMENTS]; + VkRenderingAttachmentFlagsInfoKHR color_attachments_flags[MESA_VK_MAX_COLOR_ATTACHMENTS]; + VkRenderingAttachmentInitialLayoutInfoMESA color_attachment_initial_layouts[MESA_VK_MAX_COLOR_ATTACHMENTS]; - for (uint32_t i = 0; i < subpass->color_count; i++) { - const struct vk_subpass_attachment *sp_att = - &subpass->color_attachments[i]; + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = &render.color_attachments[c]; + VkRenderingAttachmentInfo *color_attachment = &color_attachments[c]; VkRenderingAttachmentFlagsInfoKHR *color_attachment_flags = - &color_attachments_flags[i]; - VkRenderingAttachmentInfo *color_attachment = &color_attachments[i]; + &color_attachments_flags[c]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { *color_attachment = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = VK_NULL_HANDLE, @@ -1894,11 +3113,13 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, continue; } - assert(sp_att->attachment < pass->attachment_count); + color_count = MAX2(color_count, c + 1); + + assert(r_att->attachment < pass->attachment_count); const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + &pass->attachments[r_att->attachment]; struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; + &cmd_buffer->attachments[r_att->attachment]; *color_attachment_flags = (VkRenderingAttachmentFlagsInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_FLAGS_INFO_KHR, @@ -1908,25 +3129,25 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .pNext = color_attachment_flags, .imageView = vk_image_view_to_handle(att_state->image_view), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, }; - if (!(subpass->view_mask & att_state->views_loaded)) { + if (!(render.view_mask & att_state->views_loaded)) { /* None of these views have been used before */ color_attachment->loadOp = rp_att->load_op; color_attachment->clearValue = att_state->clear_value; - att_state->views_loaded |= subpass->view_mask; + att_state->views_loaded |= render.view_mask; VkImageLayout initial_layout; if (can_use_attachment_initial_layout(cmd_buffer, - sp_att->attachment, - subpass->view_mask, + r_att->attachment, + render.view_mask, &initial_layout, NULL) && - sp_att->layout != initial_layout) { + r_att->layout != initial_layout) { assert(color_attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); VkRenderingAttachmentInitialLayoutInfoMESA *color_initial_layout = - &color_attachment_initial_layouts[i]; + &color_attachment_initial_layouts[c]; *color_initial_layout = (VkRenderingAttachmentInitialLayoutInfoMESA) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA, .initialLayout = initial_layout, @@ -1934,8 +3155,8 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, __vk_append_struct(color_attachment, color_initial_layout); vk_command_buffer_set_attachment_layout(cmd_buffer, - sp_att->attachment, - sp_att->layout, + r_att->attachment, + r_att->layout, VK_IMAGE_LAYOUT_UNDEFINED); } } else { @@ -1945,7 +3166,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } - if (!(subpass->view_mask & ~sp_att->last_subpass)) { + if (!(render.view_mask & ~r_att->last_subpass)) { /* This is the last subpass for every view */ color_attachment->storeOp = rp_att->store_op; } else { @@ -1959,38 +3180,31 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment->storeOp = VK_ATTACHMENT_STORE_OP_STORE; } - if (sp_att->resolve != NULL) { - assert(sp_att->resolve->attachment < pass->attachment_count); + /* With multisample render to single sample, we may have a resolve even + * if we don't have a resolve attachment. + */ + color_attachment->resolveMode = r_att->resolve_mode; + + if (r_att->resolve != NULL) { + assert(r_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = - &cmd_buffer->attachments[sp_att->resolve->attachment]; + &cmd_buffer->attachments[r_att->resolve->attachment]; /* Resolve attachments are entirely overwritten by the resolve * operation so the load op really doesn't matter. We can consider * the resolve as being the load. */ - res_att_state->views_loaded |= subpass->view_mask; + res_att_state->views_loaded |= render.view_mask; const struct vk_render_pass_attachment *resolve_att = - &pass->attachments[sp_att->resolve->attachment]; - if (resolve_att->has_external_format) - color_attachment->resolveMode = VK_RESOLVE_MODE_EXTERNAL_FORMAT_DOWNSAMPLE_BIT_ANDROID; - else if (vk_format_is_int(res_att_state->image_view->format)) - color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; - else - color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; + &pass->attachments[r_att->resolve->attachment]; color_attachment->resolveImageView = vk_image_view_to_handle(res_att_state->image_view); - color_attachment->resolveImageLayout = sp_att->resolve->layout; + color_attachment->resolveImageLayout = r_att->resolve->layout; color_attachment_flags->flags = vk_attachment_description_flags_to_rendering_flags(resolve_att->flags); - } else if (subpass->mrtss.multisampledRenderToSingleSampledEnable && - rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { - if (vk_format_is_int(att_state->image_view->format)) - color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; - else - color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; } } @@ -2014,21 +3228,21 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, }; const VkSampleLocationsInfoEXT *sample_locations = NULL; - if (subpass->depth_stencil_attachment != NULL) { - const struct vk_subpass_attachment *sp_att = - subpass->depth_stencil_attachment; + if (render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.depth_stencil_attachment; - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + &pass->attachments[r_att->attachment]; struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; + &cmd_buffer->attachments[r_att->attachment]; - assert(sp_att->aspects == rp_att->aspects); + assert(r_att->aspects == rp_att->aspects); if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { depth_attachment.imageView = vk_image_view_to_handle(att_state->image_view); - depth_attachment.imageLayout = sp_att->layout; + depth_attachment.imageLayout = r_att->layout; depth_attachment_flags.flags = vk_attachment_description_flags_to_rendering_flags(rp_att->flags); } @@ -2036,7 +3250,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { stencil_attachment.imageView = vk_image_view_to_handle(att_state->image_view); - stencil_attachment.imageLayout = sp_att->stencil_layout; + stencil_attachment.imageLayout = r_att->stencil_layout; stencil_attachment_flags.flags = vk_attachment_description_flags_to_rendering_flags(rp_att->flags); } @@ -2046,22 +3260,22 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, __vk_append_struct(&stencil_attachment, &stencil_attachment_flags); - if (!(subpass->view_mask & att_state->views_loaded)) { + if (!(render.view_mask & att_state->views_loaded)) { /* None of these views have been used before */ depth_attachment.loadOp = rp_att->load_op; depth_attachment.clearValue = att_state->clear_value; stencil_attachment.loadOp = rp_att->stencil_load_op; stencil_attachment.clearValue = att_state->clear_value; - att_state->views_loaded |= subpass->view_mask; + att_state->views_loaded |= render.view_mask; VkImageLayout initial_layout, initial_stencil_layout; if (can_use_attachment_initial_layout(cmd_buffer, - sp_att->attachment, - subpass->view_mask, + r_att->attachment, + render.view_mask, &initial_layout, &initial_stencil_layout)) { if ((rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - sp_att->layout != initial_layout) { + r_att->layout != initial_layout) { assert(depth_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); depth_initial_layout.initialLayout = initial_layout; __vk_append_struct(&depth_attachment, @@ -2069,7 +3283,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } if ((rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - sp_att->stencil_layout != initial_stencil_layout) { + r_att->stencil_layout != initial_stencil_layout) { assert(stencil_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); stencil_initial_layout.initialLayout = initial_stencil_layout; __vk_append_struct(&stencil_attachment, @@ -2077,9 +3291,9 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } vk_command_buffer_set_attachment_layout(cmd_buffer, - sp_att->attachment, - sp_att->layout, - sp_att->stencil_layout); + r_att->attachment, + r_att->layout, + r_att->stencil_layout); } } else { /* We've seen at least one of the views of this attachment before so @@ -2089,7 +3303,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } - if (!(subpass->view_mask & ~sp_att->last_subpass)) { + if (!(render.view_mask & ~r_att->last_subpass)) { /* This is the last subpass for every view */ depth_attachment.storeOp = rp_att->store_op; stencil_attachment.storeOp = rp_att->stencil_store_op; @@ -2101,6 +3315,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * some places where it may have wanted STORE_OP_NONE but that should * be harmless. */ + assert(subpass_idx < pass->subpass_count - 1); depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; } @@ -2132,74 +3347,45 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, get_subpass_sample_locations(cmd_buffer->pass_sample_locations, subpass_idx); - u_foreach_bit(view, subpass->view_mask) + u_foreach_bit(view, render.view_mask) att_state->views[view].sample_locations = sample_locations; } - if (sp_att->resolve != NULL || - (subpass->mrtss.multisampledRenderToSingleSampledEnable && - rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { - const struct vk_subpass_attachment *res_sp_att = sp_att->resolve ? sp_att->resolve : sp_att; - assert(res_sp_att->attachment < pass->attachment_count); - const struct vk_render_pass_attachment *res_rp_att = - &pass->attachments[res_sp_att->attachment]; + /* With multisample render to single sample, we may have a resolve even + * if we don't have a resolve attachment. + */ + depth_attachment.resolveMode = r_att->resolve_mode; + stencil_attachment.resolveMode = r_att->stencil_resolve_mode; + + if (r_att->resolve != NULL) { + assert(r_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = - &cmd_buffer->attachments[res_sp_att->attachment]; - - /* From the Vulkan 1.3.204 spec: - * - * "VkSubpassDescriptionDepthStencilResolve::depthResolveMode is - * ignored if the VkFormat of the pDepthStencilResolveAttachment - * does not have a depth component. Similarly, - * VkSubpassDescriptionDepthStencilResolve::stencilResolveMode is - * ignored if the VkFormat of the pDepthStencilResolveAttachment - * does not have a stencil component." - * - * TODO: Should we handle this here or when we create the render - * pass? Handling it here makes load ops "correct" in the sense - * that, if we resolve to the wrong aspect, we will still consider - * it bound and clear it if requested. - */ - VkResolveModeFlagBits depth_resolve_mode = VK_RESOLVE_MODE_NONE; - if (res_rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - depth_resolve_mode = subpass->depth_resolve_mode; - - VkResolveModeFlagBits stencil_resolve_mode = VK_RESOLVE_MODE_NONE; - if (res_rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - stencil_resolve_mode = subpass->stencil_resolve_mode; + &cmd_buffer->attachments[r_att->resolve->attachment]; VkImageAspectFlags resolved_aspects = 0; - if (depth_resolve_mode != VK_RESOLVE_MODE_NONE) { - depth_attachment.resolveMode = depth_resolve_mode; - if (sp_att->resolve) { - depth_attachment.resolveImageView = - vk_image_view_to_handle(res_att_state->image_view); - depth_attachment.resolveImageLayout = - sp_att->resolve->layout; - } - + if (depth_attachment.resolveMode != VK_RESOLVE_MODE_NONE) { + depth_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + depth_attachment.resolveImageLayout = + r_att->resolve->layout; resolved_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (stencil_resolve_mode != VK_RESOLVE_MODE_NONE) { - stencil_attachment.resolveMode = stencil_resolve_mode; - if (sp_att->resolve) { - stencil_attachment.resolveImageView = - vk_image_view_to_handle(res_att_state->image_view); - stencil_attachment.resolveImageLayout = - sp_att->resolve->stencil_layout; - } - + if (stencil_attachment.resolveMode != VK_RESOLVE_MODE_NONE) { + stencil_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + stencil_attachment.resolveImageLayout = + r_att->resolve->stencil_layout; resolved_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; } - if (sp_att->resolve && resolved_aspects == rp_att->aspects) { + if (resolved_aspects == rp_att->aspects) { /* The resolve attachment is entirely overwritten by the * resolve operation so the load op really doesn't matter. * We can consider the resolve as being the load. */ - res_att_state->views_loaded |= subpass->view_mask; + res_att_state->views_loaded |= render.view_mask; } } } @@ -2208,176 +3394,21 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * VkMemoryBarriers for subpass dependencies and it may include some * number of VkImageMemoryBarriers for layout transitions. */ - - /* At most all dependencies will need a barrier, and we might have an - * implicit one. */ - STACK_ARRAY(VkMemoryBarrier2, mem_barriers, pass->dependency_count + 1); - uint32_t mem_barrier_count = 0; - bool external_dependency = false; - for (uint32_t d = 0; d < pass->dependency_count; d++) { - const struct vk_subpass_dependency *dep = &pass->dependencies[d]; - if (dep->dst_subpass != subpass_idx) - continue; - - if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { - /* From the Vulkan 1.3.204 spec: - * - * VUID-VkSubpassDependency2-dependencyFlags-03091 - * - * "If dependencyFlags includes VK_DEPENDENCY_VIEW_LOCAL_BIT, - * dstSubpass must not be equal to VK_SUBPASS_EXTERNAL" - */ - assert(dep->src_subpass != VK_SUBPASS_EXTERNAL); - - assert(dep->src_subpass < pass->subpass_count); - const struct vk_subpass *src_subpass = - &pass->subpasses[dep->src_subpass]; - - /* Figure out the set of views in the source subpass affected by this - * dependency. - */ - uint32_t src_dep_view_mask = subpass->view_mask; - if (dep->view_offset >= 0) - src_dep_view_mask <<= dep->view_offset; - else - src_dep_view_mask >>= -dep->view_offset; - - /* From the Vulkan 1.3.204 spec: - * - * "If the dependency is view-local, then each view (dstView) in - * the destination subpass depends on the view dstView + - * pViewOffsets[dependency] in the source subpass. If there is not - * such a view in the source subpass, then this dependency does - * not affect that view in the destination subpass." - */ - if (!(src_subpass->view_mask & src_dep_view_mask)) - continue; - } - - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = dep->src_stage_mask, - .srcAccessMask = dep->src_access_mask, - .dstStageMask = dep->dst_stage_mask, - .dstAccessMask = dep->dst_access_mask, - }; - external_dependency |= (dep->src_subpass == VK_SUBPASS_EXTERNAL); - } - - uint32_t max_image_barrier_count = 0; - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - assert(sp_att->attachment < pass->attachment_count); - const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; - - max_image_barrier_count += util_bitcount(subpass->view_mask) * - util_bitcount(rp_att->aspects); - } - if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) - max_image_barrier_count += util_bitcount(subpass->view_mask); - STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); - uint32_t image_barrier_count = 0; - bool has_layout_transition = false; - - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - /* If we're using an initial layout, the attachment will already be - * marked as transitioned and this will be a no-op. - */ - uint32_t transitioned_views = - transition_attachment(cmd_buffer, sp_att->attachment, - subpass->view_mask, - sp_att->layout, sp_att->stencil_layout, - &image_barrier_count, - max_image_barrier_count, - image_barriers); - - has_layout_transition |= (sp_att->first_subpass & transitioned_views) != 0; - } - if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { - transition_attachment(cmd_buffer, pass->fragment_density_map.attachment, - subpass->view_mask, - pass->fragment_density_map.layout, - VK_IMAGE_LAYOUT_UNDEFINED, - &image_barrier_count, - max_image_barrier_count, - image_barriers); - } - assert(image_barrier_count <= max_image_barrier_count); - - if (has_layout_transition && !external_dependency) { - /* From the Vulkan 1.3.232 spec: - * - * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the - * first subpass that uses an attachment, then an implicit subpass - * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it - * is used in. The implicit subpass dependency only exists if there - * exists an automatic layout transition away from initialLayout. The - * subpass dependency operates as if defined with the following - * parameters: - * - * VkSubpassDependency implicitDependency = { - * .srcSubpass = VK_SUBPASS_EXTERNAL; - * .dstSubpass = firstSubpass; // First subpass attachment is used in - * .srcStageMask = VK_PIPELINE_STAGE_NONE; - * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - * .srcAccessMask = 0; - * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | - * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - * .dependencyFlags = 0; - * };" - */ - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_NONE, - .srcAccessMask = VK_ACCESS_2_NONE, - .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | - VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - }; - } - - if (mem_barrier_count > 0 || image_barrier_count > 0) { - const VkDependencyInfo dependency_info = { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = 0, - .memoryBarrierCount = mem_barrier_count, - .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, - .imageMemoryBarrierCount = image_barrier_count, - .pImageMemoryBarriers = image_barrier_count > 0 ? image_barriers : NULL, - }; - cmd_buffer->runtime_rp_barrier = true; - disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), - &dependency_info); - cmd_buffer->runtime_rp_barrier = false; - } - - STACK_ARRAY_FINISH(image_barriers); - STACK_ARRAY_FINISH(mem_barriers); + barrier_before_subpass_range(cmd_buffer, merge_begin, merge_end); /* Next, handle any VK_ATTACHMENT_LOAD_OP_CLEAR that we couldn't handle * directly by emitting a quick vkCmdBegin/EndRendering to do the load. */ - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; + for (uint32_t s = merge_begin; s <= merge_end; s++) { + struct vk_subpass *subpass = &pass->subpasses[s]; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; - load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, - sp_att->layout, sp_att->stencil_layout); + load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, + sp_att->layout, sp_att->stencil_layout); + } } /* TODO: Handle preserve attachments @@ -2393,37 +3424,33 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .flags = VK_RENDERING_LOCAL_READ_CONCURRENT_ACCESS_CONTROL_BIT_KHR, .renderArea = cmd_buffer->render_area, .layerCount = pass->is_multiview ? 1 : framebuffer->layers, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, - .colorAttachmentCount = subpass->color_count, - .pColorAttachments = color_attachments, - .pDepthAttachment = &depth_attachment, - .pStencilAttachment = &stencil_attachment, + .viewMask = pass->is_multiview ? render.view_mask : 0, + .colorAttachmentCount = color_count, + .pColorAttachments = color_count > 0 ? color_attachments : NULL, + .pDepthAttachment = depth_attachment.imageView != VK_NULL_HANDLE ? + &depth_attachment : NULL, + .pStencilAttachment = stencil_attachment.imageView != VK_NULL_HANDLE ? + &stencil_attachment : NULL, }; - if (subpass->legacy_dithering_enabled) + if (render.legacy_dithering_enabled) rendering.flags |= VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT; VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_attachment; - if (subpass->fragment_shading_rate_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->fragment_shading_rate_attachment; + if (render.fragment_shading_rate_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.fragment_shading_rate_attachment; - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; - - /* Fragment shading rate attachments have no loadOp (it's implicitly - * LOAD_OP_LOAD) so we need to ensure the load op happens. - */ - load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, - sp_att->layout, sp_att->stencil_layout); + &cmd_buffer->attachments[r_att->attachment]; fsr_attachment = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, .imageView = vk_image_view_to_handle(att_state->image_view), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, .shadingRateAttachmentTexelSize = - subpass->fragment_shading_rate_attachment_texel_size, + render.fragment_shading_rate_attachment_texel_size, }; __vk_append_struct(&rendering, &fsr_attachment); } @@ -2463,43 +3490,32 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, /* Append this one last because it lives in the subpass and we don't want * to be changed by appending other structures later. */ - if (subpass->mrtss.multisampledRenderToSingleSampledEnable) - __vk_append_struct(&rendering, (void *)&subpass->mrtss); - - disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd_buffer), - &rendering); - - if (disp->CmdSetRenderingInputAttachmentIndices) { - /* From the Vulkan 1.4.312 spec: - * " - * Until this command is called, mappings in the command buffer state - * are treated as each color attachment specified in vkCmdBeginRendering - * mapping to subpass inputs with a InputAttachmentIndex equal to its - * index in VkRenderingInfo::pColorAttachments, and depth/stencil - * attachments mapping to input attachments without these decorations. - * This state is reset whenever vkCmdBeginRendering is called. - * " - * - * In practice, CmdBindPipeline() should apply exactly the same - * state to the vk_command_buffer dynamic state, and that's exactly - * what the Vulkan spec wants: - * - * " - * This command sets the input attachment index mappings for subsequent - * drawing commands, and must match the mappings provided to the bound - * pipeline, if one is bound, which can be set by chaining - * VkRenderingInputAttachmentIndexInfo to VkGraphicsPipelineCreateInfo. - * " - * - * So I'm not sure this CmdSetRenderingInputAttachmentIndices() is - * really needed, but let's keep it to play by the rules. - */ - disp->CmdSetRenderingInputAttachmentIndices(vk_command_buffer_to_handle(cmd_buffer), - &subpass->ial.info); + VkMultisampledRenderToSingleSampledInfoEXT mrtss; + if (render.mrtss_samples != 0) { + mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { + .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + .multisampledRenderToSingleSampledEnable = true, + .rasterizationSamples = render.mrtss_samples, + }; + __vk_append_struct(&rendering, (void *)&mrtss); } - STACK_ARRAY_FINISH(color_attachments); - STACK_ARRAY_FINISH(color_attachment_initial_layouts); + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + + VkRenderingAttachmentRemapInfoMESA rar_tmp; + if (subpass->merge != MESA_VK_SUBPASS_MERGE_SINGLE) { + rar_tmp = subpass->rar_info; + rar_tmp.pNext = NULL; + __vk_append_struct(&rendering, (void *)&rar_tmp); + } + + disp->CmdBeginRendering(cmd_buffer_h, &rendering); + + if (disp->CmdSetRenderingInputAttachmentIndices) { + VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; + ial_tmp.pNext = NULL; + disp->CmdSetRenderingInputAttachmentIndices(cmd_buffer_h, &ial_tmp); + } } static void @@ -2509,100 +3525,20 @@ end_subpass(struct vk_command_buffer *cmd_buffer, const struct vk_render_pass *pass = cmd_buffer->render_pass; const uint32_t subpass_idx = cmd_buffer->subpass_idx; assert(subpass_idx < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; + if (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_END)) + return; + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd_buffer)); - /* At most all dependencies will need a barrier, and we might have an - * implicit one. */ - STACK_ARRAY(VkMemoryBarrier2, mem_barriers, pass->dependency_count + 1); - uint32_t mem_barrier_count = 0; - for (uint32_t d = 0; d < pass->dependency_count; d++) { - const struct vk_subpass_dependency *dep = &pass->dependencies[d]; - if (dep->src_subpass != subpass_idx) - continue; + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); - if (dep->dst_subpass != VK_SUBPASS_EXTERNAL) - continue; - - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = dep->src_stage_mask, - .srcAccessMask = dep->src_access_mask, - .dstStageMask = dep->dst_stage_mask, - .dstAccessMask = dep->dst_access_mask, - }; - } - - /* If we have a barrier, we have an external dependency */ - bool external_dependency = mem_barrier_count > 0; - - if (!external_dependency) { - bool has_layout_transition = false; - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; - uint32_t view_mask = transition_view_mask(cmd_buffer, sp_att->attachment, - subpass->view_mask, - rp_att->final_layout, - rp_att->final_stencil_layout); - - has_layout_transition |= (sp_att->last_subpass & view_mask) != 0; - } - - /* From the Vulkan 1.3.232 spec: - * - * "Similarly, if there is no subpass dependency from the last - * subpass that uses an attachment to VK_SUBPASS_EXTERNAL, then an - * implicit subpass dependency exists from the last subpass it is - * used in to VK_SUBPASS_EXTERNAL. The implicit subpass dependency - * only exists if there exists an automatic layout transition into - * finalLayout. The subpass dependency operates as if defined with - * the following parameters: - * - * VkSubpassDependency implicitDependency = { - * .srcSubpass = lastSubpass; // Last subpass attachment is used in - * .dstSubpass = VK_SUBPASS_EXTERNAL; - * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - * .dstStageMask = VK_PIPELINE_STAGE_NONE; - * .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - * .dstAccessMask = 0; - * .dependencyFlags = 0; - * };" - */ - if (has_layout_transition) { - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_NONE, - .dstAccessMask = VK_ACCESS_2_NONE, - }; - } - } - - if (mem_barrier_count > 0) { - const VkDependencyInfo dependency_info = { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = 0, - .memoryBarrierCount = mem_barrier_count, - .pMemoryBarriers = mem_barriers, - }; - cmd_buffer->runtime_rp_barrier = true; - disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), - &dependency_info); - cmd_buffer->runtime_rp_barrier = false; - } - - STACK_ARRAY_FINISH(mem_barriers); + barrier_after_subpass_range(cmd_buffer, merge_begin, merge_end); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 302a9606ebc..9df5aabe048 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -75,10 +75,31 @@ struct vk_subpass_attachment { /** Resolve attachment, if any */ struct vk_subpass_attachment *resolve; + + VkResolveModeFlagBits resolve_mode; + VkResolveModeFlagBits stencil_resolve_mode; +}; + +/***/ +enum vk_subpass_merge { + /* This subpass is in the middle of a merged subpass group */ + MESA_VK_SUBPASS_MERGE_MID = 0, + + /* This subpass begins a merged subpass group */ + MESA_VK_SUBPASS_MERGE_BEGIN = 1, + + /* This subpass ends a merged subpass group */ + MESA_VK_SUBPASS_MERGE_END = 2, + + /** This subpass is in its own subpass group */ + MESA_VK_SUBPASS_MERGE_SINGLE = MESA_VK_SUBPASS_MERGE_BEGIN | + MESA_VK_SUBPASS_MERGE_END, }; /***/ struct vk_subpass { + enum vk_subpass_merge merge; + /** Count of all attachments referenced by this subpass */ uint32_t attachment_count; @@ -120,11 +141,10 @@ struct vk_subpass { */ uint32_t view_mask; - /** VkSubpassDescriptionDepthStencilResolve::depthResolveMode */ - VkResolveModeFlagBits depth_resolve_mode; + VkSampleCountFlagBits mrtss_samples; - /** VkSubpassDescriptionDepthStencilResolve::stencilResolveMode */ - VkResolveModeFlagBits stencil_resolve_mode; + bool has_external_src_dependency; + bool has_external_dst_dependency; /** VkFragmentShadingRateAttachmentInfoKHR::shadingRateAttachmentTexelSize */ VkExtent2D fragment_shading_rate_attachment_texel_size; @@ -136,7 +156,16 @@ struct vk_subpass { * * This is in the pNext chain of pipeline_info and inheritance_info. */ - VkAttachmentSampleCountInfoAMD sample_count_info_amd; + struct { + VkAttachmentSampleCountInfoAMD info; + VkSampleCountFlagBits samples[MESA_VK_MAX_COLOR_ATTACHMENTS]; + } sample_count_amd; + + /** VkRenderingAttachmentRemapInfoMESA for this subpass + * + * This is in the pNext chain of pipeline_info and inheritance_info. + */ + VkRenderingAttachmentRemapInfoMESA rar_info; /** VkRenderingInputAttachmentIndexInfo for this subpass * @@ -152,6 +181,8 @@ struct vk_subpass { uint32_t stencil; } ial; + VkFormat color_attachment_formats[MESA_VK_MAX_COLOR_ATTACHMENTS]; + /** VkPipelineRenderingCreateInfo for this subpass * * Returned by vk_get_pipeline_rendering_create_info() if @@ -166,9 +197,6 @@ struct vk_subpass { */ VkCommandBufferInheritanceRenderingInfo inheritance_info; - /** VkMultisampledRenderToSingleSampledInfoEXT for this subpass */ - VkMultisampledRenderToSingleSampledInfoEXT mrtss; - /** True if legacy dithering is enabled for this subpass. */ bool legacy_dithering_enabled; }; @@ -296,6 +324,19 @@ struct vk_render_pass { VK_DEFINE_NONDISP_HANDLE_CASTS(vk_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS); +struct vk_render_pass * +vk_render_pass_create(struct vk_device *device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *alloc); + +/** Attempts to merge the given subpass index into the previous subpass + * + * Returns true if the merge succeeded. + */ +bool +vk_render_pass_try_merge_subpass(struct vk_render_pass *pass, + uint32_t subpass_idx); + /** Returns the VkPipelineRenderingCreateInfo for a graphics pipeline * * For render-pass-free drivers, this can be used in the implementation of @@ -311,6 +352,20 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vk_render_pass, base, VkRenderPass, const VkPipelineRenderingCreateInfo * vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info); +/** Returns the VkRenderingAttachmentRemapInfoMESA for a graphics pipeline + * + * For render-pass-free drivers, this can be used in the implementation of + * vkCreateGraphicsPipelines to get the VkRenderingAttachmentRemapInfoMESA. + * If VkGraphicsPipelineCreateInfo::renderPass is not VK_NULL_HANDLE, it will + * return the VkRenderingAttachmentRemapInfoMESA for the specified subpass. + * VkGraphicsPipelineCreateInfo::renderPass is VK_NULL_HANDLE, it will return + * NULL. + * + * :param info: |in| One of the pCreateInfos from vkCreateGraphicsPipelines + */ +const VkRenderingAttachmentRemapInfoMESA * +vk_get_pipeline_rendering_ar_info(const VkGraphicsPipelineCreateInfo *info); + /** Returns the VkRenderingInputAttachmentIndexInfo for a graphics pipeline * * For render-pass-free drivers, this can be used in the implementation of @@ -388,6 +443,8 @@ vk_get_rendering_attachment_flags(const VkRenderingAttachmentInfo *att); struct vk_gcbiarr_data { VkRenderingInfo rendering; VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_att; + VkMultisampledRenderToSingleSampledInfoEXT mrtss; + VkRenderingAttachmentRemapInfoMESA rar; VkRenderingAttachmentInfo attachments[]; }; diff --git a/src/vulkan/util/vk_internal_exts.h b/src/vulkan/util/vk_internal_exts.h index 497d28bd40b..2d665bb809a 100644 --- a/src/vulkan/util/vk_internal_exts.h +++ b/src/vulkan/util/vk_internal_exts.h @@ -15,6 +15,8 @@ #ifndef VK_INTERNAL_EXTS_H #define VK_INTERNAL_EXTS_H +#include "vulkan/runtime/vk_limits.h" + #include #include @@ -123,6 +125,43 @@ typedef struct VkRenderingAttachmentInitialLayoutInfoMESA { #define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA_cast \ VkRenderingAttachmentInitialLayoutInfoMESA +/* Can extend any of the following: + * + * - VkPipelineCreateInfo + * - VkRenderingInfo + * - VkCommandBufferInheritanceRenderingInfo + * + * When chained into VkRenderingInfo, the driver must first begin the render, + * including any attachment loads, and then set the remap. The remap does not + * apply to the VkRenderingInfo itself. + */ +typedef struct VkRenderingAttachmentRemapInfoMESA { + VkStructureType sType; + const void* pNext; + + /** A mapping from attachments (as per the vkBeginRendering() numbering) + * to logical attachments used by other Vulkan commands such + * CmdClearAttachments() or CmdSetColorBlendEquationEXT(). + * + * Unlike VkRenderingAttachmentLocationInfo, this applies to all Vulkan + * commands and structs other than CmdBeginRendering() and + * VkCommandBufferInheritanceRenderingInfo, into which it can be chained. + */ + uint32_t colorAttachmentRemap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** True if the depth/stencil attachment should be enabled. + * + * If false, the driver will behave as if the depth attachment is not + * present, even though it may still be bound. This implies disabling the + * depth and stencil tests as well as depth writes. + */ + VkBool32 depthStencilAttachmentEnable; +} VkRenderingAttachmentRemapInfoMESA; + +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA \ + (VkStructureType)1000044902 +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA_cast \ + VkRenderingAttachmentRemapInfoMESA struct nir_shader;