diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 9411838002a..69144d5e3a9 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -3365,6 +3365,26 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, } TU_GENX(tu_CmdClearDepthStencilImage); +/* CmdClearAttachments uses the original color attachment index instead of the + * remapped index used by the shader, and our MRTs use the remapped + * indices, so we have to remap them. We should always be able to find a + * shader attachment thanks to this VU: + * + * VUID-vkCmdClearAttachments-colorAttachment-09503 + * "The colorAttachment member of each element of pAttachments must not + * identify a color attachment that is currently mapped to + * VK_ATTACHMENT_UNUSED in commandBuffer via + * VkRenderingAttachmentLocationInfoKHR" + */ +static unsigned +remap_attachment(struct tu_cmd_buffer *cmd, unsigned a) +{ + unsigned i = cmd->vk.dynamic_graphics_state.cal.color_map[a]; + assert(i != MESA_VK_ATTACHMENT_UNUSED && + "app violates VUID-vkCmdClearAttachments-colorAttachment-09503"); + return i; +} + template static void tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, @@ -3394,9 +3414,10 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, if (a == VK_ATTACHMENT_UNUSED) continue; - clear_rts |= 1 << c; - clear_components |= 0xf << (c * 4); - memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t)); + uint32_t remapped = remap_attachment(cmd, c); + clear_rts |= 1 << remapped; + clear_components |= 0xf << (remapped * 4); + memcpy(clear_value[remapped], &attachments[i].clearValue, 4 * sizeof(uint32_t)); } else { a = subpass->depth_stencil_attachment.attachment; if (a == VK_ATTACHMENT_UNUSED) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index a870beecc57..0e7ee111f6b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -449,54 +449,64 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, enum a6xx_format mrt0_format = FMT6_NONE; + uint32_t written = 0; for (uint32_t i = 0; i < subpass->color_count; ++i) { uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) { - /* From the VkPipelineRenderingCreateInfo definition: - * - * Valid formats indicate that an attachment can be used - but it - * is still valid to set the attachment to NULL when beginning - * rendering. - * - * This means that with dynamic rendering, pipelines may write to - * some attachments that are UNUSED here. Setting the format to 0 - * here should prevent them from writing to anything. This also seems - * to also be required for alpha-to-coverage which can use the alpha - * value for an otherwise-unused attachment. - */ - tu_cs_emit_regs(cs, - RB_MRT_BUF_INFO(CHIP, i), - A6XX_RB_MRT_PITCH(i), - A6XX_RB_MRT_ARRAY_PITCH(i), - A6XX_RB_MRT_BASE(i), - A6XX_RB_MRT_BASE_GMEM(i), - ); - - tu_cs_emit_regs(cs, - A6XX_SP_FS_MRT_REG(i, .dword = 0)); + unsigned remapped = cmd->vk.dynamic_graphics_state.cal.color_map[i]; + if (a == VK_ATTACHMENT_UNUSED || + remapped == MESA_VK_ATTACHMENT_UNUSED) continue; - } const struct tu_image_view *iview = cmd->state.attachments[a]; tu_cs_emit_regs(cs, - RB_MRT_BUF_INFO(CHIP, i, .dword = iview->view.RB_MRT_BUF_INFO), - A6XX_RB_MRT_PITCH(i, iview->view.pitch), - A6XX_RB_MRT_ARRAY_PITCH(i, iview->view.layer_size), - A6XX_RB_MRT_BASE(i, .qword = tu_layer_address(&iview->view, 0)), - A6XX_RB_MRT_BASE_GMEM(i, + RB_MRT_BUF_INFO(CHIP, remapped, .dword = iview->view.RB_MRT_BUF_INFO), + A6XX_RB_MRT_PITCH(remapped, iview->view.pitch), + A6XX_RB_MRT_ARRAY_PITCH(remapped, iview->view.layer_size), + A6XX_RB_MRT_BASE(remapped, .qword = tu_layer_address(&iview->view, 0)), + A6XX_RB_MRT_BASE_GMEM(remapped, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0) ), ); tu_cs_emit_regs(cs, - A6XX_SP_FS_MRT_REG(i, .dword = iview->view.SP_FS_MRT_REG)); + A6XX_SP_FS_MRT_REG(remapped, .dword = iview->view.SP_FS_MRT_REG)); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR(i), 3); + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR(remapped), 3); tu_cs_image_flag_ref(cs, &iview->view, 0); - if (i == 0) + if (remapped == 0) mrt0_format = (enum a6xx_format) (iview->view.SP_FS_MRT_REG & 0xff); + + written |= 1u << remapped; + } + + u_foreach_bit (i, ~written) { + if (i >= subpass->color_count) + break; + + /* From the VkPipelineRenderingCreateInfo definition: + * + * Valid formats indicate that an attachment can be used - but it + * is still valid to set the attachment to NULL when beginning + * rendering. + * + * This means that with dynamic rendering, pipelines may write to + * some attachments that are UNUSED here. Setting the format to 0 + * here should prevent them from writing to anything. This also seems + * to also be required for alpha-to-coverage which can use the alpha + * value for an otherwise-unused attachment. + */ + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, i), + A6XX_RB_MRT_PITCH(i), + A6XX_RB_MRT_ARRAY_PITCH(i), + A6XX_RB_MRT_BASE(i), + A6XX_RB_MRT_BASE_GMEM(i), + ); + + tu_cs_emit_regs(cs, + A6XX_SP_FS_MRT_REG(i, .dword = 0)); } tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = mrt0_format)); @@ -599,12 +609,14 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, uint32_t mrts_ubwc_enable = 0; for (uint32_t i = 0; i < subpass->color_count; ++i) { uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) + unsigned remapped = cmd->vk.dynamic_graphics_state.cal.color_map[i]; + if (a == VK_ATTACHMENT_UNUSED || + remapped == MESA_VK_ATTACHMENT_UNUSED) continue; const struct tu_image_view *iview = cmd->state.attachments[a]; if (iview->view.ubwc_enabled) - mrts_ubwc_enable |= 1 << i; + mrts_ubwc_enable |= 1 << remapped; } cntl |= A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable); @@ -4709,6 +4721,12 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.attachments[a] = view; } + const VkRenderingAttachmentLocationInfoKHR ral_info = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR, + .colorAttachmentCount = pRenderingInfo->colorAttachmentCount, + }; + vk_cmd_set_rendering_attachment_locations(&cmd->vk, &ral_info); + if (cmd->dynamic_pass.has_fdm) cmd->patchpoints_ctx = ralloc_context(NULL); @@ -4785,6 +4803,35 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, } TU_GENX(tu_CmdBeginRendering); +template +VKAPI_ATTR void VKAPI_CALL +tu_CmdSetRenderingAttachmentLocationsKHR( + VkCommandBuffer commandBuffer, + const VkRenderingAttachmentLocationInfoKHR *pLocationInfo) +{ + VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + vk_common_CmdSetRenderingAttachmentLocationsKHR(commandBuffer, pLocationInfo); + + tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); + tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); + + /* Because this is just a remapping and not a different "reference", there + * doesn't need to be a barrier between accesses to the same attachment + * with a different index. This is different from "classic" renderpasses. + * Before a7xx the CCU includes the render target ID in the cache location + * calculation, so we need to manually flush/invalidate color CCU here + * since the same render target/attachment may be in a different location. + */ + if (cmd->device->physical_device->info->chip == 6) { + struct tu_cache_state *cache = &cmd->state.renderpass_cache; + tu_flush_for_access(cache, TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE, + TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE); + cache->flush_bits |= TU_CMD_FLAG_WAIT_FOR_IDLE; + } +} +TU_GENX(tu_CmdSetRenderingAttachmentLocationsKHR); + template VKAPI_ATTR void VKAPI_CALL tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 84de40e1470..f259bbb44bd 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -2895,12 +2895,14 @@ static const enum mesa_vk_dynamic_graphics_state tu_blend_state[] = { MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE, MESA_VK_DYNAMIC_MS_SAMPLE_MASK, + MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP, }; template static unsigned tu6_blend_size(struct tu_device *dev, const struct vk_color_blend_state *cb, + const struct vk_color_attachment_location_state *cal, bool alpha_to_coverage_enable, bool alpha_to_one_enable, uint32_t sample_mask) @@ -2914,6 +2916,7 @@ template static void tu6_emit_blend(struct tu_cs *cs, const struct vk_color_blend_state *cb, + const struct vk_color_attachment_location_state *cal, bool alpha_to_coverage_enable, bool alpha_to_one_enable, uint32_t sample_mask) @@ -2923,12 +2926,14 @@ tu6_emit_blend(struct tu_cs *cs, uint32_t blend_enable_mask = 0; for (unsigned i = 0; i < cb->attachment_count; i++) { - const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; - if (!(cb->color_write_enables & (1u << i))) + if (!(cb->color_write_enables & (1u << i)) || + cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) continue; + const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; + if (rop_reads_dst || att->blend_enable) { - blend_enable_mask |= 1u << i; + blend_enable_mask |= 1u << cal->color_map[i]; } } @@ -2960,6 +2965,9 @@ tu6_emit_blend(struct tu_cs *cs, .sample_mask = sample_mask)); for (unsigned i = 0; i < num_rts; i++) { + if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + unsigned remapped_idx = cal->color_map[i]; const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) { const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op); @@ -2975,13 +2983,13 @@ tu6_emit_blend(struct tu_cs *cs, tu6_blend_factor((VkBlendFactor)att->dst_alpha_blend_factor); tu_cs_emit_regs(cs, - A6XX_RB_MRT_CONTROL(i, + A6XX_RB_MRT_CONTROL(remapped_idx, .blend = att->blend_enable, .blend2 = att->blend_enable, .rop_enable = cb->logic_op_enable, .rop_code = rop, .component_enable = att->write_mask), - A6XX_RB_MRT_BLEND_CONTROL(i, + A6XX_RB_MRT_BLEND_CONTROL(remapped_idx, .rgb_src_factor = src_color_factor, .rgb_blend_opcode = color_op, .rgb_dest_factor = dst_color_factor, @@ -2990,8 +2998,8 @@ tu6_emit_blend(struct tu_cs *cs, .alpha_dest_factor = dst_alpha_factor)); } else { tu_cs_emit_regs(cs, - A6XX_RB_MRT_CONTROL(i,), - A6XX_RB_MRT_BLEND_CONTROL(i,)); + A6XX_RB_MRT_CONTROL(remapped_idx,), + A6XX_RB_MRT_BLEND_CONTROL(remapped_idx,)); } } } @@ -3394,6 +3402,7 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, BITSET_SET(pipeline_set, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS); } DRAW_STATE(blend, TU_DYNAMIC_STATE_BLEND, cb, + builder->graphics_state.cal, builder->graphics_state.ms->alpha_to_coverage_enable, builder->graphics_state.ms->alpha_to_one_enable, builder->graphics_state.ms->sample_mask); @@ -3607,6 +3616,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) &cmd->vk.dynamic_graphics_state.rs); DRAW_STATE(blend, TU_DYNAMIC_STATE_BLEND, &cmd->vk.dynamic_graphics_state.cb, + &cmd->vk.dynamic_graphics_state.cal, cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable, cmd->vk.dynamic_graphics_state.ms.alpha_to_one_enable, cmd->vk.dynamic_graphics_state.ms.sample_mask);