diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 0e7ee111f6b..a2b9d4c1f1d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -2612,6 +2612,14 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, tu_setup_dynamic_inheritance(cmd_buffer, rendering_info); cmd_buffer->state.pass = &cmd_buffer->dynamic_pass; cmd_buffer->state.subpass = &cmd_buffer->dynamic_subpass; + + const VkRenderingAttachmentLocationInfoKHR *location_info = + vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, + RENDERING_ATTACHMENT_LOCATION_INFO_KHR); + if (location_info) { + vk_common_CmdSetRenderingAttachmentLocationsKHR(commandBuffer, + location_info); + } } else { cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); cmd_buffer->state.subpass = @@ -4832,6 +4840,62 @@ tu_CmdSetRenderingAttachmentLocationsKHR( } TU_GENX(tu_CmdSetRenderingAttachmentLocationsKHR); +VKAPI_ATTR void VKAPI_CALL +tu_CmdSetRenderingInputAttachmentIndicesKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInputAttachmentIndexInfoKHR *pLocationInfo) +{ + VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + vk_common_CmdSetRenderingInputAttachmentIndicesKHR(commandBuffer, pLocationInfo); + + const struct vk_input_attachment_location_state *ial = + &cmd->vk.dynamic_graphics_state.ial; + + struct tu_subpass *subpass = &cmd->dynamic_subpass; + + for (unsigned i = 0; i < ARRAY_SIZE(cmd->dynamic_input_attachments); i++) { + subpass->input_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + } + + unsigned input_count = 0; + for (unsigned i = 0; i < subpass->color_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + subpass->input_attachments[ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET].attachment = + subpass->color_attachments[i].attachment; + input_count = MAX2(input_count, ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET + 1); + } + + if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) { + if (ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX) { + subpass->input_attachments[0].attachment = + subpass->depth_stencil_attachment.attachment; + input_count = MAX2(input_count, 1); + } else { + subpass->input_attachments[ial->depth_att + TU_DYN_INPUT_ATT_OFFSET].attachment = + subpass->depth_stencil_attachment.attachment; + input_count = MAX2(input_count, ial->depth_att + TU_DYN_INPUT_ATT_OFFSET + 1); + } + } + + if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) { + if (ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX) { + subpass->input_attachments[0].attachment = + subpass->depth_stencil_attachment.attachment; + input_count = MAX2(input_count, 1); + } else { + subpass->input_attachments[ial->stencil_att + TU_DYN_INPUT_ATT_OFFSET].attachment = + subpass->depth_stencil_attachment.attachment; + input_count = MAX2(input_count, ial->stencil_att + TU_DYN_INPUT_ATT_OFFSET + 1); + } + } + + subpass->input_count = input_count; + + tu_set_input_attachments(cmd, cmd->state.subpass); +} + template VKAPI_ATTR void VKAPI_CALL tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 8d5b894d82a..24cbcd95976 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -571,6 +571,7 @@ struct tu_cmd_buffer struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 1]; struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS]; + struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1]; struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1]; const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 1]; VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)]; diff --git a/src/freedreno/vulkan/tu_common.h b/src/freedreno/vulkan/tu_common.h index 0b07b566ef2..6660a38ff27 100644 --- a/src/freedreno/vulkan/tu_common.h +++ b/src/freedreno/vulkan/tu_common.h @@ -96,6 +96,12 @@ (MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \ A6XX_TEX_CONST_DWORDS +/* With dynamic rendering, input attachment indices are shifted by 1 and + * attachment 0 is used for input attachments without an InputAttachmentIndex + * (which can only be depth/stencil). + */ +#define TU_DYN_INPUT_ATT_OFFSET 1 + #define SAMPLE_LOCATION_MIN 0.f #define SAMPLE_LOCATION_MAX 0.9375f diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index a41a8854c3b..04a413681b2 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -1084,12 +1084,26 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, pass->attachments = cmd_buffer->dynamic_rp_attachments; subpass->color_count = subpass->resolve_count = info->colorAttachmentCount; + subpass->input_count = info->colorAttachmentCount + 1; subpass->color_attachments = cmd_buffer->dynamic_color_attachments; + subpass->input_attachments = cmd_buffer->dynamic_input_attachments; subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments; subpass->multiview_mask = info->viewMask; subpass->legacy_dithering_enabled = info->flags & VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT; + /* Because we don't know with dynamic rendering when input attachments + * are used relative to color attachments, we have to always assume + * they may be written as a color or depth/stencil attachment first. This + * means we can't apply the optimization in + * tu_render_pass_patch_input_gmem(). Initialize this for all possible + * attachments now so we don't have to update it later. + */ + for (unsigned i = 0; i < ARRAY_SIZE(cmd_buffer->dynamic_input_attachments); + i++) { + subpass->input_attachments[i].patch_input_gmem = true; + } + uint32_t a = 0; for (uint32_t i = 0; i < info->colorAttachmentCount; i++) { struct tu_render_pass_attachment *att = &pass->attachments[a]; @@ -1097,6 +1111,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, if (att_info->imageView == VK_NULL_HANDLE) { subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + subpass->input_attachments[i + 1].attachment = VK_ATTACHMENT_UNUSED; subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; continue; } @@ -1109,6 +1124,9 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp, VK_ATTACHMENT_STORE_OP_DONT_CARE); subpass->color_attachments[i].attachment = a++; + subpass->input_attachments[i + 1].attachment = + subpass->color_attachments[i].attachment; + subpass->input_attachments[i + 1].patch_input_gmem = true; subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; @@ -1147,6 +1165,9 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, att->gmem = true; att->clear_views = info->viewMask; subpass->depth_stencil_attachment.attachment = a++; + subpass->input_attachments[0].attachment = + subpass->depth_stencil_attachment.attachment; + subpass->input_attachments[0].patch_input_gmem = true; subpass->depth_used = (bool) info->pDepthAttachment; subpass->stencil_used = (bool) info->pStencilAttachment; @@ -1184,9 +1205,11 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, } } else { subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + subpass->input_attachments[0].attachment = VK_ATTACHMENT_UNUSED; } } else { subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + subpass->input_attachments[0].attachment = VK_ATTACHMENT_UNUSED; } pass->attachment_count = a; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index f259bbb44bd..d8fbbc25279 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -1722,6 +1722,47 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, builder->device); } + if ((builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + builder->graphics_state.ial && + builder->create_info->renderPass == VK_NULL_HANDLE) { + const struct vk_input_attachment_location_state *ial = + builder->graphics_state.ial; + + keys[MESA_SHADER_FRAGMENT].dynamic_renderpass = true; + + uint32_t attachments_referenced = 0; + + if (ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN) { + attachments_referenced |= + BITFIELD_MASK(MAX_RTS) << TU_DYN_INPUT_ATT_OFFSET; + } else { + for (unsigned i = 0; i < ial->color_attachment_count; i++) { + if (ial->color_map[i] != MESA_VK_ATTACHMENT_UNUSED) { + attachments_referenced |= + (1u << (ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET)); + } + } + } + + if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) { + if (ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX) + attachments_referenced |= 1; + else + attachments_referenced |= 1u << (ial->depth_att + 1); + } + + if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) { + if (ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX) + attachments_referenced |= 1; + else + attachments_referenced |= 1u << (ial->stencil_att + 1); + } + + keys[MESA_SHADER_FRAGMENT].read_only_input_attachments = + ~attachments_referenced; + } + if (builder->create_flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) { for (unsigned i = 0; i < builder->num_libraries; i++) { diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 39fe9578892..afbf90bea4c 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -363,7 +363,9 @@ static nir_def * build_bindless(struct tu_device *dev, nir_builder *b, nir_deref_instr *deref, bool is_sampler, struct tu_shader *shader, - const struct tu_pipeline_layout *layout) + const struct tu_pipeline_layout *layout, + uint32_t read_only_input_attachments, + bool dynamic_renderpass) { nir_variable *var = nir_deref_instr_get_variable(deref); @@ -374,9 +376,27 @@ build_bindless(struct tu_device *dev, nir_builder *b, /* input attachments use non bindless workaround */ if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT && + (!dynamic_renderpass || + (var->data.index == NIR_VARIABLE_NO_INDEX ? + !(read_only_input_attachments & 0x1) : + !(read_only_input_attachments & (1u << (var->data.index + 1))))) && !TU_DEBUG(DYNAMIC)) { const struct glsl_type *glsl_type = glsl_without_array(var->type); - uint32_t idx = var->data.index * 2; + uint32_t idx; + + /* With dynamic renderpasses, we reserve the first two attachments for + * input attachments without an InputAttachmentIndex, which must be for + * depth/stencil if they are not read-only, and shift over the rest of + * the indices. + */ + if (var->data.index == ~0u) { + assert(dynamic_renderpass); + idx = 0; + } else if (dynamic_renderpass) { + idx = (var->data.index + 1) * 2; + } else { + idx = var->data.index * 2; + } BITSET_SET_RANGE_INSIDE_WORD(b->shader->info.textures_used, idx, (idx + bind_layout->array_size * 2) - 1); @@ -425,7 +445,7 @@ lower_image_deref(struct tu_device *dev, nir_builder *b, const struct tu_pipeline_layout *layout) { nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); - nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout); + nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout, 0, false); nir_rewrite_image_intrinsic(instr, bindless, true); } @@ -568,14 +588,17 @@ lower_tex_ycbcr(const struct tu_pipeline_layout *layout, static bool lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev, - struct tu_shader *shader, const struct tu_pipeline_layout *layout) + struct tu_shader *shader, const struct tu_pipeline_layout *layout, + uint32_t read_only_input_attachments, bool dynamic_renderpass) { lower_tex_ycbcr(layout, b, tex); int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); if (sampler_src_idx >= 0) { nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); - nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout); + nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout, + read_only_input_attachments, + dynamic_renderpass); nir_src_rewrite(&tex->src[sampler_src_idx].src, bindless); tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle; } @@ -583,7 +606,9 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev, int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); if (tex_src_idx >= 0) { nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); - nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout); + nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout, + read_only_input_attachments, + dynamic_renderpass); nir_src_rewrite(&tex->src[tex_src_idx].src, bindless); tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle; @@ -599,6 +624,8 @@ struct lower_instr_params { struct tu_device *dev; struct tu_shader *shader; const struct tu_pipeline_layout *layout; + uint32_t read_only_input_attachments; + bool dynamic_renderpass; }; static bool @@ -608,7 +635,9 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data) b->cursor = nir_before_instr(instr); switch (instr->type) { case nir_instr_type_tex: - return lower_tex(b, nir_instr_as_tex(instr), params->dev, params->shader, params->layout); + return lower_tex(b, nir_instr_as_tex(instr), params->dev, params->shader, params->layout, + params->read_only_input_attachments, + params->dynamic_renderpass); case nir_instr_type_intrinsic: return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout); default: @@ -778,6 +807,8 @@ static bool tu_lower_io(nir_shader *shader, struct tu_device *dev, struct tu_shader *tu_shader, const struct tu_pipeline_layout *layout, + uint32_t read_only_input_attachments, + bool dynamic_renderpass, unsigned *reserved_consts_vec4_out) { tu_shader->const_state.push_consts = (struct tu_push_constant_range) { @@ -889,6 +920,8 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, .dev = dev, .shader = tu_shader, .layout = layout, + .read_only_input_attachments = read_only_input_attachments, + .dynamic_renderpass = dynamic_renderpass, }; bool progress = false; @@ -2364,7 +2397,12 @@ tu_shader_create(struct tu_device *dev, * multiview is enabled. */ .use_view_id_for_layer = key->multiview_mask != 0, - .unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord, + .unscaled_depth_stencil_ir3 = + key->dynamic_renderpass && !(key->read_only_input_attachments & 1), + .unscaled_input_attachment_ir3 = + key->dynamic_renderpass ? + ~(key->read_only_input_attachments >> 1) : + key->unscaled_input_fragcoord, }; NIR_PASS_V(nir, nir_lower_input_attachments, &att_options); } @@ -2469,7 +2507,9 @@ tu_shader_create(struct tu_device *dev, } unsigned reserved_consts_vec4 = 0; - NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4); + NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, + key->read_only_input_attachments, key->dynamic_renderpass, + &reserved_consts_vec4); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index c6355be68db..c6a68f296bf 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -102,8 +102,10 @@ struct tu_shader struct tu_shader_key { unsigned multiview_mask; + uint16_t read_only_input_attachments; bool force_sample_interp; bool fragment_density_map; + bool dynamic_renderpass; uint8_t unscaled_input_fragcoord; enum ir3_wavesize_option api_wavesize, real_wavesize; };