tu: Support dynamic input attachments

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31261>
This commit is contained in:
Connor Abbott 2024-03-05 09:41:47 -05:00 committed by Marge Bot
parent d50eef5b06
commit beb513ad78
7 changed files with 186 additions and 9 deletions

View file

@ -2612,6 +2612,14 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
tu_setup_dynamic_inheritance(cmd_buffer, rendering_info);
cmd_buffer->state.pass = &cmd_buffer->dynamic_pass;
cmd_buffer->state.subpass = &cmd_buffer->dynamic_subpass;
const VkRenderingAttachmentLocationInfoKHR *location_info =
vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext,
RENDERING_ATTACHMENT_LOCATION_INFO_KHR);
if (location_info) {
vk_common_CmdSetRenderingAttachmentLocationsKHR(commandBuffer,
location_info);
}
} else {
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
cmd_buffer->state.subpass =
@ -4832,6 +4840,62 @@ tu_CmdSetRenderingAttachmentLocationsKHR(
}
TU_GENX(tu_CmdSetRenderingAttachmentLocationsKHR);
VKAPI_ATTR void VKAPI_CALL
tu_CmdSetRenderingInputAttachmentIndicesKHR(
VkCommandBuffer commandBuffer,
const VkRenderingInputAttachmentIndexInfoKHR *pLocationInfo)
{
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
vk_common_CmdSetRenderingInputAttachmentIndicesKHR(commandBuffer, pLocationInfo);
const struct vk_input_attachment_location_state *ial =
&cmd->vk.dynamic_graphics_state.ial;
struct tu_subpass *subpass = &cmd->dynamic_subpass;
for (unsigned i = 0; i < ARRAY_SIZE(cmd->dynamic_input_attachments); i++) {
subpass->input_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
}
unsigned input_count = 0;
for (unsigned i = 0; i < subpass->color_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
continue;
subpass->input_attachments[ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET].attachment =
subpass->color_attachments[i].attachment;
input_count = MAX2(input_count, ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET + 1);
}
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
if (ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX) {
subpass->input_attachments[0].attachment =
subpass->depth_stencil_attachment.attachment;
input_count = MAX2(input_count, 1);
} else {
subpass->input_attachments[ial->depth_att + TU_DYN_INPUT_ATT_OFFSET].attachment =
subpass->depth_stencil_attachment.attachment;
input_count = MAX2(input_count, ial->depth_att + TU_DYN_INPUT_ATT_OFFSET + 1);
}
}
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
if (ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX) {
subpass->input_attachments[0].attachment =
subpass->depth_stencil_attachment.attachment;
input_count = MAX2(input_count, 1);
} else {
subpass->input_attachments[ial->stencil_att + TU_DYN_INPUT_ATT_OFFSET].attachment =
subpass->depth_stencil_attachment.attachment;
input_count = MAX2(input_count, ial->stencil_att + TU_DYN_INPUT_ATT_OFFSET + 1);
}
}
subpass->input_count = input_count;
tu_set_input_attachments(cmd, cmd->state.subpass);
}
template <chip CHIP>
VKAPI_ATTR void VKAPI_CALL
tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,

View file

@ -571,6 +571,7 @@ struct tu_cmd_buffer
struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 1];
struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS];
struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1];
struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1];
const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 1];
VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)];

View file

@ -96,6 +96,12 @@
(MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \
A6XX_TEX_CONST_DWORDS
/* With dynamic rendering, input attachment indices are shifted by 1 and
* attachment 0 is used for input attachments without an InputAttachmentIndex
* (which can only be depth/stencil).
*/
#define TU_DYN_INPUT_ATT_OFFSET 1
#define SAMPLE_LOCATION_MIN 0.f
#define SAMPLE_LOCATION_MAX 0.9375f

View file

@ -1084,12 +1084,26 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
pass->attachments = cmd_buffer->dynamic_rp_attachments;
subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
subpass->input_count = info->colorAttachmentCount + 1;
subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
subpass->input_attachments = cmd_buffer->dynamic_input_attachments;
subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
subpass->multiview_mask = info->viewMask;
subpass->legacy_dithering_enabled = info->flags &
VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT;
/* Because we don't know with dynamic rendering when input attachments
* are used relative to color attachments, we have to always assume
* they may be written as a color or depth/stencil attachment first. This
* means we can't apply the optimization in
* tu_render_pass_patch_input_gmem(). Initialize this for all possible
* attachments now so we don't have to update it later.
*/
for (unsigned i = 0; i < ARRAY_SIZE(cmd_buffer->dynamic_input_attachments);
i++) {
subpass->input_attachments[i].patch_input_gmem = true;
}
uint32_t a = 0;
for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[a];
@ -1097,6 +1111,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
if (att_info->imageView == VK_NULL_HANDLE) {
subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
subpass->input_attachments[i + 1].attachment = VK_ATTACHMENT_UNUSED;
subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
continue;
}
@ -1109,6 +1124,9 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
VK_ATTACHMENT_STORE_OP_DONT_CARE);
subpass->color_attachments[i].attachment = a++;
subpass->input_attachments[i + 1].attachment =
subpass->color_attachments[i].attachment;
subpass->input_attachments[i + 1].patch_input_gmem = true;
subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
@ -1147,6 +1165,9 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
att->gmem = true;
att->clear_views = info->viewMask;
subpass->depth_stencil_attachment.attachment = a++;
subpass->input_attachments[0].attachment =
subpass->depth_stencil_attachment.attachment;
subpass->input_attachments[0].patch_input_gmem = true;
subpass->depth_used = (bool) info->pDepthAttachment;
subpass->stencil_used = (bool) info->pStencilAttachment;
@ -1184,9 +1205,11 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
}
} else {
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
subpass->input_attachments[0].attachment = VK_ATTACHMENT_UNUSED;
}
} else {
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
subpass->input_attachments[0].attachment = VK_ATTACHMENT_UNUSED;
}
pass->attachment_count = a;

View file

@ -1722,6 +1722,47 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
builder->device);
}
if ((builder->state &
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
builder->graphics_state.ial &&
builder->create_info->renderPass == VK_NULL_HANDLE) {
const struct vk_input_attachment_location_state *ial =
builder->graphics_state.ial;
keys[MESA_SHADER_FRAGMENT].dynamic_renderpass = true;
uint32_t attachments_referenced = 0;
if (ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN) {
attachments_referenced |=
BITFIELD_MASK(MAX_RTS) << TU_DYN_INPUT_ATT_OFFSET;
} else {
for (unsigned i = 0; i < ial->color_attachment_count; i++) {
if (ial->color_map[i] != MESA_VK_ATTACHMENT_UNUSED) {
attachments_referenced |=
(1u << (ial->color_map[i] + TU_DYN_INPUT_ATT_OFFSET));
}
}
}
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
if (ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX)
attachments_referenced |= 1;
else
attachments_referenced |= 1u << (ial->depth_att + 1);
}
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
if (ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX)
attachments_referenced |= 1;
else
attachments_referenced |= 1u << (ial->stencil_att + 1);
}
keys[MESA_SHADER_FRAGMENT].read_only_input_attachments =
~attachments_referenced;
}
if (builder->create_flags &
VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) {
for (unsigned i = 0; i < builder->num_libraries; i++) {

View file

@ -363,7 +363,9 @@ static nir_def *
build_bindless(struct tu_device *dev, nir_builder *b,
nir_deref_instr *deref, bool is_sampler,
struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
const struct tu_pipeline_layout *layout,
uint32_t read_only_input_attachments,
bool dynamic_renderpass)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
@ -374,9 +376,27 @@ build_bindless(struct tu_device *dev, nir_builder *b,
/* input attachments use non bindless workaround */
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT &&
(!dynamic_renderpass ||
(var->data.index == NIR_VARIABLE_NO_INDEX ?
!(read_only_input_attachments & 0x1) :
!(read_only_input_attachments & (1u << (var->data.index + 1))))) &&
!TU_DEBUG(DYNAMIC)) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);
uint32_t idx = var->data.index * 2;
uint32_t idx;
/* With dynamic renderpasses, we reserve the first two attachments for
* input attachments without an InputAttachmentIndex, which must be for
* depth/stencil if they are not read-only, and shift over the rest of
* the indices.
*/
if (var->data.index == ~0u) {
assert(dynamic_renderpass);
idx = 0;
} else if (dynamic_renderpass) {
idx = (var->data.index + 1) * 2;
} else {
idx = var->data.index * 2;
}
BITSET_SET_RANGE_INSIDE_WORD(b->shader->info.textures_used, idx, (idx + bind_layout->array_size * 2) - 1);
@ -425,7 +445,7 @@ lower_image_deref(struct tu_device *dev, nir_builder *b,
const struct tu_pipeline_layout *layout)
{
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout);
nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout, 0, false);
nir_rewrite_image_intrinsic(instr, bindless, true);
}
@ -568,14 +588,17 @@ lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
struct tu_shader *shader, const struct tu_pipeline_layout *layout)
struct tu_shader *shader, const struct tu_pipeline_layout *layout,
uint32_t read_only_input_attachments, bool dynamic_renderpass)
{
lower_tex_ycbcr(layout, b, tex);
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout);
nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout,
read_only_input_attachments,
dynamic_renderpass);
nir_src_rewrite(&tex->src[sampler_src_idx].src, bindless);
tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
}
@ -583,7 +606,9 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (tex_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout);
nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout,
read_only_input_attachments,
dynamic_renderpass);
nir_src_rewrite(&tex->src[tex_src_idx].src, bindless);
tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
@ -599,6 +624,8 @@ struct lower_instr_params {
struct tu_device *dev;
struct tu_shader *shader;
const struct tu_pipeline_layout *layout;
uint32_t read_only_input_attachments;
bool dynamic_renderpass;
};
static bool
@ -608,7 +635,9 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
b->cursor = nir_before_instr(instr);
switch (instr->type) {
case nir_instr_type_tex:
return lower_tex(b, nir_instr_as_tex(instr), params->dev, params->shader, params->layout);
return lower_tex(b, nir_instr_as_tex(instr), params->dev, params->shader, params->layout,
params->read_only_input_attachments,
params->dynamic_renderpass);
case nir_instr_type_intrinsic:
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout);
default:
@ -778,6 +807,8 @@ static bool
tu_lower_io(nir_shader *shader, struct tu_device *dev,
struct tu_shader *tu_shader,
const struct tu_pipeline_layout *layout,
uint32_t read_only_input_attachments,
bool dynamic_renderpass,
unsigned *reserved_consts_vec4_out)
{
tu_shader->const_state.push_consts = (struct tu_push_constant_range) {
@ -889,6 +920,8 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
.dev = dev,
.shader = tu_shader,
.layout = layout,
.read_only_input_attachments = read_only_input_attachments,
.dynamic_renderpass = dynamic_renderpass,
};
bool progress = false;
@ -2364,7 +2397,12 @@ tu_shader_create(struct tu_device *dev,
* multiview is enabled.
*/
.use_view_id_for_layer = key->multiview_mask != 0,
.unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord,
.unscaled_depth_stencil_ir3 =
key->dynamic_renderpass && !(key->read_only_input_attachments & 1),
.unscaled_input_attachment_ir3 =
key->dynamic_renderpass ?
~(key->read_only_input_attachments >> 1) :
key->unscaled_input_fragcoord,
};
NIR_PASS_V(nir, nir_lower_input_attachments, &att_options);
}
@ -2469,7 +2507,9 @@ tu_shader_create(struct tu_device *dev,
}
unsigned reserved_consts_vec4 = 0;
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout,
key->read_only_input_attachments, key->dynamic_renderpass,
&reserved_consts_vec4);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

View file

@ -102,8 +102,10 @@ struct tu_shader
struct tu_shader_key {
unsigned multiview_mask;
uint16_t read_only_input_attachments;
bool force_sample_interp;
bool fragment_density_map;
bool dynamic_renderpass;
uint8_t unscaled_input_fragcoord;
enum ir3_wavesize_option api_wavesize, real_wavesize;
};