From bfd5ddbf32d12ff73ee1e63014306d5177f9aa82 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 24 Jan 2025 12:06:31 +0100 Subject: [PATCH] panvk: Optimize input attachment loads when we can When we know the input attachment is also an active color attachment we can load the value from the tile buffer instead of going back to the texture. Signed-off-by: Boris Brezillon Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 40 +++-- src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 11 +- src/panfrost/vulkan/panvk_cmd_draw.h | 44 ++++++ src/panfrost/vulkan/panvk_shader.h | 15 ++ src/panfrost/vulkan/panvk_vX_cmd_draw.c | 81 ++++++++++ .../vulkan/panvk_vX_nir_lower_descriptors.c | 140 +++++++++++++++++- src/panfrost/vulkan/panvk_vX_shader.c | 83 +++++++++-- 7 files changed, 384 insertions(+), 30 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index a6930b598b9..2960faa76e8 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1293,7 +1293,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) } static VkResult -prepare_ds(struct panvk_cmd_buffer *cmdbuf) +prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs) { bool dirty = dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) || @@ -1307,7 +1307,9 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) || - fs_user_dirty(cmdbuf); + dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || + fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ); if (!dirty) return VK_SUCCESS; @@ -1356,8 +1358,11 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) if (rs->depth_clamp_enable) cfg.depth_clamp_mode = MALI_DEPTH_CLAMP_MODE_BOUNDS; - if (fs) + if (fs) { + cfg.shader_read_only_z_s = earlyzs.shader_readonly_zs; cfg.depth_source = pan_depth_source(&fs->info); + } + cfg.depth_write_enable = test_z && ds->depth.write_enable; cfg.depth_bias_enable = rs->depth_bias.enable; cfg.depth_function = test_z ? translate_compare_func(ds->depth.compare_op) @@ -1454,7 +1459,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf) } static void -prepare_dcd(struct panvk_cmd_buffer *cmdbuf) +prepare_dcd(struct panvk_cmd_buffer *cmdbuf, + struct pan_earlyzs_state *earlyzs) { struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); @@ -1478,6 +1484,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) || /* line mode needs primitive topology */ dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) || + dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE) || gfx_state_dirty(cmdbuf, OQ); bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) || @@ -1517,26 +1524,30 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) struct mali_dcd_flags_0_packed dcd0; pan_pack(&dcd0, DCD_FLAGS_0, cfg) { if (fs) { - uint8_t rt_written = color_attachment_written_mask( - fs, &cmdbuf->vk.dynamic_graphics_state.cal); uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS; + uint8_t rt_written = color_attachment_written_mask( + fs, &cmdbuf->vk.dynamic_graphics_state.cal); + uint8_t rt_read = + color_attachment_read_mask(fs, &dyns->ial, rt_mask); + bool zs_read = zs_attachment_read(fs, &dyns->ial); cfg.allow_forward_pixel_to_kill = fs->info.fs.can_fpk && !(rt_mask & ~rt_written) && - !alpha_to_coverage && !cmdbuf->state.gfx.cb.info.any_dest_read; + !(rt_read & rt_written) && !alpha_to_coverage && + !cmdbuf->state.gfx.cb.info.any_dest_read; bool writes_zs = writes_z || writes_s; bool zs_always_passes = ds_test_always_passes(cmdbuf); bool oq = cmdbuf->state.gfx.occlusion_query.mode != MALI_OCCLUSION_MODE_DISABLED; - struct pan_earlyzs_state earlyzs = + *earlyzs = pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq, - alpha_to_coverage, zs_always_passes, false); + alpha_to_coverage, zs_always_passes, zs_read); - cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill; - cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update; + cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs->kill; + cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs->update; cfg.evaluate_per_sample = fs->info.fs.sample_shading && (dyns->ms.rasterization_samples > 1); @@ -1748,7 +1759,11 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size); - result = prepare_ds(cmdbuf); + struct pan_earlyzs_state earlyzs = {0}; + + prepare_dcd(cmdbuf, &earlyzs); + + result = prepare_ds(cmdbuf, earlyzs); if (result != VK_SUCCESS) return result; @@ -1756,7 +1771,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) if (result != VK_SUCCESS) return result; - prepare_dcd(cmdbuf); prepare_vp(cmdbuf); prepare_tiler_primitive_size(cmdbuf); } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 652fc725876..222364cfe80 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -282,13 +282,16 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, 8)); } - uint8_t rt_written = color_attachment_written_mask( - fs, &cmdbuf->vk.dynamic_graphics_state.cal); uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS; + uint8_t rt_written = color_attachment_written_mask( + fs, &cmdbuf->vk.dynamic_graphics_state.cal); + uint8_t rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask); + bool zs_read = zs_attachment_read(fs, &dyns->ial); cfg.properties.allow_forward_pixel_to_kill = fs_info->fs.can_fpk && !(rt_mask & ~rt_written) && - !alpha_to_coverage && !binfo->any_dest_read; + !(rt_read & rt_written) && !alpha_to_coverage && + !binfo->any_dest_read; bool writes_zs = writes_z || writes_s; bool zs_always_passes = ds_test_always_passes(cmdbuf); @@ -297,7 +300,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs = pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq, - alpha_to_coverage, zs_always_passes, false); + alpha_to_coverage, zs_always_passes, zs_read); cfg.properties.pixel_kill_operation = earlyzs.kill; cfg.properties.zs_update_operation = earlyzs.update; diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 47adfc1ba18..4f2ec63279d 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -16,6 +16,7 @@ #include "panvk_image.h" #include "panvk_image_view.h" #include "panvk_physical_device.h" +#include "panvk_shader.h" #include "vk_command_buffer.h" #include "vk_format.h" @@ -376,4 +377,47 @@ color_attachment_written_mask( return catt_written_mask; } +static inline uint32_t +color_attachment_read_mask(const struct panvk_shader *fs, + const struct vk_input_attachment_location_state *ial, + uint8_t color_attachment_mask) +{ + uint32_t color_attachment_count = + ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN + ? util_last_bit(color_attachment_mask) + : ial->color_attachment_count; + uint32_t catt_read_mask = 0; + + for (uint32_t i = 0; i < color_attachment_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + + uint32_t catt_idx = ial->color_map[i] + 1; + if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) { + assert(color_attachment_mask & BITFIELD_BIT(i)); + catt_read_mask |= BITFIELD_BIT(i); + } + } + + return catt_read_mask; +} + +static inline bool +zs_attachment_read(const struct panvk_shader *fs, + const struct vk_input_attachment_location_state *ial) +{ + uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX + ? BITFIELD_BIT(0) + : ial->depth_att != MESA_VK_ATTACHMENT_UNUSED + ? BITFIELD_BIT(ial->depth_att + 1) + : 0; + uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX + ? BITFIELD_BIT(0) + : ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED + ? BITFIELD_BIT(ial->stencil_att + 1) + : 0; + + return (depth_mask | stencil_mask) & fs->fs.input_attachment_read; +} + #endif diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 3e232188586..06169ac8083 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -52,6 +52,18 @@ enum panvk_desc_table_id { }; #endif +#define PANVK_COLOR_ATTACHMENT(x) (x) +#define PANVK_ZS_ATTACHMENT 255 + +struct panvk_input_attachment_info { + uint32_t target; + uint32_t conversion; +}; + +/* One attachment per color, one for depth, one for stencil, and the last one + * for the attachment without an InputAttachmentIndex attribute. */ +#define INPUT_ATTACHMENT_MAP_SIZE 11 + #define FAU_WORD_SIZE sizeof(uint64_t) #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t @@ -79,6 +91,8 @@ struct panvk_graphics_sysvals { aligned_u64 push_consts; aligned_u64 printf_buffer_address; + struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE]; + #if PAN_ARCH <= 7 /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per * layer, and filter primitives at the VS level. @@ -265,6 +279,7 @@ struct panvk_shader { struct { struct pan_earlyzs_lut earlyzs_lut; + uint32_t input_attachment_read; } fs; }; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 1fdc32ae599..5ba08b87eeb 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -539,6 +539,84 @@ panvk_per_arch(cmd_preload_render_area_border)( panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info); } +static void +prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals) +{ + const struct vk_input_attachment_location_state *ial = + &cmdbuf->vk.dynamic_graphics_state.ial; + struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE]; + uint32_t catt_count = + ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN + ? MAX_RTS + : ial->color_attachment_count; + + memset(iam, ~0, sizeof(iam)); + + assert(catt_count <= MAX_RTS); + + for (uint32_t i = 0; i < catt_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED || + !(cmdbuf->state.gfx.render.bound_attachments & + MESA_VK_RP_ATTACHMENT_COLOR_BIT(i))) + continue; + + VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i]; + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + struct mali_internal_conversion_packed conv; + uint32_t ia_idx = ial->color_map[i] + 1; + assert(ia_idx < ARRAY_SIZE(iam)); + + iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i); + + pan_pack(&conv, INTERNAL_CONVERSION, cfg) { + cfg.memory_format = + GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false); +#if PAN_ARCH <= 7 + cfg.register_format = + vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32 + : vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32 + : MALI_REGISTER_FILE_FORMAT_F32; +#endif + } + + iam[ia_idx].conversion = conv.opaque[0]; + } + + if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) { + uint32_t ia_idx = + ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1; + + assert(ia_idx < ARRAY_SIZE(iam)); + iam[ia_idx].target = PANVK_ZS_ATTACHMENT; + +#if PAN_ARCH <= 7 + /* On v7, we need to pass the depth format around. If we use a conversion + * of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */ + VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt; + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + struct mali_internal_conversion_packed conv; + + pan_pack(&conv, INTERNAL_CONVERSION, cfg) { + cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32; + cfg.memory_format = + GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false); + } + iam[ia_idx].conversion = conv.opaque[0]; +#endif + } + + if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) { + uint32_t ia_idx = + ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1; + + assert(ia_idx < ARRAY_SIZE(iam)); + iam[ia_idx].target = PANVK_ZS_ATTACHMENT; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++) + set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]); +} + /* This value has been selected to get * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing. */ @@ -647,6 +725,9 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, } } + if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP)) + prepare_iam_sysvals(cmdbuf, dirty_sysvals); + const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; #if PAN_ARCH <= 7 diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 61fa9b769f9..aada6522ac0 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -30,6 +30,7 @@ #include "panvk_device.h" #include "panvk_shader.h" +#include "vk_graphics_state.h" #include "vk_pipeline.h" #include "vk_pipeline_layout.h" @@ -72,6 +73,7 @@ struct lower_desc_ctx { bool add_bounds_checks; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; + struct panvk_shader *shader; }; static nir_address_format @@ -827,6 +829,137 @@ get_img_index(nir_builder *b, nir_deref_instr *deref, } } +static bool +lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr, + void *data) +{ + if (intr->intrinsic != nir_intrinsic_image_deref_load && + intr->intrinsic != nir_intrinsic_image_deref_sparse_load) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(deref->type); + if (image_dim != GLSL_SAMPLER_DIM_SUBPASS && + image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS) + return false; + + struct panvk_shader *shader = data; + nir_variable *var = nir_deref_instr_get_variable(deref); + assert(var); + + const unsigned iam_idx = + var->data.index != NIR_VARIABLE_NO_INDEX ? var->data.index + 1 : 0; + nir_alu_type dest_type = nir_intrinsic_dest_type(intr); + + shader->fs.input_attachment_read |= BITFIELD_BIT(iam_idx); + + b->cursor = nir_before_instr(&intr->instr); + + nir_def *target = + nir_load_input_attachment_target_pan(b, nir_imm_int(b, iam_idx)); + nir_def *load_img, *load_output; + + nir_push_if(b, nir_ine_imm(b, target, ~0)); + { + nir_def *is_color_att = nir_ilt_imm(b, target, 8); + nir_def *load_color, *load_zs; + nir_io_semantics iosem = {0}; + + nir_push_if(b, is_color_att); + { + nir_def *conversion = + nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx)); + + iosem.location = FRAG_RESULT_DATA0; + load_color = nir_load_converted_output_pan( + b, intr->def.num_components, intr->def.bit_size, target, + intr->src[2].ssa, conversion, .dest_type = dest_type, + .io_semantics = iosem); + } + nir_push_else(b, NULL); + { +#if PAN_ARCH <= 7 + /* On v7, we need to pass the depth format around. If we use a + * conversion of zero, like we do on v9+, the GPU reports an + * INVALID_INSTR_ENC. */ + struct mali_internal_conversion_packed stencil_conv; + + pan_pack(&stencil_conv, INTERNAL_CONVERSION, cfg) { + cfg.register_format = MALI_REGISTER_FILE_FORMAT_U32; + cfg.memory_format = GENX(panfrost_dithered_format_from_pipe_format)( + PIPE_FORMAT_S8_UINT, false); + } + + nir_def *conversion = + dest_type == nir_type_uint32 + ? nir_imm_int(b, stencil_conv.opaque[0]) + : nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx)); +#else + nir_def *conversion = nir_imm_int(b, 0); +#endif + + iosem.location = dest_type == nir_type_float32 ? FRAG_RESULT_DEPTH + : FRAG_RESULT_STENCIL; + target = nir_imm_int(b, 0); + load_zs = nir_load_converted_output_pan( + b, intr->def.num_components, intr->def.bit_size, target, + intr->src[2].ssa, conversion, .dest_type = dest_type, + .io_semantics = iosem); + + /* If we loaded the stencil value, the upper 24 bits might contain + * garbage, hence the masking done here. */ + if (iosem.location == FRAG_RESULT_STENCIL) + load_zs = nir_iand_imm(b, load_zs, BITFIELD_MASK(8)); + } + nir_pop_if(b, NULL); + + load_output = nir_if_phi(b, load_color, load_zs); + } + nir_push_else(b, NULL); + { + load_img = + intr->intrinsic == nir_intrinsic_image_deref_sparse_load + ? nir_image_deref_sparse_load( + b, intr->num_components, intr->def.bit_size, intr->src[0].ssa, + intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, + .image_dim = nir_intrinsic_image_dim(intr), + .image_array = nir_intrinsic_image_array(intr), + .format = nir_intrinsic_format(intr), + .access = nir_intrinsic_access(intr), .dest_type = dest_type) + : nir_image_deref_load( + b, intr->num_components, intr->def.bit_size, intr->src[0].ssa, + intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, + .image_dim = nir_intrinsic_image_dim(intr), + .image_array = nir_intrinsic_image_array(intr), + .format = nir_intrinsic_format(intr), + .access = nir_intrinsic_access(intr), .dest_type = dest_type); + } + nir_pop_if(b, NULL); + + nir_def_replace(&intr->def, nir_if_phi(b, load_output, load_img)); + + return true; +} + +static bool +lower_input_attachment_loads(nir_shader *nir, struct panvk_shader *shader) +{ + bool progress = false; + + NIR_PASS(progress, nir, nir_shader_intrinsics_pass, + lower_input_attachment_load, nir_metadata_control_flow, shader); + + /* Lower the remaining input attachment loads. */ + struct nir_input_attachment_options lower_input_attach_opts = { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = true, + }; + NIR_PASS(progress, nir, nir_lower_input_attachments, + &lower_input_attach_opts); + + return progress; +} + static bool lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, struct lower_desc_ctx *ctx) @@ -1231,11 +1364,13 @@ upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader, void panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, - const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count, + const struct vk_pipeline_robustness_state *rs, + uint32_t set_layout_count, struct vk_descriptor_set_layout *const *set_layouts, struct panvk_shader *shader) { struct lower_desc_ctx ctx = { + .shader = shader, .add_bounds_checks = rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || @@ -1279,6 +1414,9 @@ panvk_per_arch(nir_lower_descriptors)( create_copy_table(nir, &ctx); upload_shader_desc_info(dev, shader, &ctx.desc_info); + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS(progress, nir, lower_input_attachment_loads, shader); + NIR_PASS(progress, nir, nir_shader_instructions_pass, lower_descriptors_instr, nir_metadata_control_flow, &ctx); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 6bd08b69a4c..209c0f00e93 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -62,12 +62,18 @@ #include "vk_shader.h" #include "vk_util.h" +struct panvk_lower_sysvals_context { + struct panvk_shader *shader; + const struct vk_graphics_pipeline_state *state; +}; + static bool panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) { if (instr->type != nir_instr_type_intrinsic) return false; + const struct panvk_lower_sysvals_context *ctx = data; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); unsigned bit_size = intr->def.bit_size; nir_def *val = NULL; @@ -127,6 +133,52 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) val = load_sysval(b, graphics, bit_size, printf_buffer_address); break; + case nir_intrinsic_load_input_attachment_target_pan: { + const struct vk_input_attachment_location_state *ial = + ctx->state ? ctx->state->ial : NULL; + + if (ial) { + uint32_t index = nir_src_as_uint(intr->src[0]); + uint32_t depth_idx = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX + ? 0 + : ial->depth_att + 1; + uint32_t stencil_idx = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX + ? 0 + : ial->stencil_att + 1; + uint32_t target = ~0; + + if (depth_idx == index || stencil_idx == index) { + target = PANVK_ZS_ATTACHMENT; + } else { + for (unsigned i = 0; i < ial->color_attachment_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + + if (ial->color_map[i] + 1 == index) { + target = PANVK_COLOR_ATTACHMENT(i); + break; + } + } + } + + val = nir_imm_int(b, target); + } else { + nir_def *ia_info = + load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa); + + val = nir_channel(b, ia_info, 0); + } + break; + } + + case nir_intrinsic_load_input_attachment_conv_pan: { + nir_def *ia_info = + load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa); + + val = nir_channel(b, ia_info, 1); + break; + } + default: return false; } @@ -326,15 +378,6 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); NIR_PASS(_, nir, nir_opt_loop); - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - struct nir_input_attachment_options lower_input_attach_opts = { - .use_fragcoord_sysval = true, - .use_layer_id_sysval = true, - }; - - NIR_PASS(_, nir, nir_lower_input_attachments, &lower_input_attach_opts); - } - /* Do texture lowering here. Yes, it's a duplication of the texture * lowering in bifrost_compile. However, we need to lower texture stuff * now, before we call panvk_per_arch(nir_lower_descriptors)() because some @@ -400,6 +443,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device, _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, sizeof(state->rp->view_mask)); + if (state->ial) + _mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial)); + _mesa_blake3_final(&blake3_ctx, blake3_out); } @@ -677,6 +723,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, struct vk_descriptor_set_layout *const *set_layouts, const struct vk_pipeline_robustness_state *rs, uint32_t *noperspective_varyings, + const struct vk_graphics_pipeline_state *state, const struct panfrost_compile_inputs *compile_input, struct panvk_shader *shader) { @@ -813,8 +860,13 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, NIR_PASS(_, nir, pan_nir_lower_static_noperspective, *noperspective_varyings); + struct panvk_lower_sysvals_context lower_sysvals_ctx = { + .shader = shader, + .state = state, + }; + NIR_PASS(_, nir, nir_shader_instructions_pass, panvk_lower_sysvals, - nir_metadata_control_flow, NULL); + nir_metadata_control_flow, &lower_sysvals_ctx); lower_load_push_consts(nir, shader); } @@ -1134,7 +1186,8 @@ panvk_compile_shader(struct panvk_device *dev, nir->info.fs.uses_sample_shading = true; panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, - info->robustness, noperspective_varyings, &inputs, shader); + info->robustness, noperspective_varyings, state, &inputs, + shader); #if PAN_ARCH >= 9 if (info->stage == MESA_SHADER_FRAGMENT) @@ -1319,7 +1372,6 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, VkResult result; blob_copy_bytes(blob, &info, sizeof(info)); - if (blob->overrun) return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); @@ -1340,6 +1392,8 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, case MESA_SHADER_FRAGMENT: shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH); + blob_copy_bytes(blob, &shader->fs.input_attachment_read, + sizeof(shader->fs.input_attachment_read)); break; default: @@ -1441,6 +1495,11 @@ panvk_shader_serialize(struct vk_device *vk_dev, sizeof(shader->cs.local_size)); break; + case MESA_SHADER_FRAGMENT: + blob_write_bytes(blob, &shader->fs.input_attachment_read, + sizeof(shader->fs.input_attachment_read)); + break; + default: break; }