diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index a6930b598b9..2960faa76e8 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1293,7 +1293,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) } static VkResult -prepare_ds(struct panvk_cmd_buffer *cmdbuf) +prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs) { bool dirty = dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) || @@ -1307,7 +1307,9 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) || - fs_user_dirty(cmdbuf); + dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || + fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ); if (!dirty) return VK_SUCCESS; @@ -1356,8 +1358,11 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) if (rs->depth_clamp_enable) cfg.depth_clamp_mode = MALI_DEPTH_CLAMP_MODE_BOUNDS; - if (fs) + if (fs) { + cfg.shader_read_only_z_s = earlyzs.shader_readonly_zs; cfg.depth_source = pan_depth_source(&fs->info); + } + cfg.depth_write_enable = test_z && ds->depth.write_enable; cfg.depth_bias_enable = rs->depth_bias.enable; cfg.depth_function = test_z ? translate_compare_func(ds->depth.compare_op) @@ -1454,7 +1459,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf) } static void -prepare_dcd(struct panvk_cmd_buffer *cmdbuf) +prepare_dcd(struct panvk_cmd_buffer *cmdbuf, + struct pan_earlyzs_state *earlyzs) { struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); @@ -1478,6 +1484,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) || /* line mode needs primitive topology */ dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) || + dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE) || gfx_state_dirty(cmdbuf, OQ); bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) || @@ -1517,26 +1524,30 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf) struct mali_dcd_flags_0_packed dcd0; pan_pack(&dcd0, DCD_FLAGS_0, cfg) { if (fs) { - uint8_t rt_written = color_attachment_written_mask( - fs, &cmdbuf->vk.dynamic_graphics_state.cal); uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS; + uint8_t rt_written = color_attachment_written_mask( + fs, &cmdbuf->vk.dynamic_graphics_state.cal); + uint8_t rt_read = + color_attachment_read_mask(fs, &dyns->ial, rt_mask); + bool zs_read = zs_attachment_read(fs, &dyns->ial); cfg.allow_forward_pixel_to_kill = fs->info.fs.can_fpk && !(rt_mask & ~rt_written) && - !alpha_to_coverage && !cmdbuf->state.gfx.cb.info.any_dest_read; + !(rt_read & rt_written) && !alpha_to_coverage && + !cmdbuf->state.gfx.cb.info.any_dest_read; bool writes_zs = writes_z || writes_s; bool zs_always_passes = ds_test_always_passes(cmdbuf); bool oq = cmdbuf->state.gfx.occlusion_query.mode != MALI_OCCLUSION_MODE_DISABLED; - struct pan_earlyzs_state earlyzs = + *earlyzs = pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq, - alpha_to_coverage, zs_always_passes, false); + alpha_to_coverage, zs_always_passes, zs_read); - cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill; - cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update; + cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs->kill; + cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs->update; cfg.evaluate_per_sample = fs->info.fs.sample_shading && (dyns->ms.rasterization_samples > 1); @@ -1748,7 +1759,11 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size); - result = prepare_ds(cmdbuf); + struct pan_earlyzs_state earlyzs = {0}; + + prepare_dcd(cmdbuf, &earlyzs); + + result = prepare_ds(cmdbuf, earlyzs); if (result != VK_SUCCESS) return result; @@ -1756,7 +1771,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) if (result != VK_SUCCESS) return result; - prepare_dcd(cmdbuf); prepare_vp(cmdbuf); prepare_tiler_primitive_size(cmdbuf); } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 652fc725876..222364cfe80 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -282,13 +282,16 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, 8)); } - uint8_t rt_written = color_attachment_written_mask( - fs, &cmdbuf->vk.dynamic_graphics_state.cal); uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS; + uint8_t rt_written = color_attachment_written_mask( + fs, &cmdbuf->vk.dynamic_graphics_state.cal); + uint8_t rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask); + bool zs_read = zs_attachment_read(fs, &dyns->ial); cfg.properties.allow_forward_pixel_to_kill = fs_info->fs.can_fpk && !(rt_mask & ~rt_written) && - !alpha_to_coverage && !binfo->any_dest_read; + !(rt_read & rt_written) && !alpha_to_coverage && + !binfo->any_dest_read; bool writes_zs = writes_z || writes_s; bool zs_always_passes = ds_test_always_passes(cmdbuf); @@ -297,7 +300,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs = pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq, - alpha_to_coverage, zs_always_passes, false); + alpha_to_coverage, zs_always_passes, zs_read); cfg.properties.pixel_kill_operation = earlyzs.kill; cfg.properties.zs_update_operation = earlyzs.update; diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 47adfc1ba18..4f2ec63279d 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -16,6 +16,7 @@ #include "panvk_image.h" #include "panvk_image_view.h" #include "panvk_physical_device.h" +#include "panvk_shader.h" #include "vk_command_buffer.h" #include "vk_format.h" @@ -376,4 +377,47 @@ color_attachment_written_mask( return catt_written_mask; } +static inline uint32_t +color_attachment_read_mask(const struct panvk_shader *fs, + const struct vk_input_attachment_location_state *ial, + uint8_t color_attachment_mask) +{ + uint32_t color_attachment_count = + ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN + ? util_last_bit(color_attachment_mask) + : ial->color_attachment_count; + uint32_t catt_read_mask = 0; + + for (uint32_t i = 0; i < color_attachment_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + + uint32_t catt_idx = ial->color_map[i] + 1; + if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) { + assert(color_attachment_mask & BITFIELD_BIT(i)); + catt_read_mask |= BITFIELD_BIT(i); + } + } + + return catt_read_mask; +} + +static inline bool +zs_attachment_read(const struct panvk_shader *fs, + const struct vk_input_attachment_location_state *ial) +{ + uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX + ? BITFIELD_BIT(0) + : ial->depth_att != MESA_VK_ATTACHMENT_UNUSED + ? BITFIELD_BIT(ial->depth_att + 1) + : 0; + uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX + ? BITFIELD_BIT(0) + : ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED + ? BITFIELD_BIT(ial->stencil_att + 1) + : 0; + + return (depth_mask | stencil_mask) & fs->fs.input_attachment_read; +} + #endif diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 3e232188586..06169ac8083 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -52,6 +52,18 @@ enum panvk_desc_table_id { }; #endif +#define PANVK_COLOR_ATTACHMENT(x) (x) +#define PANVK_ZS_ATTACHMENT 255 + +struct panvk_input_attachment_info { + uint32_t target; + uint32_t conversion; +}; + +/* One attachment per color, one for depth, one for stencil, and the last one + * for the attachment without an InputAttachmentIndex attribute. */ +#define INPUT_ATTACHMENT_MAP_SIZE 11 + #define FAU_WORD_SIZE sizeof(uint64_t) #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t @@ -79,6 +91,8 @@ struct panvk_graphics_sysvals { aligned_u64 push_consts; aligned_u64 printf_buffer_address; + struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE]; + #if PAN_ARCH <= 7 /* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per * layer, and filter primitives at the VS level. @@ -265,6 +279,7 @@ struct panvk_shader { struct { struct pan_earlyzs_lut earlyzs_lut; + uint32_t input_attachment_read; } fs; }; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 1fdc32ae599..5ba08b87eeb 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -539,6 +539,84 @@ panvk_per_arch(cmd_preload_render_area_border)( panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info); } +static void +prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals) +{ + const struct vk_input_attachment_location_state *ial = + &cmdbuf->vk.dynamic_graphics_state.ial; + struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE]; + uint32_t catt_count = + ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN + ? MAX_RTS + : ial->color_attachment_count; + + memset(iam, ~0, sizeof(iam)); + + assert(catt_count <= MAX_RTS); + + for (uint32_t i = 0; i < catt_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED || + !(cmdbuf->state.gfx.render.bound_attachments & + MESA_VK_RP_ATTACHMENT_COLOR_BIT(i))) + continue; + + VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i]; + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + struct mali_internal_conversion_packed conv; + uint32_t ia_idx = ial->color_map[i] + 1; + assert(ia_idx < ARRAY_SIZE(iam)); + + iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i); + + pan_pack(&conv, INTERNAL_CONVERSION, cfg) { + cfg.memory_format = + GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false); +#if PAN_ARCH <= 7 + cfg.register_format = + vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32 + : vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32 + : MALI_REGISTER_FILE_FORMAT_F32; +#endif + } + + iam[ia_idx].conversion = conv.opaque[0]; + } + + if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) { + uint32_t ia_idx = + ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1; + + assert(ia_idx < ARRAY_SIZE(iam)); + iam[ia_idx].target = PANVK_ZS_ATTACHMENT; + +#if PAN_ARCH <= 7 + /* On v7, we need to pass the depth format around. If we use a conversion + * of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */ + VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt; + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + struct mali_internal_conversion_packed conv; + + pan_pack(&conv, INTERNAL_CONVERSION, cfg) { + cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32; + cfg.memory_format = + GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false); + } + iam[ia_idx].conversion = conv.opaque[0]; +#endif + } + + if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) { + uint32_t ia_idx = + ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1; + + assert(ia_idx < ARRAY_SIZE(iam)); + iam[ia_idx].target = PANVK_ZS_ATTACHMENT; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++) + set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]); +} + /* This value has been selected to get * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing. */ @@ -647,6 +725,9 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, } } + if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP)) + prepare_iam_sysvals(cmdbuf, dirty_sysvals); + const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; #if PAN_ARCH <= 7 diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 61fa9b769f9..aada6522ac0 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -30,6 +30,7 @@ #include "panvk_device.h" #include "panvk_shader.h" +#include "vk_graphics_state.h" #include "vk_pipeline.h" #include "vk_pipeline_layout.h" @@ -72,6 +73,7 @@ struct lower_desc_ctx { bool add_bounds_checks; nir_address_format ubo_addr_format; nir_address_format ssbo_addr_format; + struct panvk_shader *shader; }; static nir_address_format @@ -827,6 +829,137 @@ get_img_index(nir_builder *b, nir_deref_instr *deref, } } +static bool +lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr, + void *data) +{ + if (intr->intrinsic != nir_intrinsic_image_deref_load && + intr->intrinsic != nir_intrinsic_image_deref_sparse_load) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(deref->type); + if (image_dim != GLSL_SAMPLER_DIM_SUBPASS && + image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS) + return false; + + struct panvk_shader *shader = data; + nir_variable *var = nir_deref_instr_get_variable(deref); + assert(var); + + const unsigned iam_idx = + var->data.index != NIR_VARIABLE_NO_INDEX ? var->data.index + 1 : 0; + nir_alu_type dest_type = nir_intrinsic_dest_type(intr); + + shader->fs.input_attachment_read |= BITFIELD_BIT(iam_idx); + + b->cursor = nir_before_instr(&intr->instr); + + nir_def *target = + nir_load_input_attachment_target_pan(b, nir_imm_int(b, iam_idx)); + nir_def *load_img, *load_output; + + nir_push_if(b, nir_ine_imm(b, target, ~0)); + { + nir_def *is_color_att = nir_ilt_imm(b, target, 8); + nir_def *load_color, *load_zs; + nir_io_semantics iosem = {0}; + + nir_push_if(b, is_color_att); + { + nir_def *conversion = + nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx)); + + iosem.location = FRAG_RESULT_DATA0; + load_color = nir_load_converted_output_pan( + b, intr->def.num_components, intr->def.bit_size, target, + intr->src[2].ssa, conversion, .dest_type = dest_type, + .io_semantics = iosem); + } + nir_push_else(b, NULL); + { +#if PAN_ARCH <= 7 + /* On v7, we need to pass the depth format around. If we use a + * conversion of zero, like we do on v9+, the GPU reports an + * INVALID_INSTR_ENC. */ + struct mali_internal_conversion_packed stencil_conv; + + pan_pack(&stencil_conv, INTERNAL_CONVERSION, cfg) { + cfg.register_format = MALI_REGISTER_FILE_FORMAT_U32; + cfg.memory_format = GENX(panfrost_dithered_format_from_pipe_format)( + PIPE_FORMAT_S8_UINT, false); + } + + nir_def *conversion = + dest_type == nir_type_uint32 + ? nir_imm_int(b, stencil_conv.opaque[0]) + : nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx)); +#else + nir_def *conversion = nir_imm_int(b, 0); +#endif + + iosem.location = dest_type == nir_type_float32 ? FRAG_RESULT_DEPTH + : FRAG_RESULT_STENCIL; + target = nir_imm_int(b, 0); + load_zs = nir_load_converted_output_pan( + b, intr->def.num_components, intr->def.bit_size, target, + intr->src[2].ssa, conversion, .dest_type = dest_type, + .io_semantics = iosem); + + /* If we loaded the stencil value, the upper 24 bits might contain + * garbage, hence the masking done here. */ + if (iosem.location == FRAG_RESULT_STENCIL) + load_zs = nir_iand_imm(b, load_zs, BITFIELD_MASK(8)); + } + nir_pop_if(b, NULL); + + load_output = nir_if_phi(b, load_color, load_zs); + } + nir_push_else(b, NULL); + { + load_img = + intr->intrinsic == nir_intrinsic_image_deref_sparse_load + ? nir_image_deref_sparse_load( + b, intr->num_components, intr->def.bit_size, intr->src[0].ssa, + intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, + .image_dim = nir_intrinsic_image_dim(intr), + .image_array = nir_intrinsic_image_array(intr), + .format = nir_intrinsic_format(intr), + .access = nir_intrinsic_access(intr), .dest_type = dest_type) + : nir_image_deref_load( + b, intr->num_components, intr->def.bit_size, intr->src[0].ssa, + intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, + .image_dim = nir_intrinsic_image_dim(intr), + .image_array = nir_intrinsic_image_array(intr), + .format = nir_intrinsic_format(intr), + .access = nir_intrinsic_access(intr), .dest_type = dest_type); + } + nir_pop_if(b, NULL); + + nir_def_replace(&intr->def, nir_if_phi(b, load_output, load_img)); + + return true; +} + +static bool +lower_input_attachment_loads(nir_shader *nir, struct panvk_shader *shader) +{ + bool progress = false; + + NIR_PASS(progress, nir, nir_shader_intrinsics_pass, + lower_input_attachment_load, nir_metadata_control_flow, shader); + + /* Lower the remaining input attachment loads. */ + struct nir_input_attachment_options lower_input_attach_opts = { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = true, + }; + NIR_PASS(progress, nir, nir_lower_input_attachments, + &lower_input_attach_opts); + + return progress; +} + static bool lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, struct lower_desc_ctx *ctx) @@ -1231,11 +1364,13 @@ upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader, void panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, - const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count, + const struct vk_pipeline_robustness_state *rs, + uint32_t set_layout_count, struct vk_descriptor_set_layout *const *set_layouts, struct panvk_shader *shader) { struct lower_desc_ctx ctx = { + .shader = shader, .add_bounds_checks = rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || @@ -1279,6 +1414,9 @@ panvk_per_arch(nir_lower_descriptors)( create_copy_table(nir, &ctx); upload_shader_desc_info(dev, shader, &ctx.desc_info); + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS(progress, nir, lower_input_attachment_loads, shader); + NIR_PASS(progress, nir, nir_shader_instructions_pass, lower_descriptors_instr, nir_metadata_control_flow, &ctx); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 6bd08b69a4c..209c0f00e93 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -62,12 +62,18 @@ #include "vk_shader.h" #include "vk_util.h" +struct panvk_lower_sysvals_context { + struct panvk_shader *shader; + const struct vk_graphics_pipeline_state *state; +}; + static bool panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) { if (instr->type != nir_instr_type_intrinsic) return false; + const struct panvk_lower_sysvals_context *ctx = data; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); unsigned bit_size = intr->def.bit_size; nir_def *val = NULL; @@ -127,6 +133,52 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) val = load_sysval(b, graphics, bit_size, printf_buffer_address); break; + case nir_intrinsic_load_input_attachment_target_pan: { + const struct vk_input_attachment_location_state *ial = + ctx->state ? ctx->state->ial : NULL; + + if (ial) { + uint32_t index = nir_src_as_uint(intr->src[0]); + uint32_t depth_idx = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX + ? 0 + : ial->depth_att + 1; + uint32_t stencil_idx = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX + ? 0 + : ial->stencil_att + 1; + uint32_t target = ~0; + + if (depth_idx == index || stencil_idx == index) { + target = PANVK_ZS_ATTACHMENT; + } else { + for (unsigned i = 0; i < ial->color_attachment_count; i++) { + if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) + continue; + + if (ial->color_map[i] + 1 == index) { + target = PANVK_COLOR_ATTACHMENT(i); + break; + } + } + } + + val = nir_imm_int(b, target); + } else { + nir_def *ia_info = + load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa); + + val = nir_channel(b, ia_info, 0); + } + break; + } + + case nir_intrinsic_load_input_attachment_conv_pan: { + nir_def *ia_info = + load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa); + + val = nir_channel(b, ia_info, 1); + break; + } + default: return false; } @@ -326,15 +378,6 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); NIR_PASS(_, nir, nir_opt_loop); - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - struct nir_input_attachment_options lower_input_attach_opts = { - .use_fragcoord_sysval = true, - .use_layer_id_sysval = true, - }; - - NIR_PASS(_, nir, nir_lower_input_attachments, &lower_input_attach_opts); - } - /* Do texture lowering here. Yes, it's a duplication of the texture * lowering in bifrost_compile. However, we need to lower texture stuff * now, before we call panvk_per_arch(nir_lower_descriptors)() because some @@ -400,6 +443,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device, _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, sizeof(state->rp->view_mask)); + if (state->ial) + _mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial)); + _mesa_blake3_final(&blake3_ctx, blake3_out); } @@ -677,6 +723,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, struct vk_descriptor_set_layout *const *set_layouts, const struct vk_pipeline_robustness_state *rs, uint32_t *noperspective_varyings, + const struct vk_graphics_pipeline_state *state, const struct panfrost_compile_inputs *compile_input, struct panvk_shader *shader) { @@ -813,8 +860,13 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, NIR_PASS(_, nir, pan_nir_lower_static_noperspective, *noperspective_varyings); + struct panvk_lower_sysvals_context lower_sysvals_ctx = { + .shader = shader, + .state = state, + }; + NIR_PASS(_, nir, nir_shader_instructions_pass, panvk_lower_sysvals, - nir_metadata_control_flow, NULL); + nir_metadata_control_flow, &lower_sysvals_ctx); lower_load_push_consts(nir, shader); } @@ -1134,7 +1186,8 @@ panvk_compile_shader(struct panvk_device *dev, nir->info.fs.uses_sample_shading = true; panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, - info->robustness, noperspective_varyings, &inputs, shader); + info->robustness, noperspective_varyings, state, &inputs, + shader); #if PAN_ARCH >= 9 if (info->stage == MESA_SHADER_FRAGMENT) @@ -1319,7 +1372,6 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, VkResult result; blob_copy_bytes(blob, &info, sizeof(info)); - if (blob->overrun) return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); @@ -1340,6 +1392,8 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, case MESA_SHADER_FRAGMENT: shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH); + blob_copy_bytes(blob, &shader->fs.input_attachment_read, + sizeof(shader->fs.input_attachment_read)); break; default: @@ -1441,6 +1495,11 @@ panvk_shader_serialize(struct vk_device *vk_dev, sizeof(shader->cs.local_size)); break; + case MESA_SHADER_FRAGMENT: + blob_write_bytes(blob, &shader->fs.input_attachment_read, + sizeof(shader->fs.input_attachment_read)); + break; + default: break; }