From 7369f775900376259ff1c833b985843395f41fb8 Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Wed, 12 Mar 2025 22:12:49 +0000 Subject: [PATCH] pvr, pco: z-replicate support Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/pco/pco_data.h | 2 + src/imagination/pco/pco_nir_pvfio.c | 83 ++++++++++++++++++++++++++- src/imagination/vulkan/pvr_pipeline.c | 50 ++++++++++++---- 3 files changed, 122 insertions(+), 13 deletions(-) diff --git a/src/imagination/pco/pco_data.h b/src/imagination/pco/pco_data.h index a4ed87577b8..a82b0e2e261 100644 --- a/src/imagination/pco/pco_data.h +++ b/src/imagination/pco/pco_data.h @@ -80,6 +80,8 @@ typedef struct _pco_fs_data { /** On-chip input attachment formats. */ enum pipe_format ia_formats[4]; + gl_frag_result z_replicate; + /* Blend options. */ nir_lower_blend_options blend_opts; pco_range blend_consts; diff --git a/src/imagination/pco/pco_nir_pvfio.c b/src/imagination/pco/pco_nir_pvfio.c index 4094743d064..550d7b52b1e 100644 --- a/src/imagination/pco/pco_nir_pvfio.c +++ b/src/imagination/pco/pco_nir_pvfio.c @@ -960,6 +960,30 @@ static nir_def *lower_pfo(nir_builder *b, nir_instr *instr, void *cb_data) static bool lower_isp_fb(nir_builder *b, struct pfo_state *state) { bool has_depth_feedback = !!state->depth_feedback_src; + if (b->shader->info.writes_memory && !has_depth_feedback) { + nir_variable *var_pos = nir_get_variable_with_location(b->shader, + nir_var_shader_in, + VARYING_SLOT_POS, + glsl_vec4_type()); + var_pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + + b->cursor = nir_before_block( + nir_start_block(nir_shader_get_entrypoint(b->shader))); + + state->depth_feedback_src = + nir_load_input(b, + 1, + 32, + nir_imm_int(b, 0), + .component = 2, + .dest_type = nir_type_float32, + .io_semantics = (nir_io_semantics){ + .location = VARYING_SLOT_POS, + .num_slots = 1, + }); + + has_depth_feedback = true; + } if (!has_depth_feedback && !state->has_discards) return false; @@ -980,7 +1004,7 @@ static bool lower_isp_fb(nir_builder *b, struct pfo_state *state) b, state->has_discards ? nir_i2b(b, nir_load_reg(b, state->discard_cond_reg)) : undef, - state->depth_feedback_src ? state->depth_feedback_src : undef); + has_depth_feedback ? state->depth_feedback_src : undef); state->fs->uses.discard = state->has_discards; state->fs->uses.depth_feedback = has_depth_feedback; @@ -1005,6 +1029,58 @@ static bool sink_outputs(nir_shader *shader, struct pfo_state *state) return progress; } +static bool z_replicate(nir_shader *shader, struct pfo_state *state) +{ + if (shader->info.internal || state->fs->z_replicate == ~0u) + return false; + + assert(!nir_find_variable_with_location(shader, + nir_var_shader_out, + state->fs->z_replicate)); + + nir_create_variable_with_location(shader, + nir_var_shader_out, + state->fs->z_replicate, + glsl_float_type()); + + if (!state->depth_feedback_src) { + nir_variable *var_pos = nir_get_variable_with_location(shader, + nir_var_shader_in, + VARYING_SLOT_POS, + glsl_vec4_type()); + var_pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + + nir_builder b = nir_builder_at( + nir_before_block(nir_start_block(nir_shader_get_entrypoint(shader)))); + + state->depth_feedback_src = + nir_load_input(&b, + 1, + 32, + nir_imm_int(&b, 0), + .component = 2, + .dest_type = nir_type_float32, + .io_semantics = (nir_io_semantics){ + .location = VARYING_SLOT_POS, + .num_slots = 1, + }); + } + + nir_builder b = nir_builder_at( + nir_after_block(nir_impl_last_block(nir_shader_get_entrypoint(shader)))); + nir_store_output(&b, + state->depth_feedback_src, + nir_imm_int(&b, 0), + .write_mask = 1, + .src_type = nir_type_invalid | 32, + .io_semantics = (nir_io_semantics){ + .location = state->fs->z_replicate, + .num_slots = 1, + }); + + return true; +} + /** * \brief Per-fragment output pass. * @@ -1029,12 +1105,13 @@ bool pco_nir_pfo(nir_shader *shader, pco_fs_data *fs) util_dynarray_init(&state.loads, NULL); util_dynarray_init(&state.stores, NULL); - bool progress = - nir_shader_lower_instructions(shader, is_pfo, lower_pfo, &state); + bool progress = false; + progress |= nir_shader_lower_instructions(shader, is_pfo, lower_pfo, &state); progress |= lower_isp_fb(&b, &state); progress |= sink_outputs(shader, &state); + progress |= z_replicate(shader, &state); util_dynarray_fini(&state.stores); util_dynarray_fini(&state.loads); diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index 1c0fc481ac2..ad7fcdfc1fa 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -1804,7 +1804,8 @@ pvr_init_fs_outputs(pco_data *data, const struct pvr_render_subpass *const subpass, const struct pvr_renderpass_hwsetup_subpass *hw_subpass) { - for (unsigned u = 0; u < subpass->color_count; ++u) { + unsigned u; + for (u = 0; u < subpass->color_count; ++u) { unsigned idx = subpass->color_attachments[u]; if (idx == VK_ATTACHMENT_UNUSED) continue; @@ -1814,7 +1815,12 @@ pvr_init_fs_outputs(pco_data *data, data->fs.output_formats[location] = vk_format_to_pipe_format(vk_format); } - /* TODO: z-replicate. */ + data->fs.z_replicate = ~0u; + if (hw_subpass->z_replicate >= 0) { + gl_frag_result location = FRAG_RESULT_DATA0 + u; + data->fs.output_formats[location] = PIPE_FORMAT_R32_FLOAT; + data->fs.z_replicate = location; + } } static void @@ -1825,7 +1831,8 @@ pvr_setup_fs_outputs(pco_data *data, { uint64_t outputs_written = nir->info.outputs_written; - for (unsigned u = 0; u < subpass->color_count; ++u) { + unsigned u; + for (u = 0; u < subpass->color_count; ++u) { gl_frag_result location = FRAG_RESULT_DATA0 + u; unsigned idx = subpass->color_attachments[u]; const struct usc_mrt_resource *mrt_resource; @@ -1840,10 +1847,10 @@ pvr_setup_fs_outputs(pco_data *data, continue; mrt_resource = &hw_subpass->setup.mrt_resources[u]; - output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG; - assert(output_reg); /* TODO: tile buffer support. */ + output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG; + assert(output_reg); set_var(data->fs.outputs, mrt_resource->reg.output_reg, @@ -1854,7 +1861,29 @@ pvr_setup_fs_outputs(pco_data *data, outputs_written &= ~BITFIELD64_BIT(location); } - /* TODO: z-replicate. */ + if (hw_subpass->z_replicate >= 0) { + const struct usc_mrt_resource *mrt_resource = + &hw_subpass->setup.mrt_resources[hw_subpass->z_replicate]; + gl_frag_result location = FRAG_RESULT_DATA0 + u; + ASSERTED bool output_reg; + nir_variable *var; + + var = nir_find_variable_with_location(nir, nir_var_shader_out, location); + if (var) { + /* TODO: tile buffer support. */ + output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG; + assert(output_reg); + + set_var(data->fs.outputs, + mrt_resource->reg.output_reg, + var, + DIV_ROUND_UP(mrt_resource->intermediate_size, + sizeof(uint32_t))); + data->fs.output_reg[location] = output_reg; + + outputs_written &= ~BITFIELD64_BIT(location); + } + } assert(!outputs_written); } @@ -1875,11 +1904,12 @@ static void pvr_init_fs_input_attachments( if (!onchip) continue; - /* TODO: z-replicate. */ - assert(hw_subpass->input_access[u].type != - PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE); - VkFormat vk_format = pass->attachments[idx].vk_format; + if (hw_subpass->input_access[u].type == + PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE) { + vk_format = VK_FORMAT_R32_SFLOAT; + } + data->fs.ia_formats[u] = vk_format_to_pipe_format(vk_format); unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;