pvr, pco: z-replicate support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-03-12 22:12:49 +00:00 committed by Marge Bot
parent 1b7954fae6
commit 7369f77590
3 changed files with 122 additions and 13 deletions

View file

@ -80,6 +80,8 @@ typedef struct _pco_fs_data {
/** On-chip input attachment formats. */
enum pipe_format ia_formats[4];
gl_frag_result z_replicate;
/* Blend options. */
nir_lower_blend_options blend_opts;
pco_range blend_consts;

View file

@ -960,6 +960,30 @@ static nir_def *lower_pfo(nir_builder *b, nir_instr *instr, void *cb_data)
static bool lower_isp_fb(nir_builder *b, struct pfo_state *state)
{
bool has_depth_feedback = !!state->depth_feedback_src;
if (b->shader->info.writes_memory && !has_depth_feedback) {
nir_variable *var_pos = nir_get_variable_with_location(b->shader,
nir_var_shader_in,
VARYING_SLOT_POS,
glsl_vec4_type());
var_pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
b->cursor = nir_before_block(
nir_start_block(nir_shader_get_entrypoint(b->shader)));
state->depth_feedback_src =
nir_load_input(b,
1,
32,
nir_imm_int(b, 0),
.component = 2,
.dest_type = nir_type_float32,
.io_semantics = (nir_io_semantics){
.location = VARYING_SLOT_POS,
.num_slots = 1,
});
has_depth_feedback = true;
}
if (!has_depth_feedback && !state->has_discards)
return false;
@ -980,7 +1004,7 @@ static bool lower_isp_fb(nir_builder *b, struct pfo_state *state)
b,
state->has_discards ? nir_i2b(b, nir_load_reg(b, state->discard_cond_reg))
: undef,
state->depth_feedback_src ? state->depth_feedback_src : undef);
has_depth_feedback ? state->depth_feedback_src : undef);
state->fs->uses.discard = state->has_discards;
state->fs->uses.depth_feedback = has_depth_feedback;
@ -1005,6 +1029,58 @@ static bool sink_outputs(nir_shader *shader, struct pfo_state *state)
return progress;
}
static bool z_replicate(nir_shader *shader, struct pfo_state *state)
{
if (shader->info.internal || state->fs->z_replicate == ~0u)
return false;
assert(!nir_find_variable_with_location(shader,
nir_var_shader_out,
state->fs->z_replicate));
nir_create_variable_with_location(shader,
nir_var_shader_out,
state->fs->z_replicate,
glsl_float_type());
if (!state->depth_feedback_src) {
nir_variable *var_pos = nir_get_variable_with_location(shader,
nir_var_shader_in,
VARYING_SLOT_POS,
glsl_vec4_type());
var_pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
nir_builder b = nir_builder_at(
nir_before_block(nir_start_block(nir_shader_get_entrypoint(shader))));
state->depth_feedback_src =
nir_load_input(&b,
1,
32,
nir_imm_int(&b, 0),
.component = 2,
.dest_type = nir_type_float32,
.io_semantics = (nir_io_semantics){
.location = VARYING_SLOT_POS,
.num_slots = 1,
});
}
nir_builder b = nir_builder_at(
nir_after_block(nir_impl_last_block(nir_shader_get_entrypoint(shader))));
nir_store_output(&b,
state->depth_feedback_src,
nir_imm_int(&b, 0),
.write_mask = 1,
.src_type = nir_type_invalid | 32,
.io_semantics = (nir_io_semantics){
.location = state->fs->z_replicate,
.num_slots = 1,
});
return true;
}
/**
* \brief Per-fragment output pass.
*
@ -1029,12 +1105,13 @@ bool pco_nir_pfo(nir_shader *shader, pco_fs_data *fs)
util_dynarray_init(&state.loads, NULL);
util_dynarray_init(&state.stores, NULL);
bool progress =
nir_shader_lower_instructions(shader, is_pfo, lower_pfo, &state);
bool progress = false;
progress |= nir_shader_lower_instructions(shader, is_pfo, lower_pfo, &state);
progress |= lower_isp_fb(&b, &state);
progress |= sink_outputs(shader, &state);
progress |= z_replicate(shader, &state);
util_dynarray_fini(&state.stores);
util_dynarray_fini(&state.loads);

View file

@ -1804,7 +1804,8 @@ pvr_init_fs_outputs(pco_data *data,
const struct pvr_render_subpass *const subpass,
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
{
for (unsigned u = 0; u < subpass->color_count; ++u) {
unsigned u;
for (u = 0; u < subpass->color_count; ++u) {
unsigned idx = subpass->color_attachments[u];
if (idx == VK_ATTACHMENT_UNUSED)
continue;
@ -1814,7 +1815,12 @@ pvr_init_fs_outputs(pco_data *data,
data->fs.output_formats[location] = vk_format_to_pipe_format(vk_format);
}
/* TODO: z-replicate. */
data->fs.z_replicate = ~0u;
if (hw_subpass->z_replicate >= 0) {
gl_frag_result location = FRAG_RESULT_DATA0 + u;
data->fs.output_formats[location] = PIPE_FORMAT_R32_FLOAT;
data->fs.z_replicate = location;
}
}
static void
@ -1825,7 +1831,8 @@ pvr_setup_fs_outputs(pco_data *data,
{
uint64_t outputs_written = nir->info.outputs_written;
for (unsigned u = 0; u < subpass->color_count; ++u) {
unsigned u;
for (u = 0; u < subpass->color_count; ++u) {
gl_frag_result location = FRAG_RESULT_DATA0 + u;
unsigned idx = subpass->color_attachments[u];
const struct usc_mrt_resource *mrt_resource;
@ -1840,10 +1847,10 @@ pvr_setup_fs_outputs(pco_data *data,
continue;
mrt_resource = &hw_subpass->setup.mrt_resources[u];
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
/* TODO: tile buffer support. */
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
set_var(data->fs.outputs,
mrt_resource->reg.output_reg,
@ -1854,7 +1861,29 @@ pvr_setup_fs_outputs(pco_data *data,
outputs_written &= ~BITFIELD64_BIT(location);
}
/* TODO: z-replicate. */
if (hw_subpass->z_replicate >= 0) {
const struct usc_mrt_resource *mrt_resource =
&hw_subpass->setup.mrt_resources[hw_subpass->z_replicate];
gl_frag_result location = FRAG_RESULT_DATA0 + u;
ASSERTED bool output_reg;
nir_variable *var;
var = nir_find_variable_with_location(nir, nir_var_shader_out, location);
if (var) {
/* TODO: tile buffer support. */
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
set_var(data->fs.outputs,
mrt_resource->reg.output_reg,
var,
DIV_ROUND_UP(mrt_resource->intermediate_size,
sizeof(uint32_t)));
data->fs.output_reg[location] = output_reg;
outputs_written &= ~BITFIELD64_BIT(location);
}
}
assert(!outputs_written);
}
@ -1875,11 +1904,12 @@ static void pvr_init_fs_input_attachments(
if (!onchip)
continue;
/* TODO: z-replicate. */
assert(hw_subpass->input_access[u].type !=
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE);
VkFormat vk_format = pass->attachments[idx].vk_format;
if (hw_subpass->input_access[u].type ==
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE) {
vk_format = VK_FORMAT_R32_SFLOAT;
}
data->fs.ia_formats[u] = vk_format_to_pipe_format(vk_format);
unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;