aco: emit p_jump_to_epilog if the main fragment shader has an epilog

MRTZ is still exported from the main shader.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17485>
This commit is contained in:
Samuel Pitoiset 2022-06-24 16:03:52 +02:00 committed by Marge Bot
parent 8bdcc20815
commit a6dff6caa1
2 changed files with 83 additions and 6 deletions

View file

@ -5160,6 +5160,18 @@ store_output_to_temps(isel_context* ctx, nir_intrinsic_instr* instr)
idx++;
}
if (ctx->stage == fragment_fs && ctx->program->info.ps.has_epilog) {
unsigned index = nir_intrinsic_base(instr) - FRAG_RESULT_DATA0;
if (nir_intrinsic_src_type(instr) == nir_type_float16) {
ctx->output_color_types |= ACO_TYPE_FLOAT16 << (index * 2);
} else if (nir_intrinsic_src_type(instr) == nir_type_int16) {
ctx->output_color_types |= ACO_TYPE_INT16 << (index * 2);
} else if (nir_intrinsic_src_type(instr) == nir_type_uint16) {
ctx->output_color_types |= ACO_TYPE_UINT16 << (index * 2);
}
}
return true;
}
@ -11115,9 +11127,60 @@ create_fs_null_export(isel_context* ctx)
/* enabled_mask */ 0, dest, /* compr */ false, /* done */ true, /* vm */ true);
}
static void
create_fs_jump_to_epilog(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
std::vector<Operand> color_exports;
PhysReg exports_start(256); /* VGPR 0 */
for (unsigned slot = FRAG_RESULT_DATA0; slot < FRAG_RESULT_DATA7 + 1; ++slot) {
unsigned color_index = slot - FRAG_RESULT_DATA0;
unsigned color_type = (ctx->output_color_types >> (color_index * 2)) & 0x3;
unsigned write_mask = ctx->outputs.mask[slot];
if (!write_mask)
continue;
PhysReg color_start(exports_start.reg() + color_index * 4);
for (unsigned i = 0; i < 4; i++) {
if (!(write_mask & BITFIELD_BIT(i))) {
color_exports.emplace_back(Operand(v1));
continue;
}
PhysReg chan_reg = color_start.advance(i * 4u);
Operand chan(ctx->outputs.temps[slot * 4u + i]);
if (color_type == ACO_TYPE_FLOAT16) {
chan = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), chan);
} else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16) {
bool sign_ext = color_type == ACO_TYPE_INT16;
Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext);
chan = Operand(tmp);
}
chan.setFixed(chan_reg);
color_exports.emplace_back(chan);
}
}
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ps_epilog_pc));
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
jump->operands[0] = Operand(continue_pc);
for (unsigned i = 0; i < color_exports.size(); i++) {
jump->operands[i + 1] = color_exports[i];
}
ctx->block->instructions.emplace_back(std::move(jump));
}
static void
create_fs_exports(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
bool exported = false;
/* Export depth, stencil and sample mask. */
@ -11125,13 +11188,17 @@ create_fs_exports(isel_context* ctx)
ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK])
exported |= export_fs_mrt_z(ctx);
/* Export all color render targets. */
for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
if (ctx->outputs.mask[i])
exported |= export_fs_mrt_color(ctx, i);
if (ctx->program->info.ps.has_epilog) {
create_fs_jump_to_epilog(ctx);
} else {
/* Export all color render targets. */
for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
if (ctx->outputs.mask[i])
exported |= export_fs_mrt_color(ctx, i);
if (!exported)
create_fs_null_export(ctx);
if (!exported)
create_fs_null_export(ctx);
}
ctx->block->kind |= block_kind_export_end;
}

View file

@ -35,6 +35,13 @@
namespace aco {
enum aco_color_output_type {
ACO_TYPE_ANY32,
ACO_TYPE_FLOAT16,
ACO_TYPE_INT16,
ACO_TYPE_UINT16,
};
struct shader_io_state {
uint8_t mask[VARYING_SLOT_MAX];
Temp temps[VARYING_SLOT_MAX * 4u];
@ -99,6 +106,9 @@ struct isel_context {
uint32_t tcs_num_patches;
bool tcs_in_out_eq = false;
/* Fragment color output information */
uint16_t output_color_types;
/* I/O information */
shader_io_state inputs;
shader_io_state outputs;