diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cfb20a4a8b1..8904b21f7d5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11310,6 +11310,8 @@ create_fs_exports(isel_context* ctx) if (ctx->program->info.ps.has_epilog) { create_fs_jump_to_epilog(ctx); } else { + unsigned compacted_mrt_index = 0; + /* Export all color render targets. */ for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) { if (!ctx->outputs.mask[i]) @@ -11317,9 +11319,9 @@ create_fs_exports(isel_context* ctx) struct mrt_color_export out = {0}; - out.slot = i - FRAG_RESULT_DATA0; + out.slot = compacted_mrt_index; out.write_mask = ctx->outputs.mask[i]; - out.col_format = (ctx->options->key.ps.col_format >> (4 * out.slot)) & 0xf; + out.col_format = (ctx->options->key.ps.col_format >> (4 * (i - FRAG_RESULT_DATA0))) & 0xf; for (unsigned c = 0; c < 4; ++c) { if (out.write_mask & (1 << c)) { @@ -11329,7 +11331,10 @@ create_fs_exports(isel_context* ctx) } } - exported |= export_fs_mrt_color(ctx, &out, false); + if (export_fs_mrt_color(ctx, &out, false)) { + compacted_mrt_index++; + exported = true; + } } if (!exported) @@ -12449,6 +12454,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade Builder bld(ctx.program, ctx.block); /* Export all color render targets */ + unsigned compacted_mrt_index = 0; bool exported = false; for (unsigned i = 0; i < 8; i++) { @@ -12459,7 +12465,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade struct mrt_color_export out; - out.slot = i; + out.slot = compacted_mrt_index; out.write_mask = 0xf; out.col_format = col_format; out.is_int8 = (key->color_is_int8 >> i) & 1; @@ -12471,7 +12477,10 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1)); } - exported |= export_fs_mrt_color(&ctx, &out, true); + if (export_fs_mrt_color(&ctx, &out, true)) { + compacted_mrt_index++; + exported = true; + } } if (!exported) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f882d50ec19..adf01c7630b 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -578,7 +578,8 @@ scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *va /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values, - unsigned enabled_channels, unsigned target, struct ac_export_args *args) + unsigned enabled_channels, unsigned target, unsigned index, + struct ac_export_args *args) { /* Specify the channels that are enabled. */ args->enabled_channels = enabled_channels; @@ -603,7 +604,6 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values, bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2; if (ctx->stage == MESA_SHADER_FRAGMENT) { - unsigned index = target - V_008DFC_SQ_EXP_MRT; unsigned col_format = (ctx->options->key.ps.col_format >> (4 * index)) & 0xf; bool is_int8 = (ctx->options->key.ps.is_int8 >> index) & 1; bool is_int10 = (ctx->options->key.ps.is_int10 >> index) & 1; @@ -743,7 +743,7 @@ radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef { struct ac_export_args args; - si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, &args); + si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, 0, &args); ac_build_export(&ctx->ac, &args); } @@ -915,7 +915,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v for (i = 0; i < noutput; i++) { switch (outputs[i].slot_name) { case VARYING_SLOT_POS: - si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]); + si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, 0, &pos_args[0]); break; case VARYING_SLOT_PSIZ: psize_value = outputs[i].values[0]; @@ -932,7 +932,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: index = 2 + outputs[i].slot_index; - si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index, + si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index, 0, &pos_args[index]); break; default: @@ -1064,11 +1064,11 @@ handle_vs_outputs_post(struct radv_shader_context *ctx) } static bool -si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned index, - struct ac_export_args *args) +si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned target, + unsigned index, struct ac_export_args *args) { /* Export */ - si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + index, args); + si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + target, index, args); if (!args->enabled_channels) return false; /* unnecessary NULL export */ @@ -1105,7 +1105,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) for (unsigned j = 0; j < 4; j++) values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - bool ret = si_export_mrt_color(ctx, values, i - FRAG_RESULT_DATA0, &color_args[index]); + bool ret = si_export_mrt_color(ctx, values, index, i - FRAG_RESULT_DATA0, &color_args[index]); if (ret) index++; } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 71b3580346f..2df56bdc0f5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -533,13 +533,41 @@ format_is_float32(VkFormat format) desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32; } +static unsigned +radv_compact_spi_shader_col_format(const struct radv_shader *ps, + const struct radv_blend_state *blend) +{ + unsigned spi_shader_col_format = blend->spi_shader_col_format; + unsigned value = 0, num_mrts = 0; + unsigned i, num_targets; + + /* Make sure to clear color attachments without exports because MRT holes are removed during + * compilation for optimal performance. + */ + spi_shader_col_format &= ps->info.ps.colors_written; + + /* Compute the number of MRTs. */ + num_targets = DIV_ROUND_UP(util_last_bit(spi_shader_col_format), 4); + + /* Remove holes in spi_shader_col_format. */ + for (i = 0; i < num_targets; i++) { + unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; + + if (spi_format) { + value |= spi_format << (num_mrts * 4); + num_mrts++; + } + } + + return value; +} + static void radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline, struct radv_blend_state *blend, const struct vk_graphics_pipeline_state *state) { unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0; - unsigned num_targets; for (unsigned i = 0; i < state->rp->color_attachment_count; ++i) { unsigned cf; @@ -572,16 +600,6 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip col_format |= V_028714_SPI_SHADER_32_AR; } - /* If the i-th target format is set, all previous target formats must - * be non-zero to avoid hangs. - */ - num_targets = (util_last_bit(col_format) + 3) / 4; - for (unsigned i = 0; i < num_targets; i++) { - if (!(col_format & (0xfu << (i * 4)))) { - col_format |= V_028714_SPI_SHADER_32_R << (i * 4); - } - } - /* The output for dual source blending should have the same format as * the first output. */ @@ -6030,6 +6048,9 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv if (device->physical_device->rad_info.gfx_level >= GFX10_3) gfx103_pipeline_init_vrs_state(pipeline, &state); + struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; + blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend); + /* Ensure that some export memory is always allocated, for two reasons: * * 1) Correctness: The hardware ignores the EXEC mask if no export @@ -6045,7 +6066,6 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv * color and Z formats to SPI_SHADER_ZERO. The hw will skip export * instructions if any are present. */ - struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; if ((device->physical_device->rad_info.gfx_level <= GFX9 || ps->info.ps.can_discard) && !blend.spi_shader_col_format) { if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)