diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index be21bc9e158..3ea8e4d9261 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -298,6 +298,7 @@ typedef struct { /* OpenGL only */ bool clamp_color; bool alpha_to_one; + bool kill_samplemask; enum pipe_compare_func alpha_func; unsigned broadcast_last_cbuf; diff --git a/src/amd/common/ac_nir_lower_ps.c b/src/amd/common/ac_nir_lower_ps.c index 13e8e63895c..90e99b22c27 100644 --- a/src/amd/common/ac_nir_lower_ps.c +++ b/src/amd/common/ac_nir_lower_ps.c @@ -348,6 +348,8 @@ emit_ps_color_clamp_and_alpha_test(nir_builder *b, lower_ps_state *s) static void emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s) { + uint64_t outputs_written = b->shader->info.outputs_written; + nir_ssa_def *mrtz_alpha = NULL; if (s->options->alpha_to_coverage_via_mrtz) { mrtz_alpha = s->outputs[FRAG_RESULT_COLOR][3] ? @@ -359,11 +361,15 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s) nir_ssa_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0]; nir_ssa_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0]; + if (s->options->kill_samplemask) { + sample_mask = NULL; + outputs_written &= ~BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + } + /* skip mrtz export if no one has written to any of them */ if (!depth && !stencil && !sample_mask && !mrtz_alpha) return; - uint64_t outputs_written = b->shader->info.outputs_written; /* use outputs_written to determine export format as we use it to set * R_028710_SPI_SHADER_Z_FORMAT instead of relying on the real store output, * because store output may be optimized out. diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0eeb9622c5f..f03f3413bb4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -704,7 +704,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + shader->selector->info.writes_z + shader->selector->info.writes_stencil + - shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */; + shader->ps.writes_samplemask + 1 /* SampleMaskIn */; for (i = 0; i < num_return_sgprs; i++) ac_add_return(&args->ac, AC_ARG_SGPR); @@ -1149,7 +1149,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad num_outputs = util_bitcount(shader->selector->info.colors_written) + (shader->selector->info.writes_z || shader->selector->info.writes_stencil || - shader->selector->info.writes_samplemask); + shader->ps.writes_samplemask); } util_debug_message(debug, SHADER_INFO, @@ -1394,6 +1394,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz); fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color); fprintf(f, " epilog.dual_src_blend_swizzle = %u\n", key->ps.part.epilog.dual_src_blend_swizzle); + fprintf(f, " epilog.rbplus_depth_only_opt = %u\n", key->ps.part.epilog.rbplus_depth_only_opt); + fprintf(f, " epilog.kill_samplemask = %u\n", key->ps.part.epilog.kill_samplemask); fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing); fprintf(f, " mono.point_smoothing = %u\n", key->ps.mono.point_smoothing); fprintf(f, " mono.interpolate_at_sample_force_center = %u\n", @@ -2232,6 +2234,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, .alpha_to_one = key->ps.part.epilog.alpha_to_one, .alpha_func = key->ps.part.epilog.alpha_func, .broadcast_last_cbuf = key->ps.part.epilog.last_cbuf, + .kill_samplemask = key->ps.part.epilog.kill_samplemask, .bc_optimize_for_persp = key->ps.part.prolog.bc_optimize_for_persp, .bc_optimize_for_linear = key->ps.part.prolog.bc_optimize_for_linear, @@ -3076,7 +3079,8 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke key->ps_epilog.color_types = info->output_color_types; key->ps_epilog.writes_z = info->writes_z; key->ps_epilog.writes_stencil = info->writes_stencil; - key->ps_epilog.writes_samplemask = info->writes_samplemask; + key->ps_epilog.writes_samplemask = info->writes_samplemask && + !shader->key.ps.part.epilog.kill_samplemask; key->ps_epilog.states = shader->key.ps.part.epilog; } @@ -3153,6 +3157,11 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler struct si_shader_selector *sel = shader->selector; struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key); + if (sel->stage == MESA_SHADER_FRAGMENT) { + shader->ps.writes_samplemask = sel->info.writes_samplemask && + !shader->key.ps.part.epilog.kill_samplemask; + } + /* LS, ES, VS are compiled on demand if the main part hasn't been * compiled for that stage. * diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 9341307d48f..0f9b3dad5de 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -606,6 +606,7 @@ struct si_ps_epilog_bits { unsigned clamp_color : 1; unsigned dual_src_blend_swizzle : 1; /* gfx11+ */ unsigned rbplus_depth_only_opt:1; + unsigned kill_samplemask:1; }; union si_shader_part_key { @@ -966,6 +967,7 @@ struct si_shader { unsigned cb_shader_mask; unsigned db_shader_control; unsigned num_interp; + bool writes_samplemask; } ps; }; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index d1f9fece050..1860fc48545 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1042,11 +1042,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ? AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL; + bool exports_color_null = false; + bool exports_mrtz = false; - bool exports_color_null = sel->info.colors_written; - bool exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask; - if (!exports_mrtz && !exports_color_null) - exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10; + if (sel->stage == MESA_SHADER_FRAGMENT) { + exports_color_null = sel->info.colors_written; + exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || shader->ps.writes_samplemask; + if (!exports_mrtz && !exports_color_null) + exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10; + } si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz, float_mode); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 9c93df00b32..19d4f794144 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1150,7 +1150,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state; if (old_rs->multisample_enable != rs->multisample_enable) { - si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); /* Update the small primitive filter workaround if necessary. */ @@ -1507,7 +1506,6 @@ void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st) static void si_emit_db_render_state(struct si_context *sctx) { - struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned db_shader_control, db_render_control, db_count_control, vrs_override_cntl = 0; /* DB_RENDER_CONTROL */ @@ -1575,10 +1573,6 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control = sctx->ps_db_shader_control; - /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ - if (!rs->multisample_enable) - db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; - if (sctx->screen->info.has_export_conflict_bug && sctx->queued.named.blend->blend_enable_4bit && si_get_num_coverage_samples(sctx) == 1) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 3d11acf79b7..72bf73e050b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1803,7 +1803,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) /* DB_SHADER_CONTROL */ shader->ps.db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) | - S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) | + S_02880C_MASK_EXPORT_ENABLE(shader->ps.writes_samplemask) | S_02880C_KILL_ENABLE(si_shader_uses_discard(shader)); switch (info->base.fs.depth_layout) { @@ -1887,7 +1887,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) shader->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr; shader->ps.num_interp = si_get_ps_num_interp(shader); shader->ps.spi_shader_z_format = - ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, info->writes_samplemask, + ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, shader->ps.writes_samplemask, shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz); /* Ensure that some export memory is always allocated, for two reasons: @@ -1907,7 +1907,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) * * RB+ depth-only rendering requires SPI_SHADER_32_R. */ - bool has_mrtz = info->writes_z || info->writes_stencil || info->writes_samplemask; + bool has_mrtz = info->writes_z || info->writes_stencil || shader->ps.writes_samplemask; if (!shader->ps.spi_shader_col_format) { if (shader->key.ps.part.epilog.rbplus_depth_only_opt) { @@ -2217,6 +2217,13 @@ void si_ps_key_update_framebuffer_blend_rasterizer(struct si_context *sctx) sctx->gfx_level >= GFX11 && alpha_to_coverage && (sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask); + /* Remove the gl_SampleMask fragment shader output if MSAA is disabled. + * This is required for correctness and it's also an optimization. + */ + key->ps.part.epilog.kill_samplemask = sel->info.writes_samplemask && + (sctx->framebuffer.nr_samples <= 1 || + !rs->multisample_enable); + /* If alpha-to-coverage isn't exported via MRTZ, set that we need to export alpha * through MRT0. */