diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 95f83a72e22..c97eb35d0cc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2020,6 +2020,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler shader->info.writes_sample_mask &= !shader->key.ps.part.epilog.kill_samplemask; shader->info.uses_discard |= shader->key.ps.part.prolog.poly_stipple || shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS; + si_shader_update_spi_shader_formats(shader); break; default:; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 3ca87661eee..3324d7bde8d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -858,8 +858,6 @@ struct si_shader { unsigned spi_ps_input_ena; unsigned spi_ps_input_addr; unsigned spi_ps_in_control; - unsigned spi_shader_z_format; - unsigned spi_shader_col_format; unsigned cb_shader_mask; unsigned db_shader_control; unsigned num_interp; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.h b/src/gallium/drivers/radeonsi/si_shader_info.h index dff54f1d1c9..829e92c3256 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.h +++ b/src/gallium/drivers/radeonsi/si_shader_info.h @@ -205,6 +205,8 @@ union si_ps_input_info { struct si_shader_variant_info { uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS]; union si_ps_input_info ps_inputs[SI_NUM_INTERP]; + uint32_t spi_shader_col_format; + uint8_t spi_shader_z_format; uint8_t num_ps_inputs; uint8_t num_ps_per_primitive_inputs; uint8_t num_ps_maybe_per_primitive_inputs; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index ce186b5e62b..356555e8c66 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -159,5 +159,6 @@ void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_ nir_shader *nir); void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader); void si_fixup_spi_ps_input_config(struct si_shader *shader); +void si_shader_update_spi_shader_formats(struct si_shader *shader); #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_variant_info.c b/src/gallium/drivers/radeonsi/si_shader_variant_info.c index 0c2c89ebd5a..c5289ee8754 100644 --- a/src/gallium/drivers/radeonsi/si_shader_variant_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_variant_info.c @@ -8,6 +8,64 @@ #include "sid.h" #include "si_pipe.h" +/* The spi_shader_*_format fields depend on the framebuffer state and the + * NIR shader (monolithic or main part). + */ +void si_shader_update_spi_shader_formats(struct si_shader *shader) +{ + unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format; + unsigned value = 0, num_mrts = 0; + unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4; + + shader->info.spi_shader_z_format = ac_get_spi_shader_z_format(shader->info.writes_z, shader->info.writes_stencil, + shader->info.writes_sample_mask, + shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz); + + /* Remove holes in spi_shader_col_format. */ + for (i = 0; i < num_targets; i++) { + unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; + + if (spi_format) { + value |= spi_format << (num_mrts * 4); + num_mrts++; + } + } + + /* Ensure that some export memory is always allocated, for two reasons: + * + * 1) Correctness: The hardware ignores the EXEC mask if no export + * memory is allocated, so KILL and alpha test do not work correctly + * without this. + * 2) Performance: Every shader needs at least a NULL export, even when + * it writes no color/depth output. The NULL export instruction + * stalls without this setting. + * + * Don't add this to CB_SHADER_MASK. + * + * GFX10 supports pixel shaders without exports by setting both + * the color and Z formats to SPI_SHADER_ZERO. The hw will skip export + * instructions if any are present. + * + * RB+ depth-only rendering requires SPI_SHADER_32_R. + */ + if (!value) { + bool has_mrtz = shader->info.spi_shader_z_format != V_028710_SPI_SHADER_ZERO; + + if (shader->key.ps.part.epilog.rbplus_depth_only_opt) { + value = V_028714_SPI_SHADER_32_R; + } else if (!has_mrtz) { + if (shader->selector->screen->info.gfx_level >= GFX10) { + if (shader->info.uses_discard) + value = V_028714_SPI_SHADER_32_R; + } else { + value = V_028714_SPI_SHADER_32_R; + } + } + } + + shader->info.spi_shader_col_format = value; +} + void si_get_shader_variant_info(struct si_shader *shader, struct si_temp_shader_variant_info *temp_info, nir_shader *nir) { @@ -288,6 +346,8 @@ void si_get_shader_variant_info(struct si_shader *shader, } } + si_shader_update_spi_shader_formats(shader); + /* ACO needs spi_ps_input_ena before si_init_shader_args. */ shader->config.spi_ps_input_ena = S_0286CC_PERSP_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE)) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 6252085e161..a3a9040b964 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1983,25 +1983,6 @@ static void si_shader_vs_legacy(struct si_screen *sscreen, struct si_shader *sha ac_pm4_finalize(&pm4->base); } -static unsigned si_get_spi_shader_col_format(struct si_shader *shader) -{ - unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format; - unsigned value = 0, num_mrts = 0; - unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4; - - /* Remove holes in spi_shader_col_format. */ - for (i = 0; i < num_targets; i++) { - unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; - - if (spi_format) { - value |= spi_format << (num_mrts * 4); - num_mrts++; - } - } - - return value; -} - static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index) { struct si_shader *shader = sctx->queued.named.ps; @@ -2011,8 +1992,8 @@ static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index) shader->ps.spi_ps_input_ena, shader->ps.spi_ps_input_addr); radeon_opt_set_context_reg2(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT, - shader->ps.spi_shader_z_format, - shader->ps.spi_shader_col_format); + shader->info.spi_shader_z_format, + shader->info.spi_shader_col_format); radeon_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK, shader->ps.cb_shader_mask); radeon_end_update_context_roll(); @@ -2029,9 +2010,9 @@ static void gfx11_dgpu_emit_shader_ps(struct si_context *sctx, unsigned index) gfx11_opt_set_context_reg(R_0286D0_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR, shader->ps.spi_ps_input_addr); gfx11_opt_set_context_reg(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT, - shader->ps.spi_shader_z_format); + shader->info.spi_shader_z_format); gfx11_opt_set_context_reg(R_028714_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT, - shader->ps.spi_shader_col_format); + shader->info.spi_shader_col_format); gfx11_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK, shader->ps.cb_shader_mask); gfx11_end_packed_context_regs(); @@ -2045,9 +2026,9 @@ static void gfx12_emit_shader_ps(struct si_context *sctx, unsigned index) radeon_begin(&sctx->gfx_cs); gfx12_begin_context_regs(); gfx12_opt_set_context_reg(R_028650_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT, - shader->ps.spi_shader_z_format); + shader->info.spi_shader_z_format); gfx12_opt_set_context_reg(R_028654_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT, - shader->ps.spi_shader_col_format); + shader->info.spi_shader_col_format); gfx12_opt_set_context_reg(R_02865C_SPI_PS_INPUT_ENA, AC_TRACKED_SPI_PS_INPUT_ENA, shader->ps.spi_ps_input_ena); gfx12_opt_set_context_reg(R_028660_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR, @@ -2180,47 +2161,10 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) if (sscreen->info.has_rbplus && !sscreen->info.rbplus_allowed) shader->ps.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); - shader->ps.spi_shader_col_format = si_get_spi_shader_col_format(shader); shader->ps.cb_shader_mask = ac_get_cb_shader_mask(shader->key.ps.part.epilog.spi_shader_col_format); shader->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena; shader->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr; shader->ps.num_interp = si_get_ps_num_interp(shader); - shader->ps.spi_shader_z_format = - ac_get_spi_shader_z_format(shader->info.writes_z, shader->info.writes_stencil, - shader->info.writes_sample_mask, - shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz); - - /* Ensure that some export memory is always allocated, for two reasons: - * - * 1) Correctness: The hardware ignores the EXEC mask if no export - * memory is allocated, so KILL and alpha test do not work correctly - * without this. - * 2) Performance: Every shader needs at least a NULL export, even when - * it writes no color/depth output. The NULL export instruction - * stalls without this setting. - * - * Don't add this to CB_SHADER_MASK. - * - * GFX10 supports pixel shaders without exports by setting both - * the color and Z formats to SPI_SHADER_ZERO. The hw will skip export - * instructions if any are present. - * - * RB+ depth-only rendering requires SPI_SHADER_32_R. - */ - bool has_mrtz = shader->ps.spi_shader_z_format != V_028710_SPI_SHADER_ZERO; - - if (!shader->ps.spi_shader_col_format) { - if (shader->key.ps.part.epilog.rbplus_depth_only_opt) { - shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R; - } else if (!has_mrtz) { - if (sscreen->info.gfx_level >= GFX10) { - if (G_02880C_KILL_ENABLE(shader->ps.db_shader_control)) - shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R; - } else { - shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R; - } - } - } if (sscreen->info.gfx_level >= GFX12) { shader->ps.spi_ps_in_control = S_028640_PARAM_GEN(shader->key.ps.mono.point_smoothing) |