radeonsi: add a tweak for PS wave CU utilization for gfx10.3

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6822>
This commit is contained in:
Marek Olšák 2020-09-22 13:13:05 -04:00 committed by Marge Bot
parent b5debe180e
commit 5f27777379

View file

@ -5137,6 +5137,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
S_028034_BR_X(16384) | S_028034_BR_Y(16384));
}
unsigned cu_mask_ps = 0xffffffff;
/* It's wasteful to enable all CUs for PS if shader arrays have a different
* number of CUs. The reason is that the hardware sends the same number of PS
* waves to each shader array, so the slowest shader array limits the performance.
* Disable the extra CUs for PS in other shader arrays to save power and thus
* increase clocks for busy CUs. In the future, we might disable or enable this
* tweak only for certain apps.
*/
if (sctx->chip_class >= GFX10_3)
cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
if (sctx->chip_class >= GFX7) {
/* Compute LATE_ALLOC_VS.LIMIT. */
unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
@ -5190,7 +5202,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
}
if (sctx->chip_class <= GFX8) {
@ -5269,7 +5281,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
if (sctx->chip_class >= GFX10) {
/* Logical CUs 16 - 31 */
si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));
si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));