mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 19:30:12 +01:00
radeonsi: apply spi_cu_en to CU_EN
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14122>
This commit is contained in:
parent
b06b481dfe
commit
384014bebe
6 changed files with 104 additions and 32 deletions
|
|
@ -581,3 +581,21 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
|
|||
|
||||
return CLAMP(workgroup_size, 1, 256);
|
||||
}
|
||||
|
||||
void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
|
||||
unsigned value_shift, const struct radeon_info *info,
|
||||
void set_sh_reg(void*, unsigned, uint32_t))
|
||||
{
|
||||
/* Register field position and mask. */
|
||||
uint32_t cu_en_mask = ~clear_mask;
|
||||
unsigned cu_en_shift = ffs(cu_en_mask) - 1;
|
||||
/* The value being set. */
|
||||
uint32_t cu_en = (value & cu_en_mask) >> cu_en_shift;
|
||||
|
||||
/* AND the field by spi_cu_en. */
|
||||
uint32_t spi_cu_en = info->spi_cu_en >> value_shift;
|
||||
uint32_t new_value = (value & ~cu_en_mask) |
|
||||
(((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
|
||||
|
||||
set_sh_reg(cs, reg_offset, new_value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,6 +118,10 @@ unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wav
|
|||
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
|
||||
unsigned max_vtx_out, unsigned prim_amp_factor);
|
||||
|
||||
void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
|
||||
unsigned value_shift, const struct radeon_info *info,
|
||||
void set_sh_reg(void*, unsigned, uint32_t));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -279,6 +279,15 @@
|
|||
radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
|
||||
} while (0)
|
||||
|
||||
/* Wrappers that are only used when they are passed as function pointers. */
|
||||
static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
|
||||
uint32_t value)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg(reg_offset, value);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
/* This should be evaluated at compile time if all parameters are constants. */
|
||||
static ALWAYS_INLINE unsigned
|
||||
si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
|
||||
|
|
|
|||
|
|
@ -31,7 +31,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SI_PM4_MAX_DW 176
|
||||
/* TODO: This is high because of cs_preamble with ac_set_reg_cu_en. */
|
||||
#define SI_PM4_MAX_DW 480
|
||||
|
||||
// forward defines
|
||||
struct si_context;
|
||||
|
|
|
|||
|
|
@ -5467,8 +5467,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
|
||||
|
||||
if (sctx->chip_class >= GFX7) {
|
||||
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
|
||||
C_00B01C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
}
|
||||
|
||||
if (sctx->chip_class <= GFX8) {
|
||||
|
|
@ -5503,11 +5504,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
}
|
||||
|
||||
if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
|
||||
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
|
||||
C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
|
||||
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
|
||||
C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
|
||||
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
||||
* Other chips are unaffected. These are suboptimal values,
|
||||
|
|
@ -5547,8 +5550,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
}
|
||||
|
||||
if (sctx->chip_class >= GFX9) {
|
||||
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
|
||||
0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
|
||||
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
|
||||
S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
|
||||
|
|
@ -5566,9 +5570,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
/* Logical CUs 16 - 31 */
|
||||
si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));
|
||||
si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
|
||||
si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
|
||||
ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
|
||||
C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
|
||||
si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
|
||||
|
|
|
|||
|
|
@ -919,18 +919,35 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
|||
radeon_end_update_context_roll(sctx);
|
||||
|
||||
/* These don't cause any context rolls. */
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
if (sctx->chip_class >= GFX7) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
|
||||
if (sctx->screen->info.spi_cu_en_has_effect) {
|
||||
if (sctx->chip_class >= GFX7) {
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs,
|
||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||
}
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs,
|
||||
C_00B204_CU_EN, 16, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
|
||||
}
|
||||
} else {
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
if (sctx->chip_class >= GFX7) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
|
@ -1129,13 +1146,27 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
|||
radeon_begin_again(&sctx->gfx_cs);
|
||||
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
|
||||
shader->ctx_reg.ngg.ge_pc_alloc);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_end();
|
||||
if (sctx->screen->info.spi_cu_en_has_effect) {
|
||||
radeon_end();
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs,
|
||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
|
||||
C_00B204_CU_EN, 16, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
|
||||
~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||
} else {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
|
||||
|
|
@ -1599,8 +1630,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
|||
oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
||||
|
||||
if (sscreen->info.chip_class >= GFX7) {
|
||||
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &sscreen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))si_pm4_set_reg);
|
||||
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue