mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
radeonsi: use SET_SH_REG_INDEX with index=3 for registers containing CU_EN
This matches PAL and RADV behavior. It's for preemption. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15098>
This commit is contained in:
parent
79a7ab642a
commit
c8e2c6faf6
5 changed files with 99 additions and 39 deletions
|
|
@ -117,11 +117,23 @@
|
||||||
radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
|
radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_sh_reg_idx3_seq(reg, num) do { \
|
||||||
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
|
assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
|
||||||
|
radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \
|
||||||
|
radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | (3 << 28)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define radeon_set_sh_reg(reg, value) do { \
|
#define radeon_set_sh_reg(reg, value) do { \
|
||||||
radeon_set_sh_reg_seq(reg, 1); \
|
radeon_set_sh_reg_seq(reg, 1); \
|
||||||
radeon_emit(value); \
|
radeon_emit(value); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_sh_reg_idx3(reg, value) do { \
|
||||||
|
radeon_set_sh_reg_idx3_seq(reg, 1); \
|
||||||
|
radeon_emit(value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
|
#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, num); \
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
|
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
|
||||||
|
|
@ -247,6 +259,19 @@
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_opt_set_sh_reg_idx3(sctx, offset, reg, val) do { \
|
||||||
|
unsigned __value = val; \
|
||||||
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||||
|
sctx->tracked_regs.reg_value[reg] != __value) { \
|
||||||
|
if (sctx->chip_class >= GFX10) \
|
||||||
|
radeon_set_sh_reg_idx3(offset, __value); \
|
||||||
|
else \
|
||||||
|
radeon_set_sh_reg(offset, __value); \
|
||||||
|
sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
|
||||||
|
sctx->tracked_regs.reg_value[reg] = __value; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
|
#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
|
||||||
unsigned __value = val; \
|
unsigned __value = val; \
|
||||||
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||||
|
|
@ -288,6 +313,14 @@ static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg
|
||||||
radeon_end();
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
|
||||||
|
uint32_t value)
|
||||||
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
|
radeon_set_sh_reg_idx3(reg_offset, value);
|
||||||
|
radeon_end();
|
||||||
|
}
|
||||||
|
|
||||||
/* This should be evaluated at compile time if all parameters are constants. */
|
/* This should be evaluated at compile time if all parameters are constants. */
|
||||||
static ALWAYS_INLINE unsigned
|
static ALWAYS_INLINE unsigned
|
||||||
si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
|
si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,27 @@ static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
|
||||||
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate);
|
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
||||||
|
unsigned opcode, unsigned idx)
|
||||||
|
{
|
||||||
|
reg >>= 2;
|
||||||
|
|
||||||
|
if (!state->max_dw)
|
||||||
|
state->max_dw = ARRAY_SIZE(state->pm4);
|
||||||
|
|
||||||
|
assert(state->ndw + 2 <= state->max_dw);
|
||||||
|
|
||||||
|
if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
|
||||||
|
si_pm4_cmd_begin(state, opcode);
|
||||||
|
state->pm4[state->ndw++] = reg | (idx << 28);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(reg <= UINT16_MAX);
|
||||||
|
state->last_reg = reg;
|
||||||
|
state->pm4[state->ndw++] = val;
|
||||||
|
si_pm4_cmd_end(state, false);
|
||||||
|
}
|
||||||
|
|
||||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||||
{
|
{
|
||||||
unsigned opcode;
|
unsigned opcode;
|
||||||
|
|
@ -80,22 +101,14 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
reg >>= 2;
|
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (!state->max_dw)
|
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||||
state->max_dw = ARRAY_SIZE(state->pm4);
|
{
|
||||||
|
SI_CHECK_SHADOWED_REGS(reg, 1);
|
||||||
|
|
||||||
assert(state->ndw + 2 <= state->max_dw);
|
si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
|
||||||
|
|
||||||
if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
|
|
||||||
si_pm4_cmd_begin(state, opcode);
|
|
||||||
state->pm4[state->ndw++] = reg;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(reg <= UINT16_MAX);
|
|
||||||
state->last_reg = reg;
|
|
||||||
state->pm4[state->ndw++] = val;
|
|
||||||
si_pm4_cmd_end(state, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_pm4_clear_state(struct si_pm4_state *state)
|
void si_pm4_clear_state(struct si_pm4_state *state)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ struct si_pm4_state {
|
||||||
|
|
||||||
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
|
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
|
||||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||||
|
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||||
|
|
||||||
void si_pm4_clear_state(struct si_pm4_state *state);
|
void si_pm4_clear_state(struct si_pm4_state *state);
|
||||||
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
|
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
|
||||||
|
|
|
||||||
|
|
@ -5476,8 +5476,10 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX7) {
|
if (sctx->chip_class >= GFX7) {
|
||||||
ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
|
S_00B01C_CU_EN(cu_mask_ps) |
|
||||||
C_00B01C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
S_00B01C_WAVE_LIMIT(0x3F),
|
||||||
|
C_00B01C_CU_EN, 0, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sctx->chip_class <= GFX8) {
|
if (sctx->chip_class <= GFX8) {
|
||||||
|
|
@ -5514,11 +5516,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||||
if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
|
if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
|
||||||
ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||||
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
|
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
|
||||||
C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
C_00B51C_CU_EN, 0, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
|
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
|
||||||
ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||||
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
|
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
|
||||||
C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
C_00B31C_CU_EN, 0, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
|
|
||||||
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
||||||
* Other chips are unaffected. These are suboptimal values,
|
* Other chips are unaffected. These are suboptimal values,
|
||||||
|
|
@ -5560,7 +5564,8 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||||
if (sctx->chip_class >= GFX9) {
|
if (sctx->chip_class >= GFX9) {
|
||||||
ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
|
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
|
||||||
0, &sscreen->info, (void*)si_pm4_set_reg);
|
0, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
|
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
|
||||||
S_028B50_ACCUM_ISOLINE(12) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
|
S_028B50_ACCUM_ISOLINE(12) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
|
||||||
|
|
@ -5579,11 +5584,14 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
/* Logical CUs 16 - 31 */
|
/* Logical CUs 16 - 31 */
|
||||||
ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
|
ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||||
C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
C_00B004_CU_EN, 16, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
|
ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
|
||||||
C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
C_00B104_CU_EN, 16, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
||||||
C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
|
C_00B404_CU_EN, 16, &sscreen->info,
|
||||||
|
(void*)(sctx->chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
|
si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
|
||||||
si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
|
si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
|
||||||
|
|
|
||||||
|
|
@ -965,27 +965,29 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
||||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs,
|
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs,
|
||||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
(void (*)(void*, unsigned, uint32_t))
|
||||||
|
(sctx->chip_class >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||||
}
|
}
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs,
|
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs,
|
||||||
C_00B204_CU_EN, 16, &sctx->screen->info,
|
C_00B204_CU_EN, 16, &sctx->screen->info,
|
||||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
(void (*)(void*, unsigned, uint32_t))
|
||||||
|
(sctx->chip_class >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
|
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
radeon_begin_again(&sctx->gfx_cs);
|
radeon_begin_again(&sctx->gfx_cs);
|
||||||
if (sctx->chip_class >= GFX7) {
|
if (sctx->chip_class >= GFX7) {
|
||||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
|
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
|
||||||
}
|
}
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
|
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
|
||||||
}
|
}
|
||||||
radeon_end();
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
@ -1192,20 +1194,22 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
||||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs,
|
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs,
|
||||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
(void (*)(void*, unsigned, uint32_t))
|
||||||
|
(sctx->chip_class >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
|
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
|
||||||
C_00B204_CU_EN, 16, &sctx->screen->info,
|
C_00B204_CU_EN, 16, &sctx->screen->info,
|
||||||
(void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
|
(void (*)(void*, unsigned, uint32_t))
|
||||||
|
(sctx->chip_class >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
|
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
|
||||||
~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||||
} else {
|
} else {
|
||||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
|
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
|
||||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
|
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
|
||||||
radeon_end();
|
radeon_end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1674,7 +1678,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||||
ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
||||||
C_00B118_CU_EN, 0, &sscreen->info,
|
C_00B118_CU_EN, 0, &sscreen->info,
|
||||||
(void (*)(void*, unsigned, uint32_t))si_pm4_set_reg);
|
(void (*)(void*, unsigned, uint32_t))
|
||||||
|
(sscreen->info.chip_class >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||||
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue