radeonsi: skip setting some PGM_HI registers by switching to 32-bit addresses

Other registers benefit from consecutive register offsets for the smallest
command buffer size.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12812>
This commit is contained in:
Marek Olšák 2021-08-18 13:37:38 -04:00 committed by Marge Bot
parent a2a7610e1f
commit 64a06f8167
4 changed files with 31 additions and 16 deletions

View file

@ -367,6 +367,9 @@ static void si_set_global_binding(struct pipe_context *ctx, unsigned first, unsi
void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs)
{
radeon_begin(cs);
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI,
S_00B834_DATA(sctx->screen->info.address32_hi >> 8));
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
@ -535,9 +538,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
RADEON_PRIO_SHADER_BINARY);
radeon_begin(cs);
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cs, shader_va >> 8);
radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(cs, config->rsrc1);

View file

@ -858,8 +858,8 @@ bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader
si_resource_reference(&shader->bo, NULL);
shader->bo = si_aligned_buffer_create(
&sscreen->b,
(sscreen->info.cpdma_prefetch_writes_memory ?
0 : SI_RESOURCE_FLAG_READ_ONLY) | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
(sscreen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY) |
SI_RESOURCE_FLAG_DRIVER_INTERNAL | SI_RESOURCE_FLAG_32BIT,
PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256);
if (!shader->bo)
return false;

View file

@ -5315,6 +5315,21 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
}
if (sscreen->info.chip_class >= GFX10) {
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
} else if (sscreen->info.chip_class == GFX9) {
si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8));
si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8));
} else {
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
}
if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));

View file

@ -546,7 +546,6 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(va >> 40));
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
@ -571,10 +570,8 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
if (sscreen->info.chip_class >= GFX9) {
if (sscreen->info.chip_class >= GFX10) {
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(va >> 40));
} else {
si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(va >> 40));
}
unsigned num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_TCS_NUM_USER_SGPR);
@ -588,7 +585,8 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
shader->config.rsrc2 |= S_00B42C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
} else {
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, S_00B424_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
shader->config.rsrc2 = S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) | S_00B42C_OC_LDS_EN(1) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
@ -662,7 +660,8 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
@ -908,10 +907,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
if (sscreen->info.chip_class >= GFX10) {
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
} else {
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(va >> 40));
}
uint32_t rsrc1 = S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_DX10_CLAMP(1) |
@ -960,7 +957,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
}
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, S_00B224_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
@ -1197,7 +1195,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
&late_alloc_wave64, &cu_mask);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
si_pm4_set_reg(
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
@ -1494,7 +1491,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
}
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
uint32_t rsrc1 =
S_00B128_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
@ -1710,7 +1708,8 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, S_00B024_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
uint32_t rsrc1 =
S_00B028_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ps_wave_size == 32 ? 8 : 4)) |