diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ad961529fd4..4a116a36831 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -367,6 +367,9 @@ static void si_set_global_binding(struct pipe_context *ctx, unsigned first, unsi void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs) { radeon_begin(cs); + radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, + S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); + radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */ @@ -535,9 +538,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute RADEON_PRIO_SHADER_BINARY); radeon_begin(cs); - radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2); - radeon_emit(cs, shader_va >> 8); - radeon_emit(cs, S_00B834_DATA(shader_va >> 40)); + radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, shader_va >> 8); radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); radeon_emit(cs, config->rsrc1); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 92b3cb98fcc..b3d84eec8fe 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -858,8 +858,8 @@ bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader si_resource_reference(&shader->bo, NULL); shader->bo = si_aligned_buffer_create( &sscreen->b, - (sscreen->info.cpdma_prefetch_writes_memory ? - 0 : SI_RESOURCE_FLAG_READ_ONLY) | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + (sscreen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY) | + SI_RESOURCE_FLAG_DRIVER_INTERNAL | SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256); if (!shader->bo) return false; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 63b21776332..cde009fd75f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5315,6 +5315,21 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); } + if (sscreen->info.chip_class >= GFX10) { + si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, + S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); + si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, + S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); + } else if (sscreen->info.chip_class == GFX9) { + si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, + S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); + si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, + S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); + } else { + si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, + S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); + } + if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) { si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index dc1d5f795a6..567309b00a5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -546,7 +546,6 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); - si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(va >> 40)); shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) | @@ -571,10 +570,8 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) if (sscreen->info.chip_class >= GFX9) { if (sscreen->info.chip_class >= GFX10) { si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); - si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(va >> 40)); } else { si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); - si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(va >> 40)); } unsigned num_user_sgprs = si_get_num_vs_user_sgprs(shader, GFX9_TCS_NUM_USER_SGPR); @@ -588,7 +585,8 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) shader->config.rsrc2 |= S_00B42C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5); } else { si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); - si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, S_00B424_MEM_BASE(va >> 40)); + si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, + S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8)); shader->config.rsrc2 = S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) | S_00B42C_OC_LDS_EN(1) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); @@ -662,7 +660,8 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0; si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); - si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40)); + si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, + S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) | @@ -908,10 +907,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) if (sscreen->info.chip_class >= GFX10) { si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); - si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40)); } else { si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); - si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(va >> 40)); } uint32_t rsrc1 = S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_DX10_CLAMP(1) | @@ -960,7 +957,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); } si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); - si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, S_00B224_MEM_BASE(va >> 40)); + si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, + S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8)); si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | @@ -1197,7 +1195,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader &late_alloc_wave64, &cu_mask); si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); - si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40)); si_pm4_set_reg( pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) | @@ -1494,7 +1491,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, } si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); - si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40)); + si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, + S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8)); uint32_t rsrc1 = S_00B128_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) | @@ -1710,7 +1708,8 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); - si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, S_00B024_MEM_BASE(va >> 40)); + si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, + S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8)); uint32_t rsrc1 = S_00B028_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ps_wave_size == 32 ? 8 : 4)) |