radv: use PM4 for setting specific graphics registers in the preamble
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34172>
This commit is contained in:
Samuel Pitoiset 2025-03-24 17:38:37 +01:00 committed by Marge Bot
parent c5d0764fce
commit ef0a6f59f3

View file

@ -737,21 +737,21 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
bool has_clear_state = pdev->info.has_clear_state;
int i;
if (!device->uses_shadow_regs) {
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
if (has_clear_state) {
radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
radeon_emit(cs, 0);
}
}
struct ac_pm4_state *pm4 = ac_pm4_create_sized(&pdev->info, false, 512, false);
if (!pm4)
return;
if (!device->uses_shadow_regs) {
ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
if (has_clear_state) {
ac_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
ac_pm4_cmd_add(pm4, 0);
}
}
const struct ac_preamble_state preamble_state = {
.border_color_va = border_color_va,
};
@ -760,35 +760,34 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
if (!has_clear_state) {
for (i = 0; i < 16; i++) {
radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i * 8, 0);
radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i * 8, fui(1.0));
ac_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i * 8, 0);
ac_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i * 8, fui(1.0));
}
}
if (!has_clear_state) {
radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
ac_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
ac_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
}
if (pdev->info.gfx_level <= GFX8)
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
ac_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
if (pdev->info.gfx_level < GFX11)
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8));
ac_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8));
unsigned cu_mask_ps = pdev->info.gfx_level >= GFX10_3 ? ac_gfx103_get_cu_mask_ps(&pdev->info) : ~0u;
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cs, R_00B420_SPI_SHADER_PGM_RSRC4_HS,
S_00B420_WAVE_LIMIT(0x3ff) | S_00B420_GLG_FORCE_DISABLE(1));
radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC4_PS,
S_00B01C_WAVE_LIMIT_GFX12(0x3FF) | S_00B01C_LDS_GROUP_SIZE_GFX12(1));
ac_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_RSRC4_HS, S_00B420_WAVE_LIMIT(0x3ff) | S_00B420_GLG_FORCE_DISABLE(1));
ac_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC4_PS,
S_00B01C_WAVE_LIMIT_GFX12(0x3FF) | S_00B01C_LDS_GROUP_SIZE_GFX12(1));
} else if (pdev->info.gfx_level >= GFX11) {
radeon_set_sh_reg_idx(&pdev->info, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
ac_apply_cu_en(S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &pdev->info));
radeon_set_sh_reg_idx(&pdev->info, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), C_00B004_CU_EN, 16, &pdev->info));
ac_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
ac_apply_cu_en(S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &pdev->info));
ac_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), C_00B004_CU_EN, 16, &pdev->info));
}
if (pdev->info.gfx_level >= GFX10) {
@ -797,13 +796,13 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
* decomposed primitives (such as quads) with polygon mode = lines.
*/
unsigned vertex_reuse_depth = pdev->info.gfx_level >= GFX10_3 ? 30 : 0;
radeon_set_context_reg(cs, R_028838_PA_CL_NGG_CNTL,
S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth));
ac_pm4_set_reg(pm4, R_028838_PA_CL_NGG_CNTL,
S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth));
if (pdev->info.gfx_level >= GFX10_3) {
/* This allows sample shading. */
radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL,
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
ac_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
}
}
@ -811,14 +810,13 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
/* GFX8+ only compares the bits according to the index type by default,
* so we can always leave the programmed value at the maximum.
*/
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0xffffffff);
ac_pm4_set_reg(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0xffffffff);
}
unsigned tmp = (unsigned)(1.0 * 8.0);
radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
radeon_set_context_reg(
cs, R_028A04_PA_SU_POINT_MINMAX,
S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
ac_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
ac_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
/* Enable the Polaris small primitive filter control.
* XXX: There is possibly an issue when MSAA is off (see RadeonSI
@ -830,41 +828,41 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
/* Workaround for a hw line bug. */
S_028830_LINE_FILTER_DISABLE(pdev->info.family <= CHIP_POLARIS12);
radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
ac_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
}
if (pdev->info.gfx_level >= GFX12) {
radeon_set_context_reg(cs, R_028644_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
ac_pm4_set_reg(pm4, R_028644_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
} else {
radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
ac_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
}
radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
ac_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
if (pdev->info.gfx_level >= GFX12) {
radeon_set_context_reg(cs, R_028814_PA_CL_VTE_CNTL,
S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
ac_pm4_set_reg(pm4, R_028814_PA_CL_VTE_CNTL,
S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) |
S_028818_VPORT_Z_OFFSET_ENA(1));
} else {
radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
ac_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) |
S_028818_VPORT_Z_OFFSET_ENA(1));
}
if (pdev->info.gfx_level == GFX8 && device->tma_bo) {
@ -877,18 +875,17 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
R_00B300_SPI_SHADER_TBA_LO_ES, R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS};
for (i = 0; i < ARRAY_SIZE(regs); ++i) {
radeon_set_sh_reg_seq(cs, regs[i], 4);
radeon_emit(cs, tba_va >> 8);
radeon_emit(cs, tba_va >> 40);
radeon_emit(cs, tma_va >> 8);
radeon_emit(cs, tma_va >> 40);
ac_pm4_set_reg(pm4, regs[i] + 0, tba_va >> 8);
ac_pm4_set_reg(pm4, regs[i] + 4, tba_va >> 40);
ac_pm4_set_reg(pm4, regs[i] + 8, tma_va >> 8);
ac_pm4_set_reg(pm4, regs[i] + 12, tma_va >> 40);
}
}
radeon_set_context_reg(cs, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0x3f800000);
ac_pm4_set_reg(pm4, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0x3f800000);
if (pdev->info.gfx_level >= GFX12) {
radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, 0);
ac_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, 0);
}
ac_pm4_finalize(pm4);