From 75be860eecd219785380b1ef721faedcd3667095 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 2 Apr 2025 17:34:51 +0200 Subject: [PATCH] radv: use paired context regs when optimal on GFX12 CP is very slow on GFX12 and parsing the packet header is the main bottleneck. Using paired context regs reduce the number of packet headers and it should be more optimal. It doesn't seem worth when only one context reg is emitted (one packet header and same number of DWORDS) or when consecutive context regs are emitted (would increase the number of DWORDS). Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 388 +++++++++++++++++++------------ 1 file changed, 233 insertions(+), 155 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7cf182a9d26..cbc081d468b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2147,16 +2147,24 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e break_wave_at_eoi = true; } - radeon_begin(cmd_buffer->cs); - if (pdev->info.gfx_level >= GFX12) { - radeon_opt_set_context_reg(cmd_buffer, R_028818_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL, - shader->info.regs.pa_cl_vs_out_cntl); - radeon_opt_set_context_reg(cmd_buffer, R_028B3C_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT, - shader->info.regs.vgt_gs_instance_cnt); - radeon_opt_set_context_reg2(cmd_buffer, R_028648_SPI_SHADER_IDX_FORMAT, RADV_TRACKED_SPI_SHADER_IDX_FORMAT, - shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_opt_set_context_reg(cmd_buffer, R_028818_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL, + shader->info.regs.pa_cl_vs_out_cntl); + gfx12_opt_set_context_reg(cmd_buffer, R_028B3C_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT, + shader->info.regs.vgt_gs_instance_cnt); + gfx12_opt_set_context_reg2(cmd_buffer, R_028648_SPI_SHADER_IDX_FORMAT, RADV_TRACKED_SPI_SHADER_IDX_FORMAT, + shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format); + gfx12_opt_set_context_reg(cmd_buffer, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, + RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, + shader->info.regs.ngg.ge_max_output_per_subgroup); + gfx12_opt_set_context_reg(cmd_buffer, R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL, + shader->info.regs.ngg.ge_ngg_subgrp_cntl); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); radeon_opt_set_context_reg(cmd_buffer, R_02881C_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL, shader->info.regs.pa_cl_vs_out_cntl); radeon_opt_set_context_reg(cmd_buffer, R_028B90_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT, @@ -2167,12 +2175,15 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format); radeon_opt_set_context_reg(cmd_buffer, R_0286C4_SPI_VS_OUT_CONFIG, RADV_TRACKED_SPI_VS_OUT_CONFIG, shader->info.regs.spi_vs_out_config); + radeon_opt_set_context_reg(cmd_buffer, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, + RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, + shader->info.regs.ngg.ge_max_output_per_subgroup); + radeon_opt_set_context_reg(cmd_buffer, R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL, + shader->info.regs.ngg.ge_ngg_subgrp_cntl); + radeon_end(); } - radeon_opt_set_context_reg(cmd_buffer, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, - shader->info.regs.ngg.ge_max_output_per_subgroup); - radeon_opt_set_context_reg(cmd_buffer, R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL, - shader->info.regs.ngg.ge_ngg_subgrp_cntl); + radeon_begin(cmd_buffer->cs); uint32_t ge_cntl = shader->info.regs.ngg.ge_cntl; if (pdev->info.gfx_level >= GFX11) { @@ -2667,22 +2678,25 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct const uint32_t spi_ps_input_addr = ps ? ps->config.spi_ps_input_addr : 0; const uint32_t spi_ps_in_control = ps ? ps->info.regs.ps.spi_ps_in_control : 0; - radeon_begin(cmd_buffer->cs); - if (pdev->info.gfx_level >= GFX12) { const uint32_t pa_sc_hisz_control = ps ? ps->info.regs.ps.pa_sc_hisz_control : 0; - radeon_opt_set_context_reg2(cmd_buffer, R_02865C_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, - spi_ps_input_ena, spi_ps_input_addr); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_opt_set_context_reg2(cmd_buffer, R_02865C_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena, + spi_ps_input_addr); - radeon_opt_set_context_reg(cmd_buffer, R_028640_SPI_PS_IN_CONTROL, RADV_TRACKED_SPI_PS_IN_CONTROL, - spi_ps_in_control); + gfx12_opt_set_context_reg(cmd_buffer, R_028640_SPI_PS_IN_CONTROL, RADV_TRACKED_SPI_PS_IN_CONTROL, + spi_ps_in_control); - radeon_opt_set_context_reg(cmd_buffer, R_028BBC_PA_SC_HISZ_CONTROL, RADV_TRACKED_PA_SC_HISZ_CONTROL, - pa_sc_hisz_control); + gfx12_opt_set_context_reg(cmd_buffer, R_028BBC_PA_SC_HISZ_CONTROL, RADV_TRACKED_PA_SC_HISZ_CONTROL, + pa_sc_hisz_control); + gfx12_end_context_regs(); + radeon_end(); } else { const uint32_t pa_sc_shader_control = ps ? ps->info.regs.ps.pa_sc_shader_control : 0; + radeon_begin(cmd_buffer->cs); radeon_opt_set_context_reg2(cmd_buffer, R_0286CC_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena, spi_ps_input_addr); @@ -2694,9 +2708,8 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) radeon_opt_set_context_reg(cmd_buffer, R_028C40_PA_SC_SHADER_CONTROL, RADV_TRACKED_PA_SC_SHADER_CONTROL, pa_sc_shader_control); + radeon_end(); } - - radeon_end(); } static void @@ -3721,15 +3734,19 @@ radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1); } - radeon_begin(cmd_buffer->cs); if (pdev->info.gfx_level >= GFX12) { - radeon_set_context_reg(R_028658_SPI_BARYC_CNTL, spi_baryc_cntl); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028658_SPI_BARYC_CNTL, spi_baryc_cntl); + gfx12_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); radeon_set_context_reg(R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); + radeon_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); + radeon_end(); } - - radeon_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); - radeon_end(); } static void @@ -3738,15 +3755,17 @@ radv_gfx12_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, st struct radeon_cmdbuf *cs = cmd_buffer->cs; radeon_begin(cs); - radeon_set_context_reg(R_028C60_CB_COLOR0_BASE + index * 0x24, cb->ac.cb_color_base); - radeon_set_context_reg(R_028C64_CB_COLOR0_VIEW + index * 0x24, cb->ac.cb_color_view); - radeon_set_context_reg(R_028C68_CB_COLOR0_VIEW2 + index * 0x24, cb->ac.cb_color_view2); - radeon_set_context_reg(R_028C6C_CB_COLOR0_ATTRIB + index * 0x24, cb->ac.cb_color_attrib); - radeon_set_context_reg(R_028C70_CB_COLOR0_FDCC_CONTROL + index * 0x24, cb->ac.cb_dcc_control); - radeon_set_context_reg(R_028C78_CB_COLOR0_ATTRIB2 + index * 0x24, cb->ac.cb_color_attrib2); - radeon_set_context_reg(R_028C7C_CB_COLOR0_ATTRIB3 + index * 0x24, cb->ac.cb_color_attrib3); - radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + index * 4, S_028E40_BASE_256B(cb->ac.cb_color_base >> 32)); - radeon_set_context_reg(R_028EC0_CB_COLOR0_INFO + index * 4, cb->ac.cb_color_info); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028C60_CB_COLOR0_BASE + index * 0x24, cb->ac.cb_color_base); + gfx12_set_context_reg(R_028C64_CB_COLOR0_VIEW + index * 0x24, cb->ac.cb_color_view); + gfx12_set_context_reg(R_028C68_CB_COLOR0_VIEW2 + index * 0x24, cb->ac.cb_color_view2); + gfx12_set_context_reg(R_028C6C_CB_COLOR0_ATTRIB + index * 0x24, cb->ac.cb_color_attrib); + gfx12_set_context_reg(R_028C70_CB_COLOR0_FDCC_CONTROL + index * 0x24, cb->ac.cb_dcc_control); + gfx12_set_context_reg(R_028C78_CB_COLOR0_ATTRIB2 + index * 0x24, cb->ac.cb_color_attrib2); + gfx12_set_context_reg(R_028C7C_CB_COLOR0_ATTRIB3 + index * 0x24, cb->ac.cb_color_attrib3); + gfx12_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + index * 4, S_028E40_BASE_256B(cb->ac.cb_color_base >> 32)); + gfx12_set_context_reg(R_028EC0_CB_COLOR0_INFO + index * 4, cb->ac.cb_color_info); + gfx12_end_context_regs(); radeon_end(); } @@ -3931,34 +3950,36 @@ radv_gfx12_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_b struct radeon_cmdbuf *cs = cmd_buffer->cs; radeon_begin(cs); - radeon_set_context_reg(R_028004_DB_DEPTH_VIEW, ds->ac.db_depth_view); - radeon_set_context_reg(R_028008_DB_DEPTH_VIEW1, ds->ac.u.gfx12.db_depth_view1); - radeon_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); - radeon_set_context_reg(R_028014_DB_DEPTH_SIZE_XY, ds->ac.db_depth_size); - radeon_set_context_reg(R_028018_DB_Z_INFO, ds->ac.db_z_info); - radeon_set_context_reg(R_02801C_DB_STENCIL_INFO, ds->ac.db_stencil_info); - radeon_set_context_reg(R_028020_DB_Z_READ_BASE, ds->ac.db_depth_base); - radeon_set_context_reg(R_028024_DB_Z_READ_BASE_HI, S_028024_BASE_HI(ds->ac.db_depth_base >> 32)); - radeon_set_context_reg(R_028028_DB_Z_WRITE_BASE, ds->ac.db_depth_base); - radeon_set_context_reg(R_02802C_DB_Z_WRITE_BASE_HI, S_02802C_BASE_HI(ds->ac.db_depth_base >> 32)); - radeon_set_context_reg(R_028030_DB_STENCIL_READ_BASE, ds->ac.db_stencil_base); - radeon_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, S_028034_BASE_HI(ds->ac.db_stencil_base >> 32)); - radeon_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, ds->ac.db_stencil_base); - radeon_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, S_02803C_BASE_HI(ds->ac.db_stencil_base >> 32)); - radeon_set_context_reg(R_028B94_PA_SC_HIZ_INFO, ds->ac.u.gfx12.hiz_info); - radeon_set_context_reg(R_028B98_PA_SC_HIS_INFO, ds->ac.u.gfx12.his_info); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028004_DB_DEPTH_VIEW, ds->ac.db_depth_view); + gfx12_set_context_reg(R_028008_DB_DEPTH_VIEW1, ds->ac.u.gfx12.db_depth_view1); + gfx12_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); + gfx12_set_context_reg(R_028014_DB_DEPTH_SIZE_XY, ds->ac.db_depth_size); + gfx12_set_context_reg(R_028018_DB_Z_INFO, ds->ac.db_z_info); + gfx12_set_context_reg(R_02801C_DB_STENCIL_INFO, ds->ac.db_stencil_info); + gfx12_set_context_reg(R_028020_DB_Z_READ_BASE, ds->ac.db_depth_base); + gfx12_set_context_reg(R_028024_DB_Z_READ_BASE_HI, S_028024_BASE_HI(ds->ac.db_depth_base >> 32)); + gfx12_set_context_reg(R_028028_DB_Z_WRITE_BASE, ds->ac.db_depth_base); + gfx12_set_context_reg(R_02802C_DB_Z_WRITE_BASE_HI, S_02802C_BASE_HI(ds->ac.db_depth_base >> 32)); + gfx12_set_context_reg(R_028030_DB_STENCIL_READ_BASE, ds->ac.db_stencil_base); + gfx12_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, S_028034_BASE_HI(ds->ac.db_stencil_base >> 32)); + gfx12_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, ds->ac.db_stencil_base); + gfx12_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, S_02803C_BASE_HI(ds->ac.db_stencil_base >> 32)); + gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, ds->ac.u.gfx12.hiz_info); + gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, ds->ac.u.gfx12.his_info); if (ds->ac.u.gfx12.hiz_info) { - radeon_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, ds->ac.u.gfx12.hiz_base); - radeon_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, S_028BA0_BASE_256B(ds->ac.u.gfx12.hiz_base >> 32)); - radeon_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, ds->ac.u.gfx12.hiz_size_xy); + gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, ds->ac.u.gfx12.hiz_base); + gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, S_028BA0_BASE_256B(ds->ac.u.gfx12.hiz_base >> 32)); + gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, ds->ac.u.gfx12.hiz_size_xy); } if (ds->ac.u.gfx12.his_info) { - radeon_set_context_reg(R_028BA8_PA_SC_HIS_BASE, ds->ac.u.gfx12.his_base); - radeon_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, S_028BAC_BASE_256B(ds->ac.u.gfx12.his_base >> 32)); - radeon_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, ds->ac.u.gfx12.his_size_xy); + gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, ds->ac.u.gfx12.his_base); + gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, S_028BAC_BASE_256B(ds->ac.u.gfx12.his_base >> 32)); + gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, ds->ac.u.gfx12.his_size_xy); } + gfx12_end_context_regs(); radeon_end(); } @@ -4074,12 +4095,14 @@ radv_gfx12_emit_null_ds_state(struct radv_cmd_buffer *cmd_buffer) struct radeon_cmdbuf *cs = cmd_buffer->cs; radeon_begin(cs); - radeon_set_context_reg_seq(R_028018_DB_Z_INFO, 2); - radeon_emit(S_028018_FORMAT(V_028018_Z_INVALID) | S_028018_NUM_SAMPLES(3)); - radeon_emit(S_02801C_FORMAT(V_02801C_STENCIL_INVALID) | S_02801C_TILE_STENCIL_DISABLE(1)); - radeon_set_context_reg(R_028B94_PA_SC_HIZ_INFO, S_028B94_SURFACE_ENABLE(0)); - radeon_set_context_reg(R_028B98_PA_SC_HIS_INFO, S_028B98_SURFACE_ENABLE(0)); - radeon_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, S_028010_CENTROID_COMPUTATION_MODE(1)); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028018_DB_Z_INFO, S_028018_FORMAT(V_028018_Z_INVALID) | S_028018_NUM_SAMPLES(3)); + gfx12_set_context_reg(R_02801C_DB_STENCIL_INFO, + S_02801C_FORMAT(V_02801C_STENCIL_INVALID) | S_02801C_TILE_STENCIL_DISABLE(1)); + gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, S_028B94_SURFACE_ENABLE(0)); + gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, S_028B98_SURFACE_ENABLE(0)); + gfx12_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, S_028010_CENTROID_COMPUTATION_MODE(1)); + gfx12_end_context_regs(); radeon_end(); } @@ -4604,14 +4627,13 @@ radv_gfx11_emit_vrs_surface(struct radv_cmd_buffer *cmd_buffer) const struct radv_rendering_state *render = &cmd_buffer->state.render; const bool vrs_surface_enable = render->vrs_att.iview != NULL; struct radeon_cmdbuf *cs = cmd_buffer->cs; - - radeon_begin(cs); + unsigned xmax = 0, ymax = 0; + uint8_t swizzle_mode = 0; + uint64_t va = 0; if (vrs_surface_enable) { const struct radv_image_view *vrs_iview = render->vrs_att.iview; struct radv_image *vrs_image = vrs_iview->image; - unsigned xmax = 0, ymax = 0; - uint64_t va = 0; radv_cs_add_buffer(device->ws, cmd_buffer->cs, vrs_image->bindings[0].bo); @@ -4621,19 +4643,32 @@ radv_gfx11_emit_vrs_surface(struct radv_cmd_buffer *cmd_buffer) xmax = vrs_iview->vk.extent.width - 1; ymax = vrs_iview->vk.extent.height - 1; - radeon_set_context_reg_seq(R_0283F0_PA_SC_VRS_RATE_BASE, 3); - radeon_emit(va >> 8); - radeon_emit(S_0283F4_BASE_256B(va >> 40)); - radeon_emit(S_0283F8_X_MAX(xmax) | S_0283F8_Y_MAX(ymax)); - - if (pdev->info.gfx_level >= GFX12) { - radeon_set_context_reg(R_0283E0_PA_SC_VRS_INFO, - S_0283E0_RATE_SW_MODE(vrs_image->planes[0].surface.u.gfx9.swizzle_mode)); - } + swizzle_mode = vrs_image->planes[0].surface.u.gfx9.swizzle_mode; } - radeon_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, S_0283D0_VRS_SURFACE_ENABLE(vrs_surface_enable)); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + radeon_begin(cs); + gfx12_begin_context_regs(); + if (vrs_surface_enable) { + gfx12_set_context_reg(R_0283F0_PA_SC_VRS_RATE_BASE, va >> 8); + gfx12_set_context_reg(R_0283F4_PA_SC_VRS_RATE_BASE_EXT, S_0283F4_BASE_256B(va >> 40)); + gfx12_set_context_reg(R_0283F8_PA_SC_VRS_RATE_SIZE_XY, S_0283F8_X_MAX(xmax) | S_0283F8_Y_MAX(ymax)); + gfx12_set_context_reg(R_0283E0_PA_SC_VRS_INFO, S_0283E0_RATE_SW_MODE(swizzle_mode)); + } + gfx12_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, S_0283D0_VRS_SURFACE_ENABLE(vrs_surface_enable)); + gfx12_end_context_regs(); + radeon_end(); + } else { + radeon_begin(cs); + if (vrs_surface_enable) { + radeon_set_context_reg_seq(R_0283F0_PA_SC_VRS_RATE_BASE, 3); + radeon_emit(va >> 8); + radeon_emit(S_0283F4_BASE_256B(va >> 40)); + radeon_emit(S_0283F8_X_MAX(xmax) | S_0283F8_Y_MAX(ymax)); + } + radeon_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, S_0283D0_VRS_SURFACE_ENABLE(vrs_surface_enable)); + radeon_end(); + } } static void @@ -9358,15 +9393,19 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe const uint32_t maxy = miny + render->area.extent.height; radeon_check_space(device->ws, cmd_buffer->cs, 10); - radeon_begin(cmd_buffer->cs); if (pdev->info.gfx_level >= GFX12) { - radeon_set_context_reg_seq(R_028204_PA_SC_WINDOW_SCISSOR_TL, 2); - radeon_emit(S_028204_TL_X(minx) | S_028204_TL_Y_GFX12(miny)); - radeon_emit(S_028208_BR_X(maxx - 1) | S_028208_BR_Y(maxy - 1)); /* inclusive */ - radeon_set_context_reg(R_028184_PA_SC_SCREEN_SCISSOR_BR, - S_028034_BR_X(screen_scissor.width) | S_028034_BR_Y(screen_scissor.height)); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_TL_X(minx) | S_028204_TL_Y_GFX12(miny)); + gfx12_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR, + S_028208_BR_X(maxx - 1) | S_028208_BR_Y(maxy - 1)); /* inclusive */ + gfx12_set_context_reg(R_028184_PA_SC_SCREEN_SCISSOR_BR, + S_028034_BR_X(screen_scissor.width) | S_028034_BR_Y(screen_scissor.height)); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); radeon_set_context_reg_seq(R_028204_PA_SC_WINDOW_SCISSOR_TL, 2); radeon_emit(S_028204_TL_X(minx) | S_028204_TL_Y_GFX6(miny)); radeon_emit(S_028208_BR_X(maxx) | S_028208_BR_Y(maxy)); @@ -9383,10 +9422,9 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); } + radeon_end(); } - radeon_end(); - radv_emit_fb_mip_change_flush(cmd_buffer); if (!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) @@ -10707,22 +10745,23 @@ radv_emit_fragment_output_state(struct radv_cmd_buffer *cmd_buffer) uint32_t col_format_compacted = radv_compact_spi_shader_col_format(cmd_buffer->state.spi_shader_col_format); - radeon_begin(cmd_buffer->cs); - if (pdev->info.gfx_level >= GFX12) { - radeon_set_context_reg(R_028854_CB_SHADER_MASK, cmd_buffer->state.cb_shader_mask); - radeon_set_context_reg_seq(R_028650_SPI_SHADER_Z_FORMAT, 2); - radeon_emit(cmd_buffer->state.spi_shader_z_format); - radeon_emit(col_format_compacted); /* SPI_SHADER_COL_FORMAT */ + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028854_CB_SHADER_MASK, cmd_buffer->state.cb_shader_mask); + gfx12_set_context_reg(R_028650_SPI_SHADER_Z_FORMAT, cmd_buffer->state.spi_shader_z_format); + gfx12_set_context_reg(R_028654_SPI_SHADER_COL_FORMAT, col_format_compacted); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); radeon_set_context_reg(R_02823C_CB_SHADER_MASK, cmd_buffer->state.cb_shader_mask); radeon_set_context_reg_seq(R_028710_SPI_SHADER_Z_FORMAT, 2); radeon_emit(cmd_buffer->state.spi_shader_z_format); radeon_emit(col_format_compacted); /* SPI_SHADER_COL_FORMAT */ + radeon_end(); } - radeon_end(); - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAGMENT_OUTPUT; } @@ -10756,39 +10795,41 @@ radv_emit_depth_stencil_state(struct radv_cmd_buffer *cmd_buffer) const uint32_t depth_bounds_min = fui(d->vk.ds.depth.bounds_test.min); const uint32_t depth_bounds_max = fui(d->vk.ds.depth.bounds_test.max); - radeon_begin(cmd_buffer->cs); - if (pdev->info.gfx_level >= GFX12) { const bool force_s_valid = stencil_test_enable && ((d->vk.ds.stencil.front.op.pass != d->vk.ds.stencil.front.op.depth_fail) || (d->vk.ds.stencil.back.op.pass != d->vk.ds.stencil.back.op.depth_fail)); - radeon_opt_set_context_reg(cmd_buffer, R_02800C_DB_RENDER_OVERRIDE, RADV_TRACKED_DB_RENDER_OVERRIDE, - S_02800C_FORCE_STENCIL_READ(1) | S_02800C_FORCE_STENCIL_VALID(force_s_valid)); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_opt_set_context_reg(cmd_buffer, R_02800C_DB_RENDER_OVERRIDE, RADV_TRACKED_DB_RENDER_OVERRIDE, + S_02800C_FORCE_STENCIL_READ(1) | S_02800C_FORCE_STENCIL_VALID(force_s_valid)); - radeon_opt_set_context_reg(cmd_buffer, R_028070_DB_DEPTH_CONTROL, RADV_TRACKED_DB_DEPTH_CONTROL, - db_depth_control); + gfx12_opt_set_context_reg(cmd_buffer, R_028070_DB_DEPTH_CONTROL, RADV_TRACKED_DB_DEPTH_CONTROL, db_depth_control); if (stencil_test_enable) { - radeon_opt_set_context_reg(cmd_buffer, R_028074_DB_STENCIL_CONTROL, RADV_TRACKED_DB_STENCIL_CONTROL, - db_stencil_control); + gfx12_opt_set_context_reg(cmd_buffer, R_028074_DB_STENCIL_CONTROL, RADV_TRACKED_DB_STENCIL_CONTROL, + db_stencil_control); - radeon_opt_set_context_reg( + gfx12_opt_set_context_reg( cmd_buffer, R_028088_DB_STENCIL_REF, RADV_TRACKED_DB_STENCIL_REF, S_028088_TESTVAL(d->vk.ds.stencil.front.reference) | S_028088_TESTVAL_BF(d->vk.ds.stencil.back.reference)); - radeon_opt_set_context_reg2(cmd_buffer, R_028090_DB_STENCIL_READ_MASK, RADV_TRACKED_DB_STENCIL_READ_MASK, - S_028090_TESTMASK(d->vk.ds.stencil.front.compare_mask) | - S_028090_TESTMASK_BF(d->vk.ds.stencil.back.compare_mask), - S_028094_WRITEMASK(d->vk.ds.stencil.front.write_mask) | - S_028094_WRITEMASK_BF(d->vk.ds.stencil.back.write_mask)); + gfx12_opt_set_context_reg2(cmd_buffer, R_028090_DB_STENCIL_READ_MASK, RADV_TRACKED_DB_STENCIL_READ_MASK, + S_028090_TESTMASK(d->vk.ds.stencil.front.compare_mask) | + S_028090_TESTMASK_BF(d->vk.ds.stencil.back.compare_mask), + S_028094_WRITEMASK(d->vk.ds.stencil.front.write_mask) | + S_028094_WRITEMASK_BF(d->vk.ds.stencil.back.write_mask)); } if (d->vk.ds.depth.bounds_test.enable) { - radeon_opt_set_context_reg2(cmd_buffer, R_028050_DB_DEPTH_BOUNDS_MIN, RADV_TRACKED_DB_DEPTH_BOUNDS_MIN, - depth_bounds_min, depth_bounds_max); + gfx12_opt_set_context_reg2(cmd_buffer, R_028050_DB_DEPTH_BOUNDS_MIN, RADV_TRACKED_DB_DEPTH_BOUNDS_MIN, + depth_bounds_min, depth_bounds_max); } + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); radeon_opt_set_context_reg(cmd_buffer, R_028800_DB_DEPTH_CONTROL, RADV_TRACKED_DB_DEPTH_CONTROL, db_depth_control); @@ -10810,10 +10851,9 @@ radv_emit_depth_stencil_state(struct radv_cmd_buffer *cmd_buffer) radeon_opt_set_context_reg2(cmd_buffer, R_028020_DB_DEPTH_BOUNDS_MIN, RADV_TRACKED_DB_DEPTH_BOUNDS_MIN, depth_bounds_min, depth_bounds_max); } + radeon_end(); } - radeon_end(); - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DEPTH_STENCIL_STATE; } @@ -10858,36 +10898,51 @@ radv_emit_raster_state(struct radv_cmd_buffer *cmd_buffer) const uint32_t pa_sc_line_cntl = S_028BDC_PERPENDICULAR_ENDCAP_ENA(radv_get_line_mode(cmd_buffer) == VK_LINE_RASTERIZATION_MODE_RECTANGULAR); - radeon_begin(cmd_buffer->cs); - - radeon_opt_set_context_reg(cmd_buffer, R_028A08_PA_SU_LINE_CNTL, RADV_TRACKED_PA_SU_LINE_CNTL, pa_su_line_cntl); - - radeon_opt_set_context_reg(cmd_buffer, R_028A0C_PA_SC_LINE_STIPPLE, RADV_TRACKED_PA_SC_LINE_STIPPLE, - S_028A0C_LINE_PATTERN(d->vk.rs.line.stipple.pattern) | - S_028A0C_REPEAT_COUNT(d->vk.rs.line.stipple.factor - 1) | - S_028A0C_AUTO_RESET_CNTL(pdev->info.gfx_level < GFX12 ? auto_reset_cntl : 0)); - - radeon_opt_set_context_reg(cmd_buffer, R_028BDC_PA_SC_LINE_CNTL, RADV_TRACKED_PA_SC_LINE_CNTL, pa_sc_line_cntl); - - radeon_opt_set_context_reg( - cmd_buffer, R_028810_PA_CL_CLIP_CNTL, RADV_TRACKED_PA_CL_CLIP_CNTL, - S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) | - S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) | - S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); - if (pdev->info.gfx_level >= GFX12) { - radeon_opt_set_context_reg(cmd_buffer, R_028A44_PA_SC_LINE_STIPPLE_RESET, RADV_TRACKED_PA_SC_LINE_STIPPLE_RESET, - S_028A44_AUTO_RESET_CNTL(auto_reset_cntl)); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_opt_set_context_reg(cmd_buffer, R_028A08_PA_SU_LINE_CNTL, RADV_TRACKED_PA_SU_LINE_CNTL, pa_su_line_cntl); - radeon_opt_set_context_reg(cmd_buffer, R_02881C_PA_SU_SC_MODE_CNTL, RADV_TRACKED_PA_SU_SC_MODE_CNTL, - pa_su_sc_mode_cntl); + gfx12_opt_set_context_reg(cmd_buffer, R_028A0C_PA_SC_LINE_STIPPLE, RADV_TRACKED_PA_SC_LINE_STIPPLE, + S_028A0C_LINE_PATTERN(d->vk.rs.line.stipple.pattern) | + S_028A0C_REPEAT_COUNT(d->vk.rs.line.stipple.factor - 1) | + S_028A0C_AUTO_RESET_CNTL(pdev->info.gfx_level < GFX12 ? auto_reset_cntl : 0)); + + gfx12_opt_set_context_reg(cmd_buffer, R_028BDC_PA_SC_LINE_CNTL, RADV_TRACKED_PA_SC_LINE_CNTL, pa_sc_line_cntl); + + gfx12_opt_set_context_reg( + cmd_buffer, R_028810_PA_CL_CLIP_CNTL, RADV_TRACKED_PA_CL_CLIP_CNTL, + S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) | + S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) | + S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); + gfx12_opt_set_context_reg(cmd_buffer, R_028A44_PA_SC_LINE_STIPPLE_RESET, RADV_TRACKED_PA_SC_LINE_STIPPLE_RESET, + S_028A44_AUTO_RESET_CNTL(auto_reset_cntl)); + + gfx12_opt_set_context_reg(cmd_buffer, R_02881C_PA_SU_SC_MODE_CNTL, RADV_TRACKED_PA_SU_SC_MODE_CNTL, + pa_su_sc_mode_cntl); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); + radeon_opt_set_context_reg(cmd_buffer, R_028A08_PA_SU_LINE_CNTL, RADV_TRACKED_PA_SU_LINE_CNTL, pa_su_line_cntl); + + radeon_opt_set_context_reg(cmd_buffer, R_028A0C_PA_SC_LINE_STIPPLE, RADV_TRACKED_PA_SC_LINE_STIPPLE, + S_028A0C_LINE_PATTERN(d->vk.rs.line.stipple.pattern) | + S_028A0C_REPEAT_COUNT(d->vk.rs.line.stipple.factor - 1) | + S_028A0C_AUTO_RESET_CNTL(pdev->info.gfx_level < GFX12 ? auto_reset_cntl : 0)); + + radeon_opt_set_context_reg(cmd_buffer, R_028BDC_PA_SC_LINE_CNTL, RADV_TRACKED_PA_SC_LINE_CNTL, pa_sc_line_cntl); + + radeon_opt_set_context_reg( + cmd_buffer, R_028810_PA_CL_CLIP_CNTL, RADV_TRACKED_PA_CL_CLIP_CNTL, + S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) | + S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) | + S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); radeon_opt_set_context_reg(cmd_buffer, R_028814_PA_SU_SC_MODE_CNTL, RADV_TRACKED_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); + radeon_end(); } - radeon_end(); - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_RASTER_STATE; } @@ -10998,24 +11053,35 @@ radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(d->vk.ms.alpha_to_coverage_enable); - radeon_begin(cmd_buffer->cs); - radeon_opt_set_context_reg2(cmd_buffer, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, RADV_TRACKED_PA_SC_AA_MASK_X0Y0_X1Y0, - sample_mask, sample_mask); - radeon_opt_set_context_reg(cmd_buffer, R_028BE0_PA_SC_AA_CONFIG, RADV_TRACKED_PA_SC_AA_CONFIG, pa_sc_aa_config); - radeon_opt_set_context_reg( - cmd_buffer, R_028A48_PA_SC_MODE_CNTL_0, RADV_TRACKED_PA_SC_MODE_CNTL_0, - S_028A48_ALTERNATE_RBS_PER_TILE(pdev->info.gfx_level >= GFX9) | S_028A48_VPORT_SCISSOR_ENABLE(1) | - S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | S_028A48_MSAA_ENABLE(rasterization_samples > 1)); - if (pdev->info.gfx_level >= GFX12) { - radeon_opt_set_context_reg(cmd_buffer, R_02807C_DB_ALPHA_TO_MASK, RADV_TRACKED_DB_ALPHA_TO_MASK, - db_alpha_to_mask); - radeon_opt_set_context_reg(cmd_buffer, R_028C5C_PA_SC_SAMPLE_PROPERTIES, RADV_TRACKED_PA_SC_SAMPLE_PROPERTIES, - S_028C5C_MAX_SAMPLE_DIST(max_sample_dist)); - radeon_opt_set_context_reg(cmd_buffer, R_028078_DB_EQAA, RADV_TRACKED_DB_EQAA, db_eqaa); - radeon_opt_set_context_reg(cmd_buffer, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, - RADV_TRACKED_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); + radeon_begin(cmd_buffer->cs); + gfx12_begin_context_regs(); + gfx12_opt_set_context_reg2(cmd_buffer, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, RADV_TRACKED_PA_SC_AA_MASK_X0Y0_X1Y0, + sample_mask, sample_mask); + gfx12_opt_set_context_reg(cmd_buffer, R_028BE0_PA_SC_AA_CONFIG, RADV_TRACKED_PA_SC_AA_CONFIG, pa_sc_aa_config); + gfx12_opt_set_context_reg(cmd_buffer, R_028A48_PA_SC_MODE_CNTL_0, RADV_TRACKED_PA_SC_MODE_CNTL_0, + S_028A48_ALTERNATE_RBS_PER_TILE(pdev->info.gfx_level >= GFX9) | + S_028A48_VPORT_SCISSOR_ENABLE(1) | + S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | + S_028A48_MSAA_ENABLE(rasterization_samples > 1)); + gfx12_opt_set_context_reg(cmd_buffer, R_02807C_DB_ALPHA_TO_MASK, RADV_TRACKED_DB_ALPHA_TO_MASK, db_alpha_to_mask); + gfx12_opt_set_context_reg(cmd_buffer, R_028C5C_PA_SC_SAMPLE_PROPERTIES, RADV_TRACKED_PA_SC_SAMPLE_PROPERTIES, + S_028C5C_MAX_SAMPLE_DIST(max_sample_dist)); + gfx12_opt_set_context_reg(cmd_buffer, R_028078_DB_EQAA, RADV_TRACKED_DB_EQAA, db_eqaa); + gfx12_opt_set_context_reg(cmd_buffer, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + RADV_TRACKED_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); + gfx12_end_context_regs(); + radeon_end(); } else { + radeon_begin(cmd_buffer->cs); + radeon_opt_set_context_reg2(cmd_buffer, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, RADV_TRACKED_PA_SC_AA_MASK_X0Y0_X1Y0, + sample_mask, sample_mask); + radeon_opt_set_context_reg(cmd_buffer, R_028BE0_PA_SC_AA_CONFIG, RADV_TRACKED_PA_SC_AA_CONFIG, pa_sc_aa_config); + radeon_opt_set_context_reg(cmd_buffer, R_028A48_PA_SC_MODE_CNTL_0, RADV_TRACKED_PA_SC_MODE_CNTL_0, + S_028A48_ALTERNATE_RBS_PER_TILE(pdev->info.gfx_level >= GFX9) | + S_028A48_VPORT_SCISSOR_ENABLE(1) | + S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | + S_028A48_MSAA_ENABLE(rasterization_samples > 1)); radeon_opt_set_context_reg(cmd_buffer, R_028B70_DB_ALPHA_TO_MASK, RADV_TRACKED_DB_ALPHA_TO_MASK, db_alpha_to_mask); radeon_opt_set_context_reg(cmd_buffer, R_028804_DB_EQAA, RADV_TRACKED_DB_EQAA, db_eqaa); @@ -11023,8 +11089,8 @@ radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) if (pdev->info.gfx_level >= GFX9) radeon_opt_set_context_reg(cmd_buffer, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, RADV_TRACKED_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); + radeon_end(); } - radeon_end(); } static void @@ -13984,8 +14050,21 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct radeon_cmdbuf *cs = cmd_buffer->cs; + if (pdev->info.gfx_level >= GFX12) { + radeon_begin(cs); + gfx12_begin_context_regs(); + gfx12_set_context_reg(R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); + gfx12_set_context_reg(R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, counter_offset); + gfx12_end_context_regs(); + radeon_end(); + } else { + radeon_begin(cs); + radeon_set_context_reg(R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); + radeon_set_context_reg(R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, counter_offset); + radeon_end(); + } + radeon_begin(cs); - radeon_set_context_reg(R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); if (gfx_level >= GFX10) { /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption @@ -14008,7 +14087,6 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_emit(0); /* unused */ } - radeon_set_context_reg(R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, counter_offset); radeon_end(); }