mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
radv: remove precomputed registers from radv_shader_binary
It is enough to compute them after upload. This saves some disk space and eliminates an unlikely bug where the shader cache is shared between two GPUs with the same chip but a different number of enabled CUs. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38970>
This commit is contained in:
parent
61287b00f3
commit
125ac1626d
5 changed files with 344 additions and 339 deletions
|
|
@ -35,9 +35,9 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
/* Shaders are allocated in the 32-bit addr space and high bits are already configured. */
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_lo, reloc->va[s] >> 8);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_lo, reloc->va[s] >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, reloc->va[s] >> 8);
|
||||
radeon_set_sh_reg(shader->regs.pgm_lo, reloc->va[s] >> 8);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
@ -49,9 +49,9 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
radeon_begin(ace_cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(task_shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(task_shader->regs.pgm_lo, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(task_shader->regs.pgm_lo, va >> 8);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2874,11 +2874,11 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
if (pgm_rsrc1)
|
||||
gfx12_push_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
|
||||
gfx12_push_sh_reg(ps_shader->regs.pgm_rsrc1, pgm_rsrc1);
|
||||
gfx12_push_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
|
||||
} else {
|
||||
if (pgm_rsrc1)
|
||||
radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
|
||||
radeon_set_sh_reg(ps_shader->regs.pgm_rsrc1, pgm_rsrc1);
|
||||
radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
|
||||
}
|
||||
radeon_end();
|
||||
|
|
@ -2892,27 +2892,27 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cm
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
|
||||
gfx12_push_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
|
||||
gfx12_push_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, shader->info.regs.cs.compute_num_thread_x);
|
||||
gfx12_push_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, shader->info.regs.cs.compute_num_thread_y);
|
||||
gfx12_push_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, shader->info.regs.cs.compute_num_thread_z);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc3, shader->config.rsrc3);
|
||||
gfx12_push_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->regs.cs.compute_resource_limits);
|
||||
gfx12_push_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, shader->regs.cs.compute_num_thread_x);
|
||||
gfx12_push_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, shader->regs.cs.compute_num_thread_y);
|
||||
gfx12_push_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, shader->regs.cs.compute_num_thread_z);
|
||||
} else {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_set_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg_seq(shader->regs.pgm_rsrc1, 2);
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX10)
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
|
||||
radeon_set_sh_reg(shader->regs.pgm_rsrc3, shader->config.rsrc3);
|
||||
|
||||
radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
|
||||
radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->regs.cs.compute_resource_limits);
|
||||
radeon_set_sh_reg_seq(R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(shader->info.regs.cs.compute_num_thread_x);
|
||||
radeon_emit(shader->info.regs.cs.compute_num_thread_y);
|
||||
radeon_emit(shader->info.regs.cs.compute_num_thread_z);
|
||||
radeon_emit(shader->regs.cs.compute_num_thread_x);
|
||||
radeon_emit(shader->regs.cs.compute_num_thread_y);
|
||||
radeon_emit(shader->regs.cs.compute_num_thread_z);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
@ -2952,34 +2952,32 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_set_sh_reg_seq(shader->regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B124_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
|
||||
radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, RADV_TRACKED_SPI_VS_OUT_CONFIG,
|
||||
shader->info.regs.spi_vs_out_config);
|
||||
shader->regs.spi_vs_out_config);
|
||||
radeon_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, RADV_TRACKED_SPI_SHADER_POS_FORMAT,
|
||||
shader->info.regs.spi_shader_pos_format);
|
||||
shader->regs.spi_shader_pos_format);
|
||||
radeon_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL,
|
||||
shader->info.regs.pa_cl_vs_out_cntl);
|
||||
shader->regs.pa_cl_vs_out_cntl);
|
||||
|
||||
if (pdev->info.gfx_level <= GFX8)
|
||||
radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, RADV_TRACKED_VGT_REUSE_OFF,
|
||||
shader->info.regs.vs.vgt_reuse_off);
|
||||
radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, RADV_TRACKED_VGT_REUSE_OFF, shader->regs.vs.vgt_reuse_off);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
|
||||
shader->info.regs.vs.spi_shader_pgm_rsrc3_vs);
|
||||
radeon_set_sh_reg(R_00B11C_SPI_SHADER_LATE_ALLOC_VS, shader->info.regs.vs.spi_shader_late_alloc_vs);
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, shader->regs.vs.spi_shader_pgm_rsrc3_vs);
|
||||
radeon_set_sh_reg(R_00B11C_SPI_SHADER_LATE_ALLOC_VS, shader->regs.vs.spi_shader_late_alloc_vs);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->info.regs.ge_pc_alloc);
|
||||
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->regs.ge_pc_alloc);
|
||||
|
||||
if (shader->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, RADV_TRACKED_VGT_GS_ONCHIP_CNTL,
|
||||
shader->info.regs.vgt_gs_onchip_cntl);
|
||||
shader->regs.vgt_gs_onchip_cntl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2998,7 +2996,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
assert(pdev->info.gfx_level < GFX11);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_set_sh_reg_seq(shader->regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
|
|
@ -3016,11 +3014,11 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
radeon_set_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
@ -3048,13 +3046,13 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
|||
if (!shader->info.merged_shader_compiled_separately) {
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
gfx12_push_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
gfx12_push_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->regs.spi_shader_pgm_rsrc4_gs);
|
||||
} else {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_set_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg_seq(shader->regs.pgm_rsrc1, 2);
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
}
|
||||
|
|
@ -3075,58 +3073,58 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
|||
radeon_begin(cs);
|
||||
gfx12_begin_context_regs();
|
||||
gfx12_opt_set_context_reg(R_028818_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL,
|
||||
shader->info.regs.pa_cl_vs_out_cntl);
|
||||
shader->regs.pa_cl_vs_out_cntl);
|
||||
gfx12_opt_set_context_reg(R_028B3C_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT,
|
||||
shader->info.regs.vgt_gs_instance_cnt);
|
||||
shader->regs.vgt_gs_instance_cnt);
|
||||
gfx12_opt_set_context_reg2(R_028648_SPI_SHADER_IDX_FORMAT, RADV_TRACKED_SPI_SHADER_IDX_FORMAT,
|
||||
shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format);
|
||||
shader->regs.ngg.spi_shader_idx_format, shader->regs.spi_shader_pos_format);
|
||||
gfx12_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||
shader->info.regs.ngg.ge_max_output_per_subgroup);
|
||||
shader->regs.ngg.ge_max_output_per_subgroup);
|
||||
gfx12_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL,
|
||||
shader->info.regs.ngg.ge_ngg_subgrp_cntl);
|
||||
shader->regs.ngg.ge_ngg_subgrp_cntl);
|
||||
gfx12_end_context_regs();
|
||||
radeon_end();
|
||||
} else if (pdev->info.has_set_context_pairs_packed) {
|
||||
radeon_begin(cs);
|
||||
gfx11_begin_packed_context_regs();
|
||||
gfx11_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL,
|
||||
shader->info.regs.pa_cl_vs_out_cntl);
|
||||
shader->regs.pa_cl_vs_out_cntl);
|
||||
gfx11_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT,
|
||||
shader->info.regs.vgt_gs_instance_cnt);
|
||||
shader->regs.vgt_gs_instance_cnt);
|
||||
gfx11_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, RADV_TRACKED_VGT_PRIMITIVEID_EN,
|
||||
shader->info.regs.ngg.vgt_primitiveid_en | S_028A84_PRIMITIVEID_EN(es_enable_prim_id));
|
||||
shader->regs.ngg.vgt_primitiveid_en | S_028A84_PRIMITIVEID_EN(es_enable_prim_id));
|
||||
gfx11_opt_set_context_reg2(R_028708_SPI_SHADER_IDX_FORMAT, RADV_TRACKED_SPI_SHADER_IDX_FORMAT,
|
||||
shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format);
|
||||
shader->regs.ngg.spi_shader_idx_format, shader->regs.spi_shader_pos_format);
|
||||
gfx11_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, RADV_TRACKED_SPI_VS_OUT_CONFIG,
|
||||
shader->info.regs.spi_vs_out_config);
|
||||
shader->regs.spi_vs_out_config);
|
||||
gfx11_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||
shader->info.regs.ngg.ge_max_output_per_subgroup);
|
||||
shader->regs.ngg.ge_max_output_per_subgroup);
|
||||
gfx11_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL,
|
||||
shader->info.regs.ngg.ge_ngg_subgrp_cntl);
|
||||
shader->regs.ngg.ge_ngg_subgrp_cntl);
|
||||
gfx11_end_packed_context_regs();
|
||||
radeon_end();
|
||||
} else {
|
||||
radeon_begin(cs);
|
||||
radeon_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL,
|
||||
shader->info.regs.pa_cl_vs_out_cntl);
|
||||
shader->regs.pa_cl_vs_out_cntl);
|
||||
radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT,
|
||||
shader->info.regs.vgt_gs_instance_cnt);
|
||||
shader->regs.vgt_gs_instance_cnt);
|
||||
radeon_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, RADV_TRACKED_VGT_PRIMITIVEID_EN,
|
||||
shader->info.regs.ngg.vgt_primitiveid_en | S_028A84_PRIMITIVEID_EN(es_enable_prim_id));
|
||||
shader->regs.ngg.vgt_primitiveid_en | S_028A84_PRIMITIVEID_EN(es_enable_prim_id));
|
||||
radeon_opt_set_context_reg2(R_028708_SPI_SHADER_IDX_FORMAT, RADV_TRACKED_SPI_SHADER_IDX_FORMAT,
|
||||
shader->info.regs.ngg.spi_shader_idx_format, shader->info.regs.spi_shader_pos_format);
|
||||
shader->regs.ngg.spi_shader_idx_format, shader->regs.spi_shader_pos_format);
|
||||
radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, RADV_TRACKED_SPI_VS_OUT_CONFIG,
|
||||
shader->info.regs.spi_vs_out_config);
|
||||
shader->regs.spi_vs_out_config);
|
||||
radeon_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, RADV_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||
shader->info.regs.ngg.ge_max_output_per_subgroup);
|
||||
shader->regs.ngg.ge_max_output_per_subgroup);
|
||||
radeon_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, RADV_TRACKED_GE_NGG_SUBGRP_CNTL,
|
||||
shader->info.regs.ngg.ge_ngg_subgrp_cntl);
|
||||
shader->regs.ngg.ge_ngg_subgrp_cntl);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
uint32_t ge_cntl = shader->info.regs.ngg.ge_cntl;
|
||||
uint32_t ge_cntl = shader->regs.ngg.ge_cntl;
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
ge_cntl |= S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi);
|
||||
} else {
|
||||
|
|
@ -3146,7 +3144,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
|||
}
|
||||
|
||||
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, RADV_TRACKED_VGT_GS_ONCHIP_CNTL,
|
||||
shader->info.regs.vgt_gs_onchip_cntl);
|
||||
shader->regs.vgt_gs_onchip_cntl);
|
||||
}
|
||||
|
||||
radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl);
|
||||
|
|
@ -3156,19 +3154,17 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
|||
assert(!(shader->info.ngg_info.esgs_ring_size & 0xffff0000));
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, shader->info.regs.ngg.vgt_primitiveid_en);
|
||||
radeon_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, shader->regs.ngg.vgt_primitiveid_en);
|
||||
gfx12_push_sh_reg(ngg_lds_layout_offset,
|
||||
SET_SGPR_FIELD(NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE, shader->info.ngg_info.esgs_ring_size));
|
||||
} else {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
shader->info.regs.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, shader->regs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
shader->info.regs.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, shader->regs.spi_shader_pgm_rsrc4_gs);
|
||||
|
||||
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->info.regs.ge_pc_alloc);
|
||||
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->regs.ge_pc_alloc);
|
||||
|
||||
radeon_set_sh_reg(ngg_lds_layout_offset,
|
||||
SET_SGPR_FIELD(NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE, shader->info.ngg_info.esgs_ring_size));
|
||||
|
|
@ -3187,14 +3183,14 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
radeon_set_sh_reg(shader->regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_set_sh_reg_seq(shader->regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B424_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
|
|
@ -3233,23 +3229,23 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
gfx12_push_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
|
||||
|
||||
if (!vs->info.vs.has_prolog) {
|
||||
gfx12_push_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
gfx12_push_sh_reg(vs->regs.pgm_lo, vs->va >> 8);
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
gfx12_push_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(vs->regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
gfx12_push_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(vs->info.regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_sh_reg(vs->regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(vs->regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
|
||||
|
||||
if (!vs->info.vs.has_prolog) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
radeon_set_sh_reg(vs->regs.pgm_lo, vs->va >> 8);
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
radeon_set_sh_reg(vs->regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_set_sh_reg_seq(vs->regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
}
|
||||
|
|
@ -3304,13 +3300,13 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
gfx12_push_sh_reg(tes->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(tes->info.regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_sh_reg(tes->regs.pgm_lo, tes->va >> 8);
|
||||
gfx12_push_sh_reg(tes->regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(tes->regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
|
||||
} else {
|
||||
radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2);
|
||||
radeon_set_sh_reg(tes->regs.pgm_lo, tes->va >> 8);
|
||||
radeon_set_sh_reg_seq(tes->regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
|
||||
|
|
@ -3340,38 +3336,38 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
|
|||
radeon_begin(cs);
|
||||
|
||||
radeon_opt_set_context_reg3(R_028A60_VGT_GSVS_RING_OFFSET_1, RADV_TRACKED_VGT_GSVS_RING_OFFSET_1,
|
||||
gs->info.regs.gs.vgt_gsvs_ring_offset[0], gs->info.regs.gs.vgt_gsvs_ring_offset[1],
|
||||
gs->info.regs.gs.vgt_gsvs_ring_offset[2]);
|
||||
gs->regs.gs.vgt_gsvs_ring_offset[0], gs->regs.gs.vgt_gsvs_ring_offset[1],
|
||||
gs->regs.gs.vgt_gsvs_ring_offset[2]);
|
||||
|
||||
radeon_opt_set_context_reg(R_028AB0_VGT_GSVS_RING_ITEMSIZE, RADV_TRACKED_VGT_GSVS_RING_ITEMSIZE,
|
||||
gs->info.regs.gs.vgt_gsvs_ring_itemsize);
|
||||
gs->regs.gs.vgt_gsvs_ring_itemsize);
|
||||
|
||||
radeon_opt_set_context_reg4(R_028B5C_VGT_GS_VERT_ITEMSIZE, RADV_TRACKED_VGT_GS_VERT_ITEMSIZE,
|
||||
gs->info.regs.gs.vgt_gs_vert_itemsize[0], gs->info.regs.gs.vgt_gs_vert_itemsize[1],
|
||||
gs->info.regs.gs.vgt_gs_vert_itemsize[2], gs->info.regs.gs.vgt_gs_vert_itemsize[3]);
|
||||
gs->regs.gs.vgt_gs_vert_itemsize[0], gs->regs.gs.vgt_gs_vert_itemsize[1],
|
||||
gs->regs.gs.vgt_gs_vert_itemsize[2], gs->regs.gs.vgt_gs_vert_itemsize[3]);
|
||||
|
||||
radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT,
|
||||
gs->info.regs.gs.vgt_gs_instance_cnt);
|
||||
gs->regs.gs.vgt_gs_instance_cnt);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (!gs->info.merged_shader_compiled_separately) {
|
||||
radeon_set_sh_reg(gs->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(gs->regs.pgm_lo, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(gs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_set_sh_reg_seq(gs->regs.pgm_rsrc1, 2);
|
||||
radeon_emit(gs->config.rsrc1);
|
||||
radeon_emit(gs->config.rsrc2 | S_00B22C_LDS_SIZE(ac_shader_encode_lds_size(
|
||||
gs_state->lds_size, pdev->info.gfx_level, MESA_SHADER_GEOMETRY)));
|
||||
}
|
||||
|
||||
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, RADV_TRACKED_VGT_GS_ONCHIP_CNTL,
|
||||
gs->info.regs.vgt_gs_onchip_cntl);
|
||||
gs->regs.vgt_gs_onchip_cntl);
|
||||
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
radeon_opt_set_context_reg(R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, RADV_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
|
||||
gs->info.regs.gs.vgt_gs_max_prims_per_subgroup);
|
||||
gs->regs.gs.vgt_gs_max_prims_per_subgroup);
|
||||
}
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(gs->info.regs.pgm_lo, 4);
|
||||
radeon_set_sh_reg_seq(gs->regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B224_MEM_BASE(va >> 40));
|
||||
radeon_emit(gs->config.rsrc1);
|
||||
|
|
@ -3381,15 +3377,15 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
|
|||
* GFX9+: Only used to set the GS input VGPRs, emulated in shaders.
|
||||
*/
|
||||
radeon_opt_set_context_reg(R_028AAC_VGT_ESGS_RING_ITEMSIZE, RADV_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||
gs->info.regs.gs.vgt_esgs_ring_itemsize);
|
||||
gs->regs.gs.vgt_esgs_ring_itemsize);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, gs->info.regs.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, gs->regs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, gs->info.regs.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, gs->regs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
|
|
@ -3414,7 +3410,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_begin(cs);
|
||||
|
||||
radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, RADV_TRACKED_VGT_GS_MAX_VERT_OUT,
|
||||
gs->info.regs.vgt_gs_max_vert_out);
|
||||
gs->regs.vgt_gs_max_vert_out);
|
||||
|
||||
if (gs->info.merged_shader_compiled_separately) {
|
||||
const uint32_t vgt_esgs_ring_itemsize_offset = radv_get_user_sgpr_loc(gs, AC_UD_VGT_ESGS_RING_ITEMSIZE);
|
||||
|
|
@ -3454,13 +3450,13 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_dim);
|
||||
gfx12_push_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
gfx12_push_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_ctrl);
|
||||
gfx12_push_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->regs.ngg.ms.spi_shader_gs_meshlet_dim);
|
||||
gfx12_push_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, ms->regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
gfx12_push_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->regs.ngg.ms.spi_shader_gs_meshlet_ctrl);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
|
||||
radeon_emit(ms->info.regs.ngg.ms.spi_shader_gs_meshlet_dim);
|
||||
radeon_emit(ms->info.regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
radeon_emit(ms->regs.ngg.ms.spi_shader_gs_meshlet_dim);
|
||||
radeon_emit(ms->regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
@ -3479,7 +3475,7 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_begin(cs);
|
||||
|
||||
radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, RADV_TRACKED_VGT_GS_MAX_VERT_OUT,
|
||||
ms->info.regs.vgt_gs_max_vert_out);
|
||||
ms->regs.vgt_gs_max_vert_out);
|
||||
radeon_set_uconfig_reg_idx(&pdev->info, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
|
||||
radeon_end();
|
||||
|
||||
|
|
@ -3628,7 +3624,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
|
|||
*/
|
||||
const unsigned num_per_vertex_params = ps->info.ps.num_inputs - num_per_primitive_params;
|
||||
radeon_opt_set_context_reg(R_0286D8_SPI_PS_IN_CONTROL, RADV_TRACKED_SPI_PS_IN_CONTROL,
|
||||
ps->info.regs.ps.spi_ps_in_control | S_0286D8_NUM_INTERP(num_per_vertex_params) |
|
||||
ps->regs.ps.spi_ps_in_control | S_0286D8_NUM_INTERP(num_per_vertex_params) |
|
||||
S_0286D8_NUM_PRIM_INTERP(num_per_primitive_params));
|
||||
}
|
||||
|
||||
|
|
@ -3646,12 +3642,12 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t spi_ps_input_ena = ps ? ps->config.spi_ps_input_ena : 0;
|
||||
const uint32_t spi_ps_input_addr = ps ? ps->config.spi_ps_input_addr : 0;
|
||||
const uint32_t spi_ps_in_control = ps ? ps->info.regs.ps.spi_ps_in_control : 0;
|
||||
const uint32_t spi_ps_in_control = ps ? ps->regs.ps.spi_ps_in_control : 0;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
const uint32_t pa_sc_hisz_control = ps ? ps->info.regs.ps.pa_sc_hisz_control : 0;
|
||||
const uint32_t pa_sc_hisz_control = ps ? ps->regs.ps.pa_sc_hisz_control : 0;
|
||||
|
||||
gfx12_begin_context_regs();
|
||||
gfx12_opt_set_context_reg2(R_02865C_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena,
|
||||
|
|
@ -3668,7 +3664,7 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct
|
|||
gfx11_opt_set_context_reg(R_0286D8_SPI_PS_IN_CONTROL, RADV_TRACKED_SPI_PS_IN_CONTROL, spi_ps_in_control);
|
||||
gfx11_end_packed_context_regs();
|
||||
} else {
|
||||
const uint32_t pa_sc_shader_control = ps ? ps->info.regs.ps.pa_sc_shader_control : 0;
|
||||
const uint32_t pa_sc_shader_control = ps ? ps->regs.ps.pa_sc_shader_control : 0;
|
||||
|
||||
radeon_opt_set_context_reg2(R_0286CC_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena,
|
||||
spi_ps_input_addr);
|
||||
|
|
@ -3709,11 +3705,11 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(ps->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(ps->info.regs.pgm_rsrc1, ps->config.rsrc1);
|
||||
gfx12_push_sh_reg(ps->info.regs.pgm_rsrc2, ps->config.rsrc2);
|
||||
gfx12_push_sh_reg(ps->regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(ps->regs.pgm_rsrc1, ps->config.rsrc1);
|
||||
gfx12_push_sh_reg(ps->regs.pgm_rsrc2, ps->config.rsrc2);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4);
|
||||
radeon_set_sh_reg_seq(ps->regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B024_MEM_BASE(va >> 40));
|
||||
radeon_emit(ps->config.rsrc1);
|
||||
|
|
@ -3956,10 +3952,10 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (pdev->info.gfx_level >= GFX12) {
|
||||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
|
||||
uint32_t gs_out_config_ps = last_vgt_shader->info.regs.spi_vs_out_config;
|
||||
uint32_t gs_out_config_ps = last_vgt_shader->regs.spi_vs_out_config;
|
||||
|
||||
if (ps) {
|
||||
gs_out_config_ps |= ps->info.regs.ps.spi_gs_out_config_ps;
|
||||
gs_out_config_ps |= ps->regs.ps.spi_gs_out_config_ps;
|
||||
} else {
|
||||
/* GFX12 seems to require a dummy FS state otherwise it might just hang. */
|
||||
radv_emit_fragment_shader_state(cmd_buffer, NULL);
|
||||
|
|
@ -6084,15 +6080,15 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
gfx12_push_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(vs_shader->regs.pgm_lo, prolog->va >> 8);
|
||||
gfx12_push_sh_reg(vs_shader->regs.pgm_rsrc1, rsrc1);
|
||||
if (vs_shader->info.merged_shader_compiled_separately)
|
||||
gfx12_push_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_sh_reg(vs_shader->regs.pgm_rsrc2, rsrc2);
|
||||
} else {
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
radeon_set_sh_reg(vs_shader->regs.pgm_lo, prolog->va >> 8);
|
||||
radeon_set_sh_reg(vs_shader->regs.pgm_rsrc1, rsrc1);
|
||||
if (vs_shader->info.merged_shader_compiled_separately)
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
radeon_set_sh_reg(vs_shader->regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
radeon_end();
|
||||
|
||||
|
|
@ -8378,7 +8374,7 @@ radv_bind_fragment_output_state(struct radv_cmd_buffer *cmd_buffer, const struct
|
|||
if (ps) {
|
||||
col_format = ps_epilog ? ps_epilog->spi_shader_col_format : ps->info.ps.spi_shader_col_format;
|
||||
z_format = ps_epilog && ps->info.ps.exports_mrtz_via_epilog ? ps_epilog->spi_shader_z_format
|
||||
: ps->info.regs.ps.spi_shader_z_format;
|
||||
: ps->regs.ps.spi_shader_z_format;
|
||||
cb_shader_mask = ps_epilog ? ps_epilog->cb_shader_mask : ps->info.ps.cb_shader_mask;
|
||||
}
|
||||
|
||||
|
|
@ -8636,7 +8632,7 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_BINNING_STATE;
|
||||
}
|
||||
|
||||
if (!previous_ps || previous_ps->info.regs.ps.db_shader_control != ps->info.regs.ps.db_shader_control ||
|
||||
if (!previous_ps || previous_ps->regs.ps.db_shader_control != ps->regs.ps.db_shader_control ||
|
||||
previous_ps->info.ps.pops_is_per_sample != ps->info.ps.pops_is_per_sample)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL;
|
||||
|
||||
|
|
@ -11582,18 +11578,18 @@ radv_emit_tcs_tes_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
gfx12_push_sh_reg(tcs->regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
if (tcs_offchip_layout || tes_offchip_layout) {
|
||||
gfx12_push_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout);
|
||||
gfx12_push_sh_reg(tes_offchip_layout_offset, tes_offchip_layout);
|
||||
}
|
||||
} else {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
radeon_set_sh_reg(tcs->regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
} else {
|
||||
const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(lds_alloc);
|
||||
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2);
|
||||
radeon_set_sh_reg(vs->regs.pgm_rsrc2, ls_rsrc2);
|
||||
}
|
||||
|
||||
if (tcs_offchip_layout || tes_offchip_layout) {
|
||||
|
|
@ -11705,7 +11701,7 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
uint32_t db_shader_control;
|
||||
|
||||
if (ps) {
|
||||
db_shader_control = ps->info.regs.ps.db_shader_control;
|
||||
db_shader_control = ps->regs.ps.db_shader_control;
|
||||
} else {
|
||||
db_shader_control = S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z) |
|
||||
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
|
||||
|
|
@ -12787,10 +12783,10 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
|
||||
gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info);
|
||||
radv_precompute_registers_hw_ngg(device, gs);
|
||||
} else {
|
||||
radv_get_legacy_gs_info(device, &es->info, &gs->info);
|
||||
radv_precompute_registers_hw_gs(device, &es->info, &gs->info);
|
||||
radv_precompute_registers_hw_gs(device, &es->info, gs);
|
||||
|
||||
cmd_buffer->esgs_ring_size_needed =
|
||||
MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.legacy_gs_info.esgs_ring_size);
|
||||
|
|
@ -13824,9 +13820,9 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_sh_reg(rt_prolog->regs.pgm_rsrc2, rsrc2);
|
||||
} else {
|
||||
radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2);
|
||||
radeon_set_sh_reg(rt_prolog->regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1567,7 +1567,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
|
|||
#endif
|
||||
|
||||
static unsigned
|
||||
radv_get_num_pos_exports(struct radv_shader_info *info, unsigned *clip_dist_mask, unsigned *cull_dist_mask)
|
||||
radv_get_num_pos_exports(const struct radv_shader_info *info, unsigned *clip_dist_mask, unsigned *cull_dist_mask)
|
||||
{
|
||||
unsigned num = 1;
|
||||
|
||||
|
|
@ -1592,21 +1592,22 @@ radv_get_num_pos_exports(struct radv_shader_info *info, unsigned *clip_dist_mask
|
|||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
unsigned clip_dist_mask, cull_dist_mask;
|
||||
unsigned num_pos_exports = radv_get_num_pos_exports(info, &clip_dist_mask, &cull_dist_mask);
|
||||
|
||||
/* VS is required to export at least one param. */
|
||||
const uint32_t nparams = MAX2(info->outinfo.param_exports, 1);
|
||||
info->regs.spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
|
||||
regs->spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
info->regs.spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(info->outinfo.param_exports == 0);
|
||||
regs->spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(info->outinfo.param_exports == 0);
|
||||
}
|
||||
|
||||
info->regs.spi_shader_pos_format =
|
||||
regs->spi_shader_pos_format =
|
||||
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
|
||||
S_02870C_POS1_EXPORT_FORMAT(num_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
|
||||
S_02870C_POS2_EXPORT_FORMAT(num_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
|
||||
|
|
@ -1616,7 +1617,7 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b
|
|||
info->outinfo.writes_viewport_index || info->outinfo.writes_primitive_shading_rate;
|
||||
const unsigned total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
info->regs.pa_cl_vs_out_cntl =
|
||||
regs->pa_cl_vs_out_cntl =
|
||||
S_02881C_USE_VTX_POINT_SIZE(info->outinfo.writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->outinfo.writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(info->outinfo.writes_viewport_index) |
|
||||
|
|
@ -1627,26 +1628,25 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b
|
|||
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask;
|
||||
|
||||
if (pdev->info.gfx_level <= GFX8)
|
||||
info->regs.vs.vgt_reuse_off = info->outinfo.writes_viewport_index;
|
||||
regs->vs.vgt_reuse_off = info->outinfo.writes_viewport_index;
|
||||
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
ac_compute_late_alloc(&pdev->info, false, false, binary->config.scratch_bytes_per_wave > 0, &late_alloc_wave64,
|
||||
ac_compute_late_alloc(&pdev->info, false, false, shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64,
|
||||
&cu_mask);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
info->regs.vs.spi_shader_pgm_rsrc3_vs =
|
||||
regs->vs.spi_shader_pgm_rsrc3_vs =
|
||||
ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), C_00B118_CU_EN, 0, &pdev->info);
|
||||
info->regs.vs.spi_shader_late_alloc_vs = S_00B11C_LIMIT(late_alloc_wave64);
|
||||
regs->vs.spi_shader_late_alloc_vs = S_00B11C_LIMIT(late_alloc_wave64);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
const uint32_t oversub_pc_lines = late_alloc_wave64 ? pdev->info.pc_lines / 4 : 0;
|
||||
|
||||
info->regs.ge_pc_alloc =
|
||||
S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
|
||||
regs->ge_pc_alloc = S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
|
||||
|
||||
/* Required programming for tessellation (legacy pipeline only). */
|
||||
if (binary->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
|
||||
if (shader->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
regs->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126);
|
||||
}
|
||||
}
|
||||
|
|
@ -1654,16 +1654,19 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b
|
|||
}
|
||||
|
||||
void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info)
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, const struct radv_shader_info *es_info,
|
||||
struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *gs_info = &shader->info;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
|
||||
gs_info->regs.gs.vgt_esgs_ring_itemsize = es_info ? es_info->esgs_itemsize / 4 : gs_info->legacy_gs_info.esgs_itemsize /4;
|
||||
regs->gs.vgt_esgs_ring_itemsize = es_info ? es_info->esgs_itemsize / 4 : gs_info->legacy_gs_info.esgs_itemsize / 4;
|
||||
|
||||
gs_info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
regs->gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
gs_info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(gs_info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
regs->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(gs_info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(gs_info->legacy_gs_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
|
|
@ -1675,44 +1678,45 @@ radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_i
|
|||
const uint8_t *num_components = gs_info->gs.num_components_per_stream;
|
||||
|
||||
uint32_t offset = num_components[0] * gs_max_out_vertices;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
regs->gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
|
||||
if (max_stream >= 1)
|
||||
offset += num_components[1] * gs_max_out_vertices;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
regs->gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
|
||||
if (max_stream >= 2)
|
||||
offset += num_components[2] * gs_max_out_vertices;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
regs->gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
|
||||
if (max_stream >= 3)
|
||||
offset += num_components[3] * gs_max_out_vertices;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_itemsize = offset;
|
||||
regs->gs.vgt_gsvs_ring_itemsize = offset;
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
gs_info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
regs->gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
|
||||
const uint32_t gs_num_invocations = gs_info->gs.invocations;
|
||||
gs_info->regs.gs.vgt_gs_instance_cnt =
|
||||
S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
|
||||
regs->gs.vgt_gs_instance_cnt = S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
|
||||
|
||||
gs_info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
regs->spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdev->info);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
gs_info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
regs->spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), C_00B204_CU_EN_GFX10,
|
||||
16, &pdev->info);
|
||||
}
|
||||
|
||||
gs_info->regs.vgt_gs_max_vert_out = gs_info->gs.vertices_out;
|
||||
regs->vgt_gs_max_vert_out = gs_info->gs.vertices_out;
|
||||
}
|
||||
|
||||
void
|
||||
radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config,
|
||||
struct radv_shader_info *info)
|
||||
radv_precompute_registers_hw_ngg(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
const struct ac_shader_config *config = &shader->config;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
|
||||
const bool no_pc_export = info->outinfo.param_exports == 0 && info->outinfo.prim_param_exports == 0;
|
||||
const unsigned num_prim_params = info->outinfo.prim_param_exports;
|
||||
|
|
@ -1720,30 +1724,30 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
|
|||
if (pdev->info.gfx_level >= GFX12) {
|
||||
const unsigned num_params = MAX2(info->outinfo.param_exports, 1);
|
||||
|
||||
info->regs.spi_vs_out_config = S_00B0C4_VS_EXPORT_COUNT(num_params - 1) |
|
||||
S_00B0C4_PRIM_EXPORT_COUNT(num_prim_params) | S_00B0C4_NO_PC_EXPORT(no_pc_export);
|
||||
regs->spi_vs_out_config = S_00B0C4_VS_EXPORT_COUNT(num_params - 1) | S_00B0C4_PRIM_EXPORT_COUNT(num_prim_params) |
|
||||
S_00B0C4_NO_PC_EXPORT(no_pc_export);
|
||||
|
||||
info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
regs->spi_shader_pgm_rsrc4_gs =
|
||||
S_00B220_SPI_SHADER_LATE_ALLOC_GS(127) | S_00B220_GLG_FORCE_DISABLE(1) | S_00B220_WAVE_LIMIT(0x3ff);
|
||||
} else {
|
||||
const unsigned num_params = MAX2(info->outinfo.param_exports, 1);
|
||||
|
||||
info->regs.spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(num_params - 1) |
|
||||
S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) | S_0286C4_NO_PC_EXPORT(no_pc_export);
|
||||
regs->spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(num_params - 1) | S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) |
|
||||
S_0286C4_NO_PC_EXPORT(no_pc_export);
|
||||
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
ac_compute_late_alloc(&pdev->info, true, info->has_ngg_culling, config->scratch_bytes_per_wave > 0,
|
||||
&late_alloc_wave64, &cu_mask);
|
||||
|
||||
info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
regs->spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdev->info);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
regs->spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX11, 16, &pdev->info);
|
||||
} else {
|
||||
info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
regs->spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX10, 16, &pdev->info);
|
||||
}
|
||||
|
|
@ -1760,7 +1764,7 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
|
|||
oversub_pc_lines *= oversub_factor;
|
||||
}
|
||||
|
||||
info->regs.ge_pc_alloc = S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
|
||||
regs->ge_pc_alloc = S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1);
|
||||
}
|
||||
|
||||
unsigned idx_format = V_028708_SPI_SHADER_1COMP;
|
||||
|
|
@ -1770,9 +1774,9 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
|
|||
unsigned clip_dist_mask, cull_dist_mask;
|
||||
unsigned num_pos_exports = radv_get_num_pos_exports(info, &clip_dist_mask, &cull_dist_mask);
|
||||
|
||||
info->regs.ngg.spi_shader_idx_format = S_028708_IDX0_EXPORT_FORMAT(idx_format);
|
||||
regs->ngg.spi_shader_idx_format = S_028708_IDX0_EXPORT_FORMAT(idx_format);
|
||||
|
||||
info->regs.spi_shader_pos_format =
|
||||
regs->spi_shader_pos_format =
|
||||
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
|
||||
S_02870C_POS1_EXPORT_FORMAT(num_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
|
||||
S_02870C_POS2_EXPORT_FORMAT(num_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
|
||||
|
|
@ -1782,7 +1786,7 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
|
|||
info->outinfo.writes_viewport_index || info->outinfo.writes_primitive_shading_rate;
|
||||
const unsigned total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
info->regs.pa_cl_vs_out_cntl =
|
||||
regs->pa_cl_vs_out_cntl =
|
||||
S_02881C_USE_VTX_POINT_SIZE(info->outinfo.writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->outinfo.writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(info->outinfo.writes_viewport_index) |
|
||||
|
|
@ -1792,71 +1796,72 @@ radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_sha
|
|||
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
|
||||
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask;
|
||||
|
||||
info->regs.ngg.vgt_primitiveid_en =
|
||||
regs->ngg.vgt_primitiveid_en =
|
||||
S_028A84_NGG_DISABLE_PROVOK_REUSE(info->stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id);
|
||||
|
||||
const uint32_t gs_num_invocations = info->stage == MESA_SHADER_GEOMETRY ? info->gs.invocations : 1;
|
||||
|
||||
info->regs.ngg.ge_max_output_per_subgroup = S_0287FC_MAX_VERTS_PER_SUBGROUP(info->ngg_info.max_out_verts);
|
||||
regs->ngg.ge_max_output_per_subgroup = S_0287FC_MAX_VERTS_PER_SUBGROUP(info->ngg_info.max_out_verts);
|
||||
|
||||
info->regs.ngg.ge_ngg_subgrp_cntl =
|
||||
regs->ngg.ge_ngg_subgrp_cntl =
|
||||
S_028B4C_PRIM_AMP_FACTOR(info->ngg_info.prim_amp_factor) | S_028B4C_THDS_PER_SUBGRP(0); /* for fast launch */
|
||||
|
||||
info->regs.vgt_gs_instance_cnt =
|
||||
S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
|
||||
regs->vgt_gs_instance_cnt = S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
|
||||
S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(info->ngg_info.max_vert_out_per_gs_instance);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* This should be <= 252 for performance on Gfx11. 256 works too but is slower. */
|
||||
const uint32_t max_prim_grp_size = pdev->info.gfx_level >= GFX12 ? 256 : 252;
|
||||
|
||||
info->regs.ngg.ge_cntl = S_03096C_PRIMS_PER_SUBGRP(info->ngg_info.max_gsprims) |
|
||||
regs->ngg.ge_cntl = S_03096C_PRIMS_PER_SUBGRP(info->ngg_info.max_gsprims) |
|
||||
S_03096C_VERTS_PER_SUBGRP(info->ngg_info.hw_max_esverts) |
|
||||
S_03096C_PRIM_GRP_SIZE_GFX11(max_prim_grp_size) |
|
||||
S_03096C_DIS_PG_SIZE_ADJUST_FOR_STRIP(pdev->info.gfx_level >= GFX12);
|
||||
} else {
|
||||
info->regs.ngg.ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(info->ngg_info.max_gsprims) |
|
||||
regs->ngg.ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(info->ngg_info.max_gsprims) |
|
||||
S_03096C_VERT_GRP_SIZE(info->ngg_info.hw_max_esverts);
|
||||
|
||||
info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->ngg_info.hw_max_esverts) |
|
||||
regs->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->ngg_info.hw_max_esverts) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(info->ngg_info.max_gsprims) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->ngg_info.max_gsprims * gs_num_invocations);
|
||||
}
|
||||
|
||||
info->regs.vgt_gs_max_vert_out = info->gs.vertices_out;
|
||||
regs->vgt_gs_max_vert_out = info->gs.vertices_out;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
radv_precompute_registers_hw_ngg(device, shader);
|
||||
|
||||
info->regs.vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
regs->vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
|
||||
info->regs.ngg.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
|
||||
regs->ngg.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
|
||||
S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) |
|
||||
S_00B2B0_MESHLET_NUM_THREAD_Z(info->cs.block_size[2] - 1) |
|
||||
S_00B2B0_MESHLET_THREADGROUP_SIZE(info->workgroup_size - 1);
|
||||
|
||||
info->regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc =
|
||||
regs->ngg.ms.spi_shader_gs_meshlet_exp_alloc =
|
||||
S_00B2B4_MAX_EXP_VERTS(info->ngg_info.max_out_verts) | S_00B2B4_MAX_EXP_PRIMS(info->ngg_info.prim_amp_factor);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
const bool derivative_group_quads = info->cs.derivative_group == DERIVATIVE_GROUP_QUADS;
|
||||
|
||||
info->regs.ngg.ms.spi_shader_gs_meshlet_ctrl =
|
||||
regs->ngg.ms.spi_shader_gs_meshlet_ctrl =
|
||||
S_00B2B8_INTERLEAVE_BITS_X(derivative_group_quads) | S_00B2B8_INTERLEAVE_BITS_Y(derivative_group_quads);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
|
||||
unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
|
||||
if (info->ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
|
||||
|
|
@ -1873,7 +1878,7 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
|
|||
const bool mask_export_enable = info->ps.writes_sample_mask;
|
||||
const bool disable_rbplus = pdev->info.has_rbplus && !pdev->info.rbplus_allowed;
|
||||
|
||||
info->regs.ps.db_shader_control =
|
||||
regs->ps.db_shader_control =
|
||||
S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) |
|
||||
S_02880C_KILL_ENABLE(info->ps.can_discard) | S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
|
||||
S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) |
|
||||
|
|
@ -1883,62 +1888,64 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
|
|||
S_02880C_DUAL_QUAD_DISABLE(disable_rbplus) | S_02880C_PRIMITIVE_ORDERED_PIXEL_SHADER(info->ps.pops);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
info->regs.ps.spi_ps_in_control = S_028640_PS_W32_EN(info->wave_size == 32);
|
||||
info->regs.ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_inputs);
|
||||
regs->ps.spi_ps_in_control = S_028640_PS_W32_EN(info->wave_size == 32);
|
||||
regs->ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_inputs);
|
||||
|
||||
info->regs.ps.pa_sc_hisz_control = S_028BBC_ROUND(2); /* required minimum value */
|
||||
regs->ps.pa_sc_hisz_control = S_028BBC_ROUND(2); /* required minimum value */
|
||||
if (info->ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
|
||||
info->regs.ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_GREATER_THAN_Z);
|
||||
regs->ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_GREATER_THAN_Z);
|
||||
else if (info->ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
|
||||
info->regs.ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_LESS_THAN_Z);
|
||||
regs->ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_LESS_THAN_Z);
|
||||
} else {
|
||||
/* GFX11 workaround when there are no PS inputs but LDS is used. */
|
||||
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_inputs && binary->config.lds_size;
|
||||
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_inputs && shader->config.lds_size;
|
||||
|
||||
info->regs.ps.spi_ps_in_control = S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
|
||||
regs->ps.spi_ps_in_control = S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
|
||||
|
||||
/* Can't precompute NUM_INTERP on GFX10.3 because per-primititve attributes
|
||||
* are tracked separately in NUM_PRIM_INTERP.
|
||||
*/
|
||||
if (pdev->info.gfx_level != GFX10_3) {
|
||||
info->regs.ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs);
|
||||
regs->ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11)
|
||||
info->regs.ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops);
|
||||
regs->ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops);
|
||||
}
|
||||
|
||||
info->regs.ps.spi_shader_z_format = ac_get_spi_shader_z_format(
|
||||
info->ps.writes_z, info->ps.writes_stencil, info->ps.writes_sample_mask, info->ps.writes_mrt0_alpha);
|
||||
regs->ps.spi_shader_z_format = ac_get_spi_shader_z_format(info->ps.writes_z, info->ps.writes_stencil,
|
||||
info->ps.writes_sample_mask, info->ps.writes_mrt0_alpha);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
|
||||
info->regs.cs.compute_resource_limits = radv_get_compute_resource_limits(pdev, info);
|
||||
regs->cs.compute_resource_limits = radv_get_compute_resource_limits(pdev, info);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX12(info->cs.block_size[0]);
|
||||
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX12(info->cs.block_size[1]);
|
||||
regs->cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX12(info->cs.block_size[0]);
|
||||
regs->cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX12(info->cs.block_size[1]);
|
||||
|
||||
if (info->cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
info->regs.cs.compute_num_thread_x |= S_00B81C_INTERLEAVE_BITS_X(1);
|
||||
info->regs.cs.compute_num_thread_y |= S_00B820_INTERLEAVE_BITS_Y(1);
|
||||
regs->cs.compute_num_thread_x |= S_00B81C_INTERLEAVE_BITS_X(1);
|
||||
regs->cs.compute_num_thread_y |= S_00B820_INTERLEAVE_BITS_Y(1);
|
||||
}
|
||||
} else {
|
||||
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX6(info->cs.block_size[0]);
|
||||
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX6(info->cs.block_size[1]);
|
||||
regs->cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX6(info->cs.block_size[0]);
|
||||
regs->cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX6(info->cs.block_size[1]);
|
||||
}
|
||||
info->regs.cs.compute_num_thread_z = S_00B824_NUM_THREAD_FULL(info->cs.block_size[2]);
|
||||
regs->cs.compute_num_thread_z = S_00B824_NUM_THREAD_FULL(info->cs.block_size[2]);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shader_info *info)
|
||||
radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
enum ac_hw_stage hw_stage = radv_select_hw_stage(info, gfx_level);
|
||||
|
||||
/* Special case for merged shaders compiled separately with ESO on GFX9+. */
|
||||
|
|
@ -1951,73 +1958,74 @@ radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shad
|
|||
}
|
||||
}
|
||||
|
||||
struct radv_shader_regs *regs = &shader->regs;
|
||||
switch (hw_stage) {
|
||||
case AC_HW_NEXT_GEN_GEOMETRY_SHADER:
|
||||
assert(gfx_level >= GFX10);
|
||||
if (gfx_level >= GFX12) {
|
||||
info->regs.pgm_lo = R_00B224_SPI_SHADER_PGM_LO_ES;
|
||||
regs->pgm_lo = R_00B224_SPI_SHADER_PGM_LO_ES;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
regs->pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
regs->pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
regs->pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
break;
|
||||
case AC_HW_LEGACY_GEOMETRY_SHADER:
|
||||
assert(gfx_level < GFX11);
|
||||
if (gfx_level >= GFX10) {
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
regs->pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
} else if (gfx_level >= GFX9) {
|
||||
info->regs.pgm_lo = R_00B210_SPI_SHADER_PGM_LO_ES;
|
||||
regs->pgm_lo = R_00B210_SPI_SHADER_PGM_LO_ES;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B220_SPI_SHADER_PGM_LO_GS;
|
||||
regs->pgm_lo = R_00B220_SPI_SHADER_PGM_LO_GS;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
regs->pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
regs->pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
break;
|
||||
case AC_HW_EXPORT_SHADER:
|
||||
assert(gfx_level < GFX9);
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
info->regs.pgm_rsrc1 = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
|
||||
info->regs.pgm_rsrc2 = R_00B32C_SPI_SHADER_PGM_RSRC2_ES;
|
||||
regs->pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
regs->pgm_rsrc1 = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
|
||||
regs->pgm_rsrc2 = R_00B32C_SPI_SHADER_PGM_RSRC2_ES;
|
||||
break;
|
||||
case AC_HW_LOCAL_SHADER:
|
||||
assert(gfx_level < GFX9);
|
||||
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
info->regs.pgm_rsrc1 = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
|
||||
info->regs.pgm_rsrc2 = R_00B52C_SPI_SHADER_PGM_RSRC2_LS;
|
||||
regs->pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
regs->pgm_rsrc1 = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
|
||||
regs->pgm_rsrc2 = R_00B52C_SPI_SHADER_PGM_RSRC2_LS;
|
||||
break;
|
||||
case AC_HW_HULL_SHADER:
|
||||
if (gfx_level >= GFX12) {
|
||||
info->regs.pgm_lo = R_00B424_SPI_SHADER_PGM_LO_LS;
|
||||
regs->pgm_lo = R_00B424_SPI_SHADER_PGM_LO_LS;
|
||||
} else if (gfx_level >= GFX10) {
|
||||
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
regs->pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
} else if (gfx_level >= GFX9) {
|
||||
info->regs.pgm_lo = R_00B410_SPI_SHADER_PGM_LO_LS;
|
||||
regs->pgm_lo = R_00B410_SPI_SHADER_PGM_LO_LS;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B420_SPI_SHADER_PGM_LO_HS;
|
||||
regs->pgm_lo = R_00B420_SPI_SHADER_PGM_LO_HS;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
|
||||
info->regs.pgm_rsrc2 = R_00B42C_SPI_SHADER_PGM_RSRC2_HS;
|
||||
regs->pgm_rsrc1 = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
|
||||
regs->pgm_rsrc2 = R_00B42C_SPI_SHADER_PGM_RSRC2_HS;
|
||||
break;
|
||||
case AC_HW_VERTEX_SHADER:
|
||||
assert(gfx_level < GFX11);
|
||||
info->regs.pgm_lo = R_00B120_SPI_SHADER_PGM_LO_VS;
|
||||
info->regs.pgm_rsrc1 = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
|
||||
info->regs.pgm_rsrc2 = R_00B12C_SPI_SHADER_PGM_RSRC2_VS;
|
||||
regs->pgm_lo = R_00B120_SPI_SHADER_PGM_LO_VS;
|
||||
regs->pgm_rsrc1 = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
|
||||
regs->pgm_rsrc2 = R_00B12C_SPI_SHADER_PGM_RSRC2_VS;
|
||||
break;
|
||||
case AC_HW_PIXEL_SHADER:
|
||||
info->regs.pgm_lo = R_00B020_SPI_SHADER_PGM_LO_PS;
|
||||
info->regs.pgm_rsrc1 = R_00B028_SPI_SHADER_PGM_RSRC1_PS;
|
||||
info->regs.pgm_rsrc2 = R_00B02C_SPI_SHADER_PGM_RSRC2_PS;
|
||||
regs->pgm_lo = R_00B020_SPI_SHADER_PGM_LO_PS;
|
||||
regs->pgm_rsrc1 = R_00B028_SPI_SHADER_PGM_RSRC1_PS;
|
||||
regs->pgm_rsrc2 = R_00B02C_SPI_SHADER_PGM_RSRC2_PS;
|
||||
break;
|
||||
case AC_HW_COMPUTE_SHADER:
|
||||
info->regs.pgm_lo = R_00B830_COMPUTE_PGM_LO;
|
||||
info->regs.pgm_rsrc1 = R_00B848_COMPUTE_PGM_RSRC1;
|
||||
info->regs.pgm_rsrc2 = R_00B84C_COMPUTE_PGM_RSRC2;
|
||||
info->regs.pgm_rsrc3 = R_00B8A0_COMPUTE_PGM_RSRC3;
|
||||
regs->pgm_lo = R_00B830_COMPUTE_PGM_LO;
|
||||
regs->pgm_rsrc1 = R_00B848_COMPUTE_PGM_RSRC1;
|
||||
regs->pgm_rsrc2 = R_00B84C_COMPUTE_PGM_RSRC2;
|
||||
regs->pgm_rsrc3 = R_00B8A0_COMPUTE_PGM_RSRC3;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid hw stage");
|
||||
|
|
@ -2026,47 +2034,47 @@ radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shad
|
|||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
radv_precompute_registers(struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
const struct radv_shader_info *info = &shader->info;
|
||||
|
||||
radv_precompute_registers_pgm(device, info);
|
||||
radv_precompute_registers_pgm(device, shader);
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (!info->vs.as_ls && !info->vs.as_es) {
|
||||
if (info->is_ngg) {
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
radv_precompute_registers_hw_ngg(device, shader);
|
||||
} else {
|
||||
radv_precompute_registers_hw_vs(device, binary);
|
||||
radv_precompute_registers_hw_vs(device, shader);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (!info->tes.as_es) {
|
||||
if (info->is_ngg) {
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
radv_precompute_registers_hw_ngg(device, shader);
|
||||
} else {
|
||||
radv_precompute_registers_hw_vs(device, binary);
|
||||
radv_precompute_registers_hw_vs(device, shader);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (info->is_ngg) {
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
radv_precompute_registers_hw_ngg(device, shader);
|
||||
} else {
|
||||
radv_precompute_registers_hw_gs(device, NULL, &binary->info);
|
||||
radv_precompute_registers_hw_gs(device, NULL, shader);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
radv_precompute_registers_hw_ms(device, binary);
|
||||
radv_precompute_registers_hw_ms(device, shader);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
radv_precompute_registers_hw_fs(device, binary);
|
||||
radv_precompute_registers_hw_fs(device, shader);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
radv_precompute_registers_hw_cs(device, binary);
|
||||
radv_precompute_registers_hw_cs(device, shader);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
@ -2420,9 +2428,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
|||
config->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
|
||||
}
|
||||
|
||||
/* Precompute register values for faster emission. */
|
||||
radv_precompute_registers(device, binary);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2916,6 +2921,9 @@ radv_shader_create_uncached(struct radv_device *device, const struct radv_shader
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* Precompute register values for faster emission. */
|
||||
radv_precompute_registers(device, shader);
|
||||
|
||||
*out_shader = shader;
|
||||
|
||||
out:
|
||||
|
|
|
|||
|
|
@ -422,6 +422,7 @@ struct radv_shader {
|
|||
uint32_t code_size;
|
||||
uint32_t exec_size;
|
||||
struct radv_shader_info info;
|
||||
struct radv_shader_regs regs;
|
||||
uint32_t max_waves;
|
||||
|
||||
blake3_hash hash;
|
||||
|
|
@ -709,10 +710,10 @@ radv_shader_need_push_constants_upload(const struct radv_shader *shader)
|
|||
return loc->sgpr_idx != -1;
|
||||
}
|
||||
|
||||
void radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
void radv_precompute_registers_hw_gs(struct radv_device *device, const struct radv_shader_info *es_info,
|
||||
struct radv_shader *shader);
|
||||
|
||||
void radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config,
|
||||
struct radv_shader_info *info);
|
||||
void radv_precompute_registers_hw_ngg(struct radv_device *device, struct radv_shader *shader);
|
||||
|
||||
void radv_set_stage_key_robustness(const struct vk_pipeline_robustness_state *rs, mesa_shader_stage stage,
|
||||
struct radv_shader_stage_key *key);
|
||||
|
|
|
|||
|
|
@ -259,9 +259,10 @@ struct radv_shader_info {
|
|||
struct radv_legacy_gs_info legacy_gs_info;
|
||||
struct gfx10_ngg_info ngg_info;
|
||||
};
|
||||
};
|
||||
|
||||
/* Precomputed register values. */
|
||||
struct {
|
||||
struct radv_shader_regs {
|
||||
uint32_t pgm_lo;
|
||||
uint32_t pgm_rsrc1;
|
||||
uint32_t pgm_rsrc2;
|
||||
|
|
@ -323,7 +324,6 @@ struct radv_shader_info {
|
|||
uint32_t spi_vs_out_config;
|
||||
uint32_t spi_shader_pos_format;
|
||||
uint32_t vgt_gs_instance_cnt;
|
||||
} regs;
|
||||
};
|
||||
|
||||
void radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage, struct radv_shader_info *info);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue