radv: use paired shader registers for graphics on GFX12
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Loosely based on RadeonSI.

This is supposed to be faster because parsing the packet header seems
to be the main bottleneck on GFX12.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35282>
This commit is contained in:
Samuel Pitoiset 2025-04-14 09:34:13 +02:00 committed by Marge Bot
parent c8b3c92a3e
commit 098c15bfc9
2 changed files with 241 additions and 117 deletions

View file

@ -437,6 +437,12 @@ radv_reset_tracked_regs(struct radv_cmd_buffer *cmd_buffer)
memset(tracked_regs->spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
}
static void
radv_reset_buffered_regs(struct radv_cmd_buffer *cmd_buffer)
{
cmd_buffer->num_buffered_sh_regs = 0;
}
static void
radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags)
{
@ -1979,11 +1985,18 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
radv_cs_add_buffer(device->ws, cmd_buffer->cs, ps_epilog->bo);
const uint32_t epilog_pc_offset = radv_get_user_sgpr_loc(ps_shader, AC_UD_EPILOG_PC);
radeon_begin(cmd_buffer->cs);
if (pgm_rsrc1)
radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
if (pgm_rsrc1)
gfx12_push_sh_reg(cmd_buffer, ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
gfx12_push_32bit_pointer(cmd_buffer, epilog_pc_offset, ps_epilog->va, &pdev->info);
} else {
radeon_begin(cmd_buffer->cs);
if (pgm_rsrc1)
radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
radeon_end();
}
cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, ps_epilog->upload_seq);
@ -2086,26 +2099,42 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
static void
radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t va = radv_shader_get_va(shader);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B324_MEM_BASE(va >> 40));
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo + 4, S_00B324_MEM_BASE(va >> 40));
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B324_MEM_BASE(va >> 40));
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
radeon_end();
}
}
static void
radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t va = radv_shader_get_va(shader);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
radeon_end();
}
}
static void
@ -2128,13 +2157,19 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
}
if (!shader->info.merged_shader_compiled_separately) {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
gfx12_push_sh_reg(cmd_buffer, R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
radeon_end();
}
}
const struct radv_vs_output_info *outinfo = &shader->info.outinfo;
@ -2211,7 +2246,6 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl);
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs);
radeon_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, shader->info.regs.ngg.vgt_primitiveid_en);
} else {
if (pdev->info.gfx_level >= GFX7) {
@ -2235,18 +2269,23 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t va = radv_shader_get_va(shader);
radeon_begin(cmd_buffer->cs);
if (pdev->info.gfx_level >= GFX9) {
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
} else {
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B424_MEM_BASE(va >> 40));
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
radeon_begin(cmd_buffer->cs);
if (pdev->info.gfx_level >= GFX9) {
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
} else {
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B424_MEM_BASE(va >> 40));
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
}
radeon_end();
}
radeon_end();
}
static void
@ -2272,20 +2311,34 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(vs, AC_UD_NEXT_STAGE_PC);
radeon_begin(cmd_buffer->cs);
radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, next_stage->va, &pdev->info);
if (!vs->info.vs.has_prolog) {
radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
} else {
radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2);
radeon_emit(rsrc1);
radeon_emit(rsrc2);
if (!vs->info.vs.has_prolog) {
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_lo, vs->va >> 8);
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1);
} else {
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1);
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc2, rsrc2);
}
}
} else {
radeon_begin(cmd_buffer->cs);
radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
if (!vs->info.vs.has_prolog) {
radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
} else {
radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2);
radeon_emit(rsrc1);
radeon_emit(rsrc2);
}
}
radeon_end();
}
radeon_end();
return;
}
@ -2329,16 +2382,22 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
radv_shader_combine_cfg_tes_gs(device, tes, gs, &rsrc1, &rsrc2);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2);
radeon_emit(rsrc1);
radeon_emit(rsrc2);
const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(tes, AC_UD_NEXT_STAGE_PC);
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_lo, tes->va >> 8);
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc1, rsrc1);
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc2, rsrc2);
gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, gs->va, &pdev->info);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2);
radeon_emit(rsrc1);
radeon_emit(rsrc2);
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
radeon_end();
}
return;
}
@ -2484,13 +2543,18 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh
assert(pdev->info.gfx_level >= GFX11);
radeon_begin(cs);
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
if (pdev->info.gfx_level >= GFX12)
radeon_set_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ms.spi_shader_gs_meshlet_dim);
gfx12_push_sh_reg(cmd_buffer, R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
gfx12_push_sh_reg(cmd_buffer, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
} else {
radeon_begin(cs);
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
radeon_end();
}
}
static void
@ -2712,16 +2776,25 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct
static void
radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
const uint64_t va = radv_shader_get_va(ps);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B024_MEM_BASE(va >> 40));
radeon_emit(ps->config.rsrc1);
radeon_emit(ps->config.rsrc2);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo + 4, S_00B024_MEM_BASE(va >> 40));
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc1, ps->config.rsrc1);
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc2, ps->config.rsrc2);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4);
radeon_emit(va >> 8);
radeon_emit(S_00B024_MEM_BASE(va >> 40));
radeon_emit(ps->config.rsrc1);
radeon_emit(ps->config.rsrc2);
radeon_end();
}
radv_emit_fragment_shader_state(cmd_buffer, ps);
}
@ -2962,9 +3035,7 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
radv_emit_fragment_shader_state(cmd_buffer, NULL);
}
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps);
radeon_end();
gfx12_push_sh_reg(cmd_buffer, R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps);
}
const struct radv_vgt_shader_key vgt_shader_cfg_key =
@ -5038,17 +5109,20 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
if (G_00B848_VGPRS(prolog->rsrc1) > G_00B848_VGPRS(rsrc1))
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (prolog->rsrc1 & ~C_00B848_VGPRS);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
if (vs_shader->info.merged_shader_compiled_separately) {
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_lo, prolog->va >> 8);
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc1, rsrc1);
if (vs_shader->info.merged_shader_compiled_separately)
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc2, rsrc2);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
if (vs_shader->info.merged_shader_compiled_separately)
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
radeon_end();
}
radeon_end();
radv_cs_add_buffer(device->ws, cmd_buffer->cs, prolog->bo);
}
@ -5886,16 +5960,20 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
if (!streamout_buffers_offset)
return;
radeon_begin(cmd_buffer->cs);
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_32bit_pointer(cmd_buffer, streamout_buffers_offset, va, &pdev->info);
} else {
radeon_begin(cmd_buffer->cs);
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
if (cmd_buffer->state.gs_copy_shader) {
streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS);
if (streamout_buffers_offset)
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
if (cmd_buffer->state.gs_copy_shader) {
streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS);
if (streamout_buffers_offset)
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
}
radeon_end();
}
radeon_end();
}
static void
@ -5912,9 +5990,7 @@ radv_emit_streamout_state(struct radv_cmd_buffer *cmd_buffer)
if (!streamout_state_offset)
return;
radeon_begin(cmd_buffer->cs);
radeon_emit_32bit_pointer(streamout_state_offset, so->state_va, &pdev->info);
radeon_end();
gfx12_push_32bit_pointer(cmd_buffer, streamout_state_offset, so->state_va, &pdev->info);
}
static void
@ -6012,9 +6088,14 @@ radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->state.last_vrs_rates != vrs_rates ||
cmd_buffer->state.last_force_vrs_rates_offset != force_vrs_rates_offset) {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, force_vrs_rates_offset, vrs_rates);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates);
radeon_end();
}
}
cmd_buffer->state.last_vrs_rates = vrs_rates;
@ -6808,6 +6889,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
cmd_buffer->state.last_force_vrs_rates_offset = -1;
radv_reset_tracked_regs(cmd_buffer);
radv_reset_buffered_regs(cmd_buffer);
cmd_buffer->usage_flags = pBeginInfo->flags;
@ -10395,6 +10477,8 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer)
static void
radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
if (!ps)
@ -10413,9 +10497,13 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
SET_SGPR_FIELD(PS_STATE_LINE_RAST_MODE, radv_get_line_mode(cmd_buffer)) |
SET_SGPR_FIELD(PS_STATE_RAST_PRIM, rast_prim);
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(ps_state_offset, ps_state);
radeon_end();
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, ps_state_offset, ps_state);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(ps_state_offset, ps_state);
radeon_end();
}
}
static uint32_t
@ -10492,16 +10580,19 @@ radv_emit_ngg_state(struct radv_cmd_buffer *cmd_buffer)
SET_SGPR_FIELD(NGG_STATE_PROVOKING_VTX, radv_get_ngg_state_provoking_vtx(cmd_buffer)) |
SET_SGPR_FIELD(NGG_STATE_QUERY, radv_get_ngg_state_query(cmd_buffer));
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(ngg_state_offset, ngg_state);
const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA);
if (pdev->info.gfx_level >= GFX11) {
const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, ngg_state_offset, ngg_state);
if (ngg_query_buf_va_offset)
gfx12_push_sh_reg(cmd_buffer, ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va);
} else {
radeon_begin(cmd_buffer->cs);
radeon_set_sh_reg(ngg_state_offset, ngg_state);
if (ngg_query_buf_va_offset)
radeon_set_sh_reg(ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va);
radeon_end();
}
radeon_end();
}
static void
@ -10573,22 +10664,30 @@ radv_emit_tess_state(struct radv_cmd_buffer *cmd_buffer)
assert(tes_offchip_layout_offset);
}
radeon_begin(cs);
if (pdev->info.gfx_level >= GFX9) {
radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(cmd_buffer, tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
if (tcs_offchip_layout) {
gfx12_push_sh_reg(cmd_buffer, tcs_offchip_layout_offset, tcs_offchip_layout);
gfx12_push_sh_reg(cmd_buffer, tes_offchip_layout_offset, tcs_offchip_layout);
}
} else {
const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
radeon_begin(cs);
radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2);
if (pdev->info.gfx_level >= GFX9) {
radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
} else {
const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2);
}
if (tcs_offchip_layout) {
radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout);
radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout);
}
radeon_end();
}
if (tcs_offchip_layout) {
radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout);
radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout);
}
radeon_end();
}
static void
@ -11528,6 +11627,12 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info
radv_emit_all_graphics_states(cmd_buffer, info);
}
if (pdev->info.gfx_level >= GFX12) {
radeon_begin(cmd_buffer->cs);
gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs);
radeon_end();
}
if (!dgc)
radv_describe_draw(cmd_buffer);
if (likely(!info->indirect_va)) {
@ -11552,6 +11657,9 @@ ALWAYS_INLINE static bool
radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount,
bool dgc)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
/* For direct draws, this makes sure we don't draw anything.
* For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100).
*/
@ -11562,7 +11670,6 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
radv_bind_graphics_shaders(cmd_buffer);
}
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs;
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
@ -11594,6 +11701,12 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
if (pc_stages)
radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
if (pdev->info.gfx_level >= GFX12) {
radeon_begin(cmd_buffer->cs);
gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs);
radeon_end();
}
if (!dgc)
radv_describe_draw(cmd_buffer);
if (likely(!info->indirect_va)) {

View file

@ -539,11 +539,22 @@ struct radv_cmd_buffer_upload {
struct list_head list;
};
/* A pair of values for SET_*_REG_PAIRS. */
struct gfx12_reg {
uint32_t reg_offset;
uint32_t reg_value;
};
struct radv_cmd_buffer {
struct vk_command_buffer vk;
struct radv_tracked_regs tracked_regs;
uint32_t num_buffered_sh_regs;
struct {
struct gfx12_reg buffered_sh_regs[64];
} gfx12;
VkCommandBufferUsageFlags usage_flags;
struct radeon_cmdbuf *cs;
struct radv_cmd_state state;