diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 2f9965cb60f..a22238b4e3d 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -47,7 +47,11 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv const uint64_t va = reloc->va[MESA_SHADER_TASK]; radeon_begin(ace_cs); - radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(task_shader->info.regs.pgm_lo, va >> 8); + } else { + radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8); + } radeon_end(); } } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index a33915f2f83..f9c2fcb8b99 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2030,20 +2030,29 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cm uint64_t va = radv_shader_get_va(shader); radeon_begin(cs); - radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + gfx12_push_sh_reg(shader->info.regs.pgm_rsrc2, shader->config.rsrc2); + gfx12_push_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3); + gfx12_push_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits); + gfx12_push_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, shader->info.regs.cs.compute_num_thread_x); + gfx12_push_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, shader->info.regs.cs.compute_num_thread_y); + gfx12_push_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, shader->info.regs.cs.compute_num_thread_z); + } else { + radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); + radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2); + radeon_emit(shader->config.rsrc1); + radeon_emit(shader->config.rsrc2); + if (pdev->info.gfx_level >= GFX10) + radeon_set_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3); - radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2); - radeon_emit(shader->config.rsrc1); - radeon_emit(shader->config.rsrc2); - if (pdev->info.gfx_level >= GFX10) { - radeon_set_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3); + radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits); + radeon_set_sh_reg_seq(R_00B81C_COMPUTE_NUM_THREAD_X, 3); + radeon_emit(shader->info.regs.cs.compute_num_thread_x); + radeon_emit(shader->info.regs.cs.compute_num_thread_y); + radeon_emit(shader->info.regs.cs.compute_num_thread_z); } - - radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits); - radeon_set_sh_reg_seq(R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(shader->info.regs.cs.compute_num_thread_x); - radeon_emit(shader->info.regs.cs.compute_num_thread_y); - radeon_emit(shader->info.regs.cs.compute_num_thread_z); radeon_end(); } @@ -7270,8 +7279,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu const struct radv_shader_info *cs_info = &rt_prolog->info; radeon_begin(cs); - radeon_set_sh_reg(ray_dynamic_callback_stack_base_offset, - rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(ray_dynamic_callback_stack_base_offset, + rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size); + } else { + radeon_set_sh_reg(ray_dynamic_callback_stack_base_offset, + rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size); + } radeon_end(); } @@ -7279,8 +7293,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu struct radv_shader *traversal_shader = cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION]; if (traversal_shader_addr_offset && traversal_shader) { uint64_t traversal_va = traversal_shader->va | radv_rt_priority_traversal; + radeon_begin(cs); - radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_64bit_pointer(traversal_shader_addr_offset, traversal_va); + } else { + radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va); + } radeon_end(); } } @@ -10602,7 +10621,11 @@ radv_emit_task_state(struct radv_cmd_buffer *cmd_buffer) shader_query_state |= radv_shader_query_pipeline_stat; radeon_begin(cmd_buffer->gang.cs); - radeon_set_sh_reg(task_state_offset, shader_query_state); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(task_state_offset, shader_query_state); + } else { + radeon_set_sh_reg(task_state_offset, shader_query_state); + } radeon_end(); } @@ -11800,6 +11823,9 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (pdev->info.gfx_level >= GFX12) { radv_gfx12_emit_buffered_regs(device, cs); + + if (task_shader) + radv_gfx12_emit_buffered_regs(device, cmd_buffer->gang.cs); } if (!dgc) @@ -12650,8 +12676,13 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer) rsrc2 |= S_00B12C_SCRATCH_EN(1); radeon_check_space(device->ws, cs->b, 3); + radeon_begin(cs); - radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2); + } else { + radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2); + } radeon_end(); } @@ -12726,6 +12757,9 @@ radv_before_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pip cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; } } + + if (pdev->info.gfx_level >= GFX12) + radv_gfx12_emit_buffered_regs(device, cmd_buffer->cs); } static void diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 98013d194e1..f942bddad30 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -568,7 +568,7 @@ struct radv_cmd_stream { uint32_t num_buffered_sh_regs; struct { - struct gfx12_reg buffered_sh_regs[64]; + struct gfx12_reg buffered_sh_regs[256]; } gfx12; }; diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index e1c576e8857..a8549a4779a 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -642,7 +642,7 @@ radv_init_tracked_regs(struct radv_cmd_stream *cs) memset(tracked_regs->sx_mrt_blend_opt, 0xff, sizeof(uint32_t) * MAX_RTS); } -static void +void radv_init_cmd_stream(struct radv_cmd_stream *cs) { cs->context_roll_without_scissor_emitted = false; diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 69ce4c79256..b39c2516035 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -322,6 +322,12 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned gfx12_push_sh_reg(sh_offset, va); \ } while (0) +#define gfx12_push_64bit_pointer(sh_offset, va) \ + do { \ + gfx12_push_sh_reg(sh_offset, va); \ + gfx12_push_sh_reg(sh_offset + 4, va >> 32); \ + } while (0) + ALWAYS_INLINE static void radv_gfx12_emit_buffered_regs(struct radv_device *device, struct radv_cmd_stream *cs) { @@ -423,6 +429,8 @@ radv_emit_pm4_commands(struct radv_cmd_stream *cs, const struct ac_pm4_state *pm VkResult radv_create_cmd_stream(const struct radv_device *device, enum radv_queue_family family, bool is_secondary, struct radv_cmd_stream **cs_out); +void radv_init_cmd_stream(struct radv_cmd_stream *cs); + void radv_reset_cmd_stream(const struct radv_device *device, struct radv_cmd_stream *cs); VkResult radv_finalize_cmd_stream(const struct radv_device *device, struct radv_cmd_stream *cs); diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index 8c23125d2b7..6410d0dd80b 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -7,6 +7,7 @@ #include "radv_dgc.h" #include "meta/radv_meta.h" #include "nir/radv_meta_nir.h" +#include "radv_cs.h" #include "radv_debug.h" #include "radv_entrypoints.h" #include "radv_pipeline_rt.h" @@ -3378,6 +3379,8 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio struct radv_compute_pipeline_metadata md; struct radv_cmd_stream cs; + radv_init_cmd_stream(&cs); + assert(shader->info.stage == MESA_SHADER_COMPUTE); radv_get_compute_shader_metadata(device, shader, &md); @@ -3386,6 +3389,7 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio return; cs.b->reserved_dw = cs.b->max_dw = 32; + cs.b->buf = malloc(cs.b->max_dw * 4); if (!cs.b->buf) { free(cs.b); @@ -3393,6 +3397,8 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio } radv_emit_compute_shader(pdev, &cs, shader); + if (pdev->info.gfx_level >= GFX12) + radv_gfx12_emit_buffered_regs(device, &cs); memcpy(ptr, &md, sizeof(md)); ptr += sizeof(md);