radv: emit compute pipeline with buffered SH regs on GFX12

This also includes RT, task shaders and DGC IES for compute.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36570>
This commit is contained in:
Samuel Pitoiset 2025-08-05 09:08:23 +02:00 committed by Marge Bot
parent bbf8338443
commit 95d2f009a9
6 changed files with 72 additions and 20 deletions

View file

@ -47,7 +47,11 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
const uint64_t va = reloc->va[MESA_SHADER_TASK];
radeon_begin(ace_cs);
radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(task_shader->info.regs.pgm_lo, va >> 8);
} else {
radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8);
}
radeon_end();
}
}

View file

@ -2030,20 +2030,29 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cm
uint64_t va = radv_shader_get_va(shader);
radeon_begin(cs);
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(shader->info.regs.pgm_lo, va >> 8);
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
gfx12_push_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
gfx12_push_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
gfx12_push_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, shader->info.regs.cs.compute_num_thread_x);
gfx12_push_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, shader->info.regs.cs.compute_num_thread_y);
gfx12_push_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, shader->info.regs.cs.compute_num_thread_z);
} else {
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX10)
radeon_set_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
radeon_set_sh_reg_seq(R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(shader->info.regs.cs.compute_num_thread_x);
radeon_emit(shader->info.regs.cs.compute_num_thread_y);
radeon_emit(shader->info.regs.cs.compute_num_thread_z);
}
radeon_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
radeon_set_sh_reg_seq(R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(shader->info.regs.cs.compute_num_thread_x);
radeon_emit(shader->info.regs.cs.compute_num_thread_y);
radeon_emit(shader->info.regs.cs.compute_num_thread_z);
radeon_end();
}
@ -7270,8 +7279,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu
const struct radv_shader_info *cs_info = &rt_prolog->info;
radeon_begin(cs);
radeon_set_sh_reg(ray_dynamic_callback_stack_base_offset,
rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(ray_dynamic_callback_stack_base_offset,
rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size);
} else {
radeon_set_sh_reg(ray_dynamic_callback_stack_base_offset,
rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size);
}
radeon_end();
}
@ -7279,8 +7293,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu
struct radv_shader *traversal_shader = cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION];
if (traversal_shader_addr_offset && traversal_shader) {
uint64_t traversal_va = traversal_shader->va | radv_rt_priority_traversal;
radeon_begin(cs);
radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_64bit_pointer(traversal_shader_addr_offset, traversal_va);
} else {
radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va);
}
radeon_end();
}
}
@ -10602,7 +10621,11 @@ radv_emit_task_state(struct radv_cmd_buffer *cmd_buffer)
shader_query_state |= radv_shader_query_pipeline_stat;
radeon_begin(cmd_buffer->gang.cs);
radeon_set_sh_reg(task_state_offset, shader_query_state);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(task_state_offset, shader_query_state);
} else {
radeon_set_sh_reg(task_state_offset, shader_query_state);
}
radeon_end();
}
@ -11800,6 +11823,9 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
if (pdev->info.gfx_level >= GFX12) {
radv_gfx12_emit_buffered_regs(device, cs);
if (task_shader)
radv_gfx12_emit_buffered_regs(device, cmd_buffer->gang.cs);
}
if (!dgc)
@ -12650,8 +12676,13 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer)
rsrc2 |= S_00B12C_SCRATCH_EN(1);
radeon_check_space(device->ws, cs->b, 3);
radeon_begin(cs);
radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2);
} else {
radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2);
}
radeon_end();
}
@ -12726,6 +12757,9 @@ radv_before_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pip
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
}
}
if (pdev->info.gfx_level >= GFX12)
radv_gfx12_emit_buffered_regs(device, cmd_buffer->cs);
}
static void

View file

@ -568,7 +568,7 @@ struct radv_cmd_stream {
uint32_t num_buffered_sh_regs;
struct {
struct gfx12_reg buffered_sh_regs[64];
struct gfx12_reg buffered_sh_regs[256];
} gfx12;
};

View file

@ -642,7 +642,7 @@ radv_init_tracked_regs(struct radv_cmd_stream *cs)
memset(tracked_regs->sx_mrt_blend_opt, 0xff, sizeof(uint32_t) * MAX_RTS);
}
static void
void
radv_init_cmd_stream(struct radv_cmd_stream *cs)
{
cs->context_roll_without_scissor_emitted = false;

View file

@ -322,6 +322,12 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned
gfx12_push_sh_reg(sh_offset, va); \
} while (0)
#define gfx12_push_64bit_pointer(sh_offset, va) \
do { \
gfx12_push_sh_reg(sh_offset, va); \
gfx12_push_sh_reg(sh_offset + 4, va >> 32); \
} while (0)
ALWAYS_INLINE static void
radv_gfx12_emit_buffered_regs(struct radv_device *device, struct radv_cmd_stream *cs)
{
@ -423,6 +429,8 @@ radv_emit_pm4_commands(struct radv_cmd_stream *cs, const struct ac_pm4_state *pm
VkResult radv_create_cmd_stream(const struct radv_device *device, enum radv_queue_family family, bool is_secondary,
struct radv_cmd_stream **cs_out);
void radv_init_cmd_stream(struct radv_cmd_stream *cs);
void radv_reset_cmd_stream(const struct radv_device *device, struct radv_cmd_stream *cs);
VkResult radv_finalize_cmd_stream(const struct radv_device *device, struct radv_cmd_stream *cs);

View file

@ -7,6 +7,7 @@
#include "radv_dgc.h"
#include "meta/radv_meta.h"
#include "nir/radv_meta_nir.h"
#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_entrypoints.h"
#include "radv_pipeline_rt.h"
@ -3378,6 +3379,8 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio
struct radv_compute_pipeline_metadata md;
struct radv_cmd_stream cs;
radv_init_cmd_stream(&cs);
assert(shader->info.stage == MESA_SHADER_COMPUTE);
radv_get_compute_shader_metadata(device, shader, &md);
@ -3386,6 +3389,7 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio
return;
cs.b->reserved_dw = cs.b->max_dw = 32;
cs.b->buf = malloc(cs.b->max_dw * 4);
if (!cs.b->buf) {
free(cs.b);
@ -3393,6 +3397,8 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio
}
radv_emit_compute_shader(pdev, &cs, shader);
if (pdev->info.gfx_level >= GFX12)
radv_gfx12_emit_buffered_regs(device, &cs);
memcpy(ptr, &md, sizeof(md));
ptr += sizeof(md);