radv: add radv_get_user_sgpr{_loc}() helpers

To simplify all the user sgpr computations which are very redundant.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29957>
This commit is contained in:
Samuel Pitoiset 2024-06-28 13:14:52 +02:00 committed by Marge Bot
parent bf852536fc
commit f22ee282fc
5 changed files with 112 additions and 182 deletions

View file

@ -2392,21 +2392,19 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer)
gs->info.regs.vgt_gs_max_vert_out);
if (gs->info.merged_shader_compiled_separately) {
const struct radv_userdata_info *vgt_esgs_ring_itemsize =
radv_get_user_sgpr_info(gs, AC_UD_VGT_ESGS_RING_ITEMSIZE);
const uint32_t vgt_esgs_ring_itemsize_offset = radv_get_user_sgpr_loc(gs, AC_UD_VGT_ESGS_RING_ITEMSIZE);
assert(vgt_esgs_ring_itemsize->sgpr_idx != -1 && vgt_esgs_ring_itemsize->num_sgprs == 1);
assert(vgt_esgs_ring_itemsize_offset);
radeon_set_sh_reg(cmd_buffer->cs, gs->info.user_data_0 + vgt_esgs_ring_itemsize->sgpr_idx * 4,
es->info.esgs_itemsize / 4);
radeon_set_sh_reg(cmd_buffer->cs, vgt_esgs_ring_itemsize_offset, es->info.esgs_itemsize / 4);
if (gs->info.is_ngg) {
const struct radv_userdata_info *ngg_lds_layout = radv_get_user_sgpr_info(gs, AC_UD_NGG_LDS_LAYOUT);
const uint32_t ngg_lds_layout_offset = radv_get_user_sgpr_loc(gs, AC_UD_NGG_LDS_LAYOUT);
assert(ngg_lds_layout->sgpr_idx != -1 && ngg_lds_layout->num_sgprs == 1);
assert(ngg_lds_layout_offset);
assert(!(gs->info.ngg_info.esgs_ring_size & 0xffff0000) && !(gs->info.ngg_info.scratch_lds_base & 0xffff0000));
radeon_set_sh_reg(cmd_buffer->cs, gs->info.user_data_0 + ngg_lds_layout->sgpr_idx * 4,
radeon_set_sh_reg(cmd_buffer->cs, ngg_lds_layout_offset,
SET_SGPR_FIELD(NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE, gs->info.ngg_info.esgs_ring_size) |
SET_SGPR_FIELD(NGG_LDS_LAYOUT_SCRATCH_BASE, gs->info.ngg_info.scratch_lds_base));
}
@ -3349,11 +3347,10 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer)
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const unsigned stage = last_vgt_shader->info.stage;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
const uint32_t ngg_provoking_vtx_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
unsigned provoking_vtx = 0;
uint32_t base_reg;
if (loc->sgpr_idx == -1)
if (!ngg_provoking_vtx_offset)
return;
if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) {
@ -3365,8 +3362,7 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer)
}
}
base_reg = last_vgt_shader->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, provoking_vtx);
radeon_set_sh_reg(cmd_buffer->cs, ngg_provoking_vtx_offset, provoking_vtx);
}
static void
@ -3375,10 +3371,9 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
const uint32_t verts_per_prim_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
const uint32_t vgt_gs_out_prim_type = radv_get_rasterization_prim(cmd_buffer);
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
uint32_t base_reg;
assert(!cmd_buffer->state.mesh_shading);
@ -3395,11 +3390,10 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
radv_emit_vgt_gs_out(cmd_buffer, vgt_gs_out_prim_type);
if (loc->sgpr_idx == -1)
if (!verts_per_prim_offset)
return;
base_reg = last_vgt_shader->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
radeon_set_sh_reg(cmd_buffer->cs, verts_per_prim_offset,
radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1);
}
@ -3704,7 +3698,7 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL];
const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL);
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
unsigned ls_hs_config, base_reg;
unsigned ls_hs_config;
/* Compute tessellation info that depends on the number of patch control points when this state
* is dynamic.
@ -3757,10 +3751,9 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
}
/* Emit user SGPRs for dynamic patch control points. */
const struct radv_userdata_info *offchip = radv_get_user_sgpr_info(tcs, AC_UD_TCS_OFFCHIP_LAYOUT);
if (offchip->sgpr_idx == -1)
uint32_t tcs_offchip_layout_offset = radv_get_user_sgpr_loc(tcs, AC_UD_TCS_OFFCHIP_LAYOUT);
if (!tcs_offchip_layout_offset)
return;
assert(offchip->num_sgprs == 1);
unsigned tcs_offchip_layout =
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS, d->vk.ts.patch_control_points - 1) |
@ -3771,14 +3764,12 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_TES_READS_TF, tes->info.tes.reads_tess_factors) |
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE, tes->info.tes._primitive_mode);
base_reg = tcs->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + offchip->sgpr_idx * 4, tcs_offchip_layout);
radeon_set_sh_reg(cmd_buffer->cs, tcs_offchip_layout_offset, tcs_offchip_layout);
const struct radv_userdata_info *tes_offchip = radv_get_user_sgpr_info(tes, AC_UD_TCS_OFFCHIP_LAYOUT);
assert(tes_offchip->sgpr_idx != -1 && tes_offchip->num_sgprs == 1);
tcs_offchip_layout_offset = radv_get_user_sgpr_loc(tes, AC_UD_TCS_OFFCHIP_LAYOUT);
assert(tcs_offchip_layout_offset);
base_reg = tes->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + tes_offchip->sgpr_idx * 4, tcs_offchip_layout);
radeon_set_sh_reg(cmd_buffer->cs, tcs_offchip_layout_offset, tcs_offchip_layout);
}
static void
@ -6340,19 +6331,20 @@ static void
radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS);
const uint32_t streamout_buffers_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint32_t base_reg;
if (loc->sgpr_idx == -1)
if (!streamout_buffers_offset)
return;
base_reg = last_vgt_shader->info.user_data_0;
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false);
radv_emit_shader_pointer(device, cmd_buffer->cs, streamout_buffers_offset, va, false);
if (cmd_buffer->state.gs_copy_shader) {
loc = &cmd_buffer->state.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
const struct radv_userdata_info *loc =
&cmd_buffer->state.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
if (loc->sgpr_idx != -1) {
base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
@ -6365,16 +6357,13 @@ static void
radv_emit_streamout_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_STREAMOUT_STATE);
const uint32_t streamout_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_STREAMOUT_STATE);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint32_t base_reg;
if (loc->sgpr_idx == -1)
if (!streamout_state_offset)
return;
base_reg = last_vgt_shader->info.user_data_0;
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false);
radv_emit_shader_pointer(device, cmd_buffer->cs, streamout_state_offset, va, false);
}
static void
@ -6472,11 +6461,10 @@ radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer)
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
uint32_t base_reg;
if (loc->sgpr_idx == -1)
if (!shader_query_state_offset)
return;
assert(last_vgt_shader->info.is_ngg || last_vgt_shader->info.stage == MESA_SHADER_GEOMETRY);
@ -6500,20 +6488,16 @@ radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer)
shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen;
}
base_reg = last_vgt_shader->info.user_data_0;
assert(loc->sgpr_idx != -1);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, shader_query_state);
radeon_set_sh_reg(cmd_buffer->cs, shader_query_state_offset, shader_query_state);
}
static void
radv_flush_shader_query_state_ace(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *task_shader)
{
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(task_shader, AC_UD_SHADER_QUERY_STATE);
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
uint32_t base_reg;
if (loc->sgpr_idx == -1)
if (!shader_query_state_offset)
return;
/* By default shader queries are disabled but they are enabled if the command buffer has active ACE
@ -6524,10 +6508,7 @@ radv_flush_shader_query_state_ace(struct radv_cmd_buffer *cmd_buffer, struct rad
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT))
shader_query_state |= radv_shader_query_pipeline_stat;
base_reg = task_shader->info.user_data_0;
assert(loc->sgpr_idx != -1);
radeon_set_sh_reg(cmd_buffer->gang.cs, base_reg + loc->sgpr_idx * 4, shader_query_state);
radeon_set_sh_reg(cmd_buffer->gang.cs, shader_query_state_offset, shader_query_state);
}
static void
@ -9621,12 +9602,12 @@ static void
radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader, uint32_t base_reg,
unsigned index)
{
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_VIEW_INDEX);
const uint32_t view_index_offset = radv_get_user_sgpr_loc(shader, AC_UD_VIEW_INDEX);
if (loc->sgpr_idx == -1)
if (!view_index_offset)
return;
radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, index);
radeon_set_sh_reg(cs, view_index_offset, index);
}
static void
@ -9825,11 +9806,7 @@ radv_cs_emit_dispatch_taskmesh_direct_ace_packet(const struct radv_device *devic
const bool predicating = cmd_state->predicating;
const uint32_t dispatch_initiator =
device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr_info(task_shader, AC_UD_TASK_RING_ENTRY);
assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
uint32_t ring_entry_reg = (task_shader->info.user_data_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t ring_entry_reg = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
radeon_emit(ace_cs, PKT3(PKT3_DISPATCH_TASKMESH_DIRECT_ACE, 4, predicating) | PKT3_SHADER_TYPE_S(1));
radeon_emit(ace_cs, x);
@ -9850,32 +9827,18 @@ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(const struct radv_devic
const struct radv_shader *task_shader = cmd_state->shaders[MESA_SHADER_TASK];
const uint32_t xyz_dim_enable = task_shader->info.cs.uses_grid_size;
const uint32_t draw_id_enable = task_shader->info.vs.needs_draw_id;
const uint32_t dispatch_initiator =
device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr_info(task_shader, AC_UD_TASK_RING_ENTRY);
const struct radv_userdata_info *xyz_dim_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_GRID_SIZE);
const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_TASK_DRAW_ID);
assert(ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
assert(!xyz_dim_enable || (xyz_dim_loc->sgpr_idx != -1 && xyz_dim_loc->num_sgprs == 3));
assert(!draw_id_enable || (draw_id_loc->sgpr_idx != -1 && draw_id_loc->num_sgprs == 1));
const uint32_t ring_entry_reg =
(task_shader->info.user_data_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t xyz_dim_reg =
!xyz_dim_enable ? 0 : (task_shader->info.user_data_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t draw_id_reg =
!draw_id_enable ? 0 : (task_shader->info.user_data_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t ring_entry_reg = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
const uint32_t xyz_dim_reg = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE);
const uint32_t draw_id_reg = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
radeon_emit(ace_cs, PKT3(PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE, 9, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(ace_cs, data_va);
radeon_emit(ace_cs, data_va >> 32);
radeon_emit(ace_cs, S_AD2_RING_ENTRY_REG(ring_entry_reg));
radeon_emit(ace_cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) | S_AD3_DRAW_INDEX_ENABLE(draw_id_enable) |
S_AD3_XYZ_DIM_ENABLE(xyz_dim_enable) | S_AD3_DRAW_INDEX_REG(draw_id_reg));
radeon_emit(ace_cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) | S_AD3_DRAW_INDEX_ENABLE(!!draw_id_reg) |
S_AD3_XYZ_DIM_ENABLE(!!xyz_dim_reg) | S_AD3_DRAW_INDEX_REG(draw_id_reg));
radeon_emit(ace_cs, S_AD4_XYZ_DIM_REG(xyz_dim_reg));
radeon_emit(ace_cs, draw_count);
radeon_emit(ace_cs, count_va);
@ -9892,13 +9855,10 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons
const struct radv_shader *mesh_shader = cmd_state->shaders[MESA_SHADER_MESH];
const bool predicating = cmd_state->predicating;
const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr_info(mesh_shader, AC_UD_TASK_RING_ENTRY);
assert(ring_entry_loc->sgpr_idx != -1);
const uint32_t ring_entry_reg = radv_get_user_sgpr(mesh_shader, AC_UD_TASK_RING_ENTRY);
uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size;
uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_state->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
uint32_t ring_entry_reg = ((mesh_shader->info.user_data_0 - SI_SH_REG_OFFSET) >> 2) + ring_entry_loc->sgpr_idx;
uint32_t mode1_en = !pdev->mesh_fast_launch_2;
uint32_t linear_dispatch_en = cmd_state->shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
const bool sqtt_en = !!device->sqtt.bo;
@ -9994,24 +9954,18 @@ radv_emit_userdata_task(const struct radv_cmd_state *cmd_state, struct radeon_cm
{
const struct radv_shader *task_shader = cmd_state->shaders[MESA_SHADER_TASK];
const struct radv_userdata_info *xyz_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_GRID_SIZE);
const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_TASK_DRAW_ID);
const uint32_t xyz_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_CS_GRID_SIZE);
const uint32_t draw_id_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_CS_TASK_DRAW_ID);
if (xyz_loc->sgpr_idx != -1) {
assert(xyz_loc->num_sgprs == 3);
unsigned xyz_reg = task_shader->info.user_data_0 + xyz_loc->sgpr_idx * 4;
radeon_set_sh_reg_seq(ace_cs, xyz_reg, 3);
if (xyz_offset) {
radeon_set_sh_reg_seq(ace_cs, xyz_offset, 3);
radeon_emit(ace_cs, x);
radeon_emit(ace_cs, y);
radeon_emit(ace_cs, z);
}
if (draw_id_loc->sgpr_idx != -1) {
assert(draw_id_loc->num_sgprs == 1);
unsigned draw_id_reg = task_shader->info.user_data_0 + draw_id_loc->sgpr_idx * 4;
radeon_set_sh_reg_seq(ace_cs, draw_id_reg, 1);
if (draw_id_offset) {
radeon_set_sh_reg_seq(ace_cs, draw_id_offset, 1);
radeon_emit(ace_cs, 0);
}
}
@ -10539,7 +10493,6 @@ static void
radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const uint32_t base_reg = last_vgt_shader->info.user_data_0;
/* Get viewport transform. */
float vp_scale[2], vp_translate[2];
@ -10565,16 +10518,14 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer)
}
uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])};
const int8_t vp_sgpr_idx = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_NGG_VIEWPORT)->sgpr_idx;
assert(vp_sgpr_idx != -1);
radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4);
const uint32_t ngg_viewport_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_VIEWPORT);
radeon_set_sh_reg_seq(cmd_buffer->cs, ngg_viewport_offset, 4);
radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4);
}
const int8_t nggc_sgpr_idx = radv_get_user_sgpr_info(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS)->sgpr_idx;
assert(nggc_sgpr_idx != -1);
const uint32_t ngg_culling_settings_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
radeon_set_sh_reg(cmd_buffer->cs, ngg_culling_settings_offset, nggc_settings);
}
static void
@ -10582,27 +10533,24 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
const struct radv_userdata_info *loc;
if (!ps)
return;
loc = radv_get_user_sgpr_info(ps, AC_UD_PS_STATE);
if (loc->sgpr_idx == -1)
const uint32_t ps_state_offset = radv_get_user_sgpr_loc(ps, AC_UD_PS_STATE);
if (!ps_state_offset)
return;
assert(loc->num_sgprs == 1);
const unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer);
const unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer);
const uint16_t ps_iter_mask = ac_get_ps_iter_mask(ps_iter_samples);
const unsigned rast_prim = radv_get_rasterization_prim(cmd_buffer);
const uint32_t base_reg = ps->info.user_data_0;
const unsigned ps_state = SET_SGPR_FIELD(PS_STATE_NUM_SAMPLES, rasterization_samples) |
SET_SGPR_FIELD(PS_STATE_PS_ITER_MASK, ps_iter_mask) |
SET_SGPR_FIELD(PS_STATE_LINE_RAST_MODE, d->vk.rs.line.mode) |
SET_SGPR_FIELD(PS_STATE_RAST_PRIM, rast_prim);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ps_state);
radeon_set_sh_reg(cmd_buffer->cs, ps_state_offset, ps_state);
}
static void
@ -11683,7 +11631,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
struct radeon_winsys *ws = device->ws;
bool predicating = cmd_buffer->state.predicating;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(compute_shader, AC_UD_CS_GRID_SIZE);
const uint32_t grid_size_offset = radv_get_user_sgpr_loc(compute_shader, AC_UD_CS_GRID_SIZE);
radv_describe_dispatch(cmd_buffer, info);
@ -11718,18 +11666,16 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
}
if (loc->sgpr_idx != -1) {
unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
if (grid_size_offset) {
if (device->load_grid_size_from_user_sgpr) {
assert(pdev->info.gfx_level >= GFX10_3);
radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
radeon_emit(cs, info->va);
radeon_emit(cs, info->va >> 32);
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (grid_size_offset - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, 3);
} else {
radv_emit_shader_pointer(device, cmd_buffer->cs, reg, info->va, true);
radv_emit_shader_pointer(device, cmd_buffer->cs, grid_size_offset, info->va, true);
}
}
@ -11829,11 +11775,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
}
if (loc->sgpr_idx != -1) {
if (grid_size_offset) {
if (device->load_grid_size_from_user_sgpr) {
assert(loc->num_sgprs == 3);
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
radeon_set_sh_reg_seq(cs, grid_size_offset, 3);
radeon_emit(cs, blocks[0]);
radeon_emit(cs, blocks[1]);
radeon_emit(cs, blocks[2]);
@ -11843,8 +11787,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
return;
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
radv_emit_shader_pointer(device, cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, va,
true);
radv_emit_shader_pointer(device, cmd_buffer->cs, grid_size_offset, va, true);
}
}
@ -12208,7 +12151,6 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K
struct radv_compute_pipeline *pipeline = &cmd_buffer->state.rt_pipeline->base;
struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog;
uint32_t base_reg = rt_prolog->info.user_data_0;
/* Reserve scratch for stacks manually since it is not handled by the compute path. */
uint32_t scratch_bytes_per_wave = rt_prolog->config.scratch_bytes_per_wave;
@ -12263,29 +12205,29 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 15);
const struct radv_userdata_info *desc_loc = radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS);
if (desc_loc->sgpr_idx != -1) {
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + desc_loc->sgpr_idx * 4, sbt_va, true);
const uint32_t sbt_descriptors_offset = radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS);
if (sbt_descriptors_offset) {
radv_emit_shader_pointer(device, cmd_buffer->cs, sbt_descriptors_offset, sbt_va, true);
}
const struct radv_userdata_info *size_loc = radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
if (size_loc->sgpr_idx != -1) {
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + size_loc->sgpr_idx * 4, launch_size_va, true);
const uint32_t ray_launch_size_addr_offset = radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
if (ray_launch_size_addr_offset) {
radv_emit_shader_pointer(device, cmd_buffer->cs, ray_launch_size_addr_offset, launch_size_va, true);
}
const struct radv_userdata_info *base_loc =
radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
if (base_loc->sgpr_idx != -1) {
const uint32_t ray_dynamic_callback_stack_base_offset =
radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
if (ray_dynamic_callback_stack_base_offset) {
const struct radv_shader_info *cs_info = &rt_prolog->info;
radeon_set_sh_reg(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + base_loc->sgpr_idx * 4,
radeon_set_sh_reg(cmd_buffer->cs, ray_dynamic_callback_stack_base_offset,
rt_prolog->config.scratch_bytes_per_wave / cs_info->wave_size);
}
const struct radv_userdata_info *shader_loc = radv_get_user_sgpr_info(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
const uint32_t traversal_shader_addr_offset = radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
struct radv_shader *traversal_shader = cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION];
if (shader_loc->sgpr_idx != -1 && traversal_shader) {
if (traversal_shader_addr_offset && traversal_shader) {
uint64_t traversal_va = traversal_shader->va | radv_rt_priority_traversal;
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + shader_loc->sgpr_idx * 4, traversal_va, true);
radv_emit_shader_pointer(device, cmd_buffer->cs, traversal_shader_addr_offset, traversal_va, true);
}
assert(cmd_buffer->cs->cdw <= cdw_max);
@ -12300,8 +12242,8 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K
/* Reset the ray launch size so the prolog doesn't think this is a converted dispatch */
tables->height = 1;
radv_upload_trace_rays_params(cmd_buffer, tables, mode, &launch_size_va, NULL);
if (size_loc->sgpr_idx != -1) {
radv_emit_shader_pointer(device, cmd_buffer->cs, base_reg + size_loc->sgpr_idx * 4, launch_size_va, true);
if (ray_launch_size_addr_offset) {
radv_emit_shader_pointer(device, cmd_buffer->cs, ray_launch_size_addr_offset, launch_size_va, true);
}
radv_dispatch(cmd_buffer, &info, pipeline, rt_prolog, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);

View file

@ -2408,30 +2408,14 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
vtx_base_sgpr |= DGC_USES_GRID_SIZE;
if (task_shader) {
const struct radv_userdata_info *mesh_ring_entry_loc =
radv_get_user_sgpr_info(mesh_shader, AC_UD_TASK_RING_ENTRY);
const struct radv_userdata_info *task_ring_entry_loc =
radv_get_user_sgpr_info(task_shader, AC_UD_TASK_RING_ENTRY);
const struct radv_userdata_info *xyz_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_GRID_SIZE);
const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr_info(task_shader, AC_UD_CS_TASK_DRAW_ID);
params->has_task_shader = 1;
params->mesh_ring_entry_sgpr =
((mesh_shader->info.user_data_0 - SI_SH_REG_OFFSET) >> 2) + mesh_ring_entry_loc->sgpr_idx;
params->mesh_ring_entry_sgpr = radv_get_user_sgpr(mesh_shader, AC_UD_TASK_RING_ENTRY);
params->linear_dispatch_en = task_shader->info.cs.linear_taskmesh_dispatch;
params->task_ring_entry_sgpr =
(task_shader->info.user_data_0 + task_ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
params->task_ring_entry_sgpr = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
params->dispatch_initiator_task =
device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
if (xyz_loc->sgpr_idx != -1) {
params->task_xyz_sgpr = ((task_shader->info.user_data_0 + xyz_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
}
if (draw_id_loc->sgpr_idx != -1) {
params->task_draw_id_sgpr =
((task_shader->info.user_data_0 + draw_id_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
}
params->task_xyz_sgpr = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE);
params->task_draw_id_sgpr = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
}
} else {
if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
@ -2468,9 +2452,8 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
++idx;
}
params->vbo_cnt = idx;
params->vbo_reg = ((radv_get_user_sgpr_info(vs, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + vs->info.user_data_0) -
SI_SH_REG_OFFSET) >>
2;
params->vbo_reg = radv_get_user_sgpr(vs, AC_UD_VS_VERTEX_BUFFERS);
*upload_data = (char *)*upload_data + vb_size;
}
}
@ -2512,10 +2495,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
}
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(cs, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
params->grid_base_sgpr = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
} else {
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);

View file

@ -74,30 +74,15 @@ radv_get_compute_shader_metadata(const struct radv_device *device, const struct
metadata->block_size_z = cs->info.cs.block_size[2];
metadata->wave32 = cs->info.wave_size == 32;
const struct radv_userdata_info *grid_size_loc = radv_get_user_sgpr_info(cs, AC_UD_CS_GRID_SIZE);
if (grid_size_loc->sgpr_idx != -1) {
metadata->grid_base_sgpr = (cs->info.user_data_0 + 4 * grid_size_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
metadata->grid_base_sgpr = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
const struct radv_userdata_info *push_constant_loc = radv_get_user_sgpr_info(cs, AC_UD_PUSH_CONSTANTS);
if (push_constant_loc->sgpr_idx != -1) {
upload_sgpr = (cs->info.user_data_0 + 4 * push_constant_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
const struct radv_userdata_info *inline_push_constant_loc = radv_get_user_sgpr_info(cs, AC_UD_INLINE_PUSH_CONSTANTS);
if (inline_push_constant_loc->sgpr_idx != -1) {
inline_sgpr = (cs->info.user_data_0 + 4 * inline_push_constant_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
upload_sgpr = radv_get_user_sgpr(cs, AC_UD_PUSH_CONSTANTS);
inline_sgpr = radv_get_user_sgpr(cs, AC_UD_INLINE_PUSH_CONSTANTS);
metadata->push_const_sgpr = upload_sgpr | (inline_sgpr << 16);
metadata->inline_push_const_mask = cs->info.inline_push_constant_mask;
const struct radv_userdata_info *indirect_desc_sets_loc =
radv_get_user_sgpr_info(cs, AC_UD_INDIRECT_DESCRIPTOR_SETS);
if (indirect_desc_sets_loc->sgpr_idx != -1) {
metadata->indirect_desc_sets_sgpr =
(cs->info.user_data_0 + 4 * indirect_desc_sets_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
metadata->indirect_desc_sets_sgpr = radv_get_user_sgpr(cs, AC_UD_INDIRECT_DESCRIPTOR_SETS);
}
void

View file

@ -3309,6 +3309,25 @@ radv_get_user_sgpr_info(const struct radv_shader *shader, int idx)
return &shader->info.user_sgprs_locs.shader_data[idx];
}
uint32_t
radv_get_user_sgpr_loc(const struct radv_shader *shader, int idx)
{
const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, idx);
if (loc->sgpr_idx == -1)
return 0;
return shader->info.user_data_0 + loc->sgpr_idx * 4;
}
uint32_t
radv_get_user_sgpr(const struct radv_shader *shader, int idx)
{
const uint32_t offset = radv_get_user_sgpr_loc(shader, idx);
return offset ? ((offset - SI_SH_REG_OFFSET) >> 2) : 0;
}
static uint32_t
radv_get_tess_patch_size(uint32_t tcs_num_input_vertices, uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs,
uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs)

View file

@ -710,6 +710,10 @@ void radv_shader_combine_cfg_tes_gs(const struct radv_shader *tes, const struct
const struct radv_userdata_info *radv_get_user_sgpr_info(const struct radv_shader *shader, int idx);
uint32_t radv_get_user_sgpr_loc(const struct radv_shader *shader, int idx);
uint32_t radv_get_user_sgpr(const struct radv_shader *shader, int idx);
void radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config,
struct radv_shader_info *info);