radv: emit all shader related user SGPR states in one place

This will allow us to use only one user SGPR for NGG shaders, and also
further optimizations.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31160>
This commit is contained in:
Samuel Pitoiset 2024-09-12 18:10:41 +02:00 committed by Marge Bot
parent e1b74407bb
commit 16341f41e1

View file

@ -3326,37 +3326,11 @@ radv_emit_culling(struct radv_cmd_buffer *cmd_buffer)
}
}
static void
radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const unsigned stage = last_vgt_shader->info.stage;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
const uint32_t ngg_provoking_vtx_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
unsigned provoking_vtx = 0;
if (!ngg_provoking_vtx_offset)
return;
if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) {
if (stage == MESA_SHADER_VERTEX) {
provoking_vtx = radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg);
} else {
assert(stage == MESA_SHADER_GEOMETRY);
provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1;
}
}
radeon_set_sh_reg(cmd_buffer->cs, ngg_provoking_vtx_offset, provoking_vtx);
}
static void
radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const uint32_t verts_per_prim_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
const uint32_t vgt_gs_out_prim_type = radv_get_rasterization_prim(cmd_buffer);
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
@ -3374,12 +3348,6 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
}
radv_emit_vgt_gs_out(cmd_buffer, vgt_gs_out_prim_type);
if (!verts_per_prim_offset)
return;
radeon_set_sh_reg(cmd_buffer->cs, verts_per_prim_offset,
radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1);
}
static void
@ -5745,9 +5713,6 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui
RADV_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_culling(cmd_buffer);
if (states & (RADV_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY))
radv_emit_provoking_vertex_mode(cmd_buffer);
if ((states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) ||
(pdev->info.gfx_level >= GFX12 && states & RADV_DYNAMIC_PATCH_CONTROL_POINTS))
radv_emit_primitive_topology(cmd_buffer);
@ -6367,74 +6332,6 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
static void
radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
if (!shader_query_state_offset)
return;
assert(last_vgt_shader->info.is_ngg);
/* By default shader queries are disabled but they are enabled if the command buffer has active GDS
* queries or if it's a secondary command buffer that inherits the number of generated
* primitives.
*/
if (cmd_buffer->state.active_pipeline_gds_queries ||
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) ||
(pdev->emulate_mesh_shader_queries && (cmd_buffer->state.inherited_pipeline_statistics &
VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT)))
shader_query_state |= radv_shader_query_pipeline_stat;
if (cmd_buffer->state.active_prims_gen_gds_queries)
shader_query_state |= radv_shader_query_prim_gen;
if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) {
shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen;
}
radeon_set_sh_reg(cmd_buffer->cs, shader_query_state_offset, shader_query_state);
}
static void
radv_flush_shader_query_state_ace(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *task_shader)
{
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
if (!shader_query_state_offset)
return;
/* By default shader queries are disabled but they are enabled if the command buffer has active ACE
* queries or if it's a secondary command buffer that inherits the number of task shader
* invocations query.
*/
if (cmd_buffer->state.active_pipeline_ace_queries ||
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT))
shader_query_state |= radv_shader_query_pipeline_stat;
radeon_set_sh_reg(cmd_buffer->gang.cs, shader_query_state_offset, shader_query_state);
}
static void
radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_flush_shader_query_state_gfx(cmd_buffer);
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK) && pdev->emulate_mesh_shader_queries)
radv_flush_shader_query_state_ace(cmd_buffer, cmd_buffer->state.shaders[MESA_SHADER_TASK]);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY;
}
static void
radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer)
{
@ -10636,6 +10533,116 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
radeon_set_sh_reg(cmd_buffer->cs, ps_state_offset, ps_state);
}
static void
radv_emit_ngg_state(struct radv_cmd_buffer *cmd_buffer, uint64_t dynamic_states)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
if (dynamic_states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
const uint32_t verts_per_prim_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
if (verts_per_prim_offset) {
radeon_set_sh_reg(cmd_buffer->cs, verts_per_prim_offset,
radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1);
}
}
if (dynamic_states & (RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_PROVOKING_VERTEX_MODE)) {
const uint32_t ngg_provoking_vtx_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
const unsigned stage = last_vgt_shader->info.stage;
unsigned provoking_vtx = 0;
if (ngg_provoking_vtx_offset) {
if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) {
if (stage == MESA_SHADER_VERTEX) {
provoking_vtx = radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg);
} else {
assert(stage == MESA_SHADER_GEOMETRY);
provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1;
}
}
radeon_set_sh_reg(cmd_buffer->cs, ngg_provoking_vtx_offset, provoking_vtx);
}
}
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) {
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
if (shader_query_state_offset) {
assert(last_vgt_shader->info.is_ngg);
/* By default shader queries are disabled but they are enabled if the command buffer has active GDS
* queries or if it's a secondary command buffer that inherits the number of generated
* primitives.
*/
if (cmd_buffer->state.active_pipeline_gds_queries ||
(cmd_buffer->state.inherited_pipeline_statistics &
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) ||
(pdev->emulate_mesh_shader_queries && (cmd_buffer->state.inherited_pipeline_statistics &
VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT)))
shader_query_state |= radv_shader_query_pipeline_stat;
if (cmd_buffer->state.active_prims_gen_gds_queries)
shader_query_state |= radv_shader_query_prim_gen;
if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) {
shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen;
}
radeon_set_sh_reg(cmd_buffer->cs, shader_query_state_offset, shader_query_state);
}
}
}
static void
radv_emit_task_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
if (!task_shader || !pdev->emulate_mesh_shader_queries)
return;
const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
if (!shader_query_state_offset)
return;
/* By default shader queries are disabled but they are enabled if the command buffer has active ACE
* queries or if it's a secondary command buffer that inherits the number of task shader
* invocations query.
*/
if (cmd_buffer->state.active_pipeline_ace_queries ||
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT))
shader_query_state |= radv_shader_query_pipeline_stat;
radeon_set_sh_reg(cmd_buffer->gang.cs, shader_query_state_offset, shader_query_state);
}
static void
radv_emit_shaders_state(struct radv_cmd_buffer *cmd_buffer, uint64_t dynamic_states)
{
if (dynamic_states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE |
RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_POLYGON_MODE))
radv_emit_fs_state(cmd_buffer);
if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) ||
(dynamic_states & (RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_PROVOKING_VERTEX_MODE)))
radv_emit_ngg_state(cmd_buffer, dynamic_states);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY)
radv_emit_task_state(cmd_buffer);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY;
}
static void
radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
{
@ -10819,9 +10826,6 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS)
radv_emit_rbplus_state(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY)
radv_flush_shader_query_state(cmd_buffer);
if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_OCCLUSION_QUERY) ||
(cmd_buffer->state.dirty_dynamic & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY)))
radv_flush_occlusion_query_state(cmd_buffer);
@ -10875,12 +10879,10 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
if (dynamic_states) {
radv_cmd_buffer_flush_dynamic_state(cmd_buffer, dynamic_states);
if (dynamic_states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE |
RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_POLYGON_MODE))
radv_emit_fs_state(cmd_buffer);
}
radv_emit_shaders_state(cmd_buffer, dynamic_states);
radv_emit_draw_registers(cmd_buffer, info);
if (late_scissor_emission) {