diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 79394a3ab3b..d803ca3e2db 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3326,37 +3326,11 @@ radv_emit_culling(struct radv_cmd_buffer *cmd_buffer) } } -static void -radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer) -{ - const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const unsigned stage = last_vgt_shader->info.stage; - const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - const uint32_t ngg_provoking_vtx_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX); - unsigned provoking_vtx = 0; - - if (!ngg_provoking_vtx_offset) - return; - - if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { - if (stage == MESA_SHADER_VERTEX) { - provoking_vtx = radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg); - } else { - assert(stage == MESA_SHADER_GEOMETRY); - provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1; - } - } - - radeon_set_sh_reg(cmd_buffer->cs, ngg_provoking_vtx_offset, provoking_vtx); -} - static void radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const uint32_t verts_per_prim_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM); const uint32_t vgt_gs_out_prim_type = radv_get_rasterization_prim(cmd_buffer); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; @@ -3374,12 +3348,6 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) } radv_emit_vgt_gs_out(cmd_buffer, vgt_gs_out_prim_type); - - if (!verts_per_prim_offset) - return; - - radeon_set_sh_reg(cmd_buffer->cs, verts_per_prim_offset, - radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1); } static void @@ -5745,9 +5713,6 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui RADV_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_culling(cmd_buffer); - if (states & (RADV_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY)) - radv_emit_provoking_vertex_mode(cmd_buffer); - if ((states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) || (pdev->info.gfx_level >= GFX12 && states & RADV_DYNAMIC_PATCH_CONTROL_POINTS)) radv_emit_primitive_topology(cmd_buffer); @@ -6367,74 +6332,6 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; } -static void -radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_SHADER_QUERY_STATE); - enum radv_shader_query_state shader_query_state = radv_shader_query_none; - - if (!shader_query_state_offset) - return; - - assert(last_vgt_shader->info.is_ngg); - - /* By default shader queries are disabled but they are enabled if the command buffer has active GDS - * queries or if it's a secondary command buffer that inherits the number of generated - * primitives. - */ - if (cmd_buffer->state.active_pipeline_gds_queries || - (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) || - (pdev->emulate_mesh_shader_queries && (cmd_buffer->state.inherited_pipeline_statistics & - VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT))) - shader_query_state |= radv_shader_query_pipeline_stat; - - if (cmd_buffer->state.active_prims_gen_gds_queries) - shader_query_state |= radv_shader_query_prim_gen; - - if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) { - shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen; - } - - radeon_set_sh_reg(cmd_buffer->cs, shader_query_state_offset, shader_query_state); -} - -static void -radv_flush_shader_query_state_ace(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *task_shader) -{ - const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_SHADER_QUERY_STATE); - enum radv_shader_query_state shader_query_state = radv_shader_query_none; - - if (!shader_query_state_offset) - return; - - /* By default shader queries are disabled but they are enabled if the command buffer has active ACE - * queries or if it's a secondary command buffer that inherits the number of task shader - * invocations query. - */ - if (cmd_buffer->state.active_pipeline_ace_queries || - (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT)) - shader_query_state |= radv_shader_query_pipeline_stat; - - radeon_set_sh_reg(cmd_buffer->gang.cs, shader_query_state_offset, shader_query_state); -} - -static void -radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - const struct radv_physical_device *pdev = radv_device_physical(device); - - radv_flush_shader_query_state_gfx(cmd_buffer); - - if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK) && pdev->emulate_mesh_shader_queries) - radv_flush_shader_query_state_ace(cmd_buffer, cmd_buffer->state.shaders[MESA_SHADER_TASK]); - - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY; -} - static void radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) { @@ -10636,6 +10533,116 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer) radeon_set_sh_reg(cmd_buffer->cs, ps_state_offset, ps_state); } +static void +radv_emit_ngg_state(struct radv_cmd_buffer *cmd_buffer, uint64_t dynamic_states) +{ + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + + if (dynamic_states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { + const uint32_t verts_per_prim_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM); + if (verts_per_prim_offset) { + radeon_set_sh_reg(cmd_buffer->cs, verts_per_prim_offset, + radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1); + } + } + + if (dynamic_states & (RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_PROVOKING_VERTEX_MODE)) { + const uint32_t ngg_provoking_vtx_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX); + const unsigned stage = last_vgt_shader->info.stage; + unsigned provoking_vtx = 0; + + if (ngg_provoking_vtx_offset) { + if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { + if (stage == MESA_SHADER_VERTEX) { + provoking_vtx = radv_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg); + } else { + assert(stage == MESA_SHADER_GEOMETRY); + provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1; + } + } + + radeon_set_sh_reg(cmd_buffer->cs, ngg_provoking_vtx_offset, provoking_vtx); + } + } + + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) { + const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_SHADER_QUERY_STATE); + enum radv_shader_query_state shader_query_state = radv_shader_query_none; + + if (shader_query_state_offset) { + assert(last_vgt_shader->info.is_ngg); + + /* By default shader queries are disabled but they are enabled if the command buffer has active GDS + * queries or if it's a secondary command buffer that inherits the number of generated + * primitives. + */ + if (cmd_buffer->state.active_pipeline_gds_queries || + (cmd_buffer->state.inherited_pipeline_statistics & + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) || + (pdev->emulate_mesh_shader_queries && (cmd_buffer->state.inherited_pipeline_statistics & + VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT))) + shader_query_state |= radv_shader_query_pipeline_stat; + + if (cmd_buffer->state.active_prims_gen_gds_queries) + shader_query_state |= radv_shader_query_prim_gen; + + if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) { + shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen; + } + + radeon_set_sh_reg(cmd_buffer->cs, shader_query_state_offset, shader_query_state); + } + } +} + +static void +radv_emit_task_state(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; + + if (!task_shader || !pdev->emulate_mesh_shader_queries) + return; + + const uint32_t shader_query_state_offset = radv_get_user_sgpr_loc(task_shader, AC_UD_SHADER_QUERY_STATE); + enum radv_shader_query_state shader_query_state = radv_shader_query_none; + + if (!shader_query_state_offset) + return; + + /* By default shader queries are disabled but they are enabled if the command buffer has active ACE + * queries or if it's a secondary command buffer that inherits the number of task shader + * invocations query. + */ + if (cmd_buffer->state.active_pipeline_ace_queries || + (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT)) + shader_query_state |= radv_shader_query_pipeline_stat; + + radeon_set_sh_reg(cmd_buffer->gang.cs, shader_query_state_offset, shader_query_state); +} + +static void +radv_emit_shaders_state(struct radv_cmd_buffer *cmd_buffer, uint64_t dynamic_states) +{ + + if (dynamic_states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE | + RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_POLYGON_MODE)) + radv_emit_fs_state(cmd_buffer); + + if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) || + (dynamic_states & (RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_PROVOKING_VERTEX_MODE))) + radv_emit_ngg_state(cmd_buffer, dynamic_states); + + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) + radv_emit_task_state(cmd_buffer); + + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY; +} + static void radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer) { @@ -10819,9 +10826,6 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS) radv_emit_rbplus_state(cmd_buffer); - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY) - radv_flush_shader_query_state(cmd_buffer); - if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_OCCLUSION_QUERY) || (cmd_buffer->state.dirty_dynamic & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY))) radv_flush_occlusion_query_state(cmd_buffer); @@ -10875,12 +10879,10 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (dynamic_states) { radv_cmd_buffer_flush_dynamic_state(cmd_buffer, dynamic_states); - - if (dynamic_states & (RADV_DYNAMIC_RASTERIZATION_SAMPLES | RADV_DYNAMIC_LINE_RASTERIZATION_MODE | - RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_POLYGON_MODE)) - radv_emit_fs_state(cmd_buffer); } + radv_emit_shaders_state(cmd_buffer, dynamic_states); + radv_emit_draw_registers(cmd_buffer, info); if (late_scissor_emission) {