From f8aed0793bf57e1aff9c757f283aed721d26bf3f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 22 Apr 2026 13:33:00 +0200 Subject: [PATCH] radv: move queue related cmd buffer state to a new struct Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 103 +++++++++++++------------------ src/amd/vulkan/radv_cmd_buffer.h | 28 +++++---- src/amd/vulkan/radv_query.c | 8 +-- src/amd/vulkan/radv_queue.c | 24 +++---- 4 files changed, 76 insertions(+), 87 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index dbbf9c81d9f..c7ca6dbebb1 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1281,23 +1281,12 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB radv_rra_accel_struct_buffers_unref(device, cmd_buffer->accel_struct_buffers); cmd_buffer->push_constant_stages = 0; - cmd_buffer->scratch_size_per_wave_needed = 0; - cmd_buffer->scratch_waves_wanted = 0; - cmd_buffer->compute_scratch_size_per_wave_needed = 0; - cmd_buffer->compute_scratch_waves_wanted = 0; - cmd_buffer->esgs_ring_size_needed = 0; - cmd_buffer->gsvs_ring_size_needed = 0; - cmd_buffer->tess_rings_needed = false; - cmd_buffer->task_rings_needed = false; - cmd_buffer->mesh_scratch_ring_needed = false; - cmd_buffer->gds_needed = false; - cmd_buffer->gds_oa_needed = false; - cmd_buffer->sample_positions_needed = false; cmd_buffer->gang.sem.leader_value = 0; cmd_buffer->gang.sem.emitted_leader_value = 0; cmd_buffer->gang.sem.va = 0; cmd_buffer->shader_upload_seq = 0; memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings)); + memset(&cmd_buffer->queue_state, 0, sizeof(cmd_buffer->queue_state)); if (cmd_buffer->upload.upload_bo) radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->upload.upload_bo); @@ -8429,7 +8418,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) * we leave the IB, otherwise another process might overwrite * it while our shaders are busy. */ - if (cmd_buffer->gds_needed) + if (cmd_buffer->queue_state.gds_needed) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; } @@ -8679,7 +8668,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (pdev->use_ngg_streamout && pdev->info.gfx_level < GFX12) { /* GFX11 needs GDS OA for streamout. */ - cmd_buffer->gds_oa_needed = true; + cmd_buffer->queue_state.gds_oa_needed = true; } } @@ -8773,7 +8762,7 @@ radv_bind_tess_ctrl_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv { radv_bind_pre_rast_shader(cmd_buffer, tcs); - cmd_buffer->tess_rings_needed = true; + cmd_buffer->queue_state.tess_rings_needed = true; /* Always re-emit patch control points/domain origin when a new pipeline with tessellation is * bound because a bunch of parameters (user SGPRs, TCS vertices out, ccw, etc) can be different. @@ -8802,8 +8791,8 @@ radv_bind_geometry_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ radv_bind_pre_rast_shader(cmd_buffer, gs); if (!gs->info.is_ngg && !gs->info.merged_shader_compiled_separately) { - cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->regs.gs.esgs_ring_size); - cmd_buffer->gsvs_ring_size_needed = MAX2(cmd_buffer->gsvs_ring_size_needed, gs->regs.gs.gsvs_ring_size); + cmd_buffer->queue_state.esgs_ring_size_needed = MAX2(cmd_buffer->queue_state.esgs_ring_size_needed, gs->regs.gs.esgs_ring_size); + cmd_buffer->queue_state.gsvs_ring_size_needed = MAX2(cmd_buffer->queue_state.gsvs_ring_size_needed, gs->regs.gs.gsvs_ring_size); } /* Re-emit the VS prolog when the geometry shader is compiled separately because shader configs @@ -8836,7 +8825,7 @@ radv_bind_mesh_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shad { radv_bind_pre_rast_shader(cmd_buffer, ms); - cmd_buffer->mesh_scratch_ring_needed |= ms->info.ms.needs_ms_scratch_ring; + cmd_buffer->queue_state.mesh_scratch_ring_needed |= ms->info.ms.needs_ms_scratch_ring; } static void @@ -8848,7 +8837,7 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ const struct radv_shader *previous_ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; if (ps->info.ps.needs_sample_positions) { - cmd_buffer->sample_positions_needed = true; + cmd_buffer->queue_state.sample_positions_needed = true; } if (ps->info.ps.has_epilog) @@ -8887,7 +8876,7 @@ radv_bind_task_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shad if (radv_get_user_sgpr_info(ts, AC_UD_TASK_STATE)->sgpr_idx != -1) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_TASK_STATE; - cmd_buffer->task_rings_needed = true; + cmd_buffer->queue_state.task_rings_needed = true; } static void @@ -8899,7 +8888,7 @@ radv_bind_rt_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *rt_p struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const unsigned max_scratch_waves = radv_get_max_scratch_waves(device, rt_prolog); - cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, max_scratch_waves); + cmd_buffer->queue_state.compute_scratch_waves_wanted = MAX2(cmd_buffer->queue_state.compute_scratch_waves_wanted, max_scratch_waves); cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, rt_prolog->upload_seq); @@ -8992,11 +8981,11 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, radv_bind_task_shader(cmd_buffer, shader); break; case MESA_SHADER_COMPUTE: { - cmd_buffer->compute_scratch_size_per_wave_needed = - MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, shader->config.scratch_bytes_per_wave); + cmd_buffer->queue_state.compute_scratch_size_per_wave_needed = + MAX2(cmd_buffer->queue_state.compute_scratch_size_per_wave_needed, shader->config.scratch_bytes_per_wave); const unsigned max_stage_waves = radv_get_max_scratch_waves(device, shader); - cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, max_stage_waves); + cmd_buffer->queue_state.compute_scratch_waves_wanted = MAX2(cmd_buffer->queue_state.compute_scratch_waves_wanted, max_stage_waves); break; } case MESA_SHADER_INTERSECTION: @@ -9010,11 +8999,11 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, cmd_buffer->state.active_stages |= mesa_to_vk_shader_stage(stage); if (mesa_to_vk_shader_stage(stage) & RADV_GRAPHICS_STAGE_BITS) { - cmd_buffer->scratch_size_per_wave_needed = - MAX2(cmd_buffer->scratch_size_per_wave_needed, shader->config.scratch_bytes_per_wave); + cmd_buffer->queue_state.scratch_size_per_wave_needed = + MAX2(cmd_buffer->queue_state.scratch_size_per_wave_needed, shader->config.scratch_bytes_per_wave); const unsigned max_stage_waves = radv_get_max_scratch_waves(device, shader); - cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, max_stage_waves); + cmd_buffer->queue_state.scratch_waves_wanted = MAX2(cmd_buffer->queue_state.scratch_waves_wanted, max_stage_waves); } cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, shader->upload_seq); @@ -9896,6 +9885,25 @@ radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, struct radv att->iview->image, &range); } +static void +radv_merge_queue_state(const struct radv_cmd_buffer_queue_state *src, struct radv_cmd_buffer_queue_state *dst) +{ + dst->scratch_size_per_wave_needed = MAX2(dst->scratch_size_per_wave_needed, src->scratch_size_per_wave_needed); + dst->scratch_waves_wanted = MAX2(dst->scratch_waves_wanted, src->scratch_waves_wanted); + dst->compute_scratch_size_per_wave_needed = + MAX2(dst->compute_scratch_size_per_wave_needed, src->compute_scratch_size_per_wave_needed); + dst->compute_scratch_waves_wanted = MAX2(dst->compute_scratch_waves_wanted, src->compute_scratch_waves_wanted); + dst->esgs_ring_size_needed = MAX2(dst->esgs_ring_size_needed, src->esgs_ring_size_needed); + dst->gsvs_ring_size_needed = MAX2(dst->gsvs_ring_size_needed, src->gsvs_ring_size_needed); + + dst->tess_rings_needed |= src->tess_rings_needed; + dst->task_rings_needed |= src->task_rings_needed; + dst->mesh_scratch_ring_needed |= src->mesh_scratch_ring_needed; + dst->gds_needed |= src->gds_needed; + dst->gds_oa_needed |= src->gds_oa_needed; + dst->sample_positions_needed |= src->sample_positions_needed; +} + VKAPI_ATTR void VKAPI_CALL radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) { @@ -9924,30 +9932,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou */ const bool allow_ib2 = !secondary->state.uses_draw_indirect || pdev->info.gfx_level >= GFX8; - primary->scratch_size_per_wave_needed = - MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); - primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted); - primary->compute_scratch_size_per_wave_needed = - MAX2(primary->compute_scratch_size_per_wave_needed, secondary->compute_scratch_size_per_wave_needed); - primary->compute_scratch_waves_wanted = - MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted); - - if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) - primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; - if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) - primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; - if (secondary->tess_rings_needed) - primary->tess_rings_needed = true; - if (secondary->task_rings_needed) - primary->task_rings_needed = true; - if (secondary->mesh_scratch_ring_needed) - primary->mesh_scratch_ring_needed = true; - if (secondary->sample_positions_needed) - primary->sample_positions_needed = true; - if (secondary->gds_needed) - primary->gds_needed = true; - if (secondary->gds_oa_needed) - primary->gds_oa_needed = true; + radv_merge_queue_state(&secondary->queue_state, &primary->queue_state); primary->shader_upload_seq = MAX2(primary->shader_upload_seq, secondary->shader_upload_seq); @@ -13110,8 +13095,8 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) radv_get_legacy_gs_info(&device->compiler_info, &es->info, &gs->info); radv_precompute_registers_hw_gs(device, &es->info, gs); - cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->regs.gs.esgs_ring_size); - cmd_buffer->gsvs_ring_size_needed = MAX2(cmd_buffer->gsvs_ring_size_needed, gs->regs.gs.gsvs_ring_size); + cmd_buffer->queue_state.esgs_ring_size_needed = MAX2(cmd_buffer->queue_state.esgs_ring_size_needed, gs->regs.gs.esgs_ring_size); + cmd_buffer->queue_state.gsvs_ring_size_needed = MAX2(cmd_buffer->queue_state.gsvs_ring_size_needed, gs->regs.gs.gsvs_ring_size); } } @@ -13805,10 +13790,10 @@ radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPr if (ies) { radv_cs_add_buffer(device->ws, cs->b, ies->bo); - cmd_buffer->compute_scratch_size_per_wave_needed = - MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, ies->compute_scratch_size_per_wave); - cmd_buffer->compute_scratch_waves_wanted = - MAX2(cmd_buffer->compute_scratch_waves_wanted, ies->compute_scratch_waves); + cmd_buffer->queue_state.compute_scratch_size_per_wave_needed = + MAX2(cmd_buffer->queue_state.compute_scratch_size_per_wave_needed, ies->compute_scratch_size_per_wave); + cmd_buffer->queue_state.compute_scratch_waves_wanted = + MAX2(cmd_buffer->queue_state.compute_scratch_waves_wanted, ies->compute_scratch_waves); } /* Secondary command buffers are banned. */ @@ -14236,8 +14221,8 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer) uint32_t scratch_bytes_per_wave = align(cmd_buffer->state.rt_stack_size * wave_size, pdev->info.scratch_wavesize_granularity); - cmd_buffer->compute_scratch_size_per_wave_needed = - MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave); + cmd_buffer->queue_state.compute_scratch_size_per_wave_needed = + MAX2(cmd_buffer->queue_state.compute_scratch_size_per_wave_needed, scratch_bytes_per_wave); if (cmd_buffer->state.rt_stack_size) rsrc2 |= S_00B12C_SCRATCH_EN(1); diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index aaa467f5afe..136a8ff4cc3 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -511,6 +511,21 @@ struct radv_cmd_stream { struct ac_buffered_sh_regs buffered_sh_regs; }; +struct radv_cmd_buffer_queue_state { + uint32_t scratch_size_per_wave_needed; + uint32_t scratch_waves_wanted; + uint32_t compute_scratch_size_per_wave_needed; + uint32_t compute_scratch_waves_wanted; + uint32_t esgs_ring_size_needed; + uint32_t gsvs_ring_size_needed; + bool tess_rings_needed; + bool task_rings_needed; + bool mesh_scratch_ring_needed; + bool gds_needed; /* Emulated queries on GFX10-GFX10.3 */ + bool gds_oa_needed; /* NGG streamout on GFX11-GFX11.5 */ + bool sample_positions_needed; +}; + struct radv_cmd_buffer { struct vk_command_buffer vk; @@ -533,18 +548,7 @@ struct radv_cmd_buffer { struct radv_cmd_buffer_upload upload; - uint32_t scratch_size_per_wave_needed; - uint32_t scratch_waves_wanted; - uint32_t compute_scratch_size_per_wave_needed; - uint32_t compute_scratch_waves_wanted; - uint32_t esgs_ring_size_needed; - uint32_t gsvs_ring_size_needed; - bool tess_rings_needed; - bool task_rings_needed; - bool mesh_scratch_ring_needed; - bool gds_needed; /* Emulated queries on GFX10-GFX10.3 */ - bool gds_oa_needed; /* NGG streamout on GFX11-GFX11.5 */ - bool sample_positions_needed; + struct radv_cmd_buffer_queue_state queue_state; uint64_t gfx9_fence_va; uint32_t gfx9_fence_idx; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 5308ba88b06..8b1e25b75a6 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -648,7 +648,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q } /* Record that the command buffer needs GDS. */ - cmd_buffer->gds_needed = true; + cmd_buffer->queue_state.gds_needed = true; if (!cmd_buffer->state.active_emulated_pipeline_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -674,7 +674,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q ac_emit_cp_write_data_imm(ace_cs->b, V_371_MICRO_ENGINE, va + task_invoc_offset + 4, 0x80000000); /* Record that the command buffer needs GDS. */ - cmd_buffer->gds_needed = true; + cmd_buffer->queue_state.gds_needed = true; if (!cmd_buffer->state.active_pipeline_ace_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -1366,7 +1366,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 36, 0x80000000); /* Record that the command buffer needs GDS. */ - cmd_buffer->gds_needed = true; + cmd_buffer->queue_state.gds_needed = true; if (!cmd_buffer->state.active_emulated_prims_gen_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -1593,7 +1593,7 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va) ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000); /* Record that the command buffer needs GDS. */ - cmd_buffer->gds_needed = true; + cmd_buffer->queue_state.gds_needed = true; if (!cmd_buffer->state.active_emulated_prims_gen_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 1074d395865..f25b85b56b5 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -1228,19 +1228,19 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device for (uint32_t j = 0; j < cmd_buffer_count; j++) { struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk); - needs.scratch_size_per_wave = MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed); - needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted); + needs.scratch_size_per_wave = MAX2(needs.scratch_size_per_wave, cmd_buffer->queue_state.scratch_size_per_wave_needed); + needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->queue_state.scratch_waves_wanted); needs.compute_scratch_size_per_wave = - MAX2(needs.compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed); - needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted); - needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed); - needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); - needs.tess_rings |= cmd_buffer->tess_rings_needed; - needs.task_rings |= cmd_buffer->task_rings_needed; - needs.mesh_scratch_ring |= cmd_buffer->mesh_scratch_ring_needed; - needs.gds |= cmd_buffer->gds_needed; - needs.gds_oa |= cmd_buffer->gds_oa_needed; - needs.sample_positions |= cmd_buffer->sample_positions_needed; + MAX2(needs.compute_scratch_size_per_wave, cmd_buffer->queue_state.compute_scratch_size_per_wave_needed); + needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, cmd_buffer->queue_state.compute_scratch_waves_wanted); + needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->queue_state.esgs_ring_size_needed); + needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->queue_state.gsvs_ring_size_needed); + needs.tess_rings |= cmd_buffer->queue_state.tess_rings_needed; + needs.task_rings |= cmd_buffer->queue_state.task_rings_needed; + needs.mesh_scratch_ring |= cmd_buffer->queue_state.mesh_scratch_ring_needed; + needs.gds |= cmd_buffer->queue_state.gds_needed; + needs.gds_oa |= cmd_buffer->queue_state.gds_oa_needed; + needs.sample_positions |= cmd_buffer->queue_state.sample_positions_needed; *use_perf_counters |= cmd_buffer->state.uses_perf_counters; *has_follower |= !!cmd_buffer->gang.cs; }