From 3ccb48ec460de6f33e007907543111da0ad120a7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 23 Jul 2025 12:51:16 +0200 Subject: [PATCH] radv: switch to radv_cmd_stream everywhere Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/layers/radv_ctx_roll_layer.c | 4 +- src/amd/vulkan/layers/radv_sqtt_layer.c | 4 +- src/amd/vulkan/meta/radv_meta.c | 3 +- src/amd/vulkan/meta/radv_meta_astc_decode.c | 3 +- src/amd/vulkan/meta/radv_meta_buffer.c | 19 +- src/amd/vulkan/meta/radv_meta_bufimage.c | 3 +- src/amd/vulkan/meta/radv_meta_clear.c | 3 +- src/amd/vulkan/meta/radv_meta_copy.c | 22 +- src/amd/vulkan/meta/radv_meta_dcc_retile.c | 3 +- src/amd/vulkan/radv_cmd_buffer.c | 585 +++++++++++--------- src/amd/vulkan/radv_cmd_buffer.h | 6 +- src/amd/vulkan/radv_cp_dma.c | 10 +- src/amd/vulkan/radv_cp_dma.h | 4 +- src/amd/vulkan/radv_cp_reg_shadowing.c | 45 +- src/amd/vulkan/radv_cp_reg_shadowing.h | 3 +- src/amd/vulkan/radv_cs.c | 14 +- src/amd/vulkan/radv_cs.h | 22 +- src/amd/vulkan/radv_descriptors.h | 22 +- src/amd/vulkan/radv_device.c | 27 +- src/amd/vulkan/radv_device.h | 5 +- src/amd/vulkan/radv_dgc.c | 26 +- src/amd/vulkan/radv_perfcounter.c | 54 +- src/amd/vulkan/radv_perfcounter.h | 8 +- src/amd/vulkan/radv_query.c | 120 ++-- src/amd/vulkan/radv_queue.c | 247 +++++---- src/amd/vulkan/radv_queue.h | 15 +- src/amd/vulkan/radv_sdma.c | 48 +- src/amd/vulkan/radv_sdma.h | 28 +- src/amd/vulkan/radv_shader.c | 24 +- src/amd/vulkan/radv_shader.h | 2 +- src/amd/vulkan/radv_spm.c | 16 +- src/amd/vulkan/radv_spm.h | 2 +- src/amd/vulkan/radv_sqtt.c | 95 ++-- src/amd/vulkan/radv_sqtt.h | 4 +- src/amd/vulkan/radv_video.c | 94 ++-- src/amd/vulkan/radv_video.h | 5 +- src/amd/vulkan/radv_video_enc.c | 70 +-- 37 files changed, 900 insertions(+), 765 deletions(-) diff --git a/src/amd/vulkan/layers/radv_ctx_roll_layer.c b/src/amd/vulkan/layers/radv_ctx_roll_layer.c index 844070b98a6..cd789e11dd6 100644 --- a/src/amd/vulkan/layers/radv_ctx_roll_layer.c +++ b/src/amd/vulkan/layers/radv_ctx_roll_layer.c @@ -39,8 +39,10 @@ ctx_roll_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 const VkSubmitInfo2 *submit = pSubmits + submit_index; for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submit->pCommandBufferInfos[i].commandBuffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + fprintf(device->ctx_roll_file, "\n%s:\n", vk_object_base_name(&cmd_buffer->vk.base)); - device->ws->cs_dump(cmd_buffer->cs, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS); + device->ws->cs_dump(cs->b, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS); } } } diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 0ff829fd35b..0d4075c05a9 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -23,7 +23,7 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_foreach_stage (s, RADV_GRAPHICS_STAGE_BITS & ~VK_SHADER_STAGE_TASK_BIT_EXT) { const struct radv_shader *shader = pipeline->base.shaders[s]; @@ -306,7 +306,7 @@ radv_gfx12_write_draw_marker(struct radv_cmd_buffer *cmd_buffer, const struct ra const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; /* RGP doesn't need this marker for indirect draws. */ if (draw_info->indirect_va) diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index bdb95fc9a22..0349dba7eb9 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -367,6 +367,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi VK_FROM_HANDLE(radv_pipeline_layout, layout, _layout); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_descriptor_set_layout *set_layout = layout->set[0].layout; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t upload_offset; uint8_t *ptr; @@ -395,7 +396,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi VK_FROM_HANDLE(radv_image_view, iview, image_view); for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) { if (iview->image->bindings[b].bo) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo); } } } diff --git a/src/amd/vulkan/meta/radv_meta_astc_decode.c b/src/amd/vulkan/meta/radv_meta_astc_decode.c index c58b25a3f27..e38bc5b6160 100644 --- a/src/amd/vulkan/meta/radv_meta_astc_decode.c +++ b/src/amd/vulkan/meta/radv_meta_astc_decode.c @@ -17,6 +17,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_meta_state *state = &device->meta_state; struct vk_texcompress_astc_write_descriptor_buffer desc_buffer; + struct radv_cmd_stream *cs = cmd_buffer->cs; VkFormat format = src_iview->image->vk.format; int blk_w = vk_format_get_blockwidth(format); int blk_h = vk_format_get_blockheight(format); @@ -26,7 +27,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie radv_image_view_to_handle(dst_iview), format); VK_FROM_HANDLE(radv_buffer, luts_buf, state->astc_decode->luts_buf); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, luts_buf->bo); + radv_cs_add_buffer(device->ws, cs->b, luts_buf->bo); radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->astc_decode->p_layout, VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT, desc_buffer.descriptors); diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index e0e4bc01101..66fd17ab937 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -319,8 +319,9 @@ radv_fill_image(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *ima const uint64_t va = image->bindings[0].addr + offset; struct radeon_winsys_bo *bo = image->bindings[0].bo; const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + radv_cs_add_buffer(device->ws, cs->b, bo); return radv_fill_memory_internal(cmd_buffer, image, va, size, value, copy_flags); } @@ -331,8 +332,9 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + radv_cs_add_buffer(device->ws, cs->b, bo); return radv_fill_memory(cmd_buffer, va, size, value, copy_flags); } @@ -378,14 +380,15 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop VK_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer); VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo); const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo); radv_suspend_conditional_rendering(cmd_buffer); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) { const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r]; @@ -404,13 +407,14 @@ radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); uint64_t words = size / 4; bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(size < RADV_BUFFER_UPDATE_THRESHOLD); radv_emit_cache_flush(cmd_buffer); - radeon_check_space(device->ws, cmd_buffer->cs, words + 4); + radeon_check_space(device->ws, cs->b, words + 4); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + words, 0)); radeon_emit(S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(va); @@ -454,12 +458,13 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset); + struct radv_cmd_stream *cs = cmd_buffer->cs; const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo); radv_suspend_conditional_rendering(cmd_buffer); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); radv_update_memory(cmd_buffer, dst_va, dataSize, pData, dst_copy_flags); diff --git a/src/amd/vulkan/meta/radv_meta_bufimage.c b/src/amd/vulkan/meta/radv_meta_bufimage.c index ea7ef9223d3..9af878b0887 100644 --- a/src/amd/vulkan/meta/radv_meta_bufimage.c +++ b/src/amd/vulkan/meta/radv_meta_bufimage.c @@ -1067,6 +1067,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra const VkClearColorValue *clear_color) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; VkPipelineLayout layout; VkPipeline pipeline; unsigned stride; @@ -1078,7 +1079,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra return; } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst->image->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dst->image->bindings[0].bo); radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 1, (VkDescriptorGetInfoEXT[]){{ diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index 94151c5f489..3963d58f643 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -591,6 +591,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im uint64_t va, uint64_t size, uint32_t htile_value, uint32_t htile_mask) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t block_count = DIV_ROUND_UP(size, 1024); struct radv_meta_saved_state saved_state; VkPipelineLayout layout; @@ -603,7 +604,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im return 0; } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + radv_cs_add_buffer(device->ws, cs->b, bo); radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index 845400d3835..bb80d7b9968 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -56,6 +56,7 @@ static bool alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!cmd_buffer->transfer.copy_temp) { const VkResult r = @@ -69,7 +70,7 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer) } } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->transfer.copy_temp); + radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->transfer.copy_temp); return true; } @@ -78,7 +79,7 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v const VkBufferImageCopy2 *region, bool to_image) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_sdma_surf buf = radv_sdma_get_buf_surf(buffer_va, image, region); const struct radv_sdma_surf img = radv_sdma_get_surf(device, image, region->imageSubresource, region->imageOffset); @@ -217,19 +218,20 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm VK_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo); radv_suspend_conditional_rendering(cmd_buffer); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r]; const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; const unsigned bind_idx = dst_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[bind_idx].bo); + radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo); copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags, dst_image, pCopyBufferToImageInfo->dstImageLayout, region); @@ -368,19 +370,20 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf VK_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage); VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo); radv_suspend_conditional_rendering(cmd_buffer); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) { const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r]; const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; const unsigned bind_idx = src_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[bind_idx].bo); + radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo); copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, src_image, pCopyImageToBufferInfo->srcImageLayout, region); @@ -394,7 +397,7 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned int dst_aspect_mask_remaining = region->dstSubresource.aspectMask; VkImageSubresourceLayers src_subresource = region->srcSubresource; @@ -650,6 +653,7 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI VK_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_suspend_conditional_rendering(cmd_buffer); @@ -660,8 +664,8 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI const VkImageAspectFlags dst_aspect_mask = region->dstSubresource.aspectMask; const unsigned dst_bind_idx = dst_image->disjoint ? radv_plane_from_aspect(dst_aspect_mask) : 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[src_bind_idx].bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[dst_bind_idx].bo); + radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[src_bind_idx].bo); + radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[dst_bind_idx].bo); copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout, region); diff --git a/src/amd/vulkan/meta/radv_meta_dcc_retile.c b/src/amd/vulkan/meta/radv_meta_dcc_retile.c index 341851393d1..694eddd2361 100644 --- a/src/amd/vulkan/meta/radv_meta_dcc_retile.c +++ b/src/amd/vulkan/meta/radv_meta_dcc_retile.c @@ -110,6 +110,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) { struct radv_meta_saved_state saved_state; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; VkPipelineLayout layout; VkPipeline pipeline; VkResult result; @@ -135,7 +136,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) const uint64_t va = image->bindings[0].addr; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, image->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, image->bindings[0].bo); radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 2, (VkDescriptorGetInfoEXT[]){ diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index fe73bc56f92..b3eefef664c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -341,9 +341,10 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) } if (cmd_buffer->cs) - device->ws->cs_destroy(cmd_buffer->cs); + radv_destroy_cmd_stream(device, cmd_buffer->cs); if (cmd_buffer->gang.cs) - device->ws->cs_destroy(cmd_buffer->gang.cs); + radv_destroy_cmd_stream(device, cmd_buffer->gang.cs); + if (cmd_buffer->transfer.copy_temp) radv_bo_destroy(device, &cmd_buffer->vk.base, cmd_buffer->transfer.copy_temp); @@ -369,7 +370,7 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, VkCommandBufferLevel level, struct radv_device *device = container_of(pool->base.device, struct radv_device, vk); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_buffer *cmd_buffer; - unsigned ring; + cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -390,11 +391,9 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, VkCommandBufferLevel level, return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); - - cmd_buffer->cs = - device->ws->cs_create(device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - if (!cmd_buffer->cs) { + result = radv_create_cmd_stream(device, cmd_buffer->qf, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY, + &cmd_buffer->cs); + if (result != VK_SUCCESS) { radv_destroy_cmd_buffer(&cmd_buffer->vk); return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } @@ -443,15 +442,16 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB { struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; vk_command_buffer_reset(&cmd_buffer->vk); if (cmd_buffer->qf == RADV_QUEUE_SPARSE) return; - device->ws->cs_reset(cmd_buffer->cs); + radv_reset_cmd_stream(device, cs); if (cmd_buffer->gang.cs) - device->ws->cs_reset(cmd_buffer->gang.cs); + radv_reset_cmd_stream(device, cmd_buffer->gang.cs); list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) { radv_rmv_log_command_buffer_bo_destroy(device, up->upload_bo); @@ -483,7 +483,7 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB cmd_buffer->shader_upload_seq = 0; if (cmd_buffer->upload.upload_bo) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo); + radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->upload.upload_bo); cmd_buffer->upload.offset = 0; for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { @@ -504,6 +504,7 @@ static bool radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t new_size; struct radeon_winsys_bo *bo = NULL; struct radv_cmd_buffer_upload *upload; @@ -521,7 +522,7 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m return false; } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + radv_cs_add_buffer(device->ws, cs->b, bo); if (cmd_buffer->upload.upload_bo) { upload = malloc(sizeof(*upload)); @@ -607,7 +608,7 @@ void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va; if (cmd_buffer->qf != RADV_QUEUE_GENERAL && cmd_buffer->qf != RADV_QUEUE_COMPUTE) @@ -622,7 +623,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) ++cmd_buffer->state.trace_id; radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, false); - radeon_check_space(device->ws, cs, 2); + radeon_check_space(device->ws, cs->b, 2); radeon_begin(cs); radeon_emit(PKT3(PKT3_NOP, 0, 0)); @@ -634,8 +635,9 @@ void radv_cmd_buffer_annotate(struct radv_cmd_buffer *cmd_buffer, const char *annotation) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; - device->ws->cs_annotate(cmd_buffer->cs, annotation); + device->ws->cs_annotate(cs->b, annotation); } #define RADV_TASK_SHADER_SENSITIVE_STAGES \ @@ -673,7 +675,7 @@ radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; const uint32_t flush_bits = cmd_buffer->gang.flush_bits; enum rgp_flush_bits sqtt_flush_bits = 0; @@ -716,8 +718,8 @@ radv_gang_follower_sem_dirty(const struct radv_cmd_buffer *cmd_buffer) } ALWAYS_INLINE static bool -radv_flush_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, - const uint32_t va_off, const uint32_t value) +radv_flush_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radv_cmd_stream *cs, + const enum radv_queue_family qf, const uint32_t va_off, const uint32_t value) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -725,13 +727,13 @@ radv_flush_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdb if (!radv_gang_sem_init(cmd_buffer)) return false; - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 12); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12); radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va + va_off, value, cmd_buffer->gfx9_eop_bug_va); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); return true; } @@ -759,13 +761,13 @@ radv_flush_gang_follower_semaphore(struct radv_cmd_buffer *cmd_buffer) } ALWAYS_INLINE static void -radv_wait_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, - const uint32_t va_off, const uint32_t value) +radv_wait_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radv_cmd_stream *cs, + const enum radv_queue_family qf, const uint32_t va_off, const uint32_t value) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); assert(cmd_buffer->gang.sem.va); - radeon_check_space(device->ws, cs, 7); + radeon_check_space(device->ws, cs->b, 7); radv_cp_wait_mem(cs, qf, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va + va_off, value, 0xffffffff); } @@ -787,19 +789,18 @@ bool radv_gang_init(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + VkResult result; if (cmd_buffer->gang.cs) return true; - struct radeon_cmdbuf *ace_cs = - device->ws->cs_create(device->ws, AMD_IP_COMPUTE, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - - if (!ace_cs) { - vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); + result = radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, + cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY, &cmd_buffer->gang.cs); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd_buffer->vk, result); return false; } - cmd_buffer->gang.cs = ace_cs; return true; } @@ -809,7 +810,7 @@ radv_gang_finalize(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); assert(cmd_buffer->gang.cs); - struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; /* Emit pending cache flush. */ radv_gang_cache_flush(cmd_buffer); @@ -829,7 +830,7 @@ radv_gang_finalize(struct radv_cmd_buffer *cmd_buffer) radv_write_data(cmd_buffer, V_370_ME, follower2leader_va, 1, &zero, false); } - return device->ws->cs_finalize(ace_cs); + return radv_finalize_cmd_stream(device, cmd_buffer->gang.cs); } static void @@ -838,10 +839,11 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (unlikely(device->sqtt.bo) && !dgc) { - radeon_check_space(device->ws, cmd_buffer->cs, 2); - radeon_begin(cmd_buffer->cs); + radeon_check_space(device->ws, cs->b, 2); + radeon_begin(cs); radeon_event_write_predicate(V_028A90_THREAD_TRACE_MARKER, cmd_buffer->state.predicating); radeon_end(); } @@ -851,7 +853,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); /* Force wait for graphics or compute engines to be idle. */ - radv_cs_emit_cache_flush(device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, + radv_cs_emit_cache_flush(device->ws, cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, cmd_buffer->qf, flags, &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); @@ -959,8 +961,8 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi } static void -radv_emit_userdata_address(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader, - int idx, uint64_t va) +radv_emit_userdata_address(const struct radv_device *device, struct radv_cmd_stream *cs, + const struct radv_shader *shader, int idx, uint64_t va) { const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t offset = radv_get_user_sgpr_loc(shader, idx); @@ -989,7 +991,7 @@ radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, un } static void -radv_emit_descriptors_per_stage(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_emit_descriptors_per_stage(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_shader *shader, const struct radv_descriptor_state *descriptors_state) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -1203,7 +1205,7 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t num_samples = (uint32_t)d->sample_location.per_pixel; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t sample_locs_pixel[4][2] = {0}; VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ uint64_t centroid_priority; @@ -1284,7 +1286,7 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) } static void -radv_emit_inline_push_consts(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_emit_inline_push_consts(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_shader *shader, int idx, const uint32_t *values) { const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[idx]; @@ -1293,7 +1295,7 @@ radv_emit_inline_push_consts(const struct radv_device *device, struct radeon_cmd if (loc->sgpr_idx == -1) return; - radeon_check_space(device->ws, cs, 2 + loc->num_sgprs); + radeon_check_space(device->ws, cs->b, 2 + loc->num_sgprs); radeon_begin(cs); radeon_set_sh_reg_seq(base_reg + loc->sgpr_idx * 4, loc->num_sgprs); radeon_emit_array(values, loc->num_sgprs); @@ -1747,6 +1749,7 @@ radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned pa_sc_binner_cntl_0; if (pdev->info.gfx_level < GFX9) @@ -1754,7 +1757,7 @@ radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer) pa_sc_binner_cntl_0 = radv_get_binning_state(cmd_buffer); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028C44_PA_SC_BINNER_CNTL_0, RADV_TRACKED_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0); radeon_end(); @@ -1847,6 +1850,7 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(pdev->info.rbplus_allowed); @@ -1989,7 +1993,7 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) * breaks dual source blending in SkQP and does not seem to improve * performance. */ - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg3(cmd_buffer, R_028754_SX_PS_DOWNCONVERT, RADV_TRACKED_SX_PS_DOWNCONVERT, sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); radeon_end(); @@ -2003,6 +2007,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader *ps_shader = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t pgm_rsrc1 = 0; if (cmd_buffer->state.emitted_ps_epilog == ps_epilog) @@ -2013,7 +2018,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader pgm_rsrc1 = (ps_shader->config.rsrc1 & C_00B848_VGPRS) | (ps_epilog->rsrc1 & ~C_00B848_VGPRS); } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, ps_epilog->bo); + radv_cs_add_buffer(device->ws, cs->b, ps_epilog->bo); const uint32_t epilog_pc_offset = radv_get_user_sgpr_loc(ps_shader, AC_UD_EPILOG_PC); @@ -2022,7 +2027,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader gfx12_push_sh_reg(cmd_buffer, ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1); gfx12_push_32bit_pointer(cmd_buffer, epilog_pc_offset, ps_epilog->va, &pdev->info); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pgm_rsrc1) radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1); radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info); @@ -2035,7 +2040,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader } void -radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, +radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, const struct radv_shader *shader) { uint64_t va = radv_shader_get_va(shader); @@ -2064,6 +2069,7 @@ radv_emit_vgt_gs_mode(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned vgt_primitiveid_en = 0; uint32_t vgt_gs_mode = 0; @@ -2077,7 +2083,7 @@ radv_emit_vgt_gs_mode(struct radv_cmd_buffer *cmd_buffer) vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1); } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028A84_VGT_PRIMITIVEID_EN, RADV_TRACKED_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); radeon_opt_set_context_reg(cmd_buffer, R_028A40_VGT_GS_MODE, RADV_TRACKED_VGT_GS_MODE, vgt_gs_mode); @@ -2089,9 +2095,10 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(shader); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); radeon_emit(va >> 8); radeon_emit(S_00B124_MEM_BASE(va >> 40)); @@ -2133,10 +2140,11 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t va = radv_shader_get_va(shader); + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(pdev->info.gfx_level < GFX11); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); radeon_emit(va >> 8); radeon_emit(S_00B324_MEM_BASE(va >> 40)); @@ -2150,13 +2158,14 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(shader); if (pdev->info.gfx_level >= GFX12) { gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); radeon_end(); @@ -2168,6 +2177,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(shader); mesa_shader_stage es_type; const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; @@ -2189,7 +2199,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2); gfx12_push_sh_reg(cmd_buffer, R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2); radeon_emit(shader->config.rsrc1); @@ -2209,7 +2219,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e } if (pdev->info.gfx_level >= GFX12) { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); gfx12_begin_context_regs(); gfx12_opt_set_context_reg(cmd_buffer, R_028818_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL, shader->info.regs.pa_cl_vs_out_cntl); @@ -2225,7 +2235,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e gfx12_end_context_regs(); radeon_end(); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_02881C_PA_CL_VS_OUT_CNTL, RADV_TRACKED_PA_CL_VS_OUT_CNTL, shader->info.regs.pa_cl_vs_out_cntl); radeon_opt_set_context_reg(cmd_buffer, R_028B90_VGT_GS_INSTANCE_CNT, RADV_TRACKED_VGT_GS_INSTANCE_CNT, @@ -2244,7 +2254,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e radeon_end(); } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); uint32_t ge_cntl = shader->info.regs.ngg.ge_cntl; if (pdev->info.gfx_level >= GFX11) { @@ -2299,13 +2309,14 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(shader); if (pdev->info.gfx_level >= GFX12) { gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX9) { radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); @@ -2326,6 +2337,7 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *vs = cmd_buffer->state.shaders[MESA_SHADER_VERTEX]; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (vs->info.merged_shader_compiled_separately) { assert(vs->info.next_stage == MESA_SHADER_TESS_CTRL || vs->info.next_stage == MESA_SHADER_GEOMETRY); @@ -2356,7 +2368,7 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer) } } } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info); if (!vs->info.vs.has_prolog) { @@ -2405,6 +2417,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *tes = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (tes->info.merged_shader_compiled_separately) { assert(tes->info.next_stage == MESA_SHADER_GEOMETRY); @@ -2422,7 +2435,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer) gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc2, rsrc2); gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, gs->va, &pdev->info); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8); radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2); radeon_emit(rsrc1); @@ -2448,9 +2461,10 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info; + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(gs); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg3(cmd_buffer, R_028A60_VGT_GSVS_RING_OFFSET_1, RADV_TRACKED_VGT_GSVS_RING_OFFSET_1, gs->info.regs.gs.vgt_gsvs_ring_offset[0], gs->info.regs.gs.vgt_gsvs_ring_offset[1], @@ -2515,6 +2529,8 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *es = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL] ? cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL] : cmd_buffer->state.shaders[MESA_SHADER_VERTEX]; + struct radv_cmd_stream *cs = cmd_buffer->cs; + if (gs->info.is_ngg) { radv_emit_hw_ngg(cmd_buffer, es, gs); } else { @@ -2522,7 +2538,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer) radv_emit_hw_vs(cmd_buffer, cmd_buffer->state.gs_copy_shader); } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028B38_VGT_GS_MAX_VERT_OUT, RADV_TRACKED_VGT_GS_MAX_VERT_OUT, gs->info.regs.vgt_gs_max_vert_out); @@ -2542,8 +2558,9 @@ radv_emit_vgt_gs_out(struct radv_cmd_buffer *cmd_buffer, uint32_t vgt_gs_out_pri { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX11) { radeon_set_uconfig_reg(R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); } else { @@ -2558,7 +2575,7 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(pdev->info.gfx_level >= GFX11); @@ -2583,10 +2600,11 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *ms = cmd_buffer->state.shaders[MESA_SHADER_MESH]; const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim); + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_emit_hw_ngg(cmd_buffer, NULL, ms); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028B38_VGT_GS_MAX_VERT_OUT, RADV_TRACKED_VGT_GS_MAX_VERT_OUT, ms->info.regs.vgt_gs_max_vert_out); @@ -2688,7 +2706,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo; const bool gfx11plus = pdev->info.gfx_level >= GFX11; const enum radv_ps_in_type per_prim = gfx11plus ? radv_ps_in_per_prim_gfx11 : radv_ps_in_per_prim_gfx103; - + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned num_per_primitive_params = 0; uint32_t ps_input_cntl[32]; unsigned ps_offset = 0; @@ -2725,7 +2743,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) /* Only GFX10.3+ support per-primitive params */ assert(pdev->info.gfx_level >= GFX10_3 || num_per_primitive_params == 0); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX12) { radeon_opt_set_context_regn(cmd_buffer, R_028664_SPI_PS_INPUT_CNTL_0, ps_input_cntl, @@ -2757,11 +2775,12 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct const uint32_t spi_ps_input_ena = ps ? ps->config.spi_ps_input_ena : 0; const uint32_t spi_ps_input_addr = ps ? ps->config.spi_ps_input_addr : 0; const uint32_t spi_ps_in_control = ps ? ps->info.regs.ps.spi_ps_in_control : 0; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX12) { const uint32_t pa_sc_hisz_control = ps ? ps->info.regs.ps.pa_sc_hisz_control : 0; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); gfx12_begin_context_regs(); gfx12_opt_set_context_reg2(cmd_buffer, R_02865C_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena, spi_ps_input_addr); @@ -2776,7 +2795,7 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct } else { const uint32_t pa_sc_shader_control = ps ? ps->info.regs.ps.pa_sc_shader_control : 0; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg2(cmd_buffer, R_0286CC_SPI_PS_INPUT_ENA, RADV_TRACKED_SPI_PS_INPUT_ENA, spi_ps_input_ena, spi_ps_input_addr); @@ -2798,6 +2817,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_shader_get_va(ps); if (pdev->info.gfx_level >= GFX12) { @@ -2805,7 +2825,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer) gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc1, ps->config.rsrc1); gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc2, ps->config.rsrc2); } else { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4); radeon_emit(va >> 8); radeon_emit(S_00B024_MEM_BASE(va >> 40)); @@ -2823,12 +2843,13 @@ radv_emit_vgt_reuse(struct radv_cmd_buffer *cmd_buffer, const struct radv_vgt_sh const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level == GFX10_3) { /* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */ const bool has_legacy_tess_gs = key->tess && key->gs && !key->ngg; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028AB4_VGT_REUSE_OFF, RADV_TRACKED_VGT_REUSE_OFF, S_028AB4_REUSE_OFF(has_legacy_tess_gs)); radeon_end(); @@ -2840,7 +2861,7 @@ radv_emit_vgt_reuse(struct radv_cmd_buffer *cmd_buffer, const struct radv_vgt_sh vtx_reuse_depth = 14; } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, RADV_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); radeon_end(); @@ -2851,6 +2872,7 @@ static void radv_emit_vgt_shader_config_gfx12(struct radv_cmd_buffer *cmd_buffer, const struct radv_vgt_shader_key *key) { const bool ngg_wave_id_en = key->ngg_streamout || (key->mesh && key->mesh_scratch_ring); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t stages = 0; stages |= S_028A98_GS_EN(key->gs) | S_028A98_GS_FAST_LAUNCH(key->mesh) | S_028A98_GS_W32_EN(key->gs_wave32) | @@ -2859,7 +2881,7 @@ radv_emit_vgt_shader_config_gfx12(struct radv_cmd_buffer *cmd_buffer, const stru if (key->tess) stages |= S_028A98_HS_EN(1) | S_028A98_HS_W32_EN(key->hs_wave32); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028A98_VGT_SHADER_STAGES_EN, RADV_TRACKED_VGT_SHADER_STAGES_EN, stages); radeon_end(); } @@ -2869,6 +2891,7 @@ radv_emit_vgt_shader_config_gfx6(struct radv_cmd_buffer *cmd_buffer, const struc { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t stages = 0; if (key->tess) { @@ -2910,7 +2933,7 @@ radv_emit_vgt_shader_config_gfx6(struct radv_cmd_buffer *cmd_buffer, const struc assert(!(key->gs && !key->ngg) || !key->gs_wave32); } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028B54_VGT_SHADER_STAGES_EN, RADV_TRACKED_VGT_SHADER_STAGES_EN, stages); radeon_end(); } @@ -2935,6 +2958,7 @@ gfx103_emit_vgt_draw_payload_cntl(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *mesh_shader = cmd_buffer->state.shaders[MESA_SHADER_MESH]; const bool enable_vrs = cmd_buffer->state.uses_vrs; + struct radv_cmd_stream *cs = cmd_buffer->cs; bool enable_prim_payload = false; /* Enables the second channel of the primitive export instruction. @@ -2950,7 +2974,7 @@ gfx103_emit_vgt_draw_payload_cntl(struct radv_cmd_buffer *cmd_buffer) const uint32_t vgt_draw_payload_cntl = S_028A98_EN_VRS_RATE(enable_vrs) | S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX12) { radeon_opt_set_context_reg(cmd_buffer, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, RADV_TRACKED_VGT_DRAW_PAYLOAD_CNTL, @@ -2971,6 +2995,7 @@ gfx103_emit_vrs_state(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; const bool force_vrs_per_vertex = cmd_buffer->state.last_vgt_shader->info.force_vrs_per_vertex; const bool enable_vrs_coarse_shading = cmd_buffer->state.uses_vrs_coarse_shading; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU; uint8_t rate_x = 0, rate_y = 0; @@ -2985,7 +3010,7 @@ gfx103_emit_vrs_state(struct radv_cmd_buffer *cmd_buffer) * requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because * in DX12 it's fully dynamic. */ - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028848_PA_CL_VRS_CNTL, RADV_TRACKED_PA_CL_VRS_CNTL, S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) | S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); @@ -2998,7 +3023,7 @@ gfx103_emit_vrs_state(struct radv_cmd_buffer *cmd_buffer) } if (pdev->info.gfx_level < GFX11) { - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_opt_set_context_reg(cmd_buffer, R_028064_DB_VRS_OVERRIDE_CNTL, RADV_TRACKED_DB_VRS_OVERRIDE_CNTL, S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | S_028064_VRS_OVERRIDE_RATE_X(rate_x) | S_028064_VRS_OVERRIDE_RATE_Y(rate_y)); @@ -3077,6 +3102,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (cmd_buffer->state.emitted_graphics_pipeline == pipeline) return; @@ -3119,7 +3145,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && (settings->context_states_per_bin > 1 || settings->persistent_states_per_bin > 1)) { /* Break the batch on PS changes. */ - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_event_write(V_028A90_BREAK_BATCH); radeon_end(); } @@ -3132,9 +3158,10 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; if (task_shader) { const struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; const uint64_t va = reloc->va[MESA_SHADER_TASK]; - radeon_begin(cmd_buffer->gang.cs); + radeon_begin(ace_cs); radeon_set_sh_reg(task_shader->info.regs.pgm_lo, va >> 8); radeon_end(); } @@ -3236,10 +3263,11 @@ radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(d->vk.vp.viewport_count); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX12) { radeon_set_context_reg_seq(R_02843C_PA_CL_VPORT_XSCALE, d->vk.vp.viewport_count * 8); @@ -3319,7 +3347,7 @@ radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!d->vk.vp.scissor_count) return; @@ -3357,8 +3385,9 @@ static void radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) { const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4); radeon_emit_array((uint32_t *)d->vk.cb.blend_constants, 4); radeon_end(); @@ -3370,6 +3399,7 @@ radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; struct radv_rendering_state *render = &cmd_buffer->state.render; unsigned slope = fui(d->vk.rs.depth_bias.slope_factor * 16.0f); + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned pa_su_poly_offset_db_fmt_cntl = 0; if (vk_format_has_depth(render->ds_att.format) && @@ -3390,7 +3420,7 @@ radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) } } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_set_context_reg_seq(R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); radeon_emit(fui(d->vk.rs.depth_bias.clamp)); /* CLAMP */ radeon_emit(slope); /* FRONT SCALE */ @@ -3409,10 +3439,11 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t vgt_gs_out_prim_type = radv_get_rasterization_prim(cmd_buffer); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(!cmd_buffer->state.mesh_shading); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX7) { uint32_t vgt_prim = d->vk.ia.primitive_topology; @@ -3445,6 +3476,7 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_cmd_stream *cs = cmd_buffer->cs; /* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore * it. This is needed for vkd3d-proton because it always declares per-draw VRS as dynamic. @@ -3516,7 +3548,7 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) */ pa_cl_vrs_cntl |= S_028848_HTILE_RATE_COMBINER_MODE(htile_comb_mode); - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); /* Emit per-draw VRS rate which is the first combiner. */ radeon_set_uconfig_reg(R_03098C_GE_VRS_RATE, S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); @@ -3661,7 +3693,7 @@ radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) static void radv_gfx12_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); gfx12_begin_context_regs(); @@ -3856,7 +3888,7 @@ radv_cmd_buffer_get_vrs_image(struct radv_cmd_buffer *cmd_buffer) static void radv_gfx12_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); gfx12_begin_context_regs(); @@ -4001,7 +4033,7 @@ radv_gfx6_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_bu static void radv_gfx12_emit_null_ds_state(struct radv_cmd_buffer *cmd_buffer) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); gfx12_begin_context_regs(); @@ -4057,7 +4089,7 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, const struct VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { const struct radv_image *image = iview->image; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (cmd_buffer->state.render.ds_att.iview == NULL || cmd_buffer->state.render.ds_att.iview->image != image) return; @@ -4096,15 +4128,15 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image VkImageAspectFlags aspects) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); /* Use the fastest way when both aspects are used. */ - ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cmd_buffer->cs, cmd_buffer->qf, V_370_PFP, va, - 2 * level_count, cmd_buffer->state.predicating); + ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, + cmd_buffer->state.predicating); radeon_begin(cs); @@ -4114,7 +4146,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image } radeon_end(); - assert(cmd_buffer->cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } else { /* Otherwise we need one WRITE_DATA packet per level. */ for (uint32_t l = 0; l < level_count; l++) { @@ -4143,7 +4175,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!pdev->info.has_tc_compat_zrange_bug) return; @@ -4151,8 +4183,8 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); - ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cmd_buffer->cs, cmd_buffer->qf, V_370_PFP, va, - level_count, cmd_buffer->state.predicating); + ASSERTED unsigned cdw_end = + radv_cs_write_data_head(device, cs, cmd_buffer->qf, V_370_PFP, va, level_count, cmd_buffer->state.predicating); radeon_begin(cs); @@ -4160,7 +4192,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra radeon_emit(value); radeon_end(); - assert(cmd_buffer->cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } static void @@ -4207,7 +4239,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const struct radv_image *image = iview->image; VkImageAspectFlags aspects = vk_format_aspects(image->vk.format); uint64_t va = radv_get_ds_clear_value_va(image, iview->vk.base_mip_level); @@ -4260,6 +4292,7 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * const VkImageSubresourceRange *range, bool value) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!image->fce_pred_offset) return; @@ -4269,7 +4302,7 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cmd_buffer->cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, false); + radv_cs_write_data_head(device, cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, false); radeon_begin(cmd_buffer->cs); @@ -4279,7 +4312,7 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * } radeon_end(); - assert(cmd_buffer->cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } /** @@ -4290,6 +4323,7 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * const VkImageSubresourceRange *range, bool value) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (image->dcc_pred_offset == 0) return; @@ -4301,7 +4335,7 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * assert(radv_dcc_enabled(image, range->baseMipLevel)); ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cmd_buffer->cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, false); + radv_cs_write_data_head(device, cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, false); radeon_begin(cmd_buffer->cs); @@ -4311,7 +4345,7 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * } radeon_end(); - assert(cmd_buffer->cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } /** @@ -4322,13 +4356,13 @@ radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct ra uint32_t color_values[2]) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (cb_idx >= cmd_buffer->state.render.color_att_count || cmd_buffer->state.render.color_att[cb_idx].iview == NULL || cmd_buffer->state.render.color_att[cb_idx].iview->image != image) return; - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 4); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 4); radeon_begin(cs); radeon_set_context_reg_seq(R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); @@ -4336,7 +4370,7 @@ radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct ra radeon_emit(color_values[1]); radeon_end(); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); cmd_buffer->state.context_roll_without_scissor_emitted = true; } @@ -4349,7 +4383,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im const VkImageSubresourceRange *range, uint32_t color_values[2]) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)); @@ -4357,8 +4391,8 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im if (radv_image_has_clear_value(image)) { uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); - ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cmd_buffer->cs, cmd_buffer->qf, V_370_PFP, va, - 2 * level_count, cmd_buffer->state.predicating); + ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, cmd_buffer->qf, V_370_PFP, va, 2 * level_count, + cmd_buffer->state.predicating); radeon_begin(cs); @@ -4368,7 +4402,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im } radeon_end(); - assert(cmd_buffer->cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } else { /* Some default value we can set in the update. */ assert(color_values[0] == 0 && color_values[1] == 0); @@ -4406,7 +4440,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_image *image = iview->image; if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->vk.base_mip_level)) @@ -4535,7 +4569,7 @@ radv_gfx11_emit_vrs_surface(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_rendering_state *render = &cmd_buffer->state.render; const bool vrs_surface_enable = render->vrs_att.iview != NULL; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned xmax = 0, ymax = 0; uint8_t swizzle_mode = 0; uint64_t va = 0; @@ -4544,7 +4578,7 @@ radv_gfx11_emit_vrs_surface(struct radv_cmd_buffer *cmd_buffer) const struct radv_image_view *vrs_iview = render->vrs_att.iview; struct radv_image *vrs_image = vrs_iview->image; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, vrs_image->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, vrs_image->bindings[0].bo); va = vrs_image->bindings[0].addr; va |= vrs_image->planes[0].surface.tile_swizzle << 8; @@ -4586,12 +4620,13 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_rendering_state *render = &cmd_buffer->state.render; + struct radv_cmd_stream *cs = cmd_buffer->cs; int i; unsigned color_invalid = pdev->info.gfx_level >= GFX12 ? S_028EC0_FORMAT(V_028EC0_COLOR_INVALID) : pdev->info.gfx_level >= GFX11 ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 51 + MAX_RTS * 70); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 51 + MAX_RTS * 70); for (i = 0; i < render->color_att_count; ++i) { struct radv_image_view *iview = render->color_att[i].iview; @@ -4608,18 +4643,18 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) VkImageLayout layout = render->color_att[i].layout; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[0].bo); assert(iview->vk.aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); if (iview->image->disjoint && iview->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT) { for (uint32_t plane_id = 0; plane_id < iview->image->plane_count; plane_id++) { - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[plane_id].bo); } } else { uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[plane_id].bo); } if (pdev->info.gfx_level >= GFX12) { @@ -4644,7 +4679,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) if (render->ds_att.iview) { struct radv_image_view *iview = render->ds_att.iview; const struct radv_image *image = iview->image; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, image->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, image->bindings[0].bo); uint32_t qf_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); bool depth_compressed = @@ -4693,7 +4728,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) radv_initialise_vrs_surface(image, htile_buffer, &ds); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, htile_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, htile_buffer->bo); bool depth_compressed = radv_layout_is_htile_compressed( device, image, 0, layout, radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf)); @@ -4709,7 +4744,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) if (pdev->info.gfx_level >= GFX11) radv_gfx11_emit_vrs_surface(cmd_buffer); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; } @@ -4723,7 +4758,7 @@ radv_emit_guardband_state(struct radv_cmd_buffer *cmd_buffer) unsigned rast_prim = radv_get_rasterization_prim(cmd_buffer); const bool draw_points = radv_rast_prim_is_point(rast_prim) || radv_polygon_mode_is_point(d->vk.rs.polygon_mode); const bool draw_lines = radv_rast_prim_is_line(rast_prim) || radv_polygon_mode_is_line(d->vk.rs.polygon_mode); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; int i; float guardband_x = INFINITY, guardband_y = INFINITY; float discard_x = 1.0f, discard_y = 1.0f; @@ -4805,7 +4840,7 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_cmd_state *state = &cmd_buffer->state; uint32_t max_index_count = state->max_index_count; uint64_t index_va = state->index_va; @@ -5009,6 +5044,7 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t rsrc1, rsrc2; /* no need to re-emit anything in this case */ @@ -5052,7 +5088,7 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v radeon_end(); } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, prolog->bo); + radv_cs_add_buffer(device->ws, cs->b, prolog->bo); } static void @@ -5374,7 +5410,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st { struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; bool flush_indirect_descriptors; if (!descriptors_state->dirty) @@ -5385,7 +5421,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st if (flush_indirect_descriptors) radv_upload_indirect_descriptor_sets(cmd_buffer, descriptors_state); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4); if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE @@ -5409,14 +5445,14 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st descriptors_state->dirty = 0; - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); if (radv_device_fault_detection_enabled(device)) radv_save_descriptors(cmd_buffer, bind_point); } static void -radv_emit_all_inline_push_consts(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_emit_all_inline_push_consts(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_shader *shader, const uint32_t *values, bool *need_push_constants) { if (radv_get_user_sgpr_info(shader, AC_UD_PUSH_CONSTANTS)->sgpr_idx != -1) @@ -5456,7 +5492,7 @@ static void radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point); struct radv_shader *shader, *prev_shader; @@ -5517,7 +5553,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += offset; - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, MESA_VULKAN_SHADER_STAGES * 4); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MESA_VULKAN_SHADER_STAGES * 4); if (internal_stages & VK_SHADER_STAGE_COMPUTE_BIT) { struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE @@ -5544,7 +5580,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag } } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } cmd_buffer->push_constant_stages &= ~stages; @@ -5713,6 +5749,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer) struct radv_shader *vs = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); bool uses_dynamic_inputs = vs->info.vs.dynamic_inputs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!vs->info.vs.vb_desc_usage_mask) return; @@ -5738,7 +5775,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer) va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += vb_offset; - radv_emit_userdata_address(device, cmd_buffer->cs, vs, AC_UD_VS_VERTEX_BUFFERS, va); + radv_emit_userdata_address(device, cs, vs, AC_UD_VS_VERTEX_BUFFERS, va); cmd_buffer->state.vb_va = va; cmd_buffer->state.vb_size = vb_desc_alloc_size; @@ -6098,7 +6135,7 @@ radv_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_ const unsigned patch_control_points = state->dynamic.vk.ts.patch_control_points; const unsigned topology = state->dynamic.vk.ia.primitive_topology; const bool prim_restart_enable = state->dynamic.vk.ia.primitive_restart_enable; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned ia_multi_vgt_param; ia_multi_vgt_param = radv_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, @@ -6170,7 +6207,7 @@ radv_emit_primitive_restart(struct radv_cmd_buffer *cmd_buffer, bool enable) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); @@ -6215,7 +6252,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d const uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer); const struct radeon_info *gpu_info = &pdev->info; struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t topology = state->dynamic.vk.ia.primitive_topology; bool disable_instance_packing = false; @@ -6782,6 +6819,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input; + struct radv_cmd_stream *cs = cmd_buffer->cs; /* We have to defer setting up vertex buffer since we need the buffer * stride from the pipeline. */ @@ -6809,7 +6847,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bit = BITFIELD_BIT(idx); if (buffer) { - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); cmd_buffer->state.vbo_bound_mask |= bit; } else { cmd_buffer->state.vbo_bound_mask &= ~bit; @@ -6865,6 +6903,7 @@ radv_CmdBindIndexBuffer2(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDevic VK_FROM_HANDLE(radv_buffer, index_buffer, buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; cmd_buffer->state.index_type = vk_to_index_type(indexType); @@ -6873,7 +6912,7 @@ radv_CmdBindIndexBuffer2(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDevic int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); cmd_buffer->state.max_index_count = (vk_buffer_range(&index_buffer->vk, offset, size)) / index_size; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, index_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, index_buffer->bo); } else { cmd_buffer->state.index_va = 0; cmd_buffer->state.max_index_count = 0; @@ -6890,6 +6929,7 @@ radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint struct radv_descriptor_set *set, unsigned idx) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radeon_winsys *ws = device->ws; radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); @@ -6900,11 +6940,11 @@ radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint if (!device->use_global_bo_list) { for (unsigned j = 0; j < set->header.buffer_count; ++j) if (set->descriptors[j]) - radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]); + radv_cs_add_buffer(ws, cs->b, set->descriptors[j]); } if (set->header.bo) - radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo); + radv_cs_add_buffer(ws, cs->b, set->header.bo); } static void @@ -7097,6 +7137,8 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; if (cmd_buffer->qf == RADV_QUEUE_SPARSE) return vk_command_buffer_end(&cmd_buffer->vk); @@ -7127,7 +7169,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) } /* Finalize the internal compute command stream, if it exists. */ - if (cmd_buffer->gang.cs) { + if (ace_cs) { VkResult result = radv_gang_finalize(cmd_buffer); if (result != VK_SUCCESS) return vk_error(cmd_buffer, result); @@ -7144,7 +7186,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) radv_describe_end_cmd_buffer(cmd_buffer); - VkResult result = device->ws->cs_finalize(cmd_buffer->cs); + VkResult result = radv_finalize_cmd_stream(device, cs); if (result != VK_SUCCESS) return vk_error(cmd_buffer, result); @@ -7156,18 +7198,19 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pipeline == cmd_buffer->state.emitted_compute_pipeline) return; - radeon_check_space(device->ws, cmd_buffer->cs, pdev->info.gfx_level >= GFX10 ? 25 : 22); + radeon_check_space(device->ws, cs->b, pdev->info.gfx_level >= GFX10 ? 25 : 22); if (pipeline->base.type == RADV_PIPELINE_COMPUTE) { - radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]); + radv_emit_compute_shader(pdev, cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]); } else { const struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog; - radv_emit_compute_shader(pdev, cmd_buffer->cs, rt_prolog); + radv_emit_compute_shader(pdev, cs, rt_prolog); const uint32_t ray_dynamic_callback_stack_base_offset = radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE); @@ -7385,13 +7428,14 @@ static void radv_bind_gs_copy_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *gs_copy_shader) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; cmd_buffer->state.gs_copy_shader = gs_copy_shader; if (gs_copy_shader) { cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, gs_copy_shader->upload_seq); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, gs_copy_shader->bo); + radv_cs_add_buffer(device->ws, cs->b, gs_copy_shader->bo); } } @@ -7469,6 +7513,8 @@ radv_bind_task_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shad static void radv_bind_rt_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *rt_prolog) { + struct radv_cmd_stream *cs = cmd_buffer->cs; + cmd_buffer->state.rt_prolog = rt_prolog; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); @@ -7477,7 +7523,7 @@ radv_bind_rt_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *rt_p cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, rt_prolog->upload_seq); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, rt_prolog->bo); + radv_cs_add_buffer(device->ws, cs->b, rt_prolog->bo); } /* This function binds/unbinds a shader to the cmdbuffer state. */ @@ -7485,6 +7531,7 @@ static void radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, mesa_shader_stage stage) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!shader) { cmd_buffer->state.shaders[stage] = NULL; @@ -7556,7 +7603,7 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, shader->upload_seq); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, shader->bo); + radv_cs_add_buffer(device->ws, cs->b, shader->bo); } static void @@ -7630,6 +7677,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_reset_shader_object_state(cmd_buffer, pipelineBindPoint); @@ -7661,7 +7709,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline for (unsigned i = 0; i < rt_pipeline->stage_count; ++i) { struct radv_shader *shader = rt_pipeline->stages[i].shader; if (shader) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, shader->bo); + radv_cs_add_buffer(device->ws, cs->b, shader->bo); } cmd_buffer->state.rt_pipeline = rt_pipeline; @@ -8834,8 +8882,8 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou if (!radv_gang_init(primary)) return; - struct radeon_cmdbuf *ace_primary = primary->gang.cs; - struct radeon_cmdbuf *ace_secondary = secondary->gang.cs; + struct radv_cmd_stream *ace_primary = primary->gang.cs; + struct radv_cmd_stream *ace_secondary = secondary->gang.cs; /* Emit pending flushes on primary prior to executing secondary. */ radv_gang_cache_flush(primary); @@ -8849,7 +8897,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou /* Execute the secondary compute cmdbuf. * Don't use IB2 packets because they are not supported on compute queues. */ - device->ws->cs_execute_secondary(ace_primary, ace_secondary, false); + device->ws->cs_execute_secondary(ace_primary->b, ace_secondary->b, false); } /* Update pending ACE internal flush bits from the secondary cmdbuf */ @@ -8864,7 +8912,10 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou if (radv_gang_follower_sem_dirty(secondary)) primary->gang.sem.follower_value++; - device->ws->cs_execute_secondary(primary->cs, secondary->cs, allow_ib2); + struct radv_cmd_stream *primary_cs = primary->cs; + struct radv_cmd_stream *secondary_cs = secondary->cs; + + device->ws->cs_execute_secondary(primary_cs->b, secondary_cs->b, allow_ib2); /* When the secondary command buffer is compute only we don't * need to re-emit the current graphics pipeline. @@ -9006,6 +9057,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); VkExtent2D screen_scissor = {MAX_FRAMEBUFFER_WIDTH, MAX_FRAMEBUFFER_HEIGHT}; + struct radv_cmd_stream *cs = cmd_buffer->cs; bool disable_constant_encode_ac01 = false; const struct VkSampleLocationsInfoEXT *sample_locs_info = @@ -9246,7 +9298,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe const uint32_t maxx = minx + render->area.extent.width; const uint32_t maxy = miny + render->area.extent.height; - radeon_check_space(device->ws, cmd_buffer->cs, 10); + radeon_check_space(device->ws, cs->b, 10); if (pdev->info.gfx_level >= GFX12) { radeon_begin(cmd_buffer->cs); @@ -9296,7 +9348,7 @@ radv_CmdEndRendering(VkCommandBuffer commandBuffer) } static void -radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader, uint32_t base_reg, +radv_emit_view_index_per_stage(struct radv_cmd_stream *cs, const struct radv_shader *shader, uint32_t base_reg, unsigned index) { const uint32_t view_index_offset = radv_get_user_sgpr_loc(shader, AC_UD_VIEW_INDEX); @@ -9310,7 +9362,7 @@ radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shade } static void -radv_emit_view_index(const struct radv_cmd_state *cmd_state, struct radeon_cmdbuf *cs, unsigned index) +radv_emit_view_index(const struct radv_cmd_state *cmd_state, struct radv_cmd_stream *cs, unsigned index) { radv_foreach_stage (stage, cmd_state->active_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) { const struct radv_shader *shader = radv_get_shader(cmd_state->shaders, stage); @@ -9324,7 +9376,7 @@ radv_emit_view_index(const struct radv_cmd_state *cmd_state, struct radeon_cmdbu } static void -radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, uint32_t src_imm, +radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, uint32_t src_imm, uint64_t dst_va) { radeon_begin(cs); @@ -9348,7 +9400,7 @@ radv_emit_copy_data_imm(const struct radv_physical_device *pdev, struct radeon_c */ static void radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_cmd_state *state, - struct radeon_cmdbuf *cs, uint64_t inv_va, bool *inv_emitted, unsigned dwords) + struct radv_cmd_stream *cs, uint64_t inv_va, bool *inv_emitted, unsigned dwords) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -9380,7 +9432,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c ALWAYS_INLINE static void radv_gfx12_emit_hiz_his_wa(const struct radv_device *device, const struct radv_cmd_state *cmd_state, - struct radeon_cmdbuf *cs) + struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_rendering_state *render = &cmd_state->render; @@ -9407,14 +9459,15 @@ static void radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, uint32_t use_opaque) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); radeon_emit(vertex_count); radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); radeon_end(); - radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cmd_buffer->cs); + radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cs); } /** @@ -9429,8 +9482,9 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in uint32_t index_count, bool not_eop) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); radeon_emit(PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); radeon_emit(max_index_count); radeon_emit(index_va); @@ -9443,7 +9497,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in radeon_emit(V_0287F0_DI_SRC_SEL_DMA | S_0287F0_NOT_EOP(not_eop)); radeon_end(); - radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cmd_buffer->cs); + radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cs); } /* MUST inline this function to avoid massive perf loss in drawoverhead */ @@ -9452,7 +9506,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index uint64_t count_va, uint32_t stride) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; bool draw_id_enable = cmd_buffer->state.uses_drawid; uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr; @@ -9507,7 +9561,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *mesh_shader = cmd_buffer->state.shaders[MESA_SHADER_MESH]; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr; bool predicating = cmd_buffer->state.predicating; assert(base_reg || (!cmd_buffer->state.uses_drawid && !mesh_shader->info.cs.uses_grid_size)); @@ -9546,7 +9600,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 ALWAYS_INLINE static void radv_cs_emit_dispatch_taskmesh_direct_ace_packet(const struct radv_device *device, - const struct radv_cmd_state *cmd_state, struct radeon_cmdbuf *ace_cs, + const struct radv_cmd_state *cmd_state, struct radv_cmd_stream *ace_cs, const uint32_t x, const uint32_t y, const uint32_t z) { const struct radv_shader *task_shader = cmd_state->shaders[MESA_SHADER_TASK]; @@ -9568,7 +9622,7 @@ radv_cs_emit_dispatch_taskmesh_direct_ace_packet(const struct radv_device *devic ALWAYS_INLINE static void radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(const struct radv_device *device, const struct radv_cmd_state *cmd_state, - struct radeon_cmdbuf *ace_cs, uint64_t data_va, + struct radv_cmd_stream *ace_cs, uint64_t data_va, uint32_t draw_count, uint64_t count_va, uint32_t stride) { assert((data_va & 0x03) == 0); @@ -9600,7 +9654,7 @@ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(const struct radv_devic ALWAYS_INLINE static void radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, const struct radv_cmd_state *cmd_state, - struct radeon_cmdbuf *cs) + struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *mesh_shader = cmd_state->shaders[MESA_SHADER_MESH]; @@ -9633,7 +9687,7 @@ radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, const str const uint32_t vertex_offset) { struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const bool uses_baseinstance = state->uses_baseinstance; const bool uses_drawid = state->uses_drawid; @@ -9673,7 +9727,7 @@ ALWAYS_INLINE static void radv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_offset, uint32_t drawid) { struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); radeon_set_sh_reg_seq(state->vtx_base_sgpr, 1 + !!drawid); @@ -9690,7 +9744,7 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, co { struct radv_cmd_state *state = &cmd_buffer->state; const struct radv_shader *mesh_shader = state->shaders[MESA_SHADER_MESH]; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const bool uses_drawid = state->uses_drawid; const bool uses_grid_size = mesh_shader->info.cs.uses_grid_size; @@ -9712,7 +9766,7 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, co } ALWAYS_INLINE static void -radv_emit_userdata_task(const struct radv_cmd_state *cmd_state, struct radeon_cmdbuf *ace_cs, uint32_t x, uint32_t y, +radv_emit_userdata_task(const struct radv_cmd_state *cmd_state, struct radv_cmd_stream *ace_cs, uint32_t x, uint32_t y, uint32_t z) { const struct radv_shader *task_shader = cmd_state->shaders[MESA_SHADER_TASK]; @@ -9745,7 +9799,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const int index_size = radv_get_vgt_index_size(state->index_type); unsigned i = 0; const bool uses_drawid = state->uses_drawid; @@ -9772,7 +9826,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } else { u_foreach_bit (view, state->render.view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } @@ -9803,7 +9857,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } else { u_foreach_bit (view, state->render.view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } @@ -9842,7 +9896,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct can_eop && i < drawCount - 1); } else { u_foreach_bit (view, state->render.view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } @@ -9867,7 +9921,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct can_eop && !offset_changes && i < drawCount - 1); } else { u_foreach_bit (view, state->render.view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } @@ -9887,6 +9941,7 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r unsigned i = 0; const uint32_t view_mask = cmd_buffer->state.render.view_mask; const bool uses_drawid = cmd_buffer->state.uses_drawid; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t last_start = 0; vk_foreach_multi_draw (draw, i, minfo, drawCount, stride) { @@ -9899,7 +9954,7 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); } else { u_foreach_bit (view, view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); } } @@ -9918,6 +9973,7 @@ static void radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cmd_buffer->cs); radeon_emit(PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, cmd_buffer->state.predicating)); @@ -9927,7 +9983,7 @@ radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radeon_emit(S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX)); radeon_end(); - radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cmd_buffer->cs); + radv_gfx12_emit_hiz_his_wa(device, &cmd_buffer->state, cs); } ALWAYS_INLINE static void @@ -9936,6 +9992,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t view_mask = cmd_buffer->state.render.view_mask; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_emit_userdata_mesh(cmd_buffer, x, y, z); @@ -9944,7 +10001,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z); } else { u_foreach_bit (view, view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z); } } @@ -9954,7 +10011,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radv_cs_emit_draw_packet(cmd_buffer, count, 0); } else { u_foreach_bit (view, view_mask) { - radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view); + radv_emit_view_index(&cmd_buffer->state, cs, view); radv_cs_emit_draw_packet(cmd_buffer, count, 0); } } @@ -9962,7 +10019,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x } static void -radv_emit_indirect_buffer(struct radeon_cmdbuf *cs, uint64_t va, bool is_compute) +radv_emit_indirect_buffer(struct radv_cmd_stream *cs, uint64_t va, bool is_compute) { radeon_begin(cs); radeon_emit(PKT3(PKT3_SET_BASE, 2, 0) | (is_compute ? PKT3_SHADER_TYPE_S(1) : 0)); @@ -9976,7 +10033,7 @@ ALWAYS_INLINE static void radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_emit_indirect_buffer(cs, info->indirect_va, false); @@ -10001,8 +10058,8 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const s ALWAYS_INLINE static void radv_emit_direct_taskmesh_draw_packets(const struct radv_device *device, struct radv_cmd_state *cmd_state, - struct radeon_cmdbuf *cs, struct radeon_cmdbuf *ace_cs, uint32_t x, uint32_t y, - uint32_t z) + struct radv_cmd_stream *cs, struct radv_cmd_stream *ace_cs, uint32_t x, + uint32_t y, uint32_t z) { const uint32_t view_mask = cmd_state->render.view_mask; const unsigned num_views = MAX2(1, util_bitcount(view_mask)); @@ -10027,7 +10084,7 @@ radv_emit_direct_taskmesh_draw_packets(const struct radv_device *device, struct static void radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struct radv_cmd_state *cmd_state, - struct radeon_cmdbuf *cs, struct radeon_cmdbuf *ace_cs, + struct radv_cmd_stream *cs, struct radv_cmd_stream *ace_cs, const struct radv_draw_info *info, uint64_t workaround_cond_va) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -10105,7 +10162,7 @@ static void radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_emit_indirect_buffer(cs, info->indirect_va, false); @@ -10448,7 +10505,7 @@ radv_emit_tess_state(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t tcs_offchip_layout = 0, tes_offchip_layout = 0; uint32_t pgm_hs_rsrc2 = 0; @@ -11494,8 +11551,9 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const bool has_prefetch = pdev->info.gfx_level >= GFX7; + struct radv_cmd_stream *cs = cmd_buffer->cs; - ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); + ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cs->b, 4096 + 128 * (drawCount - 1)); if (likely(!info->indirect_va)) { /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is @@ -11570,7 +11628,6 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info radv_describe_draw(cmd_buffer, info); if (likely(!info->indirect_va)) { struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(state->vtx_base_sgpr); if (state->last_num_instances != info->instance_count) { radeon_begin(cs); @@ -11581,7 +11638,7 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info state->last_num_instances = info->instance_count; } } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); return true; } @@ -11592,6 +11649,7 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; /* For direct draws, this makes sure we don't draw anything. * For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100). @@ -11603,7 +11661,7 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ radv_bind_graphics_shaders(cmd_buffer); } - struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; assert(!task_shader || ace_cs); @@ -11612,9 +11670,9 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0); const bool need_task_semaphore = task_shader && radv_flush_gang_leader_semaphore(cmd_buffer); - ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); + ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cs->b, 4096 + 128 * (drawCount - 1)); ASSERTED const unsigned ace_cdw_max = - !ace_cs ? 0 : radeon_check_space(device->ws, ace_cs, 4096 + 128 * (drawCount - 1)); + !ace_cs ? 0 : radeon_check_space(device->ws, ace_cs->b, 4096 + 128 * (drawCount - 1)); radv_emit_all_graphics_states(cmd_buffer, info); @@ -11645,8 +11703,6 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (likely(!info->indirect_va)) { struct radv_cmd_state *state = &cmd_buffer->state; if (unlikely(state->last_num_instances != 1)) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - radeon_begin(cs); radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, false)); radeon_emit(1); @@ -11656,8 +11712,8 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ } } - assert(cmd_buffer->cs->cdw <= cdw_max); - assert(!ace_cs || ace_cs->cdw <= ace_cdw_max); + assert(cs->b->cdw <= cdw_max); + assert(!ace_cs || ace_cs->b->cdw <= ace_cdw_max); cmd_buffer->state.last_index_type = -1; @@ -11786,6 +11842,7 @@ radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSi VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.count = drawCount; @@ -11796,7 +11853,7 @@ radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSi info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; @@ -11811,6 +11868,7 @@ radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkD VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.indexed = true; @@ -11821,7 +11879,7 @@ radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkD info.strmout_va = 0; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; @@ -11837,6 +11895,7 @@ radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDev VK_FROM_HANDLE(radv_buffer, buffer, _buffer); VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.count = maxDrawCount; @@ -11847,8 +11906,8 @@ radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDev info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, count_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; @@ -11865,6 +11924,7 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer VK_FROM_HANDLE(radv_buffer, buffer, _buffer); VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.indexed = true; @@ -11875,8 +11935,8 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer info.strmout_va = 0; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, count_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; @@ -11889,6 +11949,7 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.count = x * y * z; @@ -11904,7 +11965,7 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, return; if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { - radv_emit_direct_taskmesh_draw_packets(device, &cmd_buffer->state, cmd_buffer->cs, cmd_buffer->gang.cs, x, y, z); + radv_emit_direct_taskmesh_draw_packets(device, &cmd_buffer->state, cs, cmd_buffer->gang.cs, x, y, z); } else { radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z); } @@ -11919,6 +11980,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.indirect_va = vk_buffer_address(&buffer->vk, offset); @@ -11929,14 +11991,13 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount, false)) return; if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { - radv_emit_indirect_taskmesh_draw_packets(device, &cmd_buffer->state, cmd_buffer->cs, cmd_buffer->gang.cs, &info, - 0); + radv_emit_indirect_taskmesh_draw_packets(device, &cmd_buffer->state, cs, cmd_buffer->gang.cs, &info, 0); } else { radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info); } @@ -11955,6 +12016,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b VK_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.indirect_va = vk_buffer_address(&buffer->vk, offset); @@ -11965,8 +12027,8 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b info.indexed = false; info.instance_count = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, count_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, count_buffer->bo); if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount, false)) return; @@ -11985,7 +12047,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b workaround_cond_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + workaround_cond_off; } - radv_emit_indirect_taskmesh_draw_packets(device, &cmd_buffer->state, cmd_buffer->cs, cmd_buffer->gang.cs, &info, + radv_emit_indirect_taskmesh_draw_packets(device, &cmd_buffer->state, cs, cmd_buffer->gang.cs, &info, workaround_cond_va); } else { radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info); @@ -12012,17 +12074,18 @@ radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommand const uint64_t ib_va = pGeneratedCommandsInfo->preprocessAddress; const uint64_t main_ib_va = ib_va + radv_get_indirect_main_cmdbuf_offset(pGeneratedCommandsInfo); const uint64_t main_trailer_va = ib_va + radv_get_indirect_main_trailer_offset(pGeneratedCommandsInfo); + struct radv_cmd_stream *cs = cmd_buffer->cs; - device->ws->cs_chain_dgc_ib(cmd_buffer->cs, main_ib_va, cmdbuf_size >> 2, main_trailer_va, - cmd_buffer->state.predicating); + device->ws->cs_chain_dgc_ib(cs->b, main_ib_va, cmdbuf_size >> 2, main_trailer_va, cmd_buffer->state.predicating); if (task_shader) { const uint32_t ace_cmdbuf_size = radv_get_indirect_ace_cmdbuf_size(pGeneratedCommandsInfo); const uint64_t ace_ib_va = ib_va + radv_get_indirect_ace_cmdbuf_offset(pGeneratedCommandsInfo); const uint64_t ace_trailer_va = ib_va + radv_get_indirect_ace_trailer_offset(pGeneratedCommandsInfo); + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; - assert(cmd_buffer->gang.cs); - device->ws->cs_chain_dgc_ib(cmd_buffer->gang.cs, ace_ib_va, ace_cmdbuf_size >> 2, ace_trailer_va, + assert(ace_cs->b); + device->ws->cs_chain_dgc_ib(ace_cs->b, ace_ib_va, ace_cmdbuf_size >> 2, ace_trailer_va, cmd_buffer->state.predicating); } } @@ -12042,9 +12105,10 @@ radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPr vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT); const VkGeneratedCommandsShaderInfoEXT *eso_info = vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_SHADER_INFO_EXT); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (ies) { - radv_cs_add_buffer(device->ws, cmd_buffer->cs, ies->bo); + radv_cs_add_buffer(device->ws, cs->b, ies->bo); cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, ies->compute_scratch_size_per_wave); @@ -12194,9 +12258,9 @@ static void radv_save_dispatch_size(struct radv_cmd_buffer *cmd_buffer, uint64_t indirect_va) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cs, 18); + radeon_check_space(device->ws, cs->b, 18); uint64_t va = radv_buffer_get_va(device->trace_bo) + offsetof(struct radv_trace_data, indirect_dispatch); @@ -12226,12 +12290,12 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv unsigned dispatch_initiator = device->dispatch_initiator; struct radeon_winsys *ws = device->ws; bool predicating = cmd_buffer->state.predicating; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint32_t grid_size_offset = radv_get_user_sgpr_loc(compute_shader, AC_UD_CS_GRID_SIZE); radv_describe_dispatch(cmd_buffer, info); - ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30); + ASSERTED unsigned cdw_max = radeon_check_space(ws, cs->b, 30); if (compute_shader->info.wave_size == 32) { assert(pdev->info.gfx_level >= GFX10); @@ -12445,7 +12509,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv radeon_end(); } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } static void @@ -12465,6 +12529,7 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned rsrc2 = rt_prolog->config.rsrc2; /* Reserve scratch for stacks manually since it is not handled by the compute path. */ @@ -12480,7 +12545,7 @@ radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.rt_stack_size) rsrc2 |= S_00B12C_SCRATCH_EN(1); - radeon_check_space(device->ws, cmd_buffer->cs, 3); + radeon_check_space(device->ws, cs->b, 3); radeon_begin(cmd_buffer->cs); radeon_set_sh_reg(rt_prolog->info.regs.pgm_rsrc2, rsrc2); radeon_end(); @@ -12624,8 +12689,9 @@ radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDevi VK_FROM_HANDLE(radv_buffer, buffer, _buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_dispatch_info info = {.indirect_va = vk_buffer_address(&buffer->vk, offset)}; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); radv_compute_dispatch(cmd_buffer, &info); } @@ -12741,6 +12807,7 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (instance->debug_flags & RADV_DEBUG_NO_RT) return; @@ -12793,7 +12860,7 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K } else info.indirect_va = launch_size_va; - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 15); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 15); const uint32_t sbt_descriptors_offset = radv_get_user_sgpr_loc(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS); if (sbt_descriptors_offset) { @@ -12809,7 +12876,7 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K radeon_end(); } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); radv_dispatch(cmd_buffer, &info, pipeline, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); @@ -13270,6 +13337,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (is_compute) cmd_buffer->state.flush_bits &= @@ -13283,7 +13351,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) return; } - radv_cs_emit_cache_flush(device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, + radv_cs_emit_cache_flush(device->ws, cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); @@ -13336,6 +13404,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t dep_count, const VkDep enum rgp_barrier_reason reason) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; enum radv_cmd_flush_bits src_flush_bits = 0; enum radv_cmd_flush_bits dst_flush_bits = 0; VkPipelineStageFlags2 src_stage_mask = 0; @@ -13446,7 +13515,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t dep_count, const VkDep * Note that GFX9+ is supposed to have RAW dependency tracking, but it's buggy * so we can't rely on it fow now. */ - radv_sdma_emit_nop(device, cmd_buffer->cs); + radv_sdma_emit_nop(device, cs); } else { const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE; if (is_gfx_or_ace) @@ -13479,7 +13548,7 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(event->bo); if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC || cmd_buffer->qf == RADV_QUEUE_VIDEO_ENC) { @@ -13489,9 +13558,9 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe radv_emit_cache_flush(cmd_buffer); - radv_cs_add_buffer(device->ws, cs, event->bo); + radv_cs_add_buffer(device->ws, cs->b, event->bo); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 28); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 28); if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT)) { @@ -13544,7 +13613,7 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va); } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL @@ -13579,7 +13648,7 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const Vk { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC || cmd_buffer->qf == RADV_QUEUE_VIDEO_ENC) return; @@ -13588,12 +13657,12 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const Vk VK_FROM_HANDLE(radv_event, event, pEvents[i]); uint64_t va = radv_buffer_get_va(event->bo); - radv_cs_add_buffer(device->ws, cs, event->bo); + radv_cs_add_buffer(device->ws, cs->b, event->bo); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 7); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 7); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } radv_barrier(cmd_buffer, eventCount, pDependencyInfos, RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS); @@ -13604,9 +13673,10 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t op = 0; - radeon_check_space(device->ws, cmd_buffer->cs, 4); + radeon_check_space(device->ws, cs->b, 4); if (va) { assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64); @@ -13620,7 +13690,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE; } - radeon_begin(cmd_buffer->cs); + radeon_begin(cs); if (pdev->info.gfx_level >= GFX9) { radeon_emit(PKT3(PKT3_SET_PREDICATION, 2, 0)); radeon_emit(op); @@ -13639,7 +13709,7 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned pred_op = PREDICATION_OP_BOOL32; uint64_t emulated_va = 0; @@ -13682,7 +13752,7 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va emulated_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; - radeon_check_space(device->ws, cmd_buffer->cs, 8); + radeon_check_space(device->ws, cs->b, 8); radeon_begin(cs); radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); @@ -13740,12 +13810,13 @@ radv_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; bool draw_visible = true; uint64_t va; va = vk_buffer_address(&buffer->vk, pConditionalRenderingBegin->offset); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); /* By default, if the 32-bit value at offset in buffer memory is zero, * then the rendering commands are discarded, otherwise they are @@ -13776,6 +13847,7 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint8_t enabled_mask = 0; assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); @@ -13791,7 +13863,7 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t sb[idx].size = pSizes[i]; } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); enabled_mask |= 1 << idx; } @@ -13832,10 +13904,10 @@ radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned reg_strmout_cntl; - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 14); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 14); radeon_begin(cs); @@ -13866,7 +13938,7 @@ radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) radeon_emit(4); /* poll interval */ radeon_end(); - assert(cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } static void @@ -13922,7 +13994,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); @@ -13931,7 +14003,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC else if (!pdev->use_ngg_streamout) radv_flush_vgt_streamout(cmd_buffer); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10); u_foreach_bit (i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; @@ -13950,7 +14022,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC va += vk_buffer_address(&buffer->vk, counter_buffer_offset); - radv_cs_add_buffer(device->ws, cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); } radeon_begin(cs); @@ -14010,7 +14082,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radeon_end(); } - assert(cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); radv_set_streamout_enable(cmd_buffer, true); @@ -14026,7 +14098,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); @@ -14043,7 +14115,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou radv_flush_vgt_streamout(cmd_buffer); } - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 12); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 12); u_foreach_bit (i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; @@ -14062,7 +14134,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou va += vk_buffer_address(&buffer->vk, counter_buffer_offset); - radv_cs_add_buffer(device->ws, cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); } radeon_begin(cs); @@ -14110,7 +14182,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou radeon_end(); } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); radv_set_streamout_enable(cmd_buffer, false); } @@ -14122,7 +14194,7 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX12) { radeon_begin(cs); @@ -14173,6 +14245,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc VK_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_draw_info info; info.count = 0; @@ -14183,7 +14256,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc info.indexed = false; info.indirect_va = 0; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, counterBuffer->bo); + radv_cs_add_buffer(device->ws, cs->b, counterBuffer->bo); if (!radv_before_draw(cmd_buffer, &info, 1, false)) return; @@ -14212,20 +14285,20 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag VK_FROM_HANDLE(radv_buffer, buffer, dstBuffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = vk_buffer_address(&buffer->vk, dstOffset); - radv_cs_add_buffer(device->ws, cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { - radeon_check_space(device->ws, cmd_buffer->cs, 4); - radv_sdma_emit_fence(cmd_buffer->cs, va, marker); + radeon_check_space(device->ws, cs->b, 4); + radv_sdma_emit_fence(cs, va, marker); return; } radv_emit_cache_flush(cmd_buffer); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 12); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 12); if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { radeon_begin(cs); @@ -14241,7 +14314,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va); } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } /* VK_EXT_descriptor_buffer */ @@ -14367,14 +14440,14 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ struct radv_shader *shader = shader_obj ? shader_obj->shader : NULL; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_bind_shader(cmd_buffer, shader, MESA_SHADER_COMPUTE); if (!shader_obj) return; - ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cmd_buffer->cs, 128); + ASSERTED const unsigned cdw_max = radeon_check_space(device->ws, cs->b, 128); radv_emit_compute_shader(pdev, cs, shader); @@ -14388,7 +14461,7 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ pc_state->size = shader_obj->push_constant_size; pc_state->dynamic_offset_count = shader_obj->dynamic_offset_count; - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 724c2eed1b0..57bdcb1a0df 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -575,7 +575,7 @@ struct radv_cmd_buffer { } gfx12; VkCommandBufferUsageFlags usage_flags; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream *cs; struct radv_cmd_state state; struct radv_vertex_binding vertex_bindings[MAX_VBS]; struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS]; @@ -620,7 +620,7 @@ struct radv_cmd_buffer { */ struct { /** Follower command stream. */ - struct radeon_cmdbuf *cs; + struct radv_cmd_stream *cs; /** Flush bits for the follower cmdbuf. */ enum radv_cmd_flush_bits flush_bits; @@ -888,7 +888,7 @@ struct radv_vbo_info { void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t vbo_idx, struct radv_vbo_info *vbo_info); -void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, +void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, const struct radv_shader *shader); void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/radv_cp_dma.c b/src/amd/vulkan/radv_cp_dma.c index 68dafdacf59..2deee13b73a 100644 --- a/src/amd/vulkan/radv_cp_dma.c +++ b/src/amd/vulkan/radv_cp_dma.c @@ -47,7 +47,7 @@ cp_dma_max_byte_count(enum amd_gfx_level gfx_level) * clear value. */ static void -radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va, +radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool predicating, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -56,7 +56,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level)); - radeon_check_space(device->ws, cs, 9); + radeon_check_space(device->ws, cs->b, 9); if (pdev->info.gfx_level >= GFX9) command |= S_415_BYTE_COUNT_GFX9(size); else @@ -106,7 +106,7 @@ static void radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; bool predicating = cmd_buffer->state.predicating; radv_cs_emit_cp_dma(device, cs, predicating, dst_va, src_va, size, flags); @@ -133,7 +133,7 @@ radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t s } void -radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, +radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size, bool predicating) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -146,7 +146,7 @@ radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf * assert(size <= cp_dma_max_byte_count(gfx_level)); - radeon_check_space(ws, cs, 9); + radeon_check_space(ws, cs->b, 9); uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1); uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va; diff --git a/src/amd/vulkan/radv_cp_dma.h b/src/amd/vulkan/radv_cp_dma.h index ce5cbf05243..eff7d0aa815 100644 --- a/src/amd/vulkan/radv_cp_dma.h +++ b/src/amd/vulkan/radv_cp_dma.h @@ -15,10 +15,10 @@ #include struct radv_device; -struct radeon_cmdbuf; +struct radv_cmd_stream; struct radv_cmd_buffer; -void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, +void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size, bool predicating); void radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); diff --git a/src/amd/vulkan/radv_cp_reg_shadowing.c b/src/amd/vulkan/radv_cp_reg_shadowing.c index f01c3a9cd48..7f7bc7f73a0 100644 --- a/src/amd/vulkan/radv_cp_reg_shadowing.c +++ b/src/amd/vulkan/radv_cp_reg_shadowing.c @@ -19,13 +19,14 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s struct radeon_winsys *ws = device->ws; const struct radeon_info *gpu_info = &pdev->info; struct ac_pm4_state *pm4 = NULL; + struct radv_cmd_stream *cs; VkResult result; - struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false); - if (!cs) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs); + if (result != VK_SUCCESS) + return result; - radeon_check_space(ws, cs, 256); + radeon_check_space(ws, cs->b, 256); /* allocate memory for queue_state->shadowed_regs where register states are saved */ result = radv_bo_create(device, NULL, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, @@ -43,10 +44,10 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s radeon_emit_array(pm4->pm4, pm4->ndw); radeon_end(); - ws->cs_pad(cs, 0); + ws->cs_pad(cs->b, 0); result = radv_bo_create( - device, NULL, cs->cdw * 4, 4096, ws->cs_domain(ws), + device, NULL, cs->b->cdw * 4, 4096, ws->cs_domain(ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, RADV_BO_PRIORITY_CS, 0, true, &queue_state->shadow_regs_ib); if (result != VK_SUCCESS) @@ -60,13 +61,13 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s result = VK_ERROR_MEMORY_MAP_FAILED; goto fail_map; } - memcpy(map, cs->buf, cs->cdw * 4); - queue_state->shadow_regs_ib_size_dw = cs->cdw; + memcpy(map, cs->b->buf, cs->b->cdw * 4); + queue_state->shadow_regs_ib_size_dw = cs->b->cdw; ws->buffer_unmap(ws, queue_state->shadow_regs_ib, false); ac_pm4_free_state(pm4); - ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); return VK_SUCCESS; fail_map: radv_bo_destroy(device, NULL, queue_state->shadow_regs_ib); @@ -77,7 +78,7 @@ fail_create: radv_bo_destroy(device, NULL, queue_state->shadowed_regs); queue_state->shadowed_regs = NULL; fail: - ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); return result; } @@ -92,15 +93,15 @@ radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_ } void -radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device, +radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device, struct radv_queue_state *queue_state) { struct radeon_winsys *ws = device->ws; - ws->cs_execute_ib(cs, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false); + ws->cs_execute_ib(cs->b, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false); - radv_cs_add_buffer(device->ws, cs, queue_state->shadowed_regs); - radv_cs_add_buffer(device->ws, cs, queue_state->shadow_regs_ib); + radv_cs_add_buffer(device->ws, cs->b, queue_state->shadowed_regs); + radv_cs_add_buffer(device->ws, cs->b, queue_state->shadow_regs_ib); } /* radv_init_shadowed_regs_buffer_state() will be called once from radv_queue_init(). This @@ -111,14 +112,14 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra const struct radv_physical_device *pdev = radv_device_physical(device); const struct radeon_info *gpu_info = &pdev->info; struct radeon_winsys *ws = device->ws; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream *cs; VkResult result; - cs = ws->cs_create(ws, AMD_IP_GFX, false); - if (!cs) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs); + if (result != VK_SUCCESS) + return result; - radeon_check_space(ws, cs, 768); + radeon_check_space(ws, cs->b, 768); radv_emit_shadow_regs_preamble(cs, device, &queue->state); @@ -136,13 +137,13 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra ac_pm4_free_state(pm4); } - result = ws->cs_finalize(cs); + result = radv_finalize_cmd_stream(device, cs); if (result == VK_SUCCESS) { - if (!radv_queue_internal_submit(queue, cs)) + if (!radv_queue_internal_submit(queue, cs->b)) result = VK_ERROR_UNKNOWN; } fail: - ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); return result; } diff --git a/src/amd/vulkan/radv_cp_reg_shadowing.h b/src/amd/vulkan/radv_cp_reg_shadowing.h index 7582f4711c6..049d4f70084 100644 --- a/src/amd/vulkan/radv_cp_reg_shadowing.h +++ b/src/amd/vulkan/radv_cp_reg_shadowing.h @@ -13,6 +13,7 @@ #include "radv_radeon_winsys.h" +struct radv_cmd_stream; struct radv_device; struct radv_queue_state; struct radv_queue; @@ -22,7 +23,7 @@ VkResult radv_create_shadow_regs_preamble(struct radv_device *device, struct rad void radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state, struct radeon_winsys *ws); -void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device, +void radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device, struct radv_queue_state *queue_state); VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue); diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index fa183409982..bf4c2650e76 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -17,7 +17,7 @@ #include "sid.h" void -radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf, +radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va) { @@ -111,7 +111,7 @@ radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_le } static void -radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl) +radv_emit_acquire_mem(struct radv_cmd_stream *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl) { radeon_begin(cs); @@ -137,7 +137,7 @@ radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsig } static void -gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, +gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va) { @@ -381,7 +381,7 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level } void -radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, +radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va) @@ -389,7 +389,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu unsigned cp_coher_cntl = 0; uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB); - radeon_check_space(ws, cs, 128); + radeon_check_space(ws, cs->b, 128); if (gfx_level >= GFX10) { /* GFX10 cache flush handling is quite different. */ @@ -592,7 +592,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu } void -radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count) +radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count) { const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; @@ -616,7 +616,7 @@ radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, } void -radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm) +radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm) { radeon_begin(cs); radeon_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 58efa4cb11a..d5229d6b754 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -28,7 +28,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned } #define radeon_begin(cs) \ - struct radeon_cmdbuf *__cs = (cs); \ + struct radeon_cmdbuf *__cs = (cs)->b; \ uint32_t __cs_num = __cs->cdw; \ UNUSED uint32_t __cs_reserved_dw = __cs->reserved_dw; \ uint32_t *__cs_buf = __cs->buf @@ -340,7 +340,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned } while (0) ALWAYS_INLINE static void -radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va, +radv_cp_wait_mem(struct radv_cmd_stream *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va, const uint32_t ref, const uint32_t mask) { assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL); @@ -363,11 +363,11 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, cons } ALWAYS_INLINE static unsigned -radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, +radv_cs_write_data_head(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf, const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating) { /* Return the correct cdw at the end of the packet so the caller can assert it. */ - const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count); + const unsigned cdw_end = radeon_check_space(device->ws, cs->b, 4 + count); if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) { radeon_begin(cs); @@ -386,7 +386,7 @@ radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf * } ALWAYS_INLINE static void -radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, +radv_cs_write_data(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf, const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords, const bool predicating) { @@ -395,24 +395,24 @@ radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, c radeon_begin(cs); radeon_emit_array(dwords, count); radeon_end(); - assert(cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } -void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf, +void radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va); -void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, +void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va); -void radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count); +void radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count); -void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm); +void radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm); static inline void -radv_emit_pm4_commands(struct radeon_cmdbuf *cs, const struct ac_pm4_state *pm4) +radv_emit_pm4_commands(struct radv_cmd_stream *cs, const struct ac_pm4_state *pm4) { radeon_begin(cs); radeon_emit_array(pm4->pm4, pm4->ndw); diff --git a/src/amd/vulkan/radv_descriptors.h b/src/amd/vulkan/radv_descriptors.h index f241121ae4d..9fba9551ba0 100644 --- a/src/amd/vulkan/radv_descriptors.h +++ b/src/amd/vulkan/radv_descriptors.h @@ -42,10 +42,12 @@ radv_write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_b if (device->use_global_bo_list) return; - if (cmd_buffer) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo); - else + if (cmd_buffer) { + struct radv_cmd_stream *cs = cmd_buffer->cs; + radv_cs_add_buffer(device->ws, cs->b, buffer_view->bo); + } else { *buffer_list = buffer_view->bo; + } } static ALWAYS_INLINE void @@ -90,10 +92,12 @@ radv_write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_bu return; } - if (cmd_buffer) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo); - else + if (cmd_buffer) { + struct radv_cmd_stream *cs = cmd_buffer->cs; + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + } else { *buffer_list = buffer->bo; + } } static ALWAYS_INLINE void @@ -190,8 +194,9 @@ radv_write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buf const uint32_t max_bindings = sizeof(iview->image->bindings) / sizeof(iview->image->bindings[0]); for (uint32_t b = 0; b < max_bindings; b++) { if (cmd_buffer) { + struct radv_cmd_stream *cs = cmd_buffer->cs; if (iview->image->bindings[b].bo) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo); } else { *buffer_list = iview->image->bindings[b].bo; buffer_list++; @@ -239,8 +244,9 @@ radv_write_image_descriptor_ycbcr_impl(struct radv_device *device, struct radv_c for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) { if (cmd_buffer) { + struct radv_cmd_stream *cs = cmd_buffer->cs; if (iview->image->bindings[b].bo) - radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo); + radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo); } else { *buffer_list = iview->image->bindings[b].bo; buffer_list++; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1c2b21baa28..d5dda28a6b7 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -537,7 +537,7 @@ radv_device_init_perf_counter(struct radv_device *device) if (result != VK_SUCCESS) return result; - device->perf_counter_lock_cs = calloc(sizeof(struct radeon_cmdbuf *), 2 * PERF_CTR_MAX_PASSES); + device->perf_counter_lock_cs = calloc(sizeof(struct radv_cmd_stream *), 2 * PERF_CTR_MAX_PASSES); if (!device->perf_counter_lock_cs) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -558,7 +558,7 @@ radv_device_finish_perf_counter(struct radv_device *device) for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) { if (device->perf_counter_lock_cs[i]) - device->ws->cs_destroy(device->perf_counter_lock_cs[i]); + radv_destroy_cmd_stream(device, device->perf_counter_lock_cs[i]); } free(device->perf_counter_lock_cs); @@ -894,18 +894,21 @@ radv_device_init_cache_key(struct radv_device *device) static void radv_create_gfx_preamble(struct radv_device *device) { - struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false); - if (!cs) + struct radv_cmd_stream *cs; + VkResult result; + + result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs); + if (result != VK_SUCCESS) return; - radeon_check_space(device->ws, cs, 512); + radeon_check_space(device->ws, cs->b, 512); radv_emit_graphics(device, cs); - device->ws->cs_pad(cs, 0); + device->ws->cs_pad(cs->b, 0); - VkResult result = radv_bo_create( - device, NULL, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws), + result = radv_bo_create( + device, NULL, cs->b->cdw * 4, 4096, device->ws->cs_domain(device->ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, RADV_BO_PRIORITY_CS, 0, true, &device->gfx_init); if (result != VK_SUCCESS) @@ -917,12 +920,12 @@ radv_create_gfx_preamble(struct radv_device *device) device->gfx_init = NULL; goto fail; } - memcpy(map, cs->buf, cs->cdw * 4); + memcpy(map, cs->b->buf, cs->b->cdw * 4); device->ws->buffer_unmap(device->ws, device->gfx_init, false); - device->gfx_init_size_dw = cs->cdw; + device->gfx_init_size_dw = cs->b->cdw; fail: - device->ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); } /* For MSAA sample positions. */ @@ -977,7 +980,7 @@ radv_get_default_max_sample_dist(int log_samples) } void -radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, int nr_samples) +radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, int nr_samples) { uint64_t centroid_priority; diff --git a/src/amd/vulkan/radv_device.h b/src/amd/vulkan/radv_device.h index bc71e1097a8..dcf305884be 100644 --- a/src/amd/vulkan/radv_device.h +++ b/src/amd/vulkan/radv_device.h @@ -33,6 +33,7 @@ #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1) struct radv_image_view; +struct radv_cmd_stream; enum radv_dispatch_table { RADV_DEVICE_DISPATCH_TABLE, @@ -289,7 +290,7 @@ struct radv_device { struct radeon_winsys_bo *perf_counter_bo; /* Interleaved lock/unlock commandbuffers for perfcounter passes. */ - struct radeon_cmdbuf **perf_counter_lock_cs; + struct radv_cmd_stream **perf_counter_lock_cs; bool uses_shadow_regs; @@ -340,7 +341,7 @@ VkResult radv_device_init_vrs_state(struct radv_device *device); unsigned radv_get_default_max_sample_dist(int log_samples); -void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, +void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, int nr_samples); struct radv_color_buffer_info { diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index f13307457d6..172f438d155 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -3339,38 +3339,38 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio const struct radv_physical_device *pdev = radv_device_physical(device); uint8_t *ptr = set->mapped_ptr + set->stride * index; struct radv_compute_pipeline_metadata md; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream cs; assert(shader->info.stage == MESA_SHADER_COMPUTE); radv_get_compute_shader_metadata(device, shader, &md); - cs = calloc(1, sizeof(*cs)); - if (!cs) + cs.b = calloc(1, sizeof(*cs.b)); + if (!cs.b) return; - cs->reserved_dw = cs->max_dw = 32; - cs->buf = malloc(cs->max_dw * 4); - if (!cs->buf) { - free(cs); + cs.b->reserved_dw = cs.b->max_dw = 32; + cs.b->buf = malloc(cs.b->max_dw * 4); + if (!cs.b->buf) { + free(cs.b); return; } - radv_emit_compute_shader(pdev, cs, shader); + radv_emit_compute_shader(pdev, &cs, shader); memcpy(ptr, &md, sizeof(md)); ptr += sizeof(md); - memcpy(ptr, &cs->cdw, sizeof(uint32_t)); + memcpy(ptr, &cs.b->cdw, sizeof(uint32_t)); ptr += sizeof(uint32_t); - memcpy(ptr, cs->buf, cs->cdw * sizeof(uint32_t)); - ptr += cs->cdw * sizeof(uint32_t); + memcpy(ptr, cs.b->buf, cs.b->cdw * sizeof(uint32_t)); + ptr += cs.b->cdw * sizeof(uint32_t); set->compute_scratch_size_per_wave = MAX2(set->compute_scratch_size_per_wave, shader->config.scratch_bytes_per_wave); set->compute_scratch_waves = MAX2(set->compute_scratch_waves, radv_get_max_scratch_waves(device, shader)); - free(cs->buf); - free(cs); + free(cs.b->buf); + free(cs.b); } static void diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 2510e330459..d89229d4b23 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -15,7 +15,7 @@ #include "sid.h" void -radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders) +radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -35,7 +35,7 @@ radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf * } static void -radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable) +radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, int family, bool enable) { radeon_begin(cs); @@ -49,7 +49,7 @@ radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs } void -radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm) +radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm) { uint32_t cp_perfmon_cntl; @@ -66,7 +66,7 @@ radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm) } static void -radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm) +radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm) { uint32_t cp_perfmon_cntl; @@ -83,7 +83,7 @@ radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm) } static void -radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm) +radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm) { uint32_t cp_perfmon_cntl; @@ -101,7 +101,7 @@ radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm) } void -radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family) +radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family) { /* Start SPM counters. */ radv_perfcounter_emit_start(cs, true); @@ -110,7 +110,7 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf } void -radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family) +radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -125,7 +125,7 @@ radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf } static void -radv_perfcounter_emit_sample(struct radeon_cmdbuf *cs) +radv_perfcounter_emit_sample(struct radv_cmd_stream *cs) { radeon_begin(cs); radeon_event_write(V_028A90_PERFCOUNTER_SAMPLE); @@ -493,7 +493,7 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea static void radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned value = S_030800_SH_BROADCAST_WRITES(1); if (se >= 0) { @@ -521,7 +521,7 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct ac_pc_block_base *regs = block->b->b; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned idx; assert(count <= regs->num_counters); @@ -551,7 +551,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_pc_block_base *regs = block->b->b; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; @@ -596,7 +596,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo static void radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); @@ -622,7 +622,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_perfcounter_emit_sample(cs); radv_pc_wait_idle(cmd_buffer); @@ -642,7 +642,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query radeon_emit(0); radeon_end(); - uint32_t *skip_dwords = cs->buf + (cs->cdw - 1); + uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1); for (unsigned i = 0; i < pool->num_pc_regs;) { enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); @@ -670,7 +670,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query radv_cs_write_data_imm(cs, V_370_ME, signal_va, 1); } - *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1; + *skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1; } radv_emit_instance(cmd_buffer, -1, -1); @@ -680,19 +680,19 @@ void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const struct radv_physical_device *pdev = radv_device_physical(device); ASSERTED unsigned cdw_max; cmd_buffer->state.uses_perf_counters = true; - cdw_max = radeon_check_space(device->ws, cs, + cdw_max = radeon_check_space(device->ws, cs->b, 256 + /* Random one time stuff */ 10 * pool->num_passes + /* COND_EXECs */ pool->b.stride / 8 * (5 + 8)); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo); + radv_cs_add_buffer(device->ws, cs->b, pool->b.bo); + radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo); uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; radv_cs_write_data_imm(cs, V_370_ME, perf_ctr_va, 0); @@ -714,7 +714,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radeon_emit(0); radeon_end(); - uint32_t *skip_dwords = cs->buf + (cs->cdw - 1); + uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1); for (unsigned i = 0; i < pool->num_pc_regs;) { enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); @@ -733,7 +733,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo i += cnt; } - *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1; + *skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1; } radv_emit_instance(cmd_buffer, -1, -1); @@ -744,7 +744,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_perfcounter_emit_start(cs, false); radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } void @@ -752,16 +752,16 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; ASSERTED unsigned cdw_max; - cdw_max = radeon_check_space(device->ws, cs, + cdw_max = radeon_check_space(device->ws, cs->b, 256 + /* Reserved for things that don't scale with passes/counters */ 5 * pool->num_passes + /* COND_EXECs */ pool->b.stride / 8 * 8); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo); + radv_cs_add_buffer(device->ws, cs->b, pool->b.bo); + radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo); uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, @@ -774,7 +774,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool radv_emit_spi_config_cntl(device, cs, false); radv_emit_inhibit_clockgating(device, cs, false); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } static uint64_t diff --git a/src/amd/vulkan/radv_perfcounter.h b/src/amd/vulkan/radv_perfcounter.h index f3ddf28cbe0..b44daa5ac8b 100644 --- a/src/amd/vulkan/radv_perfcounter.h +++ b/src/amd/vulkan/radv_perfcounter.h @@ -30,13 +30,13 @@ struct radv_pc_query_pool { struct radv_perfcounter_impl *counters; }; -void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders); +void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders); -void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm); +void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm); -void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family); +void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family); -void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family); +void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family); void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 99ec0f4336e..1edeaaba09c 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -35,7 +35,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu uint32_t pipeline_stats_mask, uint32_t avail_offset, bool uses_emulated_queries); static void -gfx10_copy_shader_query(struct radeon_cmdbuf *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va) +gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va) { radeon_begin(cs); radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); @@ -93,7 +93,7 @@ enum radv_event_write { }; static void -radv_emit_event_write(const struct radeon_info *info, struct radeon_cmdbuf *cs, enum radv_event_write event, +radv_emit_event_write(const struct radeon_info *info, struct radv_cmd_stream *cs, enum radv_event_write event, uint64_t va) { radeon_begin(cs); @@ -322,9 +322,9 @@ radv_begin_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkQu { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cs, 11); + radeon_check_space(device->ws, cs->b, 11); ++cmd_buffer->state.active_occlusion_queries; if (cmd_buffer->state.active_occlusion_queries == 1) { @@ -356,9 +356,9 @@ radv_end_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cs, 14); + radeon_check_space(device->ws, cs->b, 14); cmd_buffer->state.active_occlusion_queries--; if (cmd_buffer->state.active_occlusion_queries == 0) { @@ -379,7 +379,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (!radv_occlusion_query_use_l2(pdev)) { @@ -390,7 +390,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv unsigned query = first_query + i; uint64_t src_va = va + query * pool->stride + rb_avail_offset; - radeon_check_space(device->ws, cs, 7); + radeon_check_space(device->ws, cs->b, 7); /* Waits on the upper word of the last DB entry */ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff); @@ -631,9 +631,9 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cs, 4); + radeon_check_space(device->ws, cs->b, 4); ++cmd_buffer->state.active_pipeline_queries; @@ -674,18 +674,19 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q if (pool->uses_ace) { uint32_t task_invoc_offset = radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; if (pdev->info.gfx_level >= GFX11) { va += task_invoc_offset; - radeon_check_space(device->ws, cmd_buffer->gang.cs, 4); + radeon_check_space(device->ws, ace_cs->b, 4); - radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); + radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); } else { - radeon_check_space(device->ws, cmd_buffer->gang.cs, 11); + radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -704,11 +705,11 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); - radeon_check_space(device->ws, cs, 16); + radeon_check_space(device->ws, cs->b, 16); cmd_buffer->state.active_pipeline_queries--; @@ -747,18 +748,19 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que if (pool->uses_ace) { uint32_t task_invoc_offset = radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; if (pdev->info.gfx_level >= GFX11) { va += task_invoc_offset; - radeon_check_space(device->ws, cmd_buffer->gang.cs, 4); + radeon_check_space(device->ws, ace_cs->b, 4); - radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); + radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); } else { - radeon_check_space(device->ws, cmd_buffer->gang.cs, 11); + radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000); cmd_buffer->state.active_pipeline_ace_queries--; @@ -778,7 +780,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -789,7 +791,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct for (unsigned i = 0; i < query_count; ++i) { unsigned query = first_query + i; - radeon_check_space(device->ws, cs, 7); + radeon_check_space(device->ws, cs->b, 7); uint64_t avail_va = va + pool->availability_offset + 4 * query; @@ -801,7 +803,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct const uint64_t start_va = src_va + task_invoc_offset + 4; const uint64_t stop_va = start_va + pipelinestat_block_size; - radeon_check_space(device->ws, cs, 7 * 2); + radeon_check_space(device->ws, cs->b, 7 * 2); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, start_va, 0x80000000, 0xffffffff); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, stop_va, 0x80000000, 0xffffffff); @@ -942,9 +944,9 @@ emit_sample_streamout(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cs, 4); + radeon_check_space(device->ws, cs->b, 4); assert(index < MAX_SO_STREAMS); @@ -978,7 +980,7 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->use_ngg_streamout) { /* generated prim counter */ @@ -1007,7 +1009,7 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->use_ngg_streamout) { /* generated prim counter */ @@ -1036,7 +1038,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1044,7 +1046,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query unsigned query = first_query + i; uint64_t src_va = va + query * pool->stride; - radeon_check_space(device->ws, cs, 7 * 4); + radeon_check_space(device->ws, cs->b, 7 * 4); /* Wait on the upper word of all results. */ for (unsigned j = 0; j < 4; j++, src_va += 8) { @@ -1169,7 +1171,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1177,7 +1179,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv unsigned query = first_query + i; uint64_t local_src_va = va + query * pool->stride; - radeon_check_space(device->ws, cs, 7); + radeon_check_space(device->ws, cs->b, 7); /* Wait on the high 32 bits of the timestamp in * case the low part is 0xffffffff. @@ -1350,7 +1352,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ @@ -1399,7 +1401,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ @@ -1446,7 +1448,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_ { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1456,7 +1458,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_ unsigned query = first_query + i; uint64_t src_va = va + query * pool->stride; - radeon_check_space(device->ws, cs, 7 * 4); + radeon_check_space(device->ws, cs->b, 7 * 4); /* Wait on the upper word of the PrimitiveStorageNeeded result. */ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff); @@ -1595,10 +1597,10 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX11) { - radeon_check_space(device->ws, cs, 4); + radeon_check_space(device->ws, cs->b, 4); ++cmd_buffer->state.active_pipeline_queries; @@ -1624,12 +1626,12 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (pdev->info.gfx_level >= GFX11) { unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); - radeon_check_space(device->ws, cs, 16); + radeon_check_space(device->ws, cs->b, 16); cmd_buffer->state.active_pipeline_queries--; @@ -1658,7 +1660,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); if (pdev->info.gfx_level >= GFX11) { @@ -1666,7 +1668,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q for (unsigned i = 0; i < query_count; ++i) { unsigned query = first_query + i; - radeon_check_space(device->ws, cs, 7); + radeon_check_space(device->ws, cs->b, 7); uint64_t avail_va = va + pool->availability_offset + 4 * query; @@ -1684,7 +1686,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q unsigned query = first_query + i; uint64_t src_va = va + query * pool->stride; - radeon_check_space(device->ws, cs, 7 * 2); + radeon_check_space(device->ws, cs->b, 7 * 2); /* Wait on the upper word. */ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff); @@ -2473,14 +2475,15 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset); + struct radv_cmd_stream *cs = cmd_buffer->cs; if (!queryCount) return; radv_suspend_conditional_rendering(cmd_buffer); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, pool->bo); + radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo); /* Workaround engines that forget to properly specify WAIT_BIT because some driver implicitly * synchronizes before query copy. @@ -2652,10 +2655,10 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_query_pool, pool, queryPool); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); - radv_cs_add_buffer(device->ws, cs, pool->bo); + radv_cs_add_buffer(device->ws, cs->b, pool->bo); emit_query_flush(cmd_buffer, pool); @@ -2665,7 +2668,8 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo if (!radv_gang_init(cmd_buffer)) return; - radv_cs_add_buffer(device->ws, cmd_buffer->gang.cs, pool->bo); + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; + radv_cs_add_buffer(device->ws, ace_cs->b, pool->bo); } if (pool->uses_shader_query_buf) @@ -2711,7 +2715,7 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) { radeon_begin(cs); @@ -2739,21 +2743,21 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; const uint64_t va = radv_buffer_get_va(pool->bo); uint64_t query_va = va + pool->stride * query; - radv_cs_add_buffer(device->ws, cs, pool->bo); + radv_cs_add_buffer(device->ws, cs->b, pool->bo); assert(cmd_buffer->qf != RADV_QUEUE_VIDEO_DEC && cmd_buffer->qf != RADV_QUEUE_VIDEO_ENC); if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { if (instance->drirc.flush_before_timestamp_write) { - radv_sdma_emit_nop(device, cmd_buffer->cs); + radv_sdma_emit_nop(device, cs); } for (unsigned i = 0; i < num_queries; ++i, query_va += pool->stride) { - radeon_check_space(device->ws, cmd_buffer->cs, 3); + radeon_check_space(device->ws, cs->b, 3); radv_sdma_emit_write_timestamp(cs, query_va); } return; @@ -2766,7 +2770,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta radv_emit_cache_flush(cmd_buffer); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 28 * num_queries); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 28 * num_queries); for (unsigned i = 0; i < num_queries; i++) { radv_write_timestamp(cmd_buffer, query_va, stage); @@ -2779,7 +2783,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL @@ -2790,15 +2794,15 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(radv_query_pool, pool, queryPool); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t pool_va = radv_buffer_get_va(pool->bo); uint64_t query_va = pool_va + pool->stride * firstQuery; - radv_cs_add_buffer(device->ws, cs, pool->bo); + radv_cs_add_buffer(device->ws, cs->b, pool->bo); radv_emit_cache_flush(cmd_buffer); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 6 * accelerationStructureCount); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 6 * accelerationStructureCount); radeon_begin(cs); @@ -2835,5 +2839,5 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, } radeon_end(); - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index c2b38f7c431..bca134aabbf 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -360,7 +360,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon } static void -radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo, +radv_emit_gs_ring_sizes(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -369,10 +369,10 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st return; if (esgs_ring_bo) - radv_cs_add_buffer(device->ws, cs, esgs_ring_bo); + radv_cs_add_buffer(device->ws, cs->b, esgs_ring_bo); if (gsvs_ring_bo) - radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo); + radv_cs_add_buffer(device->ws, cs->b, gsvs_ring_bo); radeon_begin(cs); @@ -390,7 +390,8 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st } static void -radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo) +radv_emit_tess_factor_ring(struct radv_device *device, struct radv_cmd_stream *cs, + struct radeon_winsys_bo *tess_rings_bo) { const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t tf_va; @@ -401,7 +402,7 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, tf_ring_size = pdev->info.tess_factor_ring_size / 4; tf_va = radv_buffer_get_va(tess_rings_bo) + pdev->info.tess_offchip_ring_size; - radv_cs_add_buffer(device->ws, cs, tess_rings_bo); + radv_cs_add_buffer(device->ws, cs->b, tess_rings_bo); radeon_begin(cs); @@ -465,7 +466,7 @@ radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_wi } static void -radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *task_rings_bo, +radv_emit_task_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *task_rings_bo, bool compute) { if (!task_rings_bo) @@ -473,7 +474,7 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo); assert(util_is_aligned(task_ctrlbuf_va, 256)); - radv_cs_add_buffer(device->ws, cs, task_rings_bo); + radv_cs_add_buffer(device->ws, cs->b, task_rings_bo); radeon_begin(cs); @@ -488,8 +489,8 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc } static void -radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, - struct radeon_winsys_bo *scratch_bo) +radv_emit_graphics_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave, + uint32_t waves, struct radeon_winsys_bo *scratch_bo) { const struct radv_physical_device *pdev = radv_device_physical(device); const struct radeon_info *gpu_info = &pdev->info; @@ -500,7 +501,7 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size); - radv_cs_add_buffer(device->ws, cs, scratch_bo); + radv_cs_add_buffer(device->ws, cs->b, scratch_bo); radeon_begin(cs); @@ -519,8 +520,8 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, } static void -radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, - struct radeon_winsys_bo *compute_scratch_bo) +radv_emit_compute_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave, + uint32_t waves, struct radeon_winsys_bo *compute_scratch_bo) { const struct radv_physical_device *pdev = radv_device_physical(device); const struct radeon_info *gpu_info = &pdev->info; @@ -541,7 +542,7 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size); - radv_cs_add_buffer(device->ws, cs, compute_scratch_bo); + radv_cs_add_buffer(device->ws, cs->b, compute_scratch_bo); radeon_begin(cs); @@ -563,14 +564,14 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, } static void -radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, +radv_emit_compute_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *descriptor_bo) { if (!descriptor_bo) return; uint64_t va = radv_buffer_get_va(descriptor_bo); - radv_cs_add_buffer(device->ws, cs, descriptor_bo); + radv_cs_add_buffer(device->ws, cs->b, descriptor_bo); /* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders), * so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg). @@ -581,7 +582,7 @@ radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdb } static void -radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, +radv_emit_graphics_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *descriptor_bo) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -592,7 +593,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd va = radv_buffer_get_va(descriptor_bo); - radv_cs_add_buffer(device->ws, cs, descriptor_bo); + radv_cs_add_buffer(device->ws, cs->b, descriptor_bo); radeon_begin(cs); @@ -638,7 +639,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd } static void -radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *ge_rings_bo) +radv_emit_ge_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *ge_rings_bo) { const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va; @@ -651,7 +652,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct va = radv_buffer_get_va(ge_rings_bo); assert((va >> 32) == pdev->info.address32_hi); - radv_cs_add_buffer(device->ws, cs, ge_rings_bo); + radv_cs_add_buffer(device->ws, cs->b, ge_rings_bo); radeon_begin(cs); @@ -716,7 +717,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct } static void -radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs, bool is_compute_queue) +radv_emit_compute(struct radv_device *device, struct radv_cmd_stream *cs, bool is_compute_queue) { const struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0; @@ -768,7 +769,7 @@ radv_pack_float_12p4(float x) } void -radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) +radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs) { struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0; @@ -941,14 +942,14 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) } static void -radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device) +radv_init_graphics_state(struct radv_cmd_stream *cs, struct radv_device *device) { if (device->gfx_init) { struct radeon_winsys *ws = device->ws; - ws->cs_execute_ib(cs, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false); + ws->cs_execute_ib(cs->b, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false); - radv_cs_add_buffer(device->ws, cs, device->gfx_init); + radv_cs_add_buffer(device->ws, cs->b, device->gfx_init); } else { radv_emit_graphics(device, cs); } @@ -971,7 +972,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi struct radeon_winsys_bo *ge_rings_bo = queue->ge_rings_bo; struct radeon_winsys_bo *gds_bo = queue->gds_bo; struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo; - struct radeon_cmdbuf *dest_cs[3] = {0}; + struct radv_cmd_stream *dest_cs[3] = {0}; const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING; VkResult result = VK_SUCCESS; @@ -1134,18 +1135,17 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi for (int i = 0; i < 3; ++i) { enum rgp_flush_bits sqtt_flush_bits = 0; - struct radeon_cmdbuf *cs = NULL; - cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false); - if (!cs) { - result = VK_ERROR_OUT_OF_DEVICE_MEMORY; - goto fail; - } + struct radv_cmd_stream *cs = NULL; - radeon_check_space(ws, cs, 512); + result = radv_create_cmd_stream(device, queue->qf, false, &cs); + if (result != VK_SUCCESS) + goto fail; + + radeon_check_space(ws, cs->b, 512); dest_cs[i] = cs; if (scratch_bo) - radv_cs_add_buffer(ws, cs, scratch_bo); + radv_cs_add_buffer(ws, cs->b, scratch_bo); /* Emit initial configuration. */ switch (queue->qf) { @@ -1205,19 +1205,19 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->qf, flush_bits, &sqtt_flush_bits, 0); } - result = ws->cs_finalize(cs); + result = radv_finalize_cmd_stream(device, cs); if (result != VK_SUCCESS) goto fail; } if (queue->initial_full_flush_preamble_cs) - ws->cs_destroy(queue->initial_full_flush_preamble_cs); + radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs); if (queue->initial_preamble_cs) - ws->cs_destroy(queue->initial_preamble_cs); + radv_destroy_cmd_stream(device, queue->initial_preamble_cs); if (queue->continue_preamble_cs) - ws->cs_destroy(queue->continue_preamble_cs); + radv_destroy_cmd_stream(device, queue->continue_preamble_cs); queue->initial_full_flush_preamble_cs = dest_cs[0]; queue->initial_preamble_cs = dest_cs[1]; @@ -1272,7 +1272,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi fail: for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i) if (dest_cs[i]) - ws->cs_destroy(dest_cs[i]); + radv_destroy_cmd_stream(device, dest_cs[i]); if (descriptor_bo && descriptor_bo != queue->descriptor_bo) radv_bo_destroy(device, NULL, descriptor_bo); if (scratch_bo && scratch_bo != queue->scratch_bo) @@ -1394,12 +1394,14 @@ radv_create_flush_postamble(struct radv_queue *queue) const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_ip_type ip = radv_queue_family_to_ring(pdev, queue->state.qf); struct radeon_winsys *ws = device->ws; + struct radv_cmd_stream *cs; + VkResult result; - struct radeon_cmdbuf *cs = ws->cs_create(ws, ip, false); - if (!cs) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + result = radv_create_cmd_stream(device, queue->state.qf, false, &cs); + if (result != VK_SUCCESS) + return result; - radeon_check_space(ws, cs, 256); + radeon_check_space(ws, cs->b, 256); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; enum radv_cmd_flush_bits flush_bits = 0; @@ -1418,10 +1420,10 @@ radv_create_flush_postamble(struct radv_queue *queue) enum rgp_flush_bits sqtt_flush_bits = 0; radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->state.qf, flush_bits, &sqtt_flush_bits, 0); - VkResult r = ws->cs_finalize(cs); - if (r != VK_SUCCESS) { - ws->cs_destroy(cs); - return r; + result = radv_finalize_cmd_stream(device, cs); + if (result != VK_SUCCESS) { + radv_destroy_cmd_stream(device, cs); + return result; } queue->state.flush_postamble_cs = cs; @@ -1439,7 +1441,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) VkResult r = VK_SUCCESS; struct radeon_winsys *ws = device->ws; - const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf); struct radeon_winsys_bo *gang_sem_bo = NULL; /* Gang semaphores BO. @@ -1452,25 +1453,34 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) if (r != VK_SUCCESS) return r; - struct radeon_cmdbuf *leader_pre_cs = ws->cs_create(ws, leader_ip, false); - struct radeon_cmdbuf *leader_post_cs = ws->cs_create(ws, leader_ip, false); - struct radeon_cmdbuf *ace_pre_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false); - struct radeon_cmdbuf *ace_post_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false); + struct radv_cmd_stream *leader_pre_cs = NULL, *leader_post_cs = NULL; + struct radv_cmd_stream *ace_pre_cs = NULL, *ace_post_cs = NULL; - if (!leader_pre_cs || !leader_post_cs || !ace_pre_cs || !ace_post_cs) { - r = VK_ERROR_OUT_OF_DEVICE_MEMORY; + r = radv_create_cmd_stream(device, queue->state.qf, false, &leader_pre_cs); + if (r != VK_SUCCESS) goto fail; - } - radeon_check_space(ws, leader_pre_cs, 256); - radeon_check_space(ws, leader_post_cs, 256); - radeon_check_space(ws, ace_pre_cs, 256); - radeon_check_space(ws, ace_post_cs, 256); + radv_create_cmd_stream(device, queue->state.qf, false, &leader_post_cs); + if (r != VK_SUCCESS) + goto fail; - radv_cs_add_buffer(ws, leader_pre_cs, gang_sem_bo); - radv_cs_add_buffer(ws, leader_post_cs, gang_sem_bo); - radv_cs_add_buffer(ws, ace_pre_cs, gang_sem_bo); - radv_cs_add_buffer(ws, ace_post_cs, gang_sem_bo); + radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_pre_cs); + if (r != VK_SUCCESS) + goto fail; + + radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_post_cs); + if (r != VK_SUCCESS) + goto fail; + + radeon_check_space(ws, leader_pre_cs->b, 256); + radeon_check_space(ws, leader_post_cs->b, 256); + radeon_check_space(ws, ace_pre_cs->b, 256); + radeon_check_space(ws, ace_post_cs->b, 256); + + radv_cs_add_buffer(ws, leader_pre_cs->b, gang_sem_bo); + radv_cs_add_buffer(ws, leader_post_cs->b, gang_sem_bo); + radv_cs_add_buffer(ws, ace_pre_cs->b, gang_sem_bo); + radv_cs_add_buffer(ws, ace_post_cs->b, gang_sem_bo); const uint64_t ace_wait_va = radv_buffer_get_va(gang_sem_bo); const uint64_t leader_wait_va = ace_wait_va + 4; @@ -1486,7 +1496,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) radv_cp_wait_mem(ace_pre_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff); radv_cs_write_data(device, ace_pre_cs, RADV_QUEUE_COMPUTE, V_370_ME, ace_wait_va, 1, &zero, false); radv_cs_write_data(device, leader_pre_cs, queue->state.qf, V_370_ME, ace_wait_va, 1, &one, false); - /* Create postambles for gang submission. * This ensures that the gang leader waits for the whole gang, * which is necessary because the kernel signals the userspace fence @@ -1498,16 +1507,16 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0); - r = ws->cs_finalize(leader_pre_cs); + r = radv_finalize_cmd_stream(device, leader_pre_cs); if (r != VK_SUCCESS) goto fail; - r = ws->cs_finalize(leader_post_cs); + r = radv_finalize_cmd_stream(device, leader_post_cs); if (r != VK_SUCCESS) goto fail; - r = ws->cs_finalize(ace_pre_cs); + r = radv_finalize_cmd_stream(device, ace_pre_cs); if (r != VK_SUCCESS) goto fail; - r = ws->cs_finalize(ace_post_cs); + r = radv_finalize_cmd_stream(device, ace_post_cs); if (r != VK_SUCCESS) goto fail; @@ -1521,13 +1530,13 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) fail: if (leader_pre_cs) - ws->cs_destroy(leader_pre_cs); + radv_destroy_cmd_stream(device, leader_pre_cs); if (leader_post_cs) - ws->cs_destroy(leader_post_cs); + radv_destroy_cmd_stream(device, leader_post_cs); if (ace_pre_cs) - ws->cs_destroy(ace_pre_cs); + radv_destroy_cmd_stream(device, ace_pre_cs); if (ace_post_cs) - ws->cs_destroy(ace_post_cs); + radv_destroy_cmd_stream(device, ace_post_cs); if (gang_sem_bo) radv_bo_destroy(device, &queue->vk.base, gang_sem_bo); @@ -1585,22 +1594,23 @@ radv_update_gang_preambles(struct radv_queue *queue) return VK_SUCCESS; } -static struct radeon_cmdbuf * +static struct radv_cmd_stream * radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock) { - struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)]; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)]; + struct radv_cmd_stream *cs; + VkResult result; if (*cs_ref) return *cs_ref; - cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false); - if (!cs) + result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs); + if (result != VK_SUCCESS) return NULL; - ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21); + ASSERTED unsigned cdw = radeon_check_space(device->ws, cs->b, 21); - radv_cs_add_buffer(device->ws, cs, device->perf_counter_bo); + radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo); radeon_begin(cs); @@ -1650,11 +1660,11 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool } radeon_end(); - assert(cs->cdw <= cdw); + assert(cs->b->cdw <= cdw); - VkResult result = device->ws->cs_finalize(cs); + result = radv_finalize_cmd_stream(device, cs); if (result != VK_SUCCESS) { - device->ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); return NULL; } @@ -1662,7 +1672,7 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool * alternative. */ if (p_atomic_cmpxchg((uintptr_t *)cs_ref, 0, (uintptr_t)cs) != 0) { - device->ws->cs_destroy(cs); + radv_destroy_cmd_stream(device, cs); } return *cs_ref; @@ -1748,18 +1758,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi if (queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE) { initial_preambles[num_initial_preambles++] = - need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs; + need_wait ? queue->state.initial_full_flush_preamble_cs->b : queue->state.initial_preamble_cs->b; - continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs; + continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs->b; if (use_perf_counters) { /* RADV only supports perf counters on the GFX queue currently. */ assert(queue->state.qf == RADV_QUEUE_GENERAL); /* Create the lock/unlock CS. */ - struct radeon_cmdbuf *perf_ctr_lock_cs = + struct radv_cmd_stream *perf_ctr_lock_cs = radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, false); - struct radeon_cmdbuf *perf_ctr_unlock_cs = + struct radv_cmd_stream *perf_ctr_unlock_cs = radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, true); if (!perf_ctr_lock_cs || !perf_ctr_unlock_cs) { @@ -1767,14 +1777,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi goto fail; } - initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs; - continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs; - postambles[num_postambles++] = perf_ctr_unlock_cs; + initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs->b; + continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs->b; + postambles[num_postambles++] = perf_ctr_unlock_cs->b; } } if (queue->state.flush_postamble_cs) { - postambles[num_postambles++] = queue->state.flush_postamble_cs; + postambles[num_postambles++] = queue->state.flush_postamble_cs->b; } const unsigned num_1q_initial_preambles = num_initial_preambles; @@ -1782,17 +1792,17 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi const unsigned num_1q_postambles = num_postambles; if (use_ace) { - initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs; - initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs; - initial_preambles[num_initial_preambles++] = - need_wait ? queue->follower_state->initial_full_flush_preamble_cs : queue->follower_state->initial_preamble_cs; + initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs->b; + initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs->b; + initial_preambles[num_initial_preambles++] = need_wait ? queue->follower_state->initial_full_flush_preamble_cs->b + : queue->follower_state->initial_preamble_cs->b; - continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs; - continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs; - continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs; + continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs->b; + continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs->b; + continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs->b; - postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs; - postambles[num_postambles++] = queue->state.gang_wait_postamble_cs; + postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs->b; + postambles[num_postambles++] = queue->state.gang_wait_postamble_cs->b; } struct radv_winsys_submit_info submit = { @@ -1826,12 +1836,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c]; assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); const bool can_chain_next = !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); + struct radv_cmd_stream *cs = cmd_buffer->cs; + struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs; /* Follower needs to be before the gang leader because the last CS must match the queue's IP type. */ - if (cmd_buffer->gang.cs) { - device->ws->cs_unchain(cmd_buffer->gang.cs); - if (!chainable_ace || !device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false)) { - cs_array[num_submitted_cs++] = cmd_buffer->gang.cs; + if (ace_cs) { + device->ws->cs_unchain(ace_cs->b); + if (!chainable_ace || !device->ws->cs_chain(chainable_ace, ace_cs->b, false)) { + cs_array[num_submitted_cs++] = ace_cs->b; /* Prevent chaining the gang leader when the follower couldn't be chained. * Otherwise, they would be in the wrong order. @@ -1839,19 +1851,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi chainable = NULL; } - chainable_ace = can_chain_next ? cmd_buffer->gang.cs : NULL; + chainable_ace = can_chain_next ? ace_cs->b : NULL; submit_ace = true; } - device->ws->cs_unchain(cmd_buffer->cs); - if (!chainable || !device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) { + device->ws->cs_unchain(cs->b); + if (!chainable || !device->ws->cs_chain(chainable, cs->b, queue->state.uses_shadow_regs)) { /* don't submit empty command buffers to the kernel. */ - if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) || - cmd_buffer->cs->cdw != 0) - cs_array[num_submitted_cs++] = cmd_buffer->cs; + if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) || cs->b->cdw != 0) + cs_array[num_submitted_cs++] = cs->b; } - chainable = can_chain_next ? cmd_buffer->cs : NULL; + chainable = can_chain_next ? cs->b : NULL; } submit.cs_count = num_submitted_cs; @@ -1873,8 +1884,8 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi radv_check_trap_handler(queue); } - initial_preambles[0] = queue->state.initial_preamble_cs; - initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs; + initial_preambles[0] = queue->state.initial_preamble_cs ? queue->state.initial_preamble_cs->b : NULL; + initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs->b; } queue->last_shader_upload_seq = MAX2(queue->last_shader_upload_seq, shader_upload_seq); @@ -2047,17 +2058,17 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *devi { radv_destroy_shadow_regs_preamble(device, queue, device->ws); if (queue->initial_full_flush_preamble_cs) - device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); + radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs); if (queue->initial_preamble_cs) - device->ws->cs_destroy(queue->initial_preamble_cs); + radv_destroy_cmd_stream(device, queue->initial_preamble_cs); if (queue->continue_preamble_cs) - device->ws->cs_destroy(queue->continue_preamble_cs); + radv_destroy_cmd_stream(device, queue->continue_preamble_cs); if (queue->gang_wait_preamble_cs) - device->ws->cs_destroy(queue->gang_wait_preamble_cs); + radv_destroy_cmd_stream(device, queue->gang_wait_preamble_cs); if (queue->gang_wait_postamble_cs) - device->ws->cs_destroy(queue->gang_wait_postamble_cs); + radv_destroy_cmd_stream(device, queue->gang_wait_postamble_cs); if (queue->flush_postamble_cs) - device->ws->cs_destroy(queue->flush_postamble_cs); + radv_destroy_cmd_stream(device, queue->flush_postamble_cs); if (queue->descriptor_bo) radv_bo_destroy(device, NULL, queue->descriptor_bo); if (queue->scratch_bo) { diff --git a/src/amd/vulkan/radv_queue.h b/src/amd/vulkan/radv_queue.h index 0dc0dea5d2e..2eb909aa843 100644 --- a/src/amd/vulkan/radv_queue.h +++ b/src/amd/vulkan/radv_queue.h @@ -16,6 +16,7 @@ #include "radv_radeon_winsys.h" struct radv_physical_device; +struct radv_cmd_stream; struct radv_device; struct radv_queue_ring_info { @@ -62,12 +63,12 @@ struct radv_queue_state { struct radeon_winsys_bo *gds_bo; struct radeon_winsys_bo *gds_oa_bo; - struct radeon_cmdbuf *initial_preamble_cs; - struct radeon_cmdbuf *initial_full_flush_preamble_cs; - struct radeon_cmdbuf *continue_preamble_cs; - struct radeon_cmdbuf *gang_wait_preamble_cs; - struct radeon_cmdbuf *gang_wait_postamble_cs; - struct radeon_cmdbuf *flush_postamble_cs; /* GFX6 only */ + struct radv_cmd_stream *initial_preamble_cs; + struct radv_cmd_stream *initial_full_flush_preamble_cs; + struct radv_cmd_stream *continue_preamble_cs; + struct radv_cmd_stream *gang_wait_preamble_cs; + struct radv_cmd_stream *gang_wait_postamble_cs; + struct radv_cmd_stream *flush_postamble_cs; /* GFX6 only */ /* the uses_shadow_regs here will be set only for general queue */ bool uses_shadow_regs; @@ -108,7 +109,7 @@ void radv_queue_finish(struct radv_queue *queue); enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfo *pObj); -void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs); +void radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs); bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs); diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index a346a3a1ddb..64e25ba28c9 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -346,17 +346,17 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima } void -radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs) +radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs) { /* SDMA NOP acts as a fence command and causes the SDMA engine to wait for pending copy operations. */ - radeon_check_space(device->ws, cs, 1); + radeon_check_space(device->ws, cs->b, 1); radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); radeon_end(); } void -radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va) +radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va) { radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_TIMESTAMP, SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP, 0)); @@ -366,7 +366,7 @@ radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va) } void -radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence) +radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence) { radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_FENCE, 0, SDMA_FENCE_MTYPE_UC)); @@ -377,7 +377,7 @@ radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence) } void -radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask) +radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask) { radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM); @@ -390,7 +390,7 @@ radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint } void -radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count) +radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count) { radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); @@ -401,7 +401,7 @@ radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t c } void -radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, +radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va, uint64_t dst_va, uint64_t size) { if (size == 0) @@ -428,7 +428,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs ncopy++; } - radeon_check_space(device->ws, cs, ncopy * 7); + radeon_check_space(device->ws, cs->b, ncopy * 7); radeon_begin(cs); @@ -450,7 +450,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs } void -radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va, +radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va, const uint64_t size, const uint32_t value) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -467,7 +467,7 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs */ const uint64_t max_fill_bytes = BITFIELD64_MASK(ver >= SDMA_6_0 ? 30 : 22) & ~0x3; const unsigned num_packets = DIV_ROUND_UP(size, max_fill_bytes); - ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, num_packets * 5); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, num_packets * 5); radeon_begin(cs); @@ -484,11 +484,11 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs } radeon_end(); - assert(cs->cdw <= cdw_max); + assert(cs->b->cdw <= cdw_max); } static void -radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, const VkExtent3D pix_extent) { @@ -524,7 +524,7 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r dst_off.x *= texel_scale; ext.width *= texel_scale; - ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 13); + ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13); radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp) @@ -543,11 +543,11 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r radeon_emit((ext.depth - 1)); radeon_end(); - assert(cs->cdw == cdw_end); + assert(cs->b->cdw == cdw_end); } static void -radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *const tiled, const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent, const bool detile) @@ -570,7 +570,7 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra assert(util_is_power_of_two_nonzero(tiled->bpp)); radv_sdma_check_pitches(linear_pitch, linear_slice_pitch, tiled->bpp, uses_depth); - ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 14 + (dcc ? 3 : 0)); + ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 14 + (dcc ? 3 : 0)); radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | dcc << 19 | detile << 31 | @@ -600,11 +600,11 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra } radeon_end(); - assert(cs->cdw <= cdw_end); + assert(cs->b->cdw <= cdw_end); } static void -radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, const VkExtent3D px_extent) { @@ -639,7 +639,7 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade assert(util_is_power_of_two_nonzero(src->bpp)); assert(util_is_power_of_two_nonzero(dst->bpp)); - ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 15 + (dcc ? 3 : 0)); + ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 15 + (dcc ? 3 : 0)); radeon_begin(cs); radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0) | dcc << 19 | dcc_dir << 31 | @@ -678,11 +678,11 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade } radeon_end(); - assert(cs->cdw <= cdw_end); + assert(cs->b->cdw <= cdw_end); } void -radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D extent, bool to_image) { @@ -715,7 +715,7 @@ radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, cons } void -radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in, const VkExtent3D base_extent, struct radeon_winsys_bo *temp_bo, bool to_image) { @@ -787,7 +787,7 @@ radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct r } void -radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src, +radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent) { if (src->is_linear) { @@ -864,7 +864,7 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r } void -radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs, +radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent, struct radeon_winsys_bo *temp_bo) { diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h index b0de4a1a6b5..7d10fb70548 100644 --- a/src/amd/vulkan/radv_sdma.h +++ b/src/amd/vulkan/radv_sdma.h @@ -9,6 +9,8 @@ #include "radv_image.h" +struct radv_cmd_stream; + #ifdef __cplusplus extern "C" { #endif @@ -57,36 +59,36 @@ struct radv_sdma_surf radv_sdma_get_buf_surf(uint64_t buffer_va, const struct ra const VkBufferImageCopy2 *const region); struct radv_sdma_surf radv_sdma_get_surf(const struct radv_device *const device, const struct radv_image *const image, const VkImageSubresourceLayers subresource, const VkOffset3D offset); -void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs, +void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D extent, bool to_image); bool radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D ext); -void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs, +void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in, const VkExtent3D copy_extent, struct radeon_winsys_bo *temp_bo, bool to_image); -void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src, - const struct radv_sdma_surf *dst, const VkExtent3D extent); +void radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs, + const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent); bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent); -void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs, +void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent, struct radeon_winsys_bo *temp_bo); -void radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, - uint64_t size); -void radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va, +void radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va, + uint64_t dst_va, uint64_t size); +void radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va, const uint64_t size, const uint32_t value); -void radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs); +void radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs); -void radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va); +void radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va); -void radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence); +void radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence); -void radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask); +void radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask); -void radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count); +void radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count); #ifdef __cplusplus } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 9ab49e39f4c..d6a9d1345a1 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1311,11 +1311,13 @@ radv_init_shader_upload_queue(struct radv_device *device) for (unsigned i = 0; i < RADV_SHADER_UPLOAD_CS_COUNT; i++) { struct radv_shader_dma_submission *submission = calloc(1, sizeof(struct radv_shader_dma_submission)); - submission->cs = ws->cs_create(ws, AMD_IP_SDMA, false); - if (!submission->cs) { + + result = radv_create_cmd_stream(device, RADV_QUEUE_TRANSFER, false, &submission->cs); + if (result != VK_SUCCESS) { free(submission); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + return result; } + list_addtail(&submission->list, &device->shader_dma_submissions); } @@ -1350,7 +1352,7 @@ radv_destroy_shader_upload_queue(struct radv_device *device) list_for_each_entry_safe (struct radv_shader_dma_submission, submission, &device->shader_dma_submissions, list) { if (submission->cs) - ws->cs_destroy(submission->cs); + radv_finalize_cmd_stream(device, submission->cs); if (submission->bo) radv_bo_destroy(device, NULL, submission->bo); list_del(&submission->list); @@ -2506,7 +2508,7 @@ struct radv_shader_dma_submission * radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size) { struct radv_shader_dma_submission *submission = radv_shader_dma_pop_submission(device); - struct radeon_cmdbuf *cs = submission->cs; + struct radv_cmd_stream *cs = submission->cs; struct radeon_winsys *ws = device->ws; VkResult result; @@ -2515,7 +2517,7 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_ if (result != VK_SUCCESS) goto fail; - ws->cs_reset(cs); + radv_reset_cmd_stream(device, cs); if (submission->bo_size < size) { result = radv_shader_dma_resize_upload_buf(device, submission, size); @@ -2524,10 +2526,10 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_ } radv_sdma_copy_memory(device, cs, radv_buffer_get_va(submission->bo), va, size); - radv_cs_add_buffer(ws, cs, submission->bo); - radv_cs_add_buffer(ws, cs, bo); + radv_cs_add_buffer(ws, cs->b, submission->bo); + radv_cs_add_buffer(ws, cs->b, bo); - result = ws->cs_finalize(cs); + result = radv_finalize_cmd_stream(device, cs); if (result != VK_SUCCESS) goto fail; @@ -2547,7 +2549,7 @@ bool radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submission *submission, uint64_t *upload_seq_out) { - struct radeon_cmdbuf *cs = submission->cs; + struct radv_cmd_stream *cs = submission->cs; struct radeon_winsys *ws = device->ws; VkResult result; @@ -2566,7 +2568,7 @@ radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submis struct radv_winsys_submit_info submit = { .ip_type = AMD_IP_SDMA, .queue_index = 0, - .cs_array = &cs, + .cs_array = &cs->b, .cs_count = 1, }; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 88b671ea496..77c4b7b9e3e 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -504,7 +504,7 @@ struct radv_shader_part_cache { struct radv_shader_dma_submission { struct list_head list; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream *cs; struct radeon_winsys_bo *bo; uint64_t bo_size; char *ptr; diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index ccb2fa2334c..379ebd4cfd8 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -68,7 +68,7 @@ radv_spm_resize_bo(struct radv_device *device) } static void -radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) +radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) { const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf); @@ -82,7 +82,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu if (!num_counters) continue; - radeon_check_space(device->ws, cs, 3 + num_counters * 3); + radeon_check_space(device->ws, cs->b, 3 + num_counters * 3); radeon_begin(cs); radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index); @@ -105,7 +105,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu if (!num_counters) continue; - radeon_check_space(device->ws, cs, 3 + num_counters * 3); + radeon_check_space(device->ws, cs->b, 3 + num_counters * 3); radeon_begin(cs); radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) | @@ -130,7 +130,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu for (unsigned i = 0; i < block_sel->num_instances; i++) { struct ac_spm_block_instance *block_instance = &block_sel->instances[i]; - radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6)); + radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6)); radeon_begin(cs); radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index); @@ -160,7 +160,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu } static void -radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) +radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) { const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf); @@ -190,7 +190,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA; } - radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE)); + radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE)); radeon_begin(cs); radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); @@ -215,7 +215,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum } void -radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) +radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) { const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_spm *spm = &device->spm; @@ -227,7 +227,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); assert(spm->sample_interval >= 32); - radeon_check_space(device->ws, cs, 27); + radeon_check_space(device->ws, cs->b, 27); radeon_begin(cs); /* Configure the SPM ring buffer. */ diff --git a/src/amd/vulkan/radv_spm.h b/src/amd/vulkan/radv_spm.h index 121aa8ff2eb..6ade0028c25 100644 --- a/src/amd/vulkan/radv_spm.h +++ b/src/amd/vulkan/radv_spm.h @@ -15,7 +15,7 @@ #include "radv_queue.h" #include "radv_radeon_winsys.h" -void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf); +void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf); bool radv_spm_init(struct radv_device *device); diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 95455cac155..9068503b618 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -48,7 +48,7 @@ radv_ip_to_queue_family(enum amd_ip_type t) } static void -radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family) +radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs, int family) { const struct radv_physical_device *pdev = radv_device_physical(device); const enum radv_queue_family qf = radv_ip_to_queue_family(family); @@ -62,7 +62,7 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf * } static void -radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) +radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) { const struct radv_physical_device *pdev = radv_device_physical(device); const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE; @@ -75,14 +75,14 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, ac_sqtt_emit_start(&pdev->info, pm4, &device->sqtt, is_compute_queue); ac_pm4_finalize(pm4); - radeon_check_space(device->ws, cs, pm4->ndw); + radeon_check_space(device->ws, cs->b, pm4->ndw); radv_emit_pm4_commands(cs, pm4); ac_pm4_free_state(pm4); } static void -radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) +radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf) { const struct radv_physical_device *pdev = radv_device_physical(device); const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE; @@ -95,7 +95,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, ac_sqtt_emit_stop(&pdev->info, pm4, is_compute_queue); ac_pm4_finalize(pm4); - radeon_check_space(device->ws, cs, pm4->ndw); + radeon_check_space(device->ws, cs->b, pm4->ndw); radv_emit_pm4_commands(cs, pm4); ac_pm4_clear_state(pm4, &pdev->info, false, is_compute_queue); @@ -108,7 +108,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, ac_sqtt_emit_wait(&pdev->info, pm4, &device->sqtt, is_compute_queue); ac_pm4_finalize(pm4); - radeon_check_space(device->ws, cs, pm4->ndw); + radeon_check_space(device->ws, cs->b, pm4->ndw); radv_emit_pm4_commands(cs, pm4); ac_pm4_free_state(pm4); @@ -121,7 +121,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da const struct radv_physical_device *pdev = radv_device_physical(device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + const struct radv_cmd_stream *cs = cmd_buffer->cs; const uint32_t *dwords = (uint32_t *)data; /* SQTT user data packets aren't supported on SDMA queues. */ @@ -131,7 +131,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da while (num_dwords > 0) { uint32_t count = MIN2(num_dwords, 2); - radeon_check_space(device->ws, cs, 2 + count); + radeon_check_space(device->ws, cs->b, 2 + count); radeon_begin(cs); /* Without the perfctr bit the CP might not always pass the @@ -150,7 +150,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da } void -radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable) +radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -177,7 +177,7 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf } void -radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit) +radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit) { const struct radv_physical_device *pdev = radv_device_physical(device); @@ -516,7 +516,7 @@ radv_begin_sqtt(struct radv_queue *queue) const struct radv_physical_device *pdev = radv_device_physical(device); enum radv_queue_family family = queue->state.qf; struct radeon_winsys *ws = device->ws; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream cs; VkResult result; /* Destroy the previous start CS and create a new one. */ @@ -525,13 +525,13 @@ radv_begin_sqtt(struct radv_queue *queue) device->sqtt.start_cs[family] = NULL; } - cs = ws->cs_create(ws, radv_queue_ring(queue), false); - if (!cs) + cs.b = ws->cs_create(ws, radv_queue_ring(queue), false); + if (!cs.b) return false; - radeon_check_space(ws, cs, 512); + radeon_check_space(ws, cs.b, 512); - radeon_begin(cs); + radeon_begin(&cs); switch (family) { case RADV_QUEUE_GENERAL: @@ -551,40 +551,40 @@ radv_begin_sqtt(struct radv_queue *queue) radeon_end(); /* Make sure to wait-for-idle before starting SQTT. */ - radv_emit_wait_for_idle(device, cs, family); + radv_emit_wait_for_idle(device, &cs, family); /* Disable clock gating before starting SQTT. */ - radv_emit_inhibit_clockgating(device, cs, true); + radv_emit_inhibit_clockgating(device, &cs, true); /* Enable SQG events that collects thread trace data. */ - radv_emit_spi_config_cntl(device, cs, true); + radv_emit_spi_config_cntl(device, &cs, true); - radv_perfcounter_emit_reset(cs, true); + radv_perfcounter_emit_reset(&cs, true); if (device->spm.bo) { /* Enable all shader stages by default. */ - radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info)); + radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info)); - radv_emit_spm_setup(device, cs, family); + radv_emit_spm_setup(device, &cs, family); } /* Start SQTT. */ - radv_emit_sqtt_start(device, cs, family); + radv_emit_sqtt_start(device, &cs, family); if (device->spm.bo) { - radeon_check_space(ws, cs, 8); - radv_perfcounter_emit_spm_start(device, cs, family); + radeon_check_space(ws, cs.b, 8); + radv_perfcounter_emit_spm_start(device, &cs, family); } - result = ws->cs_finalize(cs); + result = ws->cs_finalize(cs.b); if (result != VK_SUCCESS) { - ws->cs_destroy(cs); + ws->cs_destroy(cs.b); return false; } - device->sqtt.start_cs[family] = cs; + device->sqtt.start_cs[family] = cs.b; - return radv_queue_internal_submit(queue, cs); + return radv_queue_internal_submit(queue, cs.b); } static bool @@ -593,7 +593,7 @@ radv_end_sqtt(struct radv_queue *queue) struct radv_device *device = radv_queue_device(queue); enum radv_queue_family family = queue->state.qf; struct radeon_winsys *ws = device->ws; - struct radeon_cmdbuf *cs; + struct radv_cmd_stream cs; VkResult result; /* Destroy the previous stop CS and create a new one. */ @@ -602,13 +602,13 @@ radv_end_sqtt(struct radv_queue *queue) device->sqtt.stop_cs[family] = NULL; } - cs = ws->cs_create(ws, radv_queue_ring(queue), false); - if (!cs) + cs.b = ws->cs_create(ws, radv_queue_ring(queue), false); + if (!cs.b) return false; - radeon_check_space(ws, cs, 512); + radeon_check_space(ws, cs.b, 512); - radeon_begin(cs); + radeon_begin(&cs); switch (family) { case RADV_QUEUE_GENERAL: @@ -628,33 +628,33 @@ radv_end_sqtt(struct radv_queue *queue) radeon_end(); /* Make sure to wait-for-idle before stopping SQTT. */ - radv_emit_wait_for_idle(device, cs, family); + radv_emit_wait_for_idle(device, &cs, family); if (device->spm.bo) { - radeon_check_space(ws, cs, 8); - radv_perfcounter_emit_spm_stop(device, cs, family); + radeon_check_space(ws, cs.b, 8); + radv_perfcounter_emit_spm_stop(device, &cs, family); } /* Stop SQTT. */ - radv_emit_sqtt_stop(device, cs, family); + radv_emit_sqtt_stop(device, &cs, family); - radv_perfcounter_emit_reset(cs, true); + radv_perfcounter_emit_reset(&cs, true); /* Restore previous state by disabling SQG events. */ - radv_emit_spi_config_cntl(device, cs, false); + radv_emit_spi_config_cntl(device, &cs, false); /* Restore previous state by re-enabling clock gating. */ - radv_emit_inhibit_clockgating(device, cs, false); + radv_emit_inhibit_clockgating(device, &cs, false); - result = ws->cs_finalize(cs); + result = ws->cs_finalize(cs.b); if (result != VK_SUCCESS) { - ws->cs_destroy(cs); + ws->cs_destroy(cs.b); return false; } - device->sqtt.stop_cs[family] = cs; + device->sqtt.stop_cs[family] = cs.b; - return radv_queue_internal_submit(queue, cs); + return radv_queue_internal_submit(queue, cs.b); } void @@ -837,11 +837,14 @@ radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_winsys_bo *ti if (result != VK_SUCCESS) goto fail; - radeon_check_space(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, 28); + struct radv_cmd_buffer *cmd_buffer = radv_cmd_buffer_from_handle(cmdbuf); + struct radv_cmd_stream *cs = cmd_buffer->cs; + + radeon_check_space(device->ws, cs->b, 28); timestamp_va = radv_buffer_get_va(timestamp_bo) + timestamp_offset; - radv_cs_add_buffer(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, timestamp_bo); + radv_cs_add_buffer(device->ws, cs->b, timestamp_bo); radv_write_timestamp(radv_cmd_buffer_from_handle(cmdbuf), timestamp_va, timestamp_stage); diff --git a/src/amd/vulkan/radv_sqtt.h b/src/amd/vulkan/radv_sqtt.h index 9c89255b9ec..49087ad4fb5 100644 --- a/src/amd/vulkan/radv_sqtt.h +++ b/src/amd/vulkan/radv_sqtt.h @@ -65,9 +65,9 @@ bool radv_sqtt_queue_events_enabled(void); void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords); -void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable); +void radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable); -void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit); +void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit); VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo, uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr); diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index 4690d5a759c..1266213a2ea 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -86,7 +86,7 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, /* vcn unified queue (sq) ib header */ void -radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature) +radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature) { if (!skip_signature) { /* vcn ib signature */ @@ -97,8 +97,8 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty radeon_emit(0); radeon_end(); - sq->signature_ib_checksum = &cs->buf[cs->cdw - 2]; - sq->signature_ib_total_size_in_dw = &cs->buf[cs->cdw - 1]; + sq->signature_ib_checksum = &cs->b->buf[cs->b->cdw - 2]; + sq->signature_ib_total_size_in_dw = &cs->b->buf[cs->b->cdw - 1]; } else { sq->signature_ib_checksum = NULL; sq->signature_ib_total_size_in_dw = NULL; @@ -112,17 +112,17 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty radeon_emit(0); radeon_end(); - sq->engine_ib_size_of_packages = &cs->buf[cs->cdw - 1]; + sq->engine_ib_size_of_packages = &cs->b->buf[cs->b->cdw - 1]; } void -radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq) +radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq) { uint32_t *end; uint32_t size_in_dw; uint32_t checksum = 0; - end = &cs->buf[cs->cdw]; + end = &cs->b->buf[cs->b->cdw]; if (sq->signature_ib_checksum == NULL && sq->signature_ib_total_size_in_dw == NULL) { if (sq->engine_ib_size_of_packages == NULL) @@ -148,18 +148,18 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_physical_device *pdev = radv_device_physical(device); struct rvcn_sq_var sq; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; /* UVD doesn't support events, and probably never will */ if (pdev->vid_decode_ip == AMD_IP_UVD) return; - radv_cs_add_buffer(device->ws, cs, event->bo); + radv_cs_add_buffer(device->ws, cs->b, event->bo); uint64_t va = radv_buffer_get_va(event->bo); bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED; if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) { - radeon_check_space(device->ws, cmd_buffer->cs, 8); + radeon_check_space(device->ws, cs->b, 8); set_reg(cmd_buffer, pdev->vid_dec_reg.data0, va & 0xffffffff); set_reg(cmd_buffer, pdev->vid_dec_reg.data1, va >> 32); set_reg(cmd_buffer, pdev->vid_dec_reg.data2, value); @@ -167,20 +167,21 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even return; } - radeon_check_space(device->ws, cs, 256); + radeon_check_space(device->ws, cs->b, 256); radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON, separate_queue); - struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]); + struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->b->buf[cs->b->cdw]); ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory); - cs->cdw++; + cs->b->cdw++; ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY; - cs->cdw++; + cs->b->cdw++; - struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]); + struct rvcn_cmn_engine_op_writememory *write_memory = + (struct rvcn_cmn_engine_op_writememory *)&(cs->b->buf[cs->b->cdw]); write_memory->dest_addr_lo = va & 0xffffffff; write_memory->dest_addr_hi = va >> 32; write_memory->data = value; - cs->cdw += sizeof(*write_memory) / 4; + cs->b->cdw += sizeof(*write_memory) / 4; radv_vcn_sq_tail(cs, &sq); } @@ -188,16 +189,17 @@ static void radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radeon_check_space(device->ws, cmd_buffer->cs, 512); - radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false); - rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + radeon_check_space(device->ws, cs->b, 512); + radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false); + rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]); ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s); - cmd_buffer->cs->cdw++; + cs->b->cdw++; ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER); - cmd_buffer->cs->cdw++; - cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); - cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; + cs->b->cdw++; + cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cs->b->buf[cs->b->cdw]); + cs->b->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); } @@ -1389,7 +1391,7 @@ radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); radeon_emit(RDECODE_PKT0(reg >> 2, 0)); @@ -1402,11 +1404,12 @@ send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_ { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_cmd_stream *cs = cmd_buffer->cs; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + radv_cs_add_buffer(device->ws, cs->b, bo); if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { - radeon_check_space(device->ws, cmd_buffer->cs, 6); + radeon_check_space(device->ws, cs->b, 6); set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr); set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32); set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1); @@ -2385,6 +2388,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se struct radv_image_plane *luma = &img->planes[0]; struct radv_image_plane *chroma = &img->planes[1]; bool use_intra_only_allocation_for_dpb = false; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (vid->dpb_type == DPB_DYNAMIC_TIER_3) { VkImageUsageFlags coincide = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; @@ -2565,10 +2569,10 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se uint64_t addr; if (use_intra_only_allocation_for_dpb) { addr = radv_buffer_get_va(vid->intra_only_dpb.mem->bo) + vid->intra_only_dpb.offset; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, vid->intra_only_dpb.mem->bo); + radv_cs_add_buffer(device->ws, cs->b, vid->intra_only_dpb.mem->bo); } else { addr = dpb->bindings[0].addr; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo); addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size); } @@ -2610,7 +2614,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se int f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo); addr = dpb_img->bindings[0].addr; addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + dpb_img->planes[1].surface.u.gfx9.surf_slice_size); @@ -2645,16 +2649,15 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se } uint32_t size = sizeof(rvcn_dec_ref_buffers_header_t) + sizeof(rvcn_dec_ref_buffer_t) * num_bufs; - rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]); ib_header->package_size = size + sizeof(struct rvcn_decode_ib_package_s); - cmd_buffer->cs->cdw++; + cs->b->cdw++; ib_header->package_type = RDECODE_IB_PARAM_DYNAMIC_REFLIST_BUFFER; - cmd_buffer->cs->cdw++; + cs->b->cdw++; - rvcn_dec_ref_buffers_header_t *refs = - (rvcn_dec_ref_buffers_header_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); - cmd_buffer->cs->cdw += size / 4; + rvcn_dec_ref_buffers_header_t *refs = (rvcn_dec_ref_buffers_header_t *)&(cs->b->buf[cs->b->cdw]); + cs->b->cdw += size / 4; refs->size = size; refs->num_bufs = 0; @@ -2669,7 +2672,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer; fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb_img, f_dpb_array_idx, frame_info->pReferenceSlots[i].slotIndex); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo); used_slots |= 1 << frame_info->pReferenceSlots[i].slotIndex; } @@ -3106,6 +3109,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_video_session *vid = cmd_buffer->video.vid; uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); + struct radv_cmd_stream *cs = cmd_buffer->cs; void *ptr; uint32_t out_offset; @@ -3135,13 +3139,13 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */ if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { - radeon_check_space(device->ws, cmd_buffer->cs, 8); - radeon_begin(cmd_buffer->cs); + radeon_check_space(device->ws, cs->b, 8); + radeon_begin(cs); for (unsigned i = 0; i < 8; i++) radeon_emit(0x81ff); radeon_end(); } else - radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); + radv_vcn_sq_tail(cs, &cmd_buffer->video.sq); } static void @@ -3149,9 +3153,11 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_video_session *vid = cmd_buffer->video.vid; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t size = sizeof(struct ruvd_msg); void *ptr; uint32_t out_offset; + radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); ruvd_dec_message_create(vid, ptr); @@ -3163,8 +3169,8 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer) /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */ int padsize = vid->sessionctx.mem ? 4 : 6; - radeon_check_space(device->ws, cmd_buffer->cs, padsize); - radeon_begin(cmd_buffer->cs); + radeon_check_space(device->ws, cs->b, padsize); + radeon_begin(cs); for (unsigned i = 0; i < padsize; i++) radeon_emit(PKT2_NOP_PAD); radeon_end(); @@ -3207,6 +3213,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf uint32_t out_offset, fb_offset, it_probs_offset = 0; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL; unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; + struct radv_cmd_stream *cs = cmd_buffer->cs; radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr); fb_bo = cmd_buffer->upload.upload_bo; @@ -3248,7 +3255,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, radv_buffer_get_va(it_probs_bo) + it_probs_offset); - radeon_check_space(device->ws, cmd_buffer->cs, 2); + radeon_check_space(device->ws, cs->b, 2); set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1); } @@ -3264,6 +3271,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf void *ptr, *fb_ptr, *it_probs_ptr = NULL; uint32_t out_offset, fb_offset, it_probs_offset = 0; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL; + struct radv_cmd_stream *cs = cmd_buffer->cs; size += sizeof(rvcn_dec_message_header_t); /* header */ size += sizeof(rvcn_dec_message_index_t); /* codec */ @@ -3352,10 +3360,10 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, radv_buffer_get_va(it_probs_bo) + it_probs_offset); if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { - radeon_check_space(device->ws, cmd_buffer->cs, 2); + radeon_check_space(device->ws, cs->b, 2); set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1); } else - radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); + radv_vcn_sq_tail(cs, &cmd_buffer->video.sq); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/amd/vulkan/radv_video.h b/src/amd/vulkan/radv_video.h index 93091ea1221..709c00c0211 100644 --- a/src/amd/vulkan/radv_video.h +++ b/src/amd/vulkan/radv_video.h @@ -23,6 +23,7 @@ struct radv_physical_device; struct rvcn_sq_var; struct radv_cmd_buffer; struct radv_image_create_info; +struct radv_cmd_stream; #define RADV_ENC_MAX_RATE_LAYER 4 @@ -82,8 +83,8 @@ void radv_init_physical_device_decoder(struct radv_physical_device *pdev); void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list, uint32_t *width_align_out, uint32_t *height_align_out); -void radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature); -void radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq); +void radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature); +void radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq); void radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value); void radv_init_physical_device_encoder(struct radv_physical_device *pdevice); diff --git a/src/amd/vulkan/radv_video_enc.c b/src/amd/vulkan/radv_video_enc.c index c21760af518..7448a7b4532 100644 --- a/src/amd/vulkan/radv_video_enc.c +++ b/src/amd/vulkan/radv_video_enc.c @@ -196,16 +196,16 @@ static const unsigned index_to_shifts[4] = {24, 16, 8, 0}; static void radv_enc_output_one_byte(struct radv_cmd_buffer *cmd_buffer, unsigned char byte) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_enc_state *enc = &cmd_buffer->video.enc; if (enc->byte_index == 0) - cs->buf[cs->cdw] = 0; - cs->buf[cs->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); + cs->b->buf[cs->b->cdw] = 0; + cs->b->buf[cs->b->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]); enc->byte_index++; if (enc->byte_index >= 4) { enc->byte_index = 0; - cs->cdw++; + cs->b->cdw++; } } @@ -279,7 +279,7 @@ static void radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer) { struct radv_enc_state *enc = &cmd_buffer->video.enc; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (enc->bits_in_shifter != 0) { unsigned char output_byte = (unsigned char)(enc->shifter >> 24); radv_enc_emulation_prevention(cmd_buffer, output_byte); @@ -291,7 +291,7 @@ radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer) } if (enc->byte_index > 0) { - cs->cdw++; + cs->b->cdw++; enc->byte_index = 0; } } @@ -377,15 +377,15 @@ radv_enc_h265_pic_type(enum StdVideoH265PictureType type) } } -#define RADEON_ENC_CS(value) (cmd_buffer->cs->buf[cmd_buffer->cs->cdw++] = (value)) +#define RADEON_ENC_CS(value) (cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++] = (value)) #define RADEON_ENC_BEGIN(cmd) \ { \ - uint32_t *begin = &cmd_buffer->cs->buf[cmd_buffer->cs->cdw++]; \ + uint32_t *begin = &cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++]; \ RADEON_ENC_CS(cmd) #define RADEON_ENC_END() \ - *begin = (&cmd_buffer->cs->buf[cmd_buffer->cs->cdw] - begin) * 4; \ + *begin = (&cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw] - begin) * 4; \ cmd_buffer->video.enc.total_task_size += *begin; \ } @@ -404,7 +404,7 @@ radv_enc_av1_bs_copy_end(struct radv_cmd_buffer *cmd_buffer, uint32_t bits) static void radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t inst, uint32_t obu_type) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_enc_state *enc = &cmd_buffer->video.enc; radv_enc_flush_headers(cmd_buffer); @@ -412,7 +412,7 @@ radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t in if (enc->bits_output) radv_enc_av1_bs_copy_end(cmd_buffer, enc->bits_output); - enc->copy_start = &cs->buf[cs->cdw++]; + enc->copy_start = &cs->b->buf[cs->b->cdw++]; RADEON_ENC_CS(inst); if (inst != RENCODE_HEADER_INSTRUCTION_COPY) { @@ -432,9 +432,9 @@ radv_enc_session_info(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; - radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->sessionctx.mem->bo); + radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->sessionctx.mem->bo); uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->sessionctx.mem->bo); va += cmd_buffer->video.vid->sessionctx.offset; @@ -455,12 +455,12 @@ radv_enc_task_info(struct radv_cmd_buffer *cmd_buffer, bool feedback) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_enc_state *enc = &cmd_buffer->video.enc; enc->task_id++; RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.task_info); - enc->p_task_size = &cs->buf[cs->cdw++]; + enc->p_task_size = &cs->b->buf[cs->b->cdw++]; RADEON_ENC_CS(enc->task_id); RADEON_ENC_CS(feedback ? 1 : 0); RADEON_ENC_END(); @@ -919,13 +919,13 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header); radv_enc_reset(cmd_buffer); radv_enc_set_emulation_prevention(cmd_buffer, false); - cdw_start = cs->cdw; + cdw_start = cs->b->cdw; if (pic->flags.IdrPicFlag) radv_enc_code_fixed_bits(cmd_buffer, 0x65, 8); @@ -1073,7 +1073,7 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - cdw_filled = cs->cdw - cdw_start; + cdw_filled = cs->b->cdw - cdw_start; for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) RADEON_ENC_CS(0x00000000); for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { @@ -1158,14 +1158,14 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco unsigned int num_pic_total_curr = 0; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; unsigned nal_unit_type = vk_video_get_h265_nal_unit(pic); RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header); radv_enc_reset(cmd_buffer); radv_enc_set_emulation_prevention(cmd_buffer, false); - cdw_start = cs->cdw; + cdw_start = cs->b->cdw; radv_enc_code_fixed_bits(cmd_buffer, 0x0, 1); radv_enc_code_fixed_bits(cmd_buffer, nal_unit_type, 6); radv_enc_code_fixed_bits(cmd_buffer, 0x0, 6); @@ -1354,7 +1354,7 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; - cdw_filled = cs->cdw - cdw_start; + cdw_filled = cs->b->cdw - cdw_start; for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) RADEON_ENC_CS(0x00000000); for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { @@ -1392,7 +1392,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_video_session *vid = cmd_buffer->video.vid; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_image_view *dpb_iv = NULL; struct radv_image *dpb = NULL; uint64_t va = 0; @@ -1420,7 +1420,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf dpb_image_sizes(dpb, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes); - radv_cs_add_buffer(device->ws, cs, dpb->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo); va = dpb->bindings[0].addr; } @@ -1533,6 +1533,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in uint32_t luma_pitch = 0, luma_size = 0, chroma_size = 0, colloc_bytes = 0; int max_ref_slot_idx = 0; const VkVideoPictureResourceInfoKHR *slots[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES] = {NULL}; + struct radv_cmd_stream *cs = cmd_buffer->cs; if (info->pSetupReferenceSlot) { max_ref_slot_idx = info->pSetupReferenceSlot->slotIndex; @@ -1569,7 +1570,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in struct radv_image_view *dpb_iv = radv_image_view_from_handle(res->imageViewBinding); assert(dpb_iv != NULL); struct radv_image *dpb_img = dpb_iv->image; - radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo); dpb_image_sizes(dpb_iv->image, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes); uint32_t metadata_size = RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME; @@ -1621,9 +1622,9 @@ radv_enc_bitstream(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffe { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t va = vk_buffer_address(&buffer->vk, offset); - radv_cs_add_buffer(device->ws, cs, buffer->bo); + radv_cs_add_buffer(device->ws, cs->b, buffer->bo); RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.bitstream); RADEON_ENC_CS(RENCODE_REC_SWIZZLE_MODE_LINEAR); @@ -1739,7 +1740,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR * struct radv_image *src_img = src_iv->image; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint32_t array_idx = enc_info->srcPictureResource.baseArrayLayer + src_iv->vk.base_array_layer; uint64_t va = src_img->bindings[0].addr; uint64_t luma_va = va + src_img->planes[0].surface.u.gfx9.surf_offset + @@ -1750,7 +1751,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR * unsigned int slot_idx = 0xffffffff; unsigned int max_layers = cmd_buffer->video.vid->rc_layer_control.max_num_temporal_layers; - radv_cs_add_buffer(device->ws, cs, src_img->bindings[0].bo); + radv_cs_add_buffer(device->ws, cs->b, src_img->bindings[0].bo); if (h264_pic) { switch (h264_pic->primary_pic_type) { case STD_VIDEO_H264_PICTURE_TYPE_P: @@ -2094,14 +2095,14 @@ radv_enc_headers_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf static void radv_enc_cdf_default_table(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_stream *cs = cmd_buffer->cs; struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); const struct VkVideoEncodeAV1PictureInfoKHR *av1_picture_info = vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_AV1_PICTURE_INFO_KHR); const StdVideoEncodeAV1PictureInfo *av1_pic = av1_picture_info->pStdPictureInfo; - radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->ctx.mem->bo); + radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->ctx.mem->bo); uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->ctx.mem->bo); va += cmd_buffer->video.vid->ctx.offset; uint32_t use_cdf_default = (av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_KEY || @@ -2639,6 +2640,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_enc_state *enc = &cmd_buffer->video.enc; + struct radv_cmd_stream *cs = cmd_buffer->cs; uint64_t feedback_query_va; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: @@ -2650,10 +2652,10 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf return; } - radeon_check_space(device->ws, cmd_buffer->cs, 1600); + radeon_check_space(device->ws, cs->b, 1600); if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4) - radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false); + radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false); const struct VkVideoInlineQueryInfoKHR *inline_queries = NULL; if (vid->vk.flags & VK_VIDEO_SESSION_CREATE_INLINE_QUERIES_BIT_KHR) { @@ -2662,7 +2664,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf if (inline_queries) { VK_FROM_HANDLE(radv_query_pool, pool, inline_queries->queryPool); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo); + radv_cs_add_buffer(device->ws, cs->b, pool->bo); feedback_query_va = radv_buffer_get_va(pool->bo); feedback_query_va += pool->stride * inline_queries->firstQuery; @@ -2745,7 +2747,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf *enc->p_task_size = enc->total_task_size; if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4) - radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); + radv_vcn_sq_tail(cs, &cmd_buffer->video.sq); } static void