radv: switch to radv_cmd_stream everywhere

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36314>
This commit is contained in:
Samuel Pitoiset 2025-07-23 12:51:16 +02:00 committed by Marge Bot
parent 5982e8f331
commit 3ccb48ec46
37 changed files with 900 additions and 765 deletions

View file

@ -39,8 +39,10 @@ ctx_roll_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2
const VkSubmitInfo2 *submit = pSubmits + submit_index;
for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submit->pCommandBufferInfos[i].commandBuffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
fprintf(device->ctx_roll_file, "\n%s:\n", vk_object_base_name(&cmd_buffer->vk.base));
device->ws->cs_dump(cmd_buffer->cs, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS);
device->ws->cs_dump(cs->b, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS);
}
}
}

View file

@ -23,7 +23,7 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_foreach_stage (s, RADV_GRAPHICS_STAGE_BITS & ~VK_SHADER_STAGE_TASK_BIT_EXT) {
const struct radv_shader *shader = pipeline->base.shaders[s];
@ -306,7 +306,7 @@ radv_gfx12_write_draw_marker(struct radv_cmd_buffer *cmd_buffer, const struct ra
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
/* RGP doesn't need this marker for indirect draws. */
if (draw_info->indirect_va)

View file

@ -367,6 +367,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi
VK_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_descriptor_set_layout *set_layout = layout->set[0].layout;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint32_t upload_offset;
uint8_t *ptr;
@ -395,7 +396,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi
VK_FROM_HANDLE(radv_image_view, iview, image_view);
for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) {
if (iview->image->bindings[b].bo)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
}
}
}

View file

@ -17,6 +17,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_meta_state *state = &device->meta_state;
struct vk_texcompress_astc_write_descriptor_buffer desc_buffer;
struct radv_cmd_stream *cs = cmd_buffer->cs;
VkFormat format = src_iview->image->vk.format;
int blk_w = vk_format_get_blockwidth(format);
int blk_h = vk_format_get_blockheight(format);
@ -26,7 +27,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie
radv_image_view_to_handle(dst_iview), format);
VK_FROM_HANDLE(radv_buffer, luts_buf, state->astc_decode->luts_buf);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, luts_buf->bo);
radv_cs_add_buffer(device->ws, cs->b, luts_buf->bo);
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->astc_decode->p_layout,
VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT, desc_buffer.descriptors);

View file

@ -319,8 +319,9 @@ radv_fill_image(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *ima
const uint64_t va = image->bindings[0].addr + offset;
struct radeon_winsys_bo *bo = image->bindings[0].bo;
const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
radv_cs_add_buffer(device->ws, cs->b, bo);
return radv_fill_memory_internal(cmd_buffer, image, va, size, value, copy_flags);
}
@ -331,8 +332,9 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
radv_cs_add_buffer(device->ws, cs->b, bo);
return radv_fill_memory(cmd_buffer, va, size, value, copy_flags);
}
@ -378,14 +380,15 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop
VK_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo);
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
@ -404,13 +407,14 @@ radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint64_t words = size / 4;
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
assert(size < RADV_BUFFER_UPDATE_THRESHOLD);
radv_emit_cache_flush(cmd_buffer);
radeon_check_space(device->ws, cmd_buffer->cs, words + 4);
radeon_check_space(device->ws, cs->b, words + 4);
radeon_begin(cmd_buffer->cs);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + words, 0));
radeon_emit(S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(va);
@ -454,12 +458,13 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
radv_update_memory(cmd_buffer, dst_va, dataSize, pData, dst_copy_flags);

View file

@ -1067,6 +1067,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra
const VkClearColorValue *clear_color)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
VkPipelineLayout layout;
VkPipeline pipeline;
unsigned stride;
@ -1078,7 +1079,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra
return;
}
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst->image->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dst->image->bindings[0].bo);
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 1,
(VkDescriptorGetInfoEXT[]){{

View file

@ -591,6 +591,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
uint64_t va, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t block_count = DIV_ROUND_UP(size, 1024);
struct radv_meta_saved_state saved_state;
VkPipelineLayout layout;
@ -603,7 +604,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
return 0;
}
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
radv_cs_add_buffer(device->ws, cs->b, bo);
radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);

View file

@ -56,6 +56,7 @@ static bool
alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (!cmd_buffer->transfer.copy_temp) {
const VkResult r =
@ -69,7 +70,7 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
}
}
radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->transfer.copy_temp);
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->transfer.copy_temp);
return true;
}
@ -78,7 +79,7 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
const VkBufferImageCopy2 *region, bool to_image)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_sdma_surf buf = radv_sdma_get_buf_surf(buffer_va, image, region);
const struct radv_sdma_surf img = radv_sdma_get_surf(device, image, region->imageSubresource, region->imageOffset);
@ -217,19 +218,20 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
VK_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo);
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
const unsigned bind_idx = dst_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[bind_idx].bo);
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo);
copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags, dst_image,
pCopyBufferToImageInfo->dstImageLayout, region);
@ -368,19 +370,20 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
VK_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
const unsigned bind_idx = src_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[bind_idx].bo);
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo);
copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, src_image,
pCopyImageToBufferInfo->srcImageLayout, region);
@ -394,7 +397,7 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned int dst_aspect_mask_remaining = region->dstSubresource.aspectMask;
VkImageSubresourceLayers src_subresource = region->srcSubresource;
@ -650,6 +653,7 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
VK_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_suspend_conditional_rendering(cmd_buffer);
@ -660,8 +664,8 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
const VkImageAspectFlags dst_aspect_mask = region->dstSubresource.aspectMask;
const unsigned dst_bind_idx = dst_image->disjoint ? radv_plane_from_aspect(dst_aspect_mask) : 0;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[src_bind_idx].bo);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[dst_bind_idx].bo);
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[src_bind_idx].bo);
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[dst_bind_idx].bo);
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
region);

View file

@ -110,6 +110,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
{
struct radv_meta_saved_state saved_state;
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
VkPipelineLayout layout;
VkPipeline pipeline;
VkResult result;
@ -135,7 +136,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
const uint64_t va = image->bindings[0].addr;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, image->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, image->bindings[0].bo);
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 2,
(VkDescriptorGetInfoEXT[]){

File diff suppressed because it is too large Load diff

View file

@ -575,7 +575,7 @@ struct radv_cmd_buffer {
} gfx12;
VkCommandBufferUsageFlags usage_flags;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream *cs;
struct radv_cmd_state state;
struct radv_vertex_binding vertex_bindings[MAX_VBS];
struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
@ -620,7 +620,7 @@ struct radv_cmd_buffer {
*/
struct {
/** Follower command stream. */
struct radeon_cmdbuf *cs;
struct radv_cmd_stream *cs;
/** Flush bits for the follower cmdbuf. */
enum radv_cmd_flush_bits flush_bits;
@ -888,7 +888,7 @@ struct radv_vbo_info {
void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t vbo_idx, struct radv_vbo_info *vbo_info);
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs,
const struct radv_shader *shader);
void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,

View file

@ -47,7 +47,7 @@ cp_dma_max_byte_count(enum amd_gfx_level gfx_level)
* clear value.
*/
static void
radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool predicating, uint64_t dst_va,
uint64_t src_va, unsigned size, unsigned flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -56,7 +56,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level));
radeon_check_space(device->ws, cs, 9);
radeon_check_space(device->ws, cs->b, 9);
if (pdev->info.gfx_level >= GFX9)
command |= S_415_BYTE_COUNT_GFX9(size);
else
@ -106,7 +106,7 @@ static void
radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
bool predicating = cmd_buffer->state.predicating;
radv_cs_emit_cp_dma(device, cs, predicating, dst_va, src_va, size, flags);
@ -133,7 +133,7 @@ radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t s
}
void
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size,
bool predicating)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -146,7 +146,7 @@ radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *
assert(size <= cp_dma_max_byte_count(gfx_level));
radeon_check_space(ws, cs, 9);
radeon_check_space(ws, cs->b, 9);
uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;

View file

@ -15,10 +15,10 @@
#include <stdbool.h>
struct radv_device;
struct radeon_cmdbuf;
struct radv_cmd_stream;
struct radv_cmd_buffer;
void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size,
bool predicating);
void radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);

View file

@ -19,13 +19,14 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
struct radeon_winsys *ws = device->ws;
const struct radeon_info *gpu_info = &pdev->info;
struct ac_pm4_state *pm4 = NULL;
struct radv_cmd_stream *cs;
VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
if (!cs)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
if (result != VK_SUCCESS)
return result;
radeon_check_space(ws, cs, 256);
radeon_check_space(ws, cs->b, 256);
/* allocate memory for queue_state->shadowed_regs where register states are saved */
result = radv_bo_create(device, NULL, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
@ -43,10 +44,10 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
radeon_emit_array(pm4->pm4, pm4->ndw);
radeon_end();
ws->cs_pad(cs, 0);
ws->cs_pad(cs->b, 0);
result = radv_bo_create(
device, NULL, cs->cdw * 4, 4096, ws->cs_domain(ws),
device, NULL, cs->b->cdw * 4, 4096, ws->cs_domain(ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, 0, true, &queue_state->shadow_regs_ib);
if (result != VK_SUCCESS)
@ -60,13 +61,13 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
result = VK_ERROR_MEMORY_MAP_FAILED;
goto fail_map;
}
memcpy(map, cs->buf, cs->cdw * 4);
queue_state->shadow_regs_ib_size_dw = cs->cdw;
memcpy(map, cs->b->buf, cs->b->cdw * 4);
queue_state->shadow_regs_ib_size_dw = cs->b->cdw;
ws->buffer_unmap(ws, queue_state->shadow_regs_ib, false);
ac_pm4_free_state(pm4);
ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
return VK_SUCCESS;
fail_map:
radv_bo_destroy(device, NULL, queue_state->shadow_regs_ib);
@ -77,7 +78,7 @@ fail_create:
radv_bo_destroy(device, NULL, queue_state->shadowed_regs);
queue_state->shadowed_regs = NULL;
fail:
ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
return result;
}
@ -92,15 +93,15 @@ radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_
}
void
radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device,
struct radv_queue_state *queue_state)
{
struct radeon_winsys *ws = device->ws;
ws->cs_execute_ib(cs, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false);
ws->cs_execute_ib(cs->b, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false);
radv_cs_add_buffer(device->ws, cs, queue_state->shadowed_regs);
radv_cs_add_buffer(device->ws, cs, queue_state->shadow_regs_ib);
radv_cs_add_buffer(device->ws, cs->b, queue_state->shadowed_regs);
radv_cs_add_buffer(device->ws, cs->b, queue_state->shadow_regs_ib);
}
/* radv_init_shadowed_regs_buffer_state() will be called once from radv_queue_init(). This
@ -111,14 +112,14 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream *cs;
VkResult result;
cs = ws->cs_create(ws, AMD_IP_GFX, false);
if (!cs)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
if (result != VK_SUCCESS)
return result;
radeon_check_space(ws, cs, 768);
radeon_check_space(ws, cs->b, 768);
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
@ -136,13 +137,13 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
ac_pm4_free_state(pm4);
}
result = ws->cs_finalize(cs);
result = radv_finalize_cmd_stream(device, cs);
if (result == VK_SUCCESS) {
if (!radv_queue_internal_submit(queue, cs))
if (!radv_queue_internal_submit(queue, cs->b))
result = VK_ERROR_UNKNOWN;
}
fail:
ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
return result;
}

View file

@ -13,6 +13,7 @@
#include "radv_radeon_winsys.h"
struct radv_cmd_stream;
struct radv_device;
struct radv_queue_state;
struct radv_queue;
@ -22,7 +23,7 @@ VkResult radv_create_shadow_regs_preamble(struct radv_device *device, struct rad
void radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state,
struct radeon_winsys *ws);
void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
void radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device,
struct radv_queue_state *queue_state);
VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue);

View file

@ -17,7 +17,7 @@
#include "sid.h"
void
radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
uint32_t new_fence, uint64_t gfx9_eop_bug_va)
{
@ -111,7 +111,7 @@ radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_le
}
static void
radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
radv_emit_acquire_mem(struct radv_cmd_stream *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
{
radeon_begin(cs);
@ -137,7 +137,7 @@ radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsig
}
static void
gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits,
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
{
@ -381,7 +381,7 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level
}
void
radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level,
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
uint64_t gfx9_eop_bug_va)
@ -389,7 +389,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
unsigned cp_coher_cntl = 0;
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
radeon_check_space(ws, cs, 128);
radeon_check_space(ws, cs->b, 128);
if (gfx_level >= GFX10) {
/* GFX10 cache flush handling is quite different. */
@ -592,7 +592,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
}
void
radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
@ -616,7 +616,7 @@ radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs,
}
void
radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_WRITE_DATA, 3, 0));

View file

@ -28,7 +28,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned
}
#define radeon_begin(cs) \
struct radeon_cmdbuf *__cs = (cs); \
struct radeon_cmdbuf *__cs = (cs)->b; \
uint32_t __cs_num = __cs->cdw; \
UNUSED uint32_t __cs_reserved_dw = __cs->reserved_dw; \
uint32_t *__cs_buf = __cs->buf
@ -340,7 +340,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned
} while (0)
ALWAYS_INLINE static void
radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
radv_cp_wait_mem(struct radv_cmd_stream *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
const uint32_t ref, const uint32_t mask)
{
assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
@ -363,11 +363,11 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, cons
}
ALWAYS_INLINE static unsigned
radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
radv_cs_write_data_head(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf,
const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
{
/* Return the correct cdw at the end of the packet so the caller can assert it. */
const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
const unsigned cdw_end = radeon_check_space(device->ws, cs->b, 4 + count);
if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
radeon_begin(cs);
@ -386,7 +386,7 @@ radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *
}
ALWAYS_INLINE static void
radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
radv_cs_write_data(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf,
const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
const bool predicating)
{
@ -395,24 +395,24 @@ radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, c
radeon_begin(cs);
radeon_emit_array(dwords, count);
radeon_end();
assert(cs->cdw == cdw_end);
assert(cs->b->cdw == cdw_end);
}
void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
void radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel,
uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va);
void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level,
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
uint64_t gfx9_eop_bug_va);
void radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count);
void radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count);
void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
void radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
static inline void
radv_emit_pm4_commands(struct radeon_cmdbuf *cs, const struct ac_pm4_state *pm4)
radv_emit_pm4_commands(struct radv_cmd_stream *cs, const struct ac_pm4_state *pm4)
{
radeon_begin(cs);
radeon_emit_array(pm4->pm4, pm4->ndw);

View file

@ -42,10 +42,12 @@ radv_write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_b
if (device->use_global_bo_list)
return;
if (cmd_buffer)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
else
if (cmd_buffer) {
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer_view->bo);
} else {
*buffer_list = buffer_view->bo;
}
}
static ALWAYS_INLINE void
@ -90,10 +92,12 @@ radv_write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_bu
return;
}
if (cmd_buffer)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
else
if (cmd_buffer) {
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
} else {
*buffer_list = buffer->bo;
}
}
static ALWAYS_INLINE void
@ -190,8 +194,9 @@ radv_write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buf
const uint32_t max_bindings = sizeof(iview->image->bindings) / sizeof(iview->image->bindings[0]);
for (uint32_t b = 0; b < max_bindings; b++) {
if (cmd_buffer) {
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (iview->image->bindings[b].bo)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
} else {
*buffer_list = iview->image->bindings[b].bo;
buffer_list++;
@ -239,8 +244,9 @@ radv_write_image_descriptor_ycbcr_impl(struct radv_device *device, struct radv_c
for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) {
if (cmd_buffer) {
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (iview->image->bindings[b].bo)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
} else {
*buffer_list = iview->image->bindings[b].bo;
buffer_list++;

View file

@ -537,7 +537,7 @@ radv_device_init_perf_counter(struct radv_device *device)
if (result != VK_SUCCESS)
return result;
device->perf_counter_lock_cs = calloc(sizeof(struct radeon_cmdbuf *), 2 * PERF_CTR_MAX_PASSES);
device->perf_counter_lock_cs = calloc(sizeof(struct radv_cmd_stream *), 2 * PERF_CTR_MAX_PASSES);
if (!device->perf_counter_lock_cs)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -558,7 +558,7 @@ radv_device_finish_perf_counter(struct radv_device *device)
for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
if (device->perf_counter_lock_cs[i])
device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
radv_destroy_cmd_stream(device, device->perf_counter_lock_cs[i]);
}
free(device->perf_counter_lock_cs);
@ -894,18 +894,21 @@ radv_device_init_cache_key(struct radv_device *device)
static void
radv_create_gfx_preamble(struct radv_device *device)
{
struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
if (!cs)
struct radv_cmd_stream *cs;
VkResult result;
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
if (result != VK_SUCCESS)
return;
radeon_check_space(device->ws, cs, 512);
radeon_check_space(device->ws, cs->b, 512);
radv_emit_graphics(device, cs);
device->ws->cs_pad(cs, 0);
device->ws->cs_pad(cs->b, 0);
VkResult result = radv_bo_create(
device, NULL, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
result = radv_bo_create(
device, NULL, cs->b->cdw * 4, 4096, device->ws->cs_domain(device->ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, 0, true, &device->gfx_init);
if (result != VK_SUCCESS)
@ -917,12 +920,12 @@ radv_create_gfx_preamble(struct radv_device *device)
device->gfx_init = NULL;
goto fail;
}
memcpy(map, cs->buf, cs->cdw * 4);
memcpy(map, cs->b->buf, cs->b->cdw * 4);
device->ws->buffer_unmap(device->ws, device->gfx_init, false);
device->gfx_init_size_dw = cs->cdw;
device->gfx_init_size_dw = cs->b->cdw;
fail:
device->ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
}
/* For MSAA sample positions. */
@ -977,7 +980,7 @@ radv_get_default_max_sample_dist(int log_samples)
}
void
radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, int nr_samples)
radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, int nr_samples)
{
uint64_t centroid_priority;

View file

@ -33,6 +33,7 @@
#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
struct radv_image_view;
struct radv_cmd_stream;
enum radv_dispatch_table {
RADV_DEVICE_DISPATCH_TABLE,
@ -289,7 +290,7 @@ struct radv_device {
struct radeon_winsys_bo *perf_counter_bo;
/* Interleaved lock/unlock commandbuffers for perfcounter passes. */
struct radeon_cmdbuf **perf_counter_lock_cs;
struct radv_cmd_stream **perf_counter_lock_cs;
bool uses_shadow_regs;
@ -340,7 +341,7 @@ VkResult radv_device_init_vrs_state(struct radv_device *device);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs,
int nr_samples);
struct radv_color_buffer_info {

View file

@ -3339,38 +3339,38 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio
const struct radv_physical_device *pdev = radv_device_physical(device);
uint8_t *ptr = set->mapped_ptr + set->stride * index;
struct radv_compute_pipeline_metadata md;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream cs;
assert(shader->info.stage == MESA_SHADER_COMPUTE);
radv_get_compute_shader_metadata(device, shader, &md);
cs = calloc(1, sizeof(*cs));
if (!cs)
cs.b = calloc(1, sizeof(*cs.b));
if (!cs.b)
return;
cs->reserved_dw = cs->max_dw = 32;
cs->buf = malloc(cs->max_dw * 4);
if (!cs->buf) {
free(cs);
cs.b->reserved_dw = cs.b->max_dw = 32;
cs.b->buf = malloc(cs.b->max_dw * 4);
if (!cs.b->buf) {
free(cs.b);
return;
}
radv_emit_compute_shader(pdev, cs, shader);
radv_emit_compute_shader(pdev, &cs, shader);
memcpy(ptr, &md, sizeof(md));
ptr += sizeof(md);
memcpy(ptr, &cs->cdw, sizeof(uint32_t));
memcpy(ptr, &cs.b->cdw, sizeof(uint32_t));
ptr += sizeof(uint32_t);
memcpy(ptr, cs->buf, cs->cdw * sizeof(uint32_t));
ptr += cs->cdw * sizeof(uint32_t);
memcpy(ptr, cs.b->buf, cs.b->cdw * sizeof(uint32_t));
ptr += cs.b->cdw * sizeof(uint32_t);
set->compute_scratch_size_per_wave = MAX2(set->compute_scratch_size_per_wave, shader->config.scratch_bytes_per_wave);
set->compute_scratch_waves = MAX2(set->compute_scratch_waves, radv_get_max_scratch_waves(device, shader));
free(cs->buf);
free(cs);
free(cs.b->buf);
free(cs.b);
}
static void

View file

@ -15,7 +15,7 @@
#include "sid.h"
void
radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -35,7 +35,7 @@ radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *
}
static void
radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable)
radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, int family, bool enable)
{
radeon_begin(cs);
@ -49,7 +49,7 @@ radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs
}
void
radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm)
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm)
{
uint32_t cp_perfmon_cntl;
@ -66,7 +66,7 @@ radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm)
}
static void
radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm)
radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm)
{
uint32_t cp_perfmon_cntl;
@ -83,7 +83,7 @@ radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm)
}
static void
radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm)
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm)
{
uint32_t cp_perfmon_cntl;
@ -101,7 +101,7 @@ radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm)
}
void
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family)
{
/* Start SPM counters. */
radv_perfcounter_emit_start(cs, true);
@ -110,7 +110,7 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf
}
void
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -125,7 +125,7 @@ radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf
}
static void
radv_perfcounter_emit_sample(struct radeon_cmdbuf *cs)
radv_perfcounter_emit_sample(struct radv_cmd_stream *cs)
{
radeon_begin(cs);
radeon_event_write(V_028A90_PERFCOUNTER_SAMPLE);
@ -493,7 +493,7 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea
static void
radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned value = S_030800_SH_BROADCAST_WRITES(1);
if (se >= 0) {
@ -521,7 +521,7 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned idx;
assert(count <= regs->num_counters);
@ -551,7 +551,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
@ -596,7 +596,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo
static void
radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_begin(cs);
@ -622,7 +622,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_perfcounter_emit_sample(cs);
radv_pc_wait_idle(cmd_buffer);
@ -642,7 +642,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
radeon_emit(0);
radeon_end();
uint32_t *skip_dwords = cs->buf + (cs->cdw - 1);
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);
for (unsigned i = 0; i < pool->num_pc_regs;) {
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
@ -670,7 +670,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
radv_cs_write_data_imm(cs, V_370_ME, signal_va, 1);
}
*skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
}
radv_emit_instance(cmd_buffer, -1, -1);
@ -680,19 +680,19 @@ void
radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
const struct radv_physical_device *pdev = radv_device_physical(device);
ASSERTED unsigned cdw_max;
cmd_buffer->state.uses_perf_counters = true;
cdw_max = radeon_check_space(device->ws, cs,
cdw_max = radeon_check_space(device->ws, cs->b,
256 + /* Random one time stuff */
10 * pool->num_passes + /* COND_EXECs */
pool->b.stride / 8 * (5 + 8));
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
radv_cs_add_buffer(device->ws, cs->b, pool->b.bo);
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
radv_cs_write_data_imm(cs, V_370_ME, perf_ctr_va, 0);
@ -714,7 +714,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
radeon_emit(0);
radeon_end();
uint32_t *skip_dwords = cs->buf + (cs->cdw - 1);
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);
for (unsigned i = 0; i < pool->num_pc_regs;) {
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
@ -733,7 +733,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
i += cnt;
}
*skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
}
radv_emit_instance(cmd_buffer, -1, -1);
@ -744,7 +744,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
radv_perfcounter_emit_start(cs, false);
radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true);
assert(cmd_buffer->cs->cdw <= cdw_max);
assert(cs->b->cdw <= cdw_max);
}
void
@ -752,16 +752,16 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
ASSERTED unsigned cdw_max;
cdw_max = radeon_check_space(device->ws, cs,
cdw_max = radeon_check_space(device->ws, cs->b,
256 + /* Reserved for things that don't scale with passes/counters */
5 * pool->num_passes + /* COND_EXECs */
pool->b.stride / 8 * 8);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
radv_cs_add_buffer(device->ws, cs->b, pool->b.bo);
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
@ -774,7 +774,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
radv_emit_spi_config_cntl(device, cs, false);
radv_emit_inhibit_clockgating(device, cs, false);
assert(cmd_buffer->cs->cdw <= cdw_max);
assert(cs->b->cdw <= cdw_max);
}
static uint64_t

View file

@ -30,13 +30,13 @@ struct radv_pc_query_pool {
struct radv_perfcounter_impl *counters;
};
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders);
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders);
void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm);
void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm);
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family);
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family);
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);

View file

@ -35,7 +35,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu
uint32_t pipeline_stats_mask, uint32_t avail_offset, bool uses_emulated_queries);
static void
gfx10_copy_shader_query(struct radeon_cmdbuf *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
@ -93,7 +93,7 @@ enum radv_event_write {
};
static void
radv_emit_event_write(const struct radeon_info *info, struct radeon_cmdbuf *cs, enum radv_event_write event,
radv_emit_event_write(const struct radeon_info *info, struct radv_cmd_stream *cs, enum radv_event_write event,
uint64_t va)
{
radeon_begin(cs);
@ -322,9 +322,9 @@ radv_begin_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkQu
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cs, 11);
radeon_check_space(device->ws, cs->b, 11);
++cmd_buffer->state.active_occlusion_queries;
if (cmd_buffer->state.active_occlusion_queries == 1) {
@ -356,9 +356,9 @@ radv_end_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cs, 14);
radeon_check_space(device->ws, cs->b, 14);
cmd_buffer->state.active_occlusion_queries--;
if (cmd_buffer->state.active_occlusion_queries == 0) {
@ -379,7 +379,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (!radv_occlusion_query_use_l2(pdev)) {
@ -390,7 +390,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
unsigned query = first_query + i;
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
radeon_check_space(device->ws, cs, 7);
radeon_check_space(device->ws, cs->b, 7);
/* Waits on the upper word of the last DB entry */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
@ -631,9 +631,9 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cs, 4);
radeon_check_space(device->ws, cs->b, 4);
++cmd_buffer->state.active_pipeline_queries;
@ -674,18 +674,19 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
if (pool->uses_ace) {
uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset;
radeon_check_space(device->ws, cmd_buffer->gang.cs, 4);
radeon_check_space(device->ws, ace_cs->b, 4);
radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
} else {
radeon_check_space(device->ws, cmd_buffer->gang.cs, 11);
radeon_check_space(device->ws, ace_cs->b, 11);
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -704,11 +705,11 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
radeon_check_space(device->ws, cs, 16);
radeon_check_space(device->ws, cs->b, 16);
cmd_buffer->state.active_pipeline_queries--;
@ -747,18 +748,19 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
if (pool->uses_ace) {
uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset;
radeon_check_space(device->ws, cmd_buffer->gang.cs, 4);
radeon_check_space(device->ws, ace_cs->b, 4);
radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
} else {
radeon_check_space(device->ws, cmd_buffer->gang.cs, 11);
radeon_check_space(device->ws, ace_cs->b, 11);
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
cmd_buffer->state.active_pipeline_ace_queries--;
@ -778,7 +780,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -789,7 +791,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
for (unsigned i = 0; i < query_count; ++i) {
unsigned query = first_query + i;
radeon_check_space(device->ws, cs, 7);
radeon_check_space(device->ws, cs->b, 7);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
@ -801,7 +803,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
const uint64_t start_va = src_va + task_invoc_offset + 4;
const uint64_t stop_va = start_va + pipelinestat_block_size;
radeon_check_space(device->ws, cs, 7 * 2);
radeon_check_space(device->ws, cs->b, 7 * 2);
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, start_va, 0x80000000, 0xffffffff);
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, stop_va, 0x80000000, 0xffffffff);
@ -942,9 +944,9 @@ emit_sample_streamout(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cs, 4);
radeon_check_space(device->ws, cs->b, 4);
assert(index < MAX_SO_STREAMS);
@ -978,7 +980,7 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->use_ngg_streamout) {
/* generated prim counter */
@ -1007,7 +1009,7 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->use_ngg_streamout) {
/* generated prim counter */
@ -1036,7 +1038,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query
uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -1044,7 +1046,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query
unsigned query = first_query + i;
uint64_t src_va = va + query * pool->stride;
radeon_check_space(device->ws, cs, 7 * 4);
radeon_check_space(device->ws, cs->b, 7 * 4);
/* Wait on the upper word of all results. */
for (unsigned j = 0; j < 4; j++, src_va += 8) {
@ -1169,7 +1171,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -1177,7 +1179,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
unsigned query = first_query + i;
uint64_t local_src_va = va + query * pool->stride;
radeon_check_space(device->ws, cs, 7);
radeon_check_space(device->ws, cs->b, 7);
/* Wait on the high 32 bits of the timestamp in
* case the low part is 0xffffffff.
@ -1350,7 +1352,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query are always emulated. */
@ -1399,7 +1401,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query are always emulated. */
@ -1446,7 +1448,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -1456,7 +1458,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_
unsigned query = first_query + i;
uint64_t src_va = va + query * pool->stride;
radeon_check_space(device->ws, cs, 7 * 4);
radeon_check_space(device->ws, cs->b, 7 * 4);
/* Wait on the upper word of the PrimitiveStorageNeeded result. */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
@ -1595,10 +1597,10 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->info.gfx_level >= GFX11) {
radeon_check_space(device->ws, cs, 4);
radeon_check_space(device->ws, cs->b, 4);
++cmd_buffer->state.active_pipeline_queries;
@ -1624,12 +1626,12 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->info.gfx_level >= GFX11) {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
radeon_check_space(device->ws, cs, 16);
radeon_check_space(device->ws, cs->b, 16);
cmd_buffer->state.active_pipeline_queries--;
@ -1658,7 +1660,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
if (pdev->info.gfx_level >= GFX11) {
@ -1666,7 +1668,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
for (unsigned i = 0; i < query_count; ++i) {
unsigned query = first_query + i;
radeon_check_space(device->ws, cs, 7);
radeon_check_space(device->ws, cs->b, 7);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
@ -1684,7 +1686,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
unsigned query = first_query + i;
uint64_t src_va = va + query * pool->stride;
radeon_check_space(device->ws, cs, 7 * 2);
radeon_check_space(device->ws, cs->b, 7 * 2);
/* Wait on the upper word. */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
@ -2473,14 +2475,15 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (!queryCount)
return;
radv_suspend_conditional_rendering(cmd_buffer);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
/* Workaround engines that forget to properly specify WAIT_BIT because some driver implicitly
* synchronizes before query copy.
@ -2652,10 +2655,10 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
radv_cs_add_buffer(device->ws, cs, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
emit_query_flush(cmd_buffer, pool);
@ -2665,7 +2668,8 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
if (!radv_gang_init(cmd_buffer))
return;
radv_cs_add_buffer(device->ws, cmd_buffer->gang.cs, pool->bo);
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
radv_cs_add_buffer(device->ws, ace_cs->b, pool->bo);
}
if (pool->uses_shader_query_buf)
@ -2711,7 +2715,7 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
radeon_begin(cs);
@ -2739,21 +2743,21 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t query_va = va + pool->stride * query;
radv_cs_add_buffer(device->ws, cs, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
assert(cmd_buffer->qf != RADV_QUEUE_VIDEO_DEC && cmd_buffer->qf != RADV_QUEUE_VIDEO_ENC);
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
if (instance->drirc.flush_before_timestamp_write) {
radv_sdma_emit_nop(device, cmd_buffer->cs);
radv_sdma_emit_nop(device, cs);
}
for (unsigned i = 0; i < num_queries; ++i, query_va += pool->stride) {
radeon_check_space(device->ws, cmd_buffer->cs, 3);
radeon_check_space(device->ws, cs->b, 3);
radv_sdma_emit_write_timestamp(cs, query_va);
}
return;
@ -2766,7 +2770,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
radv_emit_cache_flush(cmd_buffer);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 28 * num_queries);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 28 * num_queries);
for (unsigned i = 0; i < num_queries; i++) {
radv_write_timestamp(cmd_buffer, query_va, stage);
@ -2779,7 +2783,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
}
assert(cmd_buffer->cs->cdw <= cdw_max);
assert(cs->b->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
@ -2790,15 +2794,15 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t pool_va = radv_buffer_get_va(pool->bo);
uint64_t query_va = pool_va + pool->stride * firstQuery;
radv_cs_add_buffer(device->ws, cs, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
radv_emit_cache_flush(cmd_buffer);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 6 * accelerationStructureCount);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 6 * accelerationStructureCount);
radeon_begin(cs);
@ -2835,5 +2839,5 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
}
radeon_end();
assert(cmd_buffer->cs->cdw <= cdw_max);
assert(cs->b->cdw <= cdw_max);
}

View file

@ -360,7 +360,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
}
static void
radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
radv_emit_gs_ring_sizes(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *esgs_ring_bo,
uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -369,10 +369,10 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
return;
if (esgs_ring_bo)
radv_cs_add_buffer(device->ws, cs, esgs_ring_bo);
radv_cs_add_buffer(device->ws, cs->b, esgs_ring_bo);
if (gsvs_ring_bo)
radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
radv_cs_add_buffer(device->ws, cs->b, gsvs_ring_bo);
radeon_begin(cs);
@ -390,7 +390,8 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
}
static void
radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
radv_emit_tess_factor_ring(struct radv_device *device, struct radv_cmd_stream *cs,
struct radeon_winsys_bo *tess_rings_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t tf_va;
@ -401,7 +402,7 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
tf_ring_size = pdev->info.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo) + pdev->info.tess_offchip_ring_size;
radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
radv_cs_add_buffer(device->ws, cs->b, tess_rings_bo);
radeon_begin(cs);
@ -465,7 +466,7 @@ radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_wi
}
static void
radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *task_rings_bo,
radv_emit_task_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *task_rings_bo,
bool compute)
{
if (!task_rings_bo)
@ -473,7 +474,7 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc
const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo);
assert(util_is_aligned(task_ctrlbuf_va, 256));
radv_cs_add_buffer(device->ws, cs, task_rings_bo);
radv_cs_add_buffer(device->ws, cs->b, task_rings_bo);
radeon_begin(cs);
@ -488,8 +489,8 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc
}
static void
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *scratch_bo)
radv_emit_graphics_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave,
uint32_t waves, struct radeon_winsys_bo *scratch_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
@ -500,7 +501,7 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size);
radv_cs_add_buffer(device->ws, cs, scratch_bo);
radv_cs_add_buffer(device->ws, cs->b, scratch_bo);
radeon_begin(cs);
@ -519,8 +520,8 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
}
static void
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *compute_scratch_bo)
radv_emit_compute_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave,
uint32_t waves, struct radeon_winsys_bo *compute_scratch_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
@ -541,7 +542,7 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size);
radv_cs_add_buffer(device->ws, cs, compute_scratch_bo);
radv_cs_add_buffer(device->ws, cs->b, compute_scratch_bo);
radeon_begin(cs);
@ -563,14 +564,14 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
}
static void
radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
radv_emit_compute_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs,
struct radeon_winsys_bo *descriptor_bo)
{
if (!descriptor_bo)
return;
uint64_t va = radv_buffer_get_va(descriptor_bo);
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
radv_cs_add_buffer(device->ws, cs->b, descriptor_bo);
/* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders),
* so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg).
@ -581,7 +582,7 @@ radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdb
}
static void
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs,
struct radeon_winsys_bo *descriptor_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -592,7 +593,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
va = radv_buffer_get_va(descriptor_bo);
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
radv_cs_add_buffer(device->ws, cs->b, descriptor_bo);
radeon_begin(cs);
@ -638,7 +639,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
}
static void
radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *ge_rings_bo)
radv_emit_ge_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *ge_rings_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va;
@ -651,7 +652,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct
va = radv_buffer_get_va(ge_rings_bo);
assert((va >> 32) == pdev->info.address32_hi);
radv_cs_add_buffer(device->ws, cs, ge_rings_bo);
radv_cs_add_buffer(device->ws, cs->b, ge_rings_bo);
radeon_begin(cs);
@ -716,7 +717,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct
}
static void
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs, bool is_compute_queue)
radv_emit_compute(struct radv_device *device, struct radv_cmd_stream *cs, bool is_compute_queue)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0;
@ -768,7 +769,7 @@ radv_pack_float_12p4(float x)
}
void
radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs)
{
struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0;
@ -941,14 +942,14 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
}
static void
radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device)
radv_init_graphics_state(struct radv_cmd_stream *cs, struct radv_device *device)
{
if (device->gfx_init) {
struct radeon_winsys *ws = device->ws;
ws->cs_execute_ib(cs, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false);
ws->cs_execute_ib(cs->b, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false);
radv_cs_add_buffer(device->ws, cs, device->gfx_init);
radv_cs_add_buffer(device->ws, cs->b, device->gfx_init);
} else {
radv_emit_graphics(device, cs);
}
@ -971,7 +972,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
struct radeon_winsys_bo *ge_rings_bo = queue->ge_rings_bo;
struct radeon_winsys_bo *gds_bo = queue->gds_bo;
struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo;
struct radeon_cmdbuf *dest_cs[3] = {0};
struct radv_cmd_stream *dest_cs[3] = {0};
const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
VkResult result = VK_SUCCESS;
@ -1134,18 +1135,17 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
for (int i = 0; i < 3; ++i) {
enum rgp_flush_bits sqtt_flush_bits = 0;
struct radeon_cmdbuf *cs = NULL;
cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false);
if (!cs) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto fail;
}
struct radv_cmd_stream *cs = NULL;
radeon_check_space(ws, cs, 512);
result = radv_create_cmd_stream(device, queue->qf, false, &cs);
if (result != VK_SUCCESS)
goto fail;
radeon_check_space(ws, cs->b, 512);
dest_cs[i] = cs;
if (scratch_bo)
radv_cs_add_buffer(ws, cs, scratch_bo);
radv_cs_add_buffer(ws, cs->b, scratch_bo);
/* Emit initial configuration. */
switch (queue->qf) {
@ -1205,19 +1205,19 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->qf, flush_bits, &sqtt_flush_bits, 0);
}
result = ws->cs_finalize(cs);
result = radv_finalize_cmd_stream(device, cs);
if (result != VK_SUCCESS)
goto fail;
}
if (queue->initial_full_flush_preamble_cs)
ws->cs_destroy(queue->initial_full_flush_preamble_cs);
radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs);
if (queue->initial_preamble_cs)
ws->cs_destroy(queue->initial_preamble_cs);
radv_destroy_cmd_stream(device, queue->initial_preamble_cs);
if (queue->continue_preamble_cs)
ws->cs_destroy(queue->continue_preamble_cs);
radv_destroy_cmd_stream(device, queue->continue_preamble_cs);
queue->initial_full_flush_preamble_cs = dest_cs[0];
queue->initial_preamble_cs = dest_cs[1];
@ -1272,7 +1272,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
fail:
for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
if (dest_cs[i])
ws->cs_destroy(dest_cs[i]);
radv_destroy_cmd_stream(device, dest_cs[i]);
if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
radv_bo_destroy(device, NULL, descriptor_bo);
if (scratch_bo && scratch_bo != queue->scratch_bo)
@ -1394,12 +1394,14 @@ radv_create_flush_postamble(struct radv_queue *queue)
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ip = radv_queue_family_to_ring(pdev, queue->state.qf);
struct radeon_winsys *ws = device->ws;
struct radv_cmd_stream *cs;
VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, ip, false);
if (!cs)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
result = radv_create_cmd_stream(device, queue->state.qf, false, &cs);
if (result != VK_SUCCESS)
return result;
radeon_check_space(ws, cs, 256);
radeon_check_space(ws, cs->b, 256);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radv_cmd_flush_bits flush_bits = 0;
@ -1418,10 +1420,10 @@ radv_create_flush_postamble(struct radv_queue *queue)
enum rgp_flush_bits sqtt_flush_bits = 0;
radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->state.qf, flush_bits, &sqtt_flush_bits, 0);
VkResult r = ws->cs_finalize(cs);
if (r != VK_SUCCESS) {
ws->cs_destroy(cs);
return r;
result = radv_finalize_cmd_stream(device, cs);
if (result != VK_SUCCESS) {
radv_destroy_cmd_stream(device, cs);
return result;
}
queue->state.flush_postamble_cs = cs;
@ -1439,7 +1441,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
VkResult r = VK_SUCCESS;
struct radeon_winsys *ws = device->ws;
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf);
struct radeon_winsys_bo *gang_sem_bo = NULL;
/* Gang semaphores BO.
@ -1452,25 +1453,34 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
if (r != VK_SUCCESS)
return r;
struct radeon_cmdbuf *leader_pre_cs = ws->cs_create(ws, leader_ip, false);
struct radeon_cmdbuf *leader_post_cs = ws->cs_create(ws, leader_ip, false);
struct radeon_cmdbuf *ace_pre_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false);
struct radeon_cmdbuf *ace_post_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false);
struct radv_cmd_stream *leader_pre_cs = NULL, *leader_post_cs = NULL;
struct radv_cmd_stream *ace_pre_cs = NULL, *ace_post_cs = NULL;
if (!leader_pre_cs || !leader_post_cs || !ace_pre_cs || !ace_post_cs) {
r = VK_ERROR_OUT_OF_DEVICE_MEMORY;
r = radv_create_cmd_stream(device, queue->state.qf, false, &leader_pre_cs);
if (r != VK_SUCCESS)
goto fail;
}
radeon_check_space(ws, leader_pre_cs, 256);
radeon_check_space(ws, leader_post_cs, 256);
radeon_check_space(ws, ace_pre_cs, 256);
radeon_check_space(ws, ace_post_cs, 256);
radv_create_cmd_stream(device, queue->state.qf, false, &leader_post_cs);
if (r != VK_SUCCESS)
goto fail;
radv_cs_add_buffer(ws, leader_pre_cs, gang_sem_bo);
radv_cs_add_buffer(ws, leader_post_cs, gang_sem_bo);
radv_cs_add_buffer(ws, ace_pre_cs, gang_sem_bo);
radv_cs_add_buffer(ws, ace_post_cs, gang_sem_bo);
radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_pre_cs);
if (r != VK_SUCCESS)
goto fail;
radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_post_cs);
if (r != VK_SUCCESS)
goto fail;
radeon_check_space(ws, leader_pre_cs->b, 256);
radeon_check_space(ws, leader_post_cs->b, 256);
radeon_check_space(ws, ace_pre_cs->b, 256);
radeon_check_space(ws, ace_post_cs->b, 256);
radv_cs_add_buffer(ws, leader_pre_cs->b, gang_sem_bo);
radv_cs_add_buffer(ws, leader_post_cs->b, gang_sem_bo);
radv_cs_add_buffer(ws, ace_pre_cs->b, gang_sem_bo);
radv_cs_add_buffer(ws, ace_post_cs->b, gang_sem_bo);
const uint64_t ace_wait_va = radv_buffer_get_va(gang_sem_bo);
const uint64_t leader_wait_va = ace_wait_va + 4;
@ -1486,7 +1496,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
radv_cp_wait_mem(ace_pre_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
radv_cs_write_data(device, ace_pre_cs, RADV_QUEUE_COMPUTE, V_370_ME, ace_wait_va, 1, &zero, false);
radv_cs_write_data(device, leader_pre_cs, queue->state.qf, V_370_ME, ace_wait_va, 1, &one, false);
/* Create postambles for gang submission.
* This ensures that the gang leader waits for the whole gang,
* which is necessary because the kernel signals the userspace fence
@ -1498,16 +1507,16 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
r = ws->cs_finalize(leader_pre_cs);
r = radv_finalize_cmd_stream(device, leader_pre_cs);
if (r != VK_SUCCESS)
goto fail;
r = ws->cs_finalize(leader_post_cs);
r = radv_finalize_cmd_stream(device, leader_post_cs);
if (r != VK_SUCCESS)
goto fail;
r = ws->cs_finalize(ace_pre_cs);
r = radv_finalize_cmd_stream(device, ace_pre_cs);
if (r != VK_SUCCESS)
goto fail;
r = ws->cs_finalize(ace_post_cs);
r = radv_finalize_cmd_stream(device, ace_post_cs);
if (r != VK_SUCCESS)
goto fail;
@ -1521,13 +1530,13 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
fail:
if (leader_pre_cs)
ws->cs_destroy(leader_pre_cs);
radv_destroy_cmd_stream(device, leader_pre_cs);
if (leader_post_cs)
ws->cs_destroy(leader_post_cs);
radv_destroy_cmd_stream(device, leader_post_cs);
if (ace_pre_cs)
ws->cs_destroy(ace_pre_cs);
radv_destroy_cmd_stream(device, ace_pre_cs);
if (ace_post_cs)
ws->cs_destroy(ace_post_cs);
radv_destroy_cmd_stream(device, ace_post_cs);
if (gang_sem_bo)
radv_bo_destroy(device, &queue->vk.base, gang_sem_bo);
@ -1585,22 +1594,23 @@ radv_update_gang_preambles(struct radv_queue *queue)
return VK_SUCCESS;
}
static struct radeon_cmdbuf *
static struct radv_cmd_stream *
radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock)
{
struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
struct radeon_cmdbuf *cs;
struct radv_cmd_stream **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
struct radv_cmd_stream *cs;
VkResult result;
if (*cs_ref)
return *cs_ref;
cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
if (!cs)
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
if (result != VK_SUCCESS)
return NULL;
ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21);
ASSERTED unsigned cdw = radeon_check_space(device->ws, cs->b, 21);
radv_cs_add_buffer(device->ws, cs, device->perf_counter_bo);
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
radeon_begin(cs);
@ -1650,11 +1660,11 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
}
radeon_end();
assert(cs->cdw <= cdw);
assert(cs->b->cdw <= cdw);
VkResult result = device->ws->cs_finalize(cs);
result = radv_finalize_cmd_stream(device, cs);
if (result != VK_SUCCESS) {
device->ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
return NULL;
}
@ -1662,7 +1672,7 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
* alternative.
*/
if (p_atomic_cmpxchg((uintptr_t *)cs_ref, 0, (uintptr_t)cs) != 0) {
device->ws->cs_destroy(cs);
radv_destroy_cmd_stream(device, cs);
}
return *cs_ref;
@ -1748,18 +1758,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
if (queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE) {
initial_preambles[num_initial_preambles++] =
need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs;
need_wait ? queue->state.initial_full_flush_preamble_cs->b : queue->state.initial_preamble_cs->b;
continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs;
continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs->b;
if (use_perf_counters) {
/* RADV only supports perf counters on the GFX queue currently. */
assert(queue->state.qf == RADV_QUEUE_GENERAL);
/* Create the lock/unlock CS. */
struct radeon_cmdbuf *perf_ctr_lock_cs =
struct radv_cmd_stream *perf_ctr_lock_cs =
radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, false);
struct radeon_cmdbuf *perf_ctr_unlock_cs =
struct radv_cmd_stream *perf_ctr_unlock_cs =
radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, true);
if (!perf_ctr_lock_cs || !perf_ctr_unlock_cs) {
@ -1767,14 +1777,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
goto fail;
}
initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs;
continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs;
postambles[num_postambles++] = perf_ctr_unlock_cs;
initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs->b;
continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs->b;
postambles[num_postambles++] = perf_ctr_unlock_cs->b;
}
}
if (queue->state.flush_postamble_cs) {
postambles[num_postambles++] = queue->state.flush_postamble_cs;
postambles[num_postambles++] = queue->state.flush_postamble_cs->b;
}
const unsigned num_1q_initial_preambles = num_initial_preambles;
@ -1782,17 +1792,17 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
const unsigned num_1q_postambles = num_postambles;
if (use_ace) {
initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs;
initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs;
initial_preambles[num_initial_preambles++] =
need_wait ? queue->follower_state->initial_full_flush_preamble_cs : queue->follower_state->initial_preamble_cs;
initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs->b;
initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs->b;
initial_preambles[num_initial_preambles++] = need_wait ? queue->follower_state->initial_full_flush_preamble_cs->b
: queue->follower_state->initial_preamble_cs->b;
continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs;
continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs;
continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs;
continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs->b;
continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs->b;
continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs->b;
postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs;
postambles[num_postambles++] = queue->state.gang_wait_postamble_cs;
postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs->b;
postambles[num_postambles++] = queue->state.gang_wait_postamble_cs->b;
}
struct radv_winsys_submit_info submit = {
@ -1826,12 +1836,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c];
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
const bool can_chain_next = !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
/* Follower needs to be before the gang leader because the last CS must match the queue's IP type. */
if (cmd_buffer->gang.cs) {
device->ws->cs_unchain(cmd_buffer->gang.cs);
if (!chainable_ace || !device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false)) {
cs_array[num_submitted_cs++] = cmd_buffer->gang.cs;
if (ace_cs) {
device->ws->cs_unchain(ace_cs->b);
if (!chainable_ace || !device->ws->cs_chain(chainable_ace, ace_cs->b, false)) {
cs_array[num_submitted_cs++] = ace_cs->b;
/* Prevent chaining the gang leader when the follower couldn't be chained.
* Otherwise, they would be in the wrong order.
@ -1839,19 +1851,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
chainable = NULL;
}
chainable_ace = can_chain_next ? cmd_buffer->gang.cs : NULL;
chainable_ace = can_chain_next ? ace_cs->b : NULL;
submit_ace = true;
}
device->ws->cs_unchain(cmd_buffer->cs);
if (!chainable || !device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) {
device->ws->cs_unchain(cs->b);
if (!chainable || !device->ws->cs_chain(chainable, cs->b, queue->state.uses_shadow_regs)) {
/* don't submit empty command buffers to the kernel. */
if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) ||
cmd_buffer->cs->cdw != 0)
cs_array[num_submitted_cs++] = cmd_buffer->cs;
if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) || cs->b->cdw != 0)
cs_array[num_submitted_cs++] = cs->b;
}
chainable = can_chain_next ? cmd_buffer->cs : NULL;
chainable = can_chain_next ? cs->b : NULL;
}
submit.cs_count = num_submitted_cs;
@ -1873,8 +1884,8 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
radv_check_trap_handler(queue);
}
initial_preambles[0] = queue->state.initial_preamble_cs;
initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs;
initial_preambles[0] = queue->state.initial_preamble_cs ? queue->state.initial_preamble_cs->b : NULL;
initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs->b;
}
queue->last_shader_upload_seq = MAX2(queue->last_shader_upload_seq, shader_upload_seq);
@ -2047,17 +2058,17 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *devi
{
radv_destroy_shadow_regs_preamble(device, queue, device->ws);
if (queue->initial_full_flush_preamble_cs)
device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs);
if (queue->initial_preamble_cs)
device->ws->cs_destroy(queue->initial_preamble_cs);
radv_destroy_cmd_stream(device, queue->initial_preamble_cs);
if (queue->continue_preamble_cs)
device->ws->cs_destroy(queue->continue_preamble_cs);
radv_destroy_cmd_stream(device, queue->continue_preamble_cs);
if (queue->gang_wait_preamble_cs)
device->ws->cs_destroy(queue->gang_wait_preamble_cs);
radv_destroy_cmd_stream(device, queue->gang_wait_preamble_cs);
if (queue->gang_wait_postamble_cs)
device->ws->cs_destroy(queue->gang_wait_postamble_cs);
radv_destroy_cmd_stream(device, queue->gang_wait_postamble_cs);
if (queue->flush_postamble_cs)
device->ws->cs_destroy(queue->flush_postamble_cs);
radv_destroy_cmd_stream(device, queue->flush_postamble_cs);
if (queue->descriptor_bo)
radv_bo_destroy(device, NULL, queue->descriptor_bo);
if (queue->scratch_bo) {

View file

@ -16,6 +16,7 @@
#include "radv_radeon_winsys.h"
struct radv_physical_device;
struct radv_cmd_stream;
struct radv_device;
struct radv_queue_ring_info {
@ -62,12 +63,12 @@ struct radv_queue_state {
struct radeon_winsys_bo *gds_bo;
struct radeon_winsys_bo *gds_oa_bo;
struct radeon_cmdbuf *initial_preamble_cs;
struct radeon_cmdbuf *initial_full_flush_preamble_cs;
struct radeon_cmdbuf *continue_preamble_cs;
struct radeon_cmdbuf *gang_wait_preamble_cs;
struct radeon_cmdbuf *gang_wait_postamble_cs;
struct radeon_cmdbuf *flush_postamble_cs; /* GFX6 only */
struct radv_cmd_stream *initial_preamble_cs;
struct radv_cmd_stream *initial_full_flush_preamble_cs;
struct radv_cmd_stream *continue_preamble_cs;
struct radv_cmd_stream *gang_wait_preamble_cs;
struct radv_cmd_stream *gang_wait_postamble_cs;
struct radv_cmd_stream *flush_postamble_cs; /* GFX6 only */
/* the uses_shadow_regs here will be set only for general queue */
bool uses_shadow_regs;
@ -108,7 +109,7 @@ void radv_queue_finish(struct radv_queue *queue);
enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfo *pObj);
void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
void radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs);
bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);

View file

@ -346,17 +346,17 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
}
void
radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs)
radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs)
{
/* SDMA NOP acts as a fence command and causes the SDMA engine to wait for pending copy operations. */
radeon_check_space(device->ws, cs, 1);
radeon_check_space(device->ws, cs->b, 1);
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
radeon_end();
}
void
radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va)
radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va)
{
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_TIMESTAMP, SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP, 0));
@ -366,7 +366,7 @@ radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va)
}
void
radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence)
radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence)
{
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_FENCE, 0, SDMA_FENCE_MTYPE_UC));
@ -377,7 +377,7 @@ radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence)
}
void
radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
{
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM);
@ -390,7 +390,7 @@ radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint
}
void
radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count)
{
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
@ -401,7 +401,7 @@ radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t c
}
void
radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va, uint64_t dst_va,
uint64_t size)
{
if (size == 0)
@ -428,7 +428,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
ncopy++;
}
radeon_check_space(device->ws, cs, ncopy * 7);
radeon_check_space(device->ws, cs->b, ncopy * 7);
radeon_begin(cs);
@ -450,7 +450,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
}
void
radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va,
const uint64_t size, const uint32_t value)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -467,7 +467,7 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
*/
const uint64_t max_fill_bytes = BITFIELD64_MASK(ver >= SDMA_6_0 ? 30 : 22) & ~0x3;
const unsigned num_packets = DIV_ROUND_UP(size, max_fill_bytes);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, num_packets * 5);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, num_packets * 5);
radeon_begin(cs);
@ -484,11 +484,11 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
}
radeon_end();
assert(cs->cdw <= cdw_max);
assert(cs->b->cdw <= cdw_max);
}
static void
radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
const VkExtent3D pix_extent)
{
@ -524,7 +524,7 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
dst_off.x *= texel_scale;
ext.width *= texel_scale;
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 13);
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13);
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp)
@ -543,11 +543,11 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
radeon_emit((ext.depth - 1));
radeon_end();
assert(cs->cdw == cdw_end);
assert(cs->b->cdw == cdw_end);
}
static void
radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *const tiled,
const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent,
const bool detile)
@ -570,7 +570,7 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
assert(util_is_power_of_two_nonzero(tiled->bpp));
radv_sdma_check_pitches(linear_pitch, linear_slice_pitch, tiled->bpp, uses_depth);
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 14 + (dcc ? 3 : 0));
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 14 + (dcc ? 3 : 0));
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | dcc << 19 | detile << 31 |
@ -600,11 +600,11 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
}
radeon_end();
assert(cs->cdw <= cdw_end);
assert(cs->b->cdw <= cdw_end);
}
static void
radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
const VkExtent3D px_extent)
{
@ -639,7 +639,7 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
assert(util_is_power_of_two_nonzero(src->bpp));
assert(util_is_power_of_two_nonzero(dst->bpp));
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 15 + (dcc ? 3 : 0));
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 15 + (dcc ? 3 : 0));
radeon_begin(cs);
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0) | dcc << 19 | dcc_dir << 31 |
@ -678,11 +678,11 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
}
radeon_end();
assert(cs->cdw <= cdw_end);
assert(cs->b->cdw <= cdw_end);
}
void
radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D extent,
bool to_image)
{
@ -715,7 +715,7 @@ radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, cons
}
void
radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in,
const VkExtent3D base_extent, struct radeon_winsys_bo *temp_bo, bool to_image)
{
@ -787,7 +787,7 @@ radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct r
}
void
radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent)
{
if (src->is_linear) {
@ -864,7 +864,7 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r
}
void
radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo)
{

View file

@ -9,6 +9,8 @@
#include "radv_image.h"
struct radv_cmd_stream;
#ifdef __cplusplus
extern "C" {
#endif
@ -57,36 +59,36 @@ struct radv_sdma_surf radv_sdma_get_buf_surf(uint64_t buffer_va, const struct ra
const VkBufferImageCopy2 *const region);
struct radv_sdma_surf radv_sdma_get_surf(const struct radv_device *const device, const struct radv_image *const image,
const VkImageSubresourceLayers subresource, const VkOffset3D offset);
void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs,
void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img,
const VkExtent3D extent, bool to_image);
bool radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, const struct radv_sdma_surf *buf,
const struct radv_sdma_surf *img, const VkExtent3D ext);
void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs,
void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in,
const VkExtent3D copy_extent, struct radeon_winsys_bo *temp_bo,
bool to_image);
void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent);
void radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent);
bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent);
void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs,
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo);
void radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
uint64_t size);
void radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
void radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va,
uint64_t dst_va, uint64_t size);
void radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va,
const uint64_t size, const uint32_t value);
void radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs);
void radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs);
void radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va);
void radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va);
void radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence);
void radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence);
void radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
void radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
void radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count);
void radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count);
#ifdef __cplusplus
}

View file

@ -1311,11 +1311,13 @@ radv_init_shader_upload_queue(struct radv_device *device)
for (unsigned i = 0; i < RADV_SHADER_UPLOAD_CS_COUNT; i++) {
struct radv_shader_dma_submission *submission = calloc(1, sizeof(struct radv_shader_dma_submission));
submission->cs = ws->cs_create(ws, AMD_IP_SDMA, false);
if (!submission->cs) {
result = radv_create_cmd_stream(device, RADV_QUEUE_TRANSFER, false, &submission->cs);
if (result != VK_SUCCESS) {
free(submission);
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
return result;
}
list_addtail(&submission->list, &device->shader_dma_submissions);
}
@ -1350,7 +1352,7 @@ radv_destroy_shader_upload_queue(struct radv_device *device)
list_for_each_entry_safe (struct radv_shader_dma_submission, submission, &device->shader_dma_submissions, list) {
if (submission->cs)
ws->cs_destroy(submission->cs);
radv_finalize_cmd_stream(device, submission->cs);
if (submission->bo)
radv_bo_destroy(device, NULL, submission->bo);
list_del(&submission->list);
@ -2506,7 +2508,7 @@ struct radv_shader_dma_submission *
radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size)
{
struct radv_shader_dma_submission *submission = radv_shader_dma_pop_submission(device);
struct radeon_cmdbuf *cs = submission->cs;
struct radv_cmd_stream *cs = submission->cs;
struct radeon_winsys *ws = device->ws;
VkResult result;
@ -2515,7 +2517,7 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_
if (result != VK_SUCCESS)
goto fail;
ws->cs_reset(cs);
radv_reset_cmd_stream(device, cs);
if (submission->bo_size < size) {
result = radv_shader_dma_resize_upload_buf(device, submission, size);
@ -2524,10 +2526,10 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_
}
radv_sdma_copy_memory(device, cs, radv_buffer_get_va(submission->bo), va, size);
radv_cs_add_buffer(ws, cs, submission->bo);
radv_cs_add_buffer(ws, cs, bo);
radv_cs_add_buffer(ws, cs->b, submission->bo);
radv_cs_add_buffer(ws, cs->b, bo);
result = ws->cs_finalize(cs);
result = radv_finalize_cmd_stream(device, cs);
if (result != VK_SUCCESS)
goto fail;
@ -2547,7 +2549,7 @@ bool
radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submission *submission,
uint64_t *upload_seq_out)
{
struct radeon_cmdbuf *cs = submission->cs;
struct radv_cmd_stream *cs = submission->cs;
struct radeon_winsys *ws = device->ws;
VkResult result;
@ -2566,7 +2568,7 @@ radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submis
struct radv_winsys_submit_info submit = {
.ip_type = AMD_IP_SDMA,
.queue_index = 0,
.cs_array = &cs,
.cs_array = &cs->b,
.cs_count = 1,
};

View file

@ -504,7 +504,7 @@ struct radv_shader_part_cache {
struct radv_shader_dma_submission {
struct list_head list;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream *cs;
struct radeon_winsys_bo *bo;
uint64_t bo_size;
char *ptr;

View file

@ -68,7 +68,7 @@ radv_spm_resize_bo(struct radv_device *device)
}
static void
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
@ -82,7 +82,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
if (!num_counters)
continue;
radeon_check_space(device->ws, cs, 3 + num_counters * 3);
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
@ -105,7 +105,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
if (!num_counters)
continue;
radeon_check_space(device->ws, cs, 3 + num_counters * 3);
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
@ -130,7 +130,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
for (unsigned i = 0; i < block_sel->num_instances; i++) {
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
@ -160,7 +160,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
}
static void
radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
@ -190,7 +190,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum
pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
}
radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
radeon_begin(cs);
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
@ -215,7 +215,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum
}
void
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_spm *spm = &device->spm;
@ -227,7 +227,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
assert(spm->sample_interval >= 32);
radeon_check_space(device->ws, cs, 27);
radeon_check_space(device->ws, cs->b, 27);
radeon_begin(cs);
/* Configure the SPM ring buffer. */

View file

@ -15,7 +15,7 @@
#include "radv_queue.h"
#include "radv_radeon_winsys.h"
void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf);
void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf);
bool radv_spm_init(struct radv_device *device);

View file

@ -48,7 +48,7 @@ radv_ip_to_queue_family(enum amd_ip_type t)
}
static void
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs, int family)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum radv_queue_family qf = radv_ip_to_queue_family(family);
@ -62,7 +62,7 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
}
static void
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
@ -75,14 +75,14 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
ac_sqtt_emit_start(&pdev->info, pm4, &device->sqtt, is_compute_queue);
ac_pm4_finalize(pm4);
radeon_check_space(device->ws, cs, pm4->ndw);
radeon_check_space(device->ws, cs->b, pm4->ndw);
radv_emit_pm4_commands(cs, pm4);
ac_pm4_free_state(pm4);
}
static void
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
@ -95,7 +95,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
ac_sqtt_emit_stop(&pdev->info, pm4, is_compute_queue);
ac_pm4_finalize(pm4);
radeon_check_space(device->ws, cs, pm4->ndw);
radeon_check_space(device->ws, cs->b, pm4->ndw);
radv_emit_pm4_commands(cs, pm4);
ac_pm4_clear_state(pm4, &pdev->info, false, is_compute_queue);
@ -108,7 +108,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
ac_sqtt_emit_wait(&pdev->info, pm4, &device->sqtt, is_compute_queue);
ac_pm4_finalize(pm4);
radeon_check_space(device->ws, cs, pm4->ndw);
radeon_check_space(device->ws, cs->b, pm4->ndw);
radv_emit_pm4_commands(cs, pm4);
ac_pm4_free_state(pm4);
@ -121,7 +121,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
const struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint32_t *dwords = (uint32_t *)data;
/* SQTT user data packets aren't supported on SDMA queues. */
@ -131,7 +131,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
while (num_dwords > 0) {
uint32_t count = MIN2(num_dwords, 2);
radeon_check_space(device->ws, cs, 2 + count);
radeon_check_space(device->ws, cs->b, 2 + count);
radeon_begin(cs);
/* Without the perfctr bit the CP might not always pass the
@ -150,7 +150,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
}
void
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -177,7 +177,7 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
}
void
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -516,7 +516,7 @@ radv_begin_sqtt(struct radv_queue *queue)
const struct radv_physical_device *pdev = radv_device_physical(device);
enum radv_queue_family family = queue->state.qf;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream cs;
VkResult result;
/* Destroy the previous start CS and create a new one. */
@ -525,13 +525,13 @@ radv_begin_sqtt(struct radv_queue *queue)
device->sqtt.start_cs[family] = NULL;
}
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
if (!cs)
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
if (!cs.b)
return false;
radeon_check_space(ws, cs, 512);
radeon_check_space(ws, cs.b, 512);
radeon_begin(cs);
radeon_begin(&cs);
switch (family) {
case RADV_QUEUE_GENERAL:
@ -551,40 +551,40 @@ radv_begin_sqtt(struct radv_queue *queue)
radeon_end();
/* Make sure to wait-for-idle before starting SQTT. */
radv_emit_wait_for_idle(device, cs, family);
radv_emit_wait_for_idle(device, &cs, family);
/* Disable clock gating before starting SQTT. */
radv_emit_inhibit_clockgating(device, cs, true);
radv_emit_inhibit_clockgating(device, &cs, true);
/* Enable SQG events that collects thread trace data. */
radv_emit_spi_config_cntl(device, cs, true);
radv_emit_spi_config_cntl(device, &cs, true);
radv_perfcounter_emit_reset(cs, true);
radv_perfcounter_emit_reset(&cs, true);
if (device->spm.bo) {
/* Enable all shader stages by default. */
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info));
radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info));
radv_emit_spm_setup(device, cs, family);
radv_emit_spm_setup(device, &cs, family);
}
/* Start SQTT. */
radv_emit_sqtt_start(device, cs, family);
radv_emit_sqtt_start(device, &cs, family);
if (device->spm.bo) {
radeon_check_space(ws, cs, 8);
radv_perfcounter_emit_spm_start(device, cs, family);
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_start(device, &cs, family);
}
result = ws->cs_finalize(cs);
result = ws->cs_finalize(cs.b);
if (result != VK_SUCCESS) {
ws->cs_destroy(cs);
ws->cs_destroy(cs.b);
return false;
}
device->sqtt.start_cs[family] = cs;
device->sqtt.start_cs[family] = cs.b;
return radv_queue_internal_submit(queue, cs);
return radv_queue_internal_submit(queue, cs.b);
}
static bool
@ -593,7 +593,7 @@ radv_end_sqtt(struct radv_queue *queue)
struct radv_device *device = radv_queue_device(queue);
enum radv_queue_family family = queue->state.qf;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
struct radv_cmd_stream cs;
VkResult result;
/* Destroy the previous stop CS and create a new one. */
@ -602,13 +602,13 @@ radv_end_sqtt(struct radv_queue *queue)
device->sqtt.stop_cs[family] = NULL;
}
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
if (!cs)
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
if (!cs.b)
return false;
radeon_check_space(ws, cs, 512);
radeon_check_space(ws, cs.b, 512);
radeon_begin(cs);
radeon_begin(&cs);
switch (family) {
case RADV_QUEUE_GENERAL:
@ -628,33 +628,33 @@ radv_end_sqtt(struct radv_queue *queue)
radeon_end();
/* Make sure to wait-for-idle before stopping SQTT. */
radv_emit_wait_for_idle(device, cs, family);
radv_emit_wait_for_idle(device, &cs, family);
if (device->spm.bo) {
radeon_check_space(ws, cs, 8);
radv_perfcounter_emit_spm_stop(device, cs, family);
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_stop(device, &cs, family);
}
/* Stop SQTT. */
radv_emit_sqtt_stop(device, cs, family);
radv_emit_sqtt_stop(device, &cs, family);
radv_perfcounter_emit_reset(cs, true);
radv_perfcounter_emit_reset(&cs, true);
/* Restore previous state by disabling SQG events. */
radv_emit_spi_config_cntl(device, cs, false);
radv_emit_spi_config_cntl(device, &cs, false);
/* Restore previous state by re-enabling clock gating. */
radv_emit_inhibit_clockgating(device, cs, false);
radv_emit_inhibit_clockgating(device, &cs, false);
result = ws->cs_finalize(cs);
result = ws->cs_finalize(cs.b);
if (result != VK_SUCCESS) {
ws->cs_destroy(cs);
ws->cs_destroy(cs.b);
return false;
}
device->sqtt.stop_cs[family] = cs;
device->sqtt.stop_cs[family] = cs.b;
return radv_queue_internal_submit(queue, cs);
return radv_queue_internal_submit(queue, cs.b);
}
void
@ -837,11 +837,14 @@ radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_winsys_bo *ti
if (result != VK_SUCCESS)
goto fail;
radeon_check_space(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, 28);
struct radv_cmd_buffer *cmd_buffer = radv_cmd_buffer_from_handle(cmdbuf);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cs->b, 28);
timestamp_va = radv_buffer_get_va(timestamp_bo) + timestamp_offset;
radv_cs_add_buffer(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, timestamp_bo);
radv_cs_add_buffer(device->ws, cs->b, timestamp_bo);
radv_write_timestamp(radv_cmd_buffer_from_handle(cmdbuf), timestamp_va, timestamp_stage);

View file

@ -65,9 +65,9 @@ bool radv_sqtt_queue_events_enabled(void);
void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
void radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable);
void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit);
void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit);
VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);

View file

@ -86,7 +86,7 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
/* vcn unified queue (sq) ib header */
void
radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
{
if (!skip_signature) {
/* vcn ib signature */
@ -97,8 +97,8 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty
radeon_emit(0);
radeon_end();
sq->signature_ib_checksum = &cs->buf[cs->cdw - 2];
sq->signature_ib_total_size_in_dw = &cs->buf[cs->cdw - 1];
sq->signature_ib_checksum = &cs->b->buf[cs->b->cdw - 2];
sq->signature_ib_total_size_in_dw = &cs->b->buf[cs->b->cdw - 1];
} else {
sq->signature_ib_checksum = NULL;
sq->signature_ib_total_size_in_dw = NULL;
@ -112,17 +112,17 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty
radeon_emit(0);
radeon_end();
sq->engine_ib_size_of_packages = &cs->buf[cs->cdw - 1];
sq->engine_ib_size_of_packages = &cs->b->buf[cs->b->cdw - 1];
}
void
radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq)
{
uint32_t *end;
uint32_t size_in_dw;
uint32_t checksum = 0;
end = &cs->buf[cs->cdw];
end = &cs->b->buf[cs->b->cdw];
if (sq->signature_ib_checksum == NULL && sq->signature_ib_total_size_in_dw == NULL) {
if (sq->engine_ib_size_of_packages == NULL)
@ -148,18 +148,18 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_physical_device *pdev = radv_device_physical(device);
struct rvcn_sq_var sq;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
/* UVD doesn't support events, and probably never will */
if (pdev->vid_decode_ip == AMD_IP_UVD)
return;
radv_cs_add_buffer(device->ws, cs, event->bo);
radv_cs_add_buffer(device->ws, cs->b, event->bo);
uint64_t va = radv_buffer_get_va(event->bo);
bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
radeon_check_space(device->ws, cmd_buffer->cs, 8);
radeon_check_space(device->ws, cs->b, 8);
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, va & 0xffffffff);
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, va >> 32);
set_reg(cmd_buffer, pdev->vid_dec_reg.data2, value);
@ -167,20 +167,21 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even
return;
}
radeon_check_space(device->ws, cs, 256);
radeon_check_space(device->ws, cs->b, 256);
radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON, separate_queue);
struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]);
struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->b->buf[cs->b->cdw]);
ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory);
cs->cdw++;
cs->b->cdw++;
ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY;
cs->cdw++;
cs->b->cdw++;
struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]);
struct rvcn_cmn_engine_op_writememory *write_memory =
(struct rvcn_cmn_engine_op_writememory *)&(cs->b->buf[cs->b->cdw]);
write_memory->dest_addr_lo = va & 0xffffffff;
write_memory->dest_addr_hi = va >> 32;
write_memory->data = value;
cs->cdw += sizeof(*write_memory) / 4;
cs->b->cdw += sizeof(*write_memory) / 4;
radv_vcn_sq_tail(cs, &sq);
}
@ -188,16 +189,17 @@ static void
radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cmd_buffer->cs, 512);
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
radeon_check_space(device->ws, cs->b, 512);
radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]);
ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
cmd_buffer->cs->cdw++;
cs->b->cdw++;
ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
cmd_buffer->cs->cdw++;
cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
cs->b->cdw++;
cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cs->b->buf[cs->b->cdw]);
cs->b->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
}
@ -1389,7 +1391,7 @@ radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession,
static void
set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_begin(cs);
radeon_emit(RDECODE_PKT0(reg >> 2, 0));
@ -1402,11 +1404,12 @@ send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
radv_cs_add_buffer(device->ws, cs->b, bo);
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(device->ws, cmd_buffer->cs, 6);
radeon_check_space(device->ws, cs->b, 6);
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
@ -2385,6 +2388,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
struct radv_image_plane *luma = &img->planes[0];
struct radv_image_plane *chroma = &img->planes[1];
bool use_intra_only_allocation_for_dpb = false;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (vid->dpb_type == DPB_DYNAMIC_TIER_3) {
VkImageUsageFlags coincide = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
@ -2565,10 +2569,10 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
uint64_t addr;
if (use_intra_only_allocation_for_dpb) {
addr = radv_buffer_get_va(vid->intra_only_dpb.mem->bo) + vid->intra_only_dpb.offset;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, vid->intra_only_dpb.mem->bo);
radv_cs_add_buffer(device->ws, cs->b, vid->intra_only_dpb.mem->bo);
} else {
addr = dpb->bindings[0].addr;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo);
addr += dpb_array_idx *
(dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
}
@ -2610,7 +2614,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
int f_dpb_array_idx =
frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
addr = dpb_img->bindings[0].addr;
addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size +
dpb_img->planes[1].surface.u.gfx9.surf_slice_size);
@ -2645,16 +2649,15 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
}
uint32_t size = sizeof(rvcn_dec_ref_buffers_header_t) + sizeof(rvcn_dec_ref_buffer_t) * num_bufs;
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]);
ib_header->package_size = size + sizeof(struct rvcn_decode_ib_package_s);
cmd_buffer->cs->cdw++;
cs->b->cdw++;
ib_header->package_type = RDECODE_IB_PARAM_DYNAMIC_REFLIST_BUFFER;
cmd_buffer->cs->cdw++;
cs->b->cdw++;
rvcn_dec_ref_buffers_header_t *refs =
(rvcn_dec_ref_buffers_header_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
cmd_buffer->cs->cdw += size / 4;
rvcn_dec_ref_buffers_header_t *refs = (rvcn_dec_ref_buffers_header_t *)&(cs->b->buf[cs->b->cdw]);
cs->b->cdw += size / 4;
refs->size = size;
refs->num_bufs = 0;
@ -2669,7 +2672,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb_img, f_dpb_array_idx,
frame_info->pReferenceSlots[i].slotIndex);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
used_slots |= 1 << frame_info->pReferenceSlots[i].slotIndex;
}
@ -3106,6 +3109,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_video_session *vid = cmd_buffer->video.vid;
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
struct radv_cmd_stream *cs = cmd_buffer->cs;
void *ptr;
uint32_t out_offset;
@ -3135,13 +3139,13 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(device->ws, cmd_buffer->cs, 8);
radeon_begin(cmd_buffer->cs);
radeon_check_space(device->ws, cs->b, 8);
radeon_begin(cs);
for (unsigned i = 0; i < 8; i++)
radeon_emit(0x81ff);
radeon_end();
} else
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
}
static void
@ -3149,9 +3153,11 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint32_t size = sizeof(struct ruvd_msg);
void *ptr;
uint32_t out_offset;
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
ruvd_dec_message_create(vid, ptr);
@ -3163,8 +3169,8 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
int padsize = vid->sessionctx.mem ? 4 : 6;
radeon_check_space(device->ws, cmd_buffer->cs, padsize);
radeon_begin(cmd_buffer->cs);
radeon_check_space(device->ws, cs->b, padsize);
radeon_begin(cs);
for (unsigned i = 0; i < padsize; i++)
radeon_emit(PKT2_NOP_PAD);
radeon_end();
@ -3207,6 +3213,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
uint32_t out_offset, fb_offset, it_probs_offset = 0;
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
fb_bo = cmd_buffer->upload.upload_bo;
@ -3248,7 +3255,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo,
radv_buffer_get_va(it_probs_bo) + it_probs_offset);
radeon_check_space(device->ws, cmd_buffer->cs, 2);
radeon_check_space(device->ws, cs->b, 2);
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
}
@ -3264,6 +3271,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
void *ptr, *fb_ptr, *it_probs_ptr = NULL;
uint32_t out_offset, fb_offset, it_probs_offset = 0;
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
struct radv_cmd_stream *cs = cmd_buffer->cs;
size += sizeof(rvcn_dec_message_header_t); /* header */
size += sizeof(rvcn_dec_message_index_t); /* codec */
@ -3352,10 +3360,10 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, radv_buffer_get_va(it_probs_bo) + it_probs_offset);
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(device->ws, cmd_buffer->cs, 2);
radeon_check_space(device->ws, cs->b, 2);
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
} else
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -23,6 +23,7 @@ struct radv_physical_device;
struct rvcn_sq_var;
struct radv_cmd_buffer;
struct radv_image_create_info;
struct radv_cmd_stream;
#define RADV_ENC_MAX_RATE_LAYER 4
@ -82,8 +83,8 @@ void radv_init_physical_device_decoder(struct radv_physical_device *pdev);
void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list,
uint32_t *width_align_out, uint32_t *height_align_out);
void radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature);
void radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq);
void radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature);
void radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq);
void radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value);
void radv_init_physical_device_encoder(struct radv_physical_device *pdevice);

View file

@ -196,16 +196,16 @@ static const unsigned index_to_shifts[4] = {24, 16, 8, 0};
static void
radv_enc_output_one_byte(struct radv_cmd_buffer *cmd_buffer, unsigned char byte)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_enc_state *enc = &cmd_buffer->video.enc;
if (enc->byte_index == 0)
cs->buf[cs->cdw] = 0;
cs->buf[cs->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]);
cs->b->buf[cs->b->cdw] = 0;
cs->b->buf[cs->b->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]);
enc->byte_index++;
if (enc->byte_index >= 4) {
enc->byte_index = 0;
cs->cdw++;
cs->b->cdw++;
}
}
@ -279,7 +279,7 @@ static void
radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_enc_state *enc = &cmd_buffer->video.enc;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (enc->bits_in_shifter != 0) {
unsigned char output_byte = (unsigned char)(enc->shifter >> 24);
radv_enc_emulation_prevention(cmd_buffer, output_byte);
@ -291,7 +291,7 @@ radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer)
}
if (enc->byte_index > 0) {
cs->cdw++;
cs->b->cdw++;
enc->byte_index = 0;
}
}
@ -377,15 +377,15 @@ radv_enc_h265_pic_type(enum StdVideoH265PictureType type)
}
}
#define RADEON_ENC_CS(value) (cmd_buffer->cs->buf[cmd_buffer->cs->cdw++] = (value))
#define RADEON_ENC_CS(value) (cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++] = (value))
#define RADEON_ENC_BEGIN(cmd) \
{ \
uint32_t *begin = &cmd_buffer->cs->buf[cmd_buffer->cs->cdw++]; \
uint32_t *begin = &cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++]; \
RADEON_ENC_CS(cmd)
#define RADEON_ENC_END() \
*begin = (&cmd_buffer->cs->buf[cmd_buffer->cs->cdw] - begin) * 4; \
*begin = (&cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw] - begin) * 4; \
cmd_buffer->video.enc.total_task_size += *begin; \
}
@ -404,7 +404,7 @@ radv_enc_av1_bs_copy_end(struct radv_cmd_buffer *cmd_buffer, uint32_t bits)
static void
radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t inst, uint32_t obu_type)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_enc_state *enc = &cmd_buffer->video.enc;
radv_enc_flush_headers(cmd_buffer);
@ -412,7 +412,7 @@ radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t in
if (enc->bits_output)
radv_enc_av1_bs_copy_end(cmd_buffer, enc->bits_output);
enc->copy_start = &cs->buf[cs->cdw++];
enc->copy_start = &cs->b->buf[cs->b->cdw++];
RADEON_ENC_CS(inst);
if (inst != RENCODE_HEADER_INSTRUCTION_COPY) {
@ -432,9 +432,9 @@ radv_enc_session_info(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->sessionctx.mem->bo);
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->sessionctx.mem->bo);
uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->sessionctx.mem->bo);
va += cmd_buffer->video.vid->sessionctx.offset;
@ -455,12 +455,12 @@ radv_enc_task_info(struct radv_cmd_buffer *cmd_buffer, bool feedback)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_enc_state *enc = &cmd_buffer->video.enc;
enc->task_id++;
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.task_info);
enc->p_task_size = &cs->buf[cs->cdw++];
enc->p_task_size = &cs->b->buf[cs->b->cdw++];
RADEON_ENC_CS(enc->task_id);
RADEON_ENC_CS(feedback ? 1 : 0);
RADEON_ENC_END();
@ -919,13 +919,13 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header);
radv_enc_reset(cmd_buffer);
radv_enc_set_emulation_prevention(cmd_buffer, false);
cdw_start = cs->cdw;
cdw_start = cs->b->cdw;
if (pic->flags.IdrPicFlag)
radv_enc_code_fixed_bits(cmd_buffer, 0x65, 8);
@ -1073,7 +1073,7 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
cdw_filled = cs->cdw - cdw_start;
cdw_filled = cs->b->cdw - cdw_start;
for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++)
RADEON_ENC_CS(0x00000000);
for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
@ -1158,14 +1158,14 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco
unsigned int num_pic_total_curr = 0;
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned nal_unit_type = vk_video_get_h265_nal_unit(pic);
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header);
radv_enc_reset(cmd_buffer);
radv_enc_set_emulation_prevention(cmd_buffer, false);
cdw_start = cs->cdw;
cdw_start = cs->b->cdw;
radv_enc_code_fixed_bits(cmd_buffer, 0x0, 1);
radv_enc_code_fixed_bits(cmd_buffer, nal_unit_type, 6);
radv_enc_code_fixed_bits(cmd_buffer, 0x0, 6);
@ -1354,7 +1354,7 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco
instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
cdw_filled = cs->cdw - cdw_start;
cdw_filled = cs->b->cdw - cdw_start;
for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++)
RADEON_ENC_CS(0x00000000);
for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
@ -1392,7 +1392,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_video_session *vid = cmd_buffer->video.vid;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_image_view *dpb_iv = NULL;
struct radv_image *dpb = NULL;
uint64_t va = 0;
@ -1420,7 +1420,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf
dpb_image_sizes(dpb, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes);
radv_cs_add_buffer(device->ws, cs, dpb->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo);
va = dpb->bindings[0].addr;
}
@ -1533,6 +1533,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in
uint32_t luma_pitch = 0, luma_size = 0, chroma_size = 0, colloc_bytes = 0;
int max_ref_slot_idx = 0;
const VkVideoPictureResourceInfoKHR *slots[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES] = {NULL};
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (info->pSetupReferenceSlot) {
max_ref_slot_idx = info->pSetupReferenceSlot->slotIndex;
@ -1569,7 +1570,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in
struct radv_image_view *dpb_iv = radv_image_view_from_handle(res->imageViewBinding);
assert(dpb_iv != NULL);
struct radv_image *dpb_img = dpb_iv->image;
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
dpb_image_sizes(dpb_iv->image, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes);
uint32_t metadata_size = RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME;
@ -1621,9 +1622,9 @@ radv_enc_bitstream(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffe
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t va = vk_buffer_address(&buffer->vk, offset);
radv_cs_add_buffer(device->ws, cs, buffer->bo);
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.bitstream);
RADEON_ENC_CS(RENCODE_REC_SWIZZLE_MODE_LINEAR);
@ -1739,7 +1740,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *
struct radv_image *src_img = src_iv->image;
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint32_t array_idx = enc_info->srcPictureResource.baseArrayLayer + src_iv->vk.base_array_layer;
uint64_t va = src_img->bindings[0].addr;
uint64_t luma_va = va + src_img->planes[0].surface.u.gfx9.surf_offset +
@ -1750,7 +1751,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *
unsigned int slot_idx = 0xffffffff;
unsigned int max_layers = cmd_buffer->video.vid->rc_layer_control.max_num_temporal_layers;
radv_cs_add_buffer(device->ws, cs, src_img->bindings[0].bo);
radv_cs_add_buffer(device->ws, cs->b, src_img->bindings[0].bo);
if (h264_pic) {
switch (h264_pic->primary_pic_type) {
case STD_VIDEO_H264_PICTURE_TYPE_P:
@ -2094,14 +2095,14 @@ radv_enc_headers_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
static void
radv_enc_cdf_default_table(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_stream *cs = cmd_buffer->cs;
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct VkVideoEncodeAV1PictureInfoKHR *av1_picture_info =
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_AV1_PICTURE_INFO_KHR);
const StdVideoEncodeAV1PictureInfo *av1_pic = av1_picture_info->pStdPictureInfo;
radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->ctx.mem->bo);
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->ctx.mem->bo);
uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->ctx.mem->bo);
va += cmd_buffer->video.vid->ctx.offset;
uint32_t use_cdf_default = (av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_KEY ||
@ -2639,6 +2640,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_enc_state *enc = &cmd_buffer->video.enc;
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint64_t feedback_query_va;
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
@ -2650,10 +2652,10 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
return;
}
radeon_check_space(device->ws, cmd_buffer->cs, 1600);
radeon_check_space(device->ws, cs->b, 1600);
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
const struct VkVideoInlineQueryInfoKHR *inline_queries = NULL;
if (vid->vk.flags & VK_VIDEO_SESSION_CREATE_INLINE_QUERIES_BIT_KHR) {
@ -2662,7 +2664,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
if (inline_queries) {
VK_FROM_HANDLE(radv_query_pool, pool, inline_queries->queryPool);
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo);
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
feedback_query_va = radv_buffer_get_va(pool->bo);
feedback_query_va += pool->stride * inline_queries->firstQuery;
@ -2745,7 +2747,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
*enc->p_task_size = enc->total_task_size;
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
}
static void