mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-02 13:50:09 +01:00
radv: switch to radv_cmd_stream everywhere
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36314>
This commit is contained in:
parent
5982e8f331
commit
3ccb48ec46
37 changed files with 900 additions and 765 deletions
|
|
@ -39,8 +39,10 @@ ctx_roll_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2
|
|||
const VkSubmitInfo2 *submit = pSubmits + submit_index;
|
||||
for (uint32_t i = 0; i < submit->commandBufferInfoCount; i++) {
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submit->pCommandBufferInfos[i].commandBuffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
fprintf(device->ctx_roll_file, "\n%s:\n", vk_object_base_name(&cmd_buffer->vk.base));
|
||||
device->ws->cs_dump(cmd_buffer->cs, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS);
|
||||
device->ws->cs_dump(cs->b, device->ctx_roll_file, NULL, 0, RADV_CS_DUMP_TYPE_CTX_ROLLS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_foreach_stage (s, RADV_GRAPHICS_STAGE_BITS & ~VK_SHADER_STAGE_TASK_BIT_EXT) {
|
||||
const struct radv_shader *shader = pipeline->base.shaders[s];
|
||||
|
|
@ -306,7 +306,7 @@ radv_gfx12_write_draw_marker(struct radv_cmd_buffer *cmd_buffer, const struct ra
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
/* RGP doesn't need this marker for indirect draws. */
|
||||
if (draw_info->indirect_va)
|
||||
|
|
|
|||
|
|
@ -367,6 +367,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi
|
|||
VK_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_descriptor_set_layout *set_layout = layout->set[0].layout;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint32_t upload_offset;
|
||||
uint8_t *ptr;
|
||||
|
||||
|
|
@ -395,7 +396,7 @@ radv_meta_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi
|
|||
VK_FROM_HANDLE(radv_image_view, iview, image_view);
|
||||
for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) {
|
||||
if (iview->image->bindings[b].bo)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
struct vk_texcompress_astc_write_descriptor_buffer desc_buffer;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
VkFormat format = src_iview->image->vk.format;
|
||||
int blk_w = vk_format_get_blockwidth(format);
|
||||
int blk_h = vk_format_get_blockheight(format);
|
||||
|
|
@ -26,7 +27,7 @@ decode_astc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivie
|
|||
radv_image_view_to_handle(dst_iview), format);
|
||||
|
||||
VK_FROM_HANDLE(radv_buffer, luts_buf, state->astc_decode->luts_buf);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, luts_buf->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, luts_buf->bo);
|
||||
|
||||
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->astc_decode->p_layout,
|
||||
VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT, desc_buffer.descriptors);
|
||||
|
|
|
|||
|
|
@ -319,8 +319,9 @@ radv_fill_image(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *ima
|
|||
const uint64_t va = image->bindings[0].addr + offset;
|
||||
struct radeon_winsys_bo *bo = image->bindings[0].bo;
|
||||
const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, bo);
|
||||
|
||||
return radv_fill_memory_internal(cmd_buffer, image, va, size, value, copy_flags);
|
||||
}
|
||||
|
|
@ -331,8 +332,9 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const enum radv_copy_flags copy_flags = radv_get_copy_flags_from_bo(bo);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, bo);
|
||||
|
||||
return radv_fill_memory(cmd_buffer, va, size, value, copy_flags);
|
||||
}
|
||||
|
|
@ -378,14 +380,15 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop
|
|||
VK_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo);
|
||||
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
|
||||
|
||||
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
|
||||
const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
|
||||
|
|
@ -404,13 +407,14 @@ radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
uint64_t words = size / 4;
|
||||
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
assert(size < RADV_BUFFER_UPDATE_THRESHOLD);
|
||||
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, words + 4);
|
||||
radeon_check_space(device->ws, cs->b, words + 4);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + words, 0));
|
||||
radeon_emit(S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
|
||||
radeon_emit(va);
|
||||
|
|
@ -454,12 +458,13 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
|
|||
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
|
||||
|
||||
radv_update_memory(cmd_buffer, dst_va, dataSize, pData, dst_copy_flags);
|
||||
|
||||
|
|
|
|||
|
|
@ -1067,6 +1067,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra
|
|||
const VkClearColorValue *clear_color)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
VkPipelineLayout layout;
|
||||
VkPipeline pipeline;
|
||||
unsigned stride;
|
||||
|
|
@ -1078,7 +1079,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct ra
|
|||
return;
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst->image->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst->image->bindings[0].bo);
|
||||
|
||||
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 1,
|
||||
(VkDescriptorGetInfoEXT[]){{
|
||||
|
|
|
|||
|
|
@ -591,6 +591,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
|
|||
uint64_t va, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t block_count = DIV_ROUND_UP(size, 1024);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
VkPipelineLayout layout;
|
||||
|
|
@ -603,7 +604,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
|
|||
return 0;
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, bo);
|
||||
|
||||
radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ static bool
|
|||
alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (!cmd_buffer->transfer.copy_temp) {
|
||||
const VkResult r =
|
||||
|
|
@ -69,7 +70,7 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->transfer.copy_temp);
|
||||
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->transfer.copy_temp);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +79,7 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
|
|||
const VkBufferImageCopy2 *region, bool to_image)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
struct radv_sdma_surf buf = radv_sdma_get_buf_surf(buffer_va, image, region);
|
||||
const struct radv_sdma_surf img = radv_sdma_get_surf(device, image, region->imageSubresource, region->imageOffset);
|
||||
|
|
@ -217,19 +218,20 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
|
|||
VK_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
const enum radv_copy_flags src_copy_flags = radv_get_copy_flags_from_bo(src_buffer->bo);
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, src_buffer->bo);
|
||||
|
||||
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
|
||||
const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
|
||||
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
||||
const unsigned bind_idx = dst_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[bind_idx].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[bind_idx].bo);
|
||||
|
||||
copy_memory_to_image(cmd_buffer, src_buffer->vk.device_address, src_buffer->vk.size, src_copy_flags, dst_image,
|
||||
pCopyBufferToImageInfo->dstImageLayout, region);
|
||||
|
|
@ -368,19 +370,20 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
|
|||
VK_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
const enum radv_copy_flags dst_copy_flags = radv_get_copy_flags_from_bo(dst_buffer->bo);
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
|
||||
|
||||
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
|
||||
const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
|
||||
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
||||
const unsigned bind_idx = src_image->disjoint ? radv_plane_from_aspect(aspect_mask) : 0;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[bind_idx].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[bind_idx].bo);
|
||||
|
||||
copy_image_to_memory(cmd_buffer, dst_buffer->vk.device_address, dst_buffer->vk.size, dst_copy_flags, src_image,
|
||||
pCopyImageToBufferInfo->srcImageLayout, region);
|
||||
|
|
@ -394,7 +397,7 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i
|
|||
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
unsigned int dst_aspect_mask_remaining = region->dstSubresource.aspectMask;
|
||||
|
||||
VkImageSubresourceLayers src_subresource = region->srcSubresource;
|
||||
|
|
@ -650,6 +653,7 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
|
|||
VK_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
|
|
@ -660,8 +664,8 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
|
|||
const VkImageAspectFlags dst_aspect_mask = region->dstSubresource.aspectMask;
|
||||
const unsigned dst_bind_idx = dst_image->disjoint ? radv_plane_from_aspect(dst_aspect_mask) : 0;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, src_image->bindings[src_bind_idx].bo);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_image->bindings[dst_bind_idx].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, src_image->bindings[src_bind_idx].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_image->bindings[dst_bind_idx].bo);
|
||||
|
||||
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
|
||||
region);
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
|
|||
{
|
||||
struct radv_meta_saved_state saved_state;
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
VkPipelineLayout layout;
|
||||
VkPipeline pipeline;
|
||||
VkResult result;
|
||||
|
|
@ -135,7 +136,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
|
|||
|
||||
const uint64_t va = image->bindings[0].addr;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, image->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, image->bindings[0].bo);
|
||||
|
||||
radv_meta_bind_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 2,
|
||||
(VkDescriptorGetInfoEXT[]){
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -575,7 +575,7 @@ struct radv_cmd_buffer {
|
|||
} gfx12;
|
||||
|
||||
VkCommandBufferUsageFlags usage_flags;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream *cs;
|
||||
struct radv_cmd_state state;
|
||||
struct radv_vertex_binding vertex_bindings[MAX_VBS];
|
||||
struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
|
||||
|
|
@ -620,7 +620,7 @@ struct radv_cmd_buffer {
|
|||
*/
|
||||
struct {
|
||||
/** Follower command stream. */
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream *cs;
|
||||
|
||||
/** Flush bits for the follower cmdbuf. */
|
||||
enum radv_cmd_flush_bits flush_bits;
|
||||
|
|
@ -888,7 +888,7 @@ struct radv_vbo_info {
|
|||
|
||||
void radv_get_vbo_info(const struct radv_cmd_buffer *cmd_buffer, uint32_t vbo_idx, struct radv_vbo_info *vbo_info);
|
||||
|
||||
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
|
||||
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs,
|
||||
const struct radv_shader *shader);
|
||||
|
||||
void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ cp_dma_max_byte_count(enum amd_gfx_level gfx_level)
|
|||
* clear value.
|
||||
*/
|
||||
static void
|
||||
radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
|
||||
radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool predicating, uint64_t dst_va,
|
||||
uint64_t src_va, unsigned size, unsigned flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -56,7 +56,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
|
|||
|
||||
assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level));
|
||||
|
||||
radeon_check_space(device->ws, cs, 9);
|
||||
radeon_check_space(device->ws, cs->b, 9);
|
||||
if (pdev->info.gfx_level >= GFX9)
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
else
|
||||
|
|
@ -106,7 +106,7 @@ static void
|
|||
radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
bool predicating = cmd_buffer->state.predicating;
|
||||
|
||||
radv_cs_emit_cp_dma(device, cs, predicating, dst_va, src_va, size, flags);
|
||||
|
|
@ -133,7 +133,7 @@ radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t s
|
|||
}
|
||||
|
||||
void
|
||||
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
|
||||
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size,
|
||||
bool predicating)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -146,7 +146,7 @@ radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
|
||||
assert(size <= cp_dma_max_byte_count(gfx_level));
|
||||
|
||||
radeon_check_space(ws, cs, 9);
|
||||
radeon_check_space(ws, cs->b, 9);
|
||||
|
||||
uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
|
||||
|
|
|
|||
|
|
@ -15,10 +15,10 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
struct radv_device;
|
||||
struct radeon_cmdbuf;
|
||||
struct radv_cmd_stream;
|
||||
struct radv_cmd_buffer;
|
||||
|
||||
void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
|
||||
void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, unsigned size,
|
||||
bool predicating);
|
||||
|
||||
void radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
|
||||
|
|
|
|||
|
|
@ -19,13 +19,14 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
|
|||
struct radeon_winsys *ws = device->ws;
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct ac_pm4_state *pm4 = NULL;
|
||||
struct radv_cmd_stream *cs;
|
||||
VkResult result;
|
||||
|
||||
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
|
||||
if (!cs)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
radeon_check_space(ws, cs, 256);
|
||||
radeon_check_space(ws, cs->b, 256);
|
||||
|
||||
/* allocate memory for queue_state->shadowed_regs where register states are saved */
|
||||
result = radv_bo_create(device, NULL, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
|
||||
|
|
@ -43,10 +44,10 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
|
|||
radeon_emit_array(pm4->pm4, pm4->ndw);
|
||||
radeon_end();
|
||||
|
||||
ws->cs_pad(cs, 0);
|
||||
ws->cs_pad(cs->b, 0);
|
||||
|
||||
result = radv_bo_create(
|
||||
device, NULL, cs->cdw * 4, 4096, ws->cs_domain(ws),
|
||||
device, NULL, cs->b->cdw * 4, 4096, ws->cs_domain(ws),
|
||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
|
||||
RADV_BO_PRIORITY_CS, 0, true, &queue_state->shadow_regs_ib);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -60,13 +61,13 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
|
|||
result = VK_ERROR_MEMORY_MAP_FAILED;
|
||||
goto fail_map;
|
||||
}
|
||||
memcpy(map, cs->buf, cs->cdw * 4);
|
||||
queue_state->shadow_regs_ib_size_dw = cs->cdw;
|
||||
memcpy(map, cs->b->buf, cs->b->cdw * 4);
|
||||
queue_state->shadow_regs_ib_size_dw = cs->b->cdw;
|
||||
|
||||
ws->buffer_unmap(ws, queue_state->shadow_regs_ib, false);
|
||||
|
||||
ac_pm4_free_state(pm4);
|
||||
ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
return VK_SUCCESS;
|
||||
fail_map:
|
||||
radv_bo_destroy(device, NULL, queue_state->shadow_regs_ib);
|
||||
|
|
@ -77,7 +78,7 @@ fail_create:
|
|||
radv_bo_destroy(device, NULL, queue_state->shadowed_regs);
|
||||
queue_state->shadowed_regs = NULL;
|
||||
fail:
|
||||
ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -92,15 +93,15 @@ radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
|
||||
radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state)
|
||||
{
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
|
||||
ws->cs_execute_ib(cs, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false);
|
||||
ws->cs_execute_ib(cs->b, queue_state->shadow_regs_ib, 0, queue_state->shadow_regs_ib_size_dw & 0xffff, false);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, queue_state->shadowed_regs);
|
||||
radv_cs_add_buffer(device->ws, cs, queue_state->shadow_regs_ib);
|
||||
radv_cs_add_buffer(device->ws, cs->b, queue_state->shadowed_regs);
|
||||
radv_cs_add_buffer(device->ws, cs->b, queue_state->shadow_regs_ib);
|
||||
}
|
||||
|
||||
/* radv_init_shadowed_regs_buffer_state() will be called once from radv_queue_init(). This
|
||||
|
|
@ -111,14 +112,14 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream *cs;
|
||||
VkResult result;
|
||||
|
||||
cs = ws->cs_create(ws, AMD_IP_GFX, false);
|
||||
if (!cs)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
radeon_check_space(ws, cs, 768);
|
||||
radeon_check_space(ws, cs->b, 768);
|
||||
|
||||
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
|
||||
|
||||
|
|
@ -136,13 +137,13 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
|
|||
ac_pm4_free_state(pm4);
|
||||
}
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
result = radv_finalize_cmd_stream(device, cs);
|
||||
if (result == VK_SUCCESS) {
|
||||
if (!radv_queue_internal_submit(queue, cs))
|
||||
if (!radv_queue_internal_submit(queue, cs->b))
|
||||
result = VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
fail:
|
||||
ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "radv_radeon_winsys.h"
|
||||
|
||||
struct radv_cmd_stream;
|
||||
struct radv_device;
|
||||
struct radv_queue_state;
|
||||
struct radv_queue;
|
||||
|
|
@ -22,7 +23,7 @@ VkResult radv_create_shadow_regs_preamble(struct radv_device *device, struct rad
|
|||
void radv_destroy_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state,
|
||||
struct radeon_winsys *ws);
|
||||
|
||||
void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
|
||||
void radv_emit_shadow_regs_preamble(struct radv_cmd_stream *cs, const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state);
|
||||
|
||||
VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue);
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
#include "sid.h"
|
||||
|
||||
void
|
||||
radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
|
||||
uint32_t new_fence, uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
|
|
@ -111,7 +111,7 @@ radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_le
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
|
||||
radv_emit_acquire_mem(struct radv_cmd_stream *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ radv_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsig
|
|||
}
|
||||
|
||||
static void
|
||||
gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
|
||||
gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
|
||||
uint64_t flush_va, enum radv_queue_family qf, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
|
|
@ -381,7 +381,7 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level
|
|||
}
|
||||
|
||||
void
|
||||
radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
|
||||
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
uint64_t gfx9_eop_bug_va)
|
||||
|
|
@ -389,7 +389,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
|
|||
unsigned cp_coher_cntl = 0;
|
||||
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
|
||||
|
||||
radeon_check_space(ws, cs, 128);
|
||||
radeon_check_space(ws, cs->b, 128);
|
||||
|
||||
if (gfx_level >= GFX10) {
|
||||
/* GFX10 cache flush handling is quite different. */
|
||||
|
|
@ -592,7 +592,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
|
||||
radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
|
@ -616,7 +616,7 @@ radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
}
|
||||
|
||||
void
|
||||
radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
|
||||
radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned
|
|||
}
|
||||
|
||||
#define radeon_begin(cs) \
|
||||
struct radeon_cmdbuf *__cs = (cs); \
|
||||
struct radeon_cmdbuf *__cs = (cs)->b; \
|
||||
uint32_t __cs_num = __cs->cdw; \
|
||||
UNUSED uint32_t __cs_reserved_dw = __cs->reserved_dw; \
|
||||
uint32_t *__cs_buf = __cs->buf
|
||||
|
|
@ -340,7 +340,7 @@ radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned
|
|||
} while (0)
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
|
||||
radv_cp_wait_mem(struct radv_cmd_stream *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
|
||||
const uint32_t ref, const uint32_t mask)
|
||||
{
|
||||
assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
|
||||
|
|
@ -363,11 +363,11 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, cons
|
|||
}
|
||||
|
||||
ALWAYS_INLINE static unsigned
|
||||
radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
|
||||
radv_cs_write_data_head(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf,
|
||||
const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
|
||||
{
|
||||
/* Return the correct cdw at the end of the packet so the caller can assert it. */
|
||||
const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
|
||||
const unsigned cdw_end = radeon_check_space(device->ws, cs->b, 4 + count);
|
||||
|
||||
if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
|
||||
radeon_begin(cs);
|
||||
|
|
@ -386,7 +386,7 @@ radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
|
||||
radv_cs_write_data(const struct radv_device *device, struct radv_cmd_stream *cs, const enum radv_queue_family qf,
|
||||
const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
|
||||
const bool predicating)
|
||||
{
|
||||
|
|
@ -395,24 +395,24 @@ radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, c
|
|||
radeon_begin(cs);
|
||||
radeon_emit_array(dwords, count);
|
||||
radeon_end();
|
||||
assert(cs->cdw == cdw_end);
|
||||
assert(cs->b->cdw == cdw_end);
|
||||
}
|
||||
|
||||
void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
void radv_cs_emit_write_event_eop(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel,
|
||||
uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va);
|
||||
|
||||
void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
|
||||
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
uint64_t gfx9_eop_bug_va);
|
||||
|
||||
void radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count);
|
||||
void radv_emit_cond_exec(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t va, uint32_t count);
|
||||
|
||||
void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
|
||||
void radv_cs_write_data_imm(struct radv_cmd_stream *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
|
||||
|
||||
static inline void
|
||||
radv_emit_pm4_commands(struct radeon_cmdbuf *cs, const struct ac_pm4_state *pm4)
|
||||
radv_emit_pm4_commands(struct radv_cmd_stream *cs, const struct ac_pm4_state *pm4)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit_array(pm4->pm4, pm4->ndw);
|
||||
|
|
|
|||
|
|
@ -42,10 +42,12 @@ radv_write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_b
|
|||
if (device->use_global_bo_list)
|
||||
return;
|
||||
|
||||
if (cmd_buffer)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
|
||||
else
|
||||
if (cmd_buffer) {
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
radv_cs_add_buffer(device->ws, cs->b, buffer_view->bo);
|
||||
} else {
|
||||
*buffer_list = buffer_view->bo;
|
||||
}
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void
|
||||
|
|
@ -90,10 +92,12 @@ radv_write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_bu
|
|||
return;
|
||||
}
|
||||
|
||||
if (cmd_buffer)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
|
||||
else
|
||||
if (cmd_buffer) {
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
|
||||
} else {
|
||||
*buffer_list = buffer->bo;
|
||||
}
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void
|
||||
|
|
@ -190,8 +194,9 @@ radv_write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buf
|
|||
const uint32_t max_bindings = sizeof(iview->image->bindings) / sizeof(iview->image->bindings[0]);
|
||||
for (uint32_t b = 0; b < max_bindings; b++) {
|
||||
if (cmd_buffer) {
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
if (iview->image->bindings[b].bo)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
|
||||
} else {
|
||||
*buffer_list = iview->image->bindings[b].bo;
|
||||
buffer_list++;
|
||||
|
|
@ -239,8 +244,9 @@ radv_write_image_descriptor_ycbcr_impl(struct radv_device *device, struct radv_c
|
|||
|
||||
for (uint32_t b = 0; b < ARRAY_SIZE(iview->image->bindings); b++) {
|
||||
if (cmd_buffer) {
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
if (iview->image->bindings[b].bo)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->image->bindings[b].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, iview->image->bindings[b].bo);
|
||||
} else {
|
||||
*buffer_list = iview->image->bindings[b].bo;
|
||||
buffer_list++;
|
||||
|
|
|
|||
|
|
@ -537,7 +537,7 @@ radv_device_init_perf_counter(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
device->perf_counter_lock_cs = calloc(sizeof(struct radeon_cmdbuf *), 2 * PERF_CTR_MAX_PASSES);
|
||||
device->perf_counter_lock_cs = calloc(sizeof(struct radv_cmd_stream *), 2 * PERF_CTR_MAX_PASSES);
|
||||
if (!device->perf_counter_lock_cs)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
|
|
@ -558,7 +558,7 @@ radv_device_finish_perf_counter(struct radv_device *device)
|
|||
|
||||
for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
|
||||
if (device->perf_counter_lock_cs[i])
|
||||
device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
|
||||
radv_destroy_cmd_stream(device, device->perf_counter_lock_cs[i]);
|
||||
}
|
||||
|
||||
free(device->perf_counter_lock_cs);
|
||||
|
|
@ -894,18 +894,21 @@ radv_device_init_cache_key(struct radv_device *device)
|
|||
static void
|
||||
radv_create_gfx_preamble(struct radv_device *device)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
|
||||
if (!cs)
|
||||
struct radv_cmd_stream *cs;
|
||||
VkResult result;
|
||||
|
||||
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
radeon_check_space(device->ws, cs, 512);
|
||||
radeon_check_space(device->ws, cs->b, 512);
|
||||
|
||||
radv_emit_graphics(device, cs);
|
||||
|
||||
device->ws->cs_pad(cs, 0);
|
||||
device->ws->cs_pad(cs->b, 0);
|
||||
|
||||
VkResult result = radv_bo_create(
|
||||
device, NULL, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
|
||||
result = radv_bo_create(
|
||||
device, NULL, cs->b->cdw * 4, 4096, device->ws->cs_domain(device->ws),
|
||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
|
||||
RADV_BO_PRIORITY_CS, 0, true, &device->gfx_init);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -917,12 +920,12 @@ radv_create_gfx_preamble(struct radv_device *device)
|
|||
device->gfx_init = NULL;
|
||||
goto fail;
|
||||
}
|
||||
memcpy(map, cs->buf, cs->cdw * 4);
|
||||
memcpy(map, cs->b->buf, cs->b->cdw * 4);
|
||||
|
||||
device->ws->buffer_unmap(device->ws, device->gfx_init, false);
|
||||
device->gfx_init_size_dw = cs->cdw;
|
||||
device->gfx_init_size_dw = cs->b->cdw;
|
||||
fail:
|
||||
device->ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
}
|
||||
|
||||
/* For MSAA sample positions. */
|
||||
|
|
@ -977,7 +980,7 @@ radv_get_default_max_sample_dist(int log_samples)
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, int nr_samples)
|
||||
radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs, int nr_samples)
|
||||
{
|
||||
uint64_t centroid_priority;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
|
||||
|
||||
struct radv_image_view;
|
||||
struct radv_cmd_stream;
|
||||
|
||||
enum radv_dispatch_table {
|
||||
RADV_DEVICE_DISPATCH_TABLE,
|
||||
|
|
@ -289,7 +290,7 @@ struct radv_device {
|
|||
struct radeon_winsys_bo *perf_counter_bo;
|
||||
|
||||
/* Interleaved lock/unlock commandbuffers for perfcounter passes. */
|
||||
struct radeon_cmdbuf **perf_counter_lock_cs;
|
||||
struct radv_cmd_stream **perf_counter_lock_cs;
|
||||
|
||||
bool uses_shadow_regs;
|
||||
|
||||
|
|
@ -340,7 +341,7 @@ VkResult radv_device_init_vrs_state(struct radv_device *device);
|
|||
|
||||
unsigned radv_get_default_max_sample_dist(int log_samples);
|
||||
|
||||
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
|
||||
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radv_cmd_stream *cs,
|
||||
int nr_samples);
|
||||
|
||||
struct radv_color_buffer_info {
|
||||
|
|
|
|||
|
|
@ -3339,38 +3339,38 @@ radv_update_ies_shader(struct radv_device *device, struct radv_indirect_executio
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint8_t *ptr = set->mapped_ptr + set->stride * index;
|
||||
struct radv_compute_pipeline_metadata md;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream cs;
|
||||
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE);
|
||||
radv_get_compute_shader_metadata(device, shader, &md);
|
||||
|
||||
cs = calloc(1, sizeof(*cs));
|
||||
if (!cs)
|
||||
cs.b = calloc(1, sizeof(*cs.b));
|
||||
if (!cs.b)
|
||||
return;
|
||||
|
||||
cs->reserved_dw = cs->max_dw = 32;
|
||||
cs->buf = malloc(cs->max_dw * 4);
|
||||
if (!cs->buf) {
|
||||
free(cs);
|
||||
cs.b->reserved_dw = cs.b->max_dw = 32;
|
||||
cs.b->buf = malloc(cs.b->max_dw * 4);
|
||||
if (!cs.b->buf) {
|
||||
free(cs.b);
|
||||
return;
|
||||
}
|
||||
|
||||
radv_emit_compute_shader(pdev, cs, shader);
|
||||
radv_emit_compute_shader(pdev, &cs, shader);
|
||||
|
||||
memcpy(ptr, &md, sizeof(md));
|
||||
ptr += sizeof(md);
|
||||
|
||||
memcpy(ptr, &cs->cdw, sizeof(uint32_t));
|
||||
memcpy(ptr, &cs.b->cdw, sizeof(uint32_t));
|
||||
ptr += sizeof(uint32_t);
|
||||
|
||||
memcpy(ptr, cs->buf, cs->cdw * sizeof(uint32_t));
|
||||
ptr += cs->cdw * sizeof(uint32_t);
|
||||
memcpy(ptr, cs.b->buf, cs.b->cdw * sizeof(uint32_t));
|
||||
ptr += cs.b->cdw * sizeof(uint32_t);
|
||||
|
||||
set->compute_scratch_size_per_wave = MAX2(set->compute_scratch_size_per_wave, shader->config.scratch_bytes_per_wave);
|
||||
set->compute_scratch_waves = MAX2(set->compute_scratch_waves, radv_get_max_scratch_waves(device, shader));
|
||||
|
||||
free(cs->buf);
|
||||
free(cs);
|
||||
free(cs.b->buf);
|
||||
free(cs.b);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "sid.h"
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
|
||||
radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable)
|
||||
radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *cs, int family, bool enable)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm)
|
||||
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm)
|
||||
radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
|
|
@ -83,7 +83,7 @@ radv_perfcounter_emit_start(struct radeon_cmdbuf *cs, bool is_spm)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm)
|
||||
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ radv_perfcounter_emit_stop(struct radeon_cmdbuf *cs, bool is_spm)
|
|||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family)
|
||||
{
|
||||
/* Start SPM counters. */
|
||||
radv_perfcounter_emit_start(cs, true);
|
||||
|
|
@ -110,7 +110,7 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf
|
|||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf
|
|||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_sample(struct radeon_cmdbuf *cs)
|
||||
radv_perfcounter_emit_sample(struct radv_cmd_stream *cs)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_event_write(V_028A90_PERFCOUNTER_SAMPLE);
|
||||
|
|
@ -493,7 +493,7 @@ radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCrea
|
|||
static void
|
||||
radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
unsigned value = S_030800_SH_BROADCAST_WRITES(1);
|
||||
|
||||
if (se >= 0) {
|
||||
|
|
@ -521,7 +521,7 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
|
|||
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
struct ac_pc_block_base *regs = block->b->b;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
unsigned idx;
|
||||
|
||||
assert(count <= regs->num_counters);
|
||||
|
|
@ -551,7 +551,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ac_pc_block_base *regs = block->b->b;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
unsigned reg = regs->counter0_lo;
|
||||
unsigned reg_delta = 8;
|
||||
|
||||
|
|
@ -596,7 +596,7 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo
|
|||
static void
|
||||
radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -622,7 +622,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_perfcounter_emit_sample(cs);
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
|
|
@ -642,7 +642,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
radeon_emit(0);
|
||||
radeon_end();
|
||||
|
||||
uint32_t *skip_dwords = cs->buf + (cs->cdw - 1);
|
||||
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);
|
||||
|
||||
for (unsigned i = 0; i < pool->num_pc_regs;) {
|
||||
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
|
||||
|
|
@ -670,7 +670,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
radv_cs_write_data_imm(cs, V_370_ME, signal_va, 1);
|
||||
}
|
||||
|
||||
*skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
|
||||
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
|
||||
}
|
||||
|
||||
radv_emit_instance(cmd_buffer, -1, -1);
|
||||
|
|
@ -680,19 +680,19 @@ void
|
|||
radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
ASSERTED unsigned cdw_max;
|
||||
|
||||
cmd_buffer->state.uses_perf_counters = true;
|
||||
|
||||
cdw_max = radeon_check_space(device->ws, cs,
|
||||
cdw_max = radeon_check_space(device->ws, cs->b,
|
||||
256 + /* Random one time stuff */
|
||||
10 * pool->num_passes + /* COND_EXECs */
|
||||
pool->b.stride / 8 * (5 + 8));
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->b.bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
|
||||
|
||||
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
|
||||
radv_cs_write_data_imm(cs, V_370_ME, perf_ctr_va, 0);
|
||||
|
|
@ -714,7 +714,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
radeon_emit(0);
|
||||
radeon_end();
|
||||
|
||||
uint32_t *skip_dwords = cs->buf + (cs->cdw - 1);
|
||||
uint32_t *skip_dwords = cs->b->buf + (cs->b->cdw - 1);
|
||||
|
||||
for (unsigned i = 0; i < pool->num_pc_regs;) {
|
||||
enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
|
||||
|
|
@ -733,7 +733,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
i += cnt;
|
||||
}
|
||||
|
||||
*skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
|
||||
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
|
||||
}
|
||||
|
||||
radv_emit_instance(cmd_buffer, -1, -1);
|
||||
|
|
@ -744,7 +744,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
radv_perfcounter_emit_start(cs, false);
|
||||
radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -752,16 +752,16 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
ASSERTED unsigned cdw_max;
|
||||
|
||||
cdw_max = radeon_check_space(device->ws, cs,
|
||||
cdw_max = radeon_check_space(device->ws, cs->b,
|
||||
256 + /* Reserved for things that don't scale with passes/counters */
|
||||
5 * pool->num_passes + /* COND_EXECs */
|
||||
pool->b.stride / 8 * 8);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->b.bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
|
||||
|
||||
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
|
|
@ -774,7 +774,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
|||
radv_emit_spi_config_cntl(device, cs, false);
|
||||
radv_emit_inhibit_clockgating(device, cs, false);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ struct radv_pc_query_pool {
|
|||
struct radv_perfcounter_impl *counters;
|
||||
};
|
||||
|
||||
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders);
|
||||
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders);
|
||||
|
||||
void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs, bool is_spm);
|
||||
void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm);
|
||||
|
||||
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
|
||||
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs, int family);
|
||||
|
||||
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
|
||||
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs, int family);
|
||||
|
||||
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkQueryType qu
|
|||
uint32_t pipeline_stats_mask, uint32_t avail_offset, bool uses_emulated_queries);
|
||||
|
||||
static void
|
||||
gfx10_copy_shader_query(struct radeon_cmdbuf *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
|
||||
gfx10_copy_shader_query(struct radv_cmd_stream *cs, uint32_t src_sel, uint64_t src_va, uint64_t dst_va)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
|
|
@ -93,7 +93,7 @@ enum radv_event_write {
|
|||
};
|
||||
|
||||
static void
|
||||
radv_emit_event_write(const struct radeon_info *info, struct radeon_cmdbuf *cs, enum radv_event_write event,
|
||||
radv_emit_event_write(const struct radeon_info *info, struct radv_cmd_stream *cs, enum radv_event_write event,
|
||||
uint64_t va)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
|
@ -322,9 +322,9 @@ radv_begin_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkQu
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cs, 11);
|
||||
radeon_check_space(device->ws, cs->b, 11);
|
||||
|
||||
++cmd_buffer->state.active_occlusion_queries;
|
||||
if (cmd_buffer->state.active_occlusion_queries == 1) {
|
||||
|
|
@ -356,9 +356,9 @@ radv_end_occlusion_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cs, 14);
|
||||
radeon_check_space(device->ws, cs->b, 14);
|
||||
|
||||
cmd_buffer->state.active_occlusion_queries--;
|
||||
if (cmd_buffer->state.active_occlusion_queries == 0) {
|
||||
|
|
@ -379,7 +379,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (!radv_occlusion_query_use_l2(pdev)) {
|
||||
|
|
@ -390,7 +390,7 @@ radv_copy_occlusion_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
unsigned query = first_query + i;
|
||||
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7);
|
||||
radeon_check_space(device->ws, cs->b, 7);
|
||||
|
||||
/* Waits on the upper word of the last DB entry */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
|
||||
|
|
@ -631,9 +631,9 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cs, 4);
|
||||
radeon_check_space(device->ws, cs->b, 4);
|
||||
|
||||
++cmd_buffer->state.active_pipeline_queries;
|
||||
|
||||
|
|
@ -674,18 +674,19 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
if (pool->uses_ace) {
|
||||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->gang.cs, 4);
|
||||
radeon_check_space(device->ws, ace_cs->b, 4);
|
||||
|
||||
radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
|
||||
radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
|
||||
} else {
|
||||
radeon_check_space(device->ws, cmd_buffer->gang.cs, 11);
|
||||
radeon_check_space(device->ws, ace_cs->b, 11);
|
||||
|
||||
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
|
@ -704,11 +705,11 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
|
||||
|
||||
radeon_check_space(device->ws, cs, 16);
|
||||
radeon_check_space(device->ws, cs->b, 16);
|
||||
|
||||
cmd_buffer->state.active_pipeline_queries--;
|
||||
|
||||
|
|
@ -747,18 +748,19 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
|
|||
if (pool->uses_ace) {
|
||||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->gang.cs, 4);
|
||||
radeon_check_space(device->ws, ace_cs->b, 4);
|
||||
|
||||
radv_emit_event_write(&pdev->info, cmd_buffer->gang.cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
|
||||
radv_emit_event_write(&pdev->info, ace_cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
|
||||
} else {
|
||||
radeon_check_space(device->ws, cmd_buffer->gang.cs, 11);
|
||||
radeon_check_space(device->ws, ace_cs->b, 11);
|
||||
|
||||
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
radv_cs_write_data_imm(ace_cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries--;
|
||||
|
||||
|
|
@ -778,7 +780,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
|
|
@ -789,7 +791,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
|
|||
for (unsigned i = 0; i < query_count; ++i) {
|
||||
unsigned query = first_query + i;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7);
|
||||
radeon_check_space(device->ws, cs->b, 7);
|
||||
|
||||
uint64_t avail_va = va + pool->availability_offset + 4 * query;
|
||||
|
||||
|
|
@ -801,7 +803,7 @@ radv_copy_pipeline_stat_query_result(struct radv_cmd_buffer *cmd_buffer, struct
|
|||
const uint64_t start_va = src_va + task_invoc_offset + 4;
|
||||
const uint64_t stop_va = start_va + pipelinestat_block_size;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7 * 2);
|
||||
radeon_check_space(device->ws, cs->b, 7 * 2);
|
||||
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, start_va, 0x80000000, 0xffffffff);
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, stop_va, 0x80000000, 0xffffffff);
|
||||
|
|
@ -942,9 +944,9 @@ emit_sample_streamout(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cs, 4);
|
||||
radeon_check_space(device->ws, cs->b, 4);
|
||||
|
||||
assert(index < MAX_SO_STREAMS);
|
||||
|
||||
|
|
@ -978,7 +980,7 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
|
|
@ -1007,7 +1009,7 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
|
|
@ -1036,7 +1038,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query
|
|||
uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
|
|
@ -1044,7 +1046,7 @@ radv_copy_tfb_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query
|
|||
unsigned query = first_query + i;
|
||||
uint64_t src_va = va + query * pool->stride;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7 * 4);
|
||||
radeon_check_space(device->ws, cs->b, 7 * 4);
|
||||
|
||||
/* Wait on the upper word of all results. */
|
||||
for (unsigned j = 0; j < 4; j++, src_va += 8) {
|
||||
|
|
@ -1169,7 +1171,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
uint32_t query_count, uint64_t dst_va, uint64_t stride, VkQueryResultFlags flags)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
|
|
@ -1177,7 +1179,7 @@ radv_copy_timestamp_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
unsigned query = first_query + i;
|
||||
uint64_t local_src_va = va + query * pool->stride;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7);
|
||||
radeon_check_space(device->ws, cs->b, 7);
|
||||
|
||||
/* Wait on the high 32 bits of the timestamp in
|
||||
* case the low part is 0xffffffff.
|
||||
|
|
@ -1350,7 +1352,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query are always emulated. */
|
||||
|
|
@ -1399,7 +1401,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query are always emulated. */
|
||||
|
|
@ -1446,7 +1448,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
|
|
@ -1456,7 +1458,7 @@ radv_copy_pg_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_query_
|
|||
unsigned query = first_query + i;
|
||||
uint64_t src_va = va + query * pool->stride;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7 * 4);
|
||||
radeon_check_space(device->ws, cs->b, 7 * 4);
|
||||
|
||||
/* Wait on the upper word of the PrimitiveStorageNeeded result. */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||
|
|
@ -1595,10 +1597,10 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_check_space(device->ws, cs, 4);
|
||||
radeon_check_space(device->ws, cs->b, 4);
|
||||
|
||||
++cmd_buffer->state.active_pipeline_queries;
|
||||
|
||||
|
|
@ -1624,12 +1626,12 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
|
||||
|
||||
radeon_check_space(device->ws, cs, 16);
|
||||
radeon_check_space(device->ws, cs->b, 16);
|
||||
|
||||
cmd_buffer->state.active_pipeline_queries--;
|
||||
|
||||
|
|
@ -1658,7 +1660,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
|
|
@ -1666,7 +1668,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
for (unsigned i = 0; i < query_count; ++i) {
|
||||
unsigned query = first_query + i;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7);
|
||||
radeon_check_space(device->ws, cs->b, 7);
|
||||
|
||||
uint64_t avail_va = va + pool->availability_offset + 4 * query;
|
||||
|
||||
|
|
@ -1684,7 +1686,7 @@ radv_copy_ms_prim_query_result(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
unsigned query = first_query + i;
|
||||
uint64_t src_va = va + query * pool->stride;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7 * 2);
|
||||
radeon_check_space(device->ws, cs->b, 7 * 2);
|
||||
|
||||
/* Wait on the upper word. */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||
|
|
@ -2473,14 +2475,15 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
const uint64_t dst_va = vk_buffer_address(&dst_buffer->vk, dstOffset);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (!queryCount)
|
||||
return;
|
||||
|
||||
radv_suspend_conditional_rendering(cmd_buffer);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dst_buffer->bo);
|
||||
|
||||
/* Workaround engines that forget to properly specify WAIT_BIT because some driver implicitly
|
||||
* synchronizes before query copy.
|
||||
|
|
@ -2652,10 +2655,10 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
|
||||
|
||||
emit_query_flush(cmd_buffer, pool);
|
||||
|
||||
|
|
@ -2665,7 +2668,8 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
if (!radv_gang_init(cmd_buffer))
|
||||
return;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->gang.cs, pool->bo);
|
||||
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
|
||||
radv_cs_add_buffer(device->ws, ace_cs->b, pool->bo);
|
||||
}
|
||||
|
||||
if (pool->uses_shader_query_buf)
|
||||
|
|
@ -2711,7 +2715,7 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
|
||||
radeon_begin(cs);
|
||||
|
|
@ -2739,21 +2743,21 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
const uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
uint64_t query_va = va + pool->stride * query;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
|
||||
|
||||
assert(cmd_buffer->qf != RADV_QUEUE_VIDEO_DEC && cmd_buffer->qf != RADV_QUEUE_VIDEO_ENC);
|
||||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
if (instance->drirc.flush_before_timestamp_write) {
|
||||
radv_sdma_emit_nop(device, cmd_buffer->cs);
|
||||
radv_sdma_emit_nop(device, cs);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_queries; ++i, query_va += pool->stride) {
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 3);
|
||||
radeon_check_space(device->ws, cs->b, 3);
|
||||
radv_sdma_emit_write_timestamp(cs, query_va);
|
||||
}
|
||||
return;
|
||||
|
|
@ -2766,7 +2770,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 28 * num_queries);
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 28 * num_queries);
|
||||
|
||||
for (unsigned i = 0; i < num_queries; i++) {
|
||||
radv_write_timestamp(cmd_buffer, query_va, stage);
|
||||
|
|
@ -2779,7 +2783,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
|
||||
}
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -2790,15 +2794,15 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
|
|||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t pool_va = radv_buffer_get_va(pool->bo);
|
||||
uint64_t query_va = pool_va + pool->stride * firstQuery;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
|
||||
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 6 * accelerationStructureCount);
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, 6 * accelerationStructureCount);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -2835,5 +2839,5 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
|
||||
radeon_end();
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -360,7 +360,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
|
||||
radv_emit_gs_ring_sizes(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *esgs_ring_bo,
|
||||
uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -369,10 +369,10 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
|
|||
return;
|
||||
|
||||
if (esgs_ring_bo)
|
||||
radv_cs_add_buffer(device->ws, cs, esgs_ring_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, esgs_ring_bo);
|
||||
|
||||
if (gsvs_ring_bo)
|
||||
radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, gsvs_ring_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -390,7 +390,8 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
|
||||
radv_emit_tess_factor_ring(struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
struct radeon_winsys_bo *tess_rings_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t tf_va;
|
||||
|
|
@ -401,7 +402,7 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
tf_ring_size = pdev->info.tess_factor_ring_size / 4;
|
||||
tf_va = radv_buffer_get_va(tess_rings_bo) + pdev->info.tess_offchip_ring_size;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, tess_rings_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -465,7 +466,7 @@ radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_wi
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *task_rings_bo,
|
||||
radv_emit_task_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *task_rings_bo,
|
||||
bool compute)
|
||||
{
|
||||
if (!task_rings_bo)
|
||||
|
|
@ -473,7 +474,7 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc
|
|||
|
||||
const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo);
|
||||
assert(util_is_aligned(task_ctrlbuf_va, 256));
|
||||
radv_cs_add_buffer(device->ws, cs, task_rings_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, task_rings_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -488,8 +489,8 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struc
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *scratch_bo)
|
||||
radv_emit_graphics_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave,
|
||||
uint32_t waves, struct radeon_winsys_bo *scratch_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
|
@ -500,7 +501,7 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
|
||||
ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, scratch_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, scratch_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -519,8 +520,8 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *compute_scratch_bo)
|
||||
radv_emit_compute_scratch(struct radv_device *device, struct radv_cmd_stream *cs, uint32_t size_per_wave,
|
||||
uint32_t waves, struct radeon_winsys_bo *compute_scratch_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
|
@ -541,7 +542,7 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
|
||||
ac_get_scratch_tmpring_size(gpu_info, waves, size_per_wave, &tmpring_size);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, compute_scratch_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, compute_scratch_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -563,14 +564,14 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_emit_compute_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
struct radeon_winsys_bo *descriptor_bo)
|
||||
{
|
||||
if (!descriptor_bo)
|
||||
return;
|
||||
|
||||
uint64_t va = radv_buffer_get_va(descriptor_bo);
|
||||
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, descriptor_bo);
|
||||
|
||||
/* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders),
|
||||
* so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg).
|
||||
|
|
@ -581,7 +582,7 @@ radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdb
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
struct radeon_winsys_bo *descriptor_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -592,7 +593,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
|
|||
|
||||
va = radv_buffer_get_va(descriptor_bo);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, descriptor_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -638,7 +639,7 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *ge_rings_bo)
|
||||
radv_emit_ge_rings(struct radv_device *device, struct radv_cmd_stream *cs, struct radeon_winsys_bo *ge_rings_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va;
|
||||
|
|
@ -651,7 +652,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct
|
|||
va = radv_buffer_get_va(ge_rings_bo);
|
||||
assert((va >> 32) == pdev->info.address32_hi);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, ge_rings_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, ge_rings_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -716,7 +717,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs, bool is_compute_queue)
|
||||
radv_emit_compute(struct radv_device *device, struct radv_cmd_stream *cs, bool is_compute_queue)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0;
|
||||
|
|
@ -768,7 +769,7 @@ radv_pack_float_12p4(float x)
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t border_color_va = device->border_color_data.bo ? radv_buffer_get_va(device->border_color_data.bo) : 0;
|
||||
|
|
@ -941,14 +942,14 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device)
|
||||
radv_init_graphics_state(struct radv_cmd_stream *cs, struct radv_device *device)
|
||||
{
|
||||
if (device->gfx_init) {
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
|
||||
ws->cs_execute_ib(cs, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false);
|
||||
ws->cs_execute_ib(cs->b, device->gfx_init, 0, device->gfx_init_size_dw & 0xffff, false);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, device->gfx_init);
|
||||
radv_cs_add_buffer(device->ws, cs->b, device->gfx_init);
|
||||
} else {
|
||||
radv_emit_graphics(device, cs);
|
||||
}
|
||||
|
|
@ -971,7 +972,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
struct radeon_winsys_bo *ge_rings_bo = queue->ge_rings_bo;
|
||||
struct radeon_winsys_bo *gds_bo = queue->gds_bo;
|
||||
struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo;
|
||||
struct radeon_cmdbuf *dest_cs[3] = {0};
|
||||
struct radv_cmd_stream *dest_cs[3] = {0};
|
||||
const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
|
|
@ -1134,18 +1135,17 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
struct radeon_cmdbuf *cs = NULL;
|
||||
cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false);
|
||||
if (!cs) {
|
||||
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
struct radv_cmd_stream *cs = NULL;
|
||||
|
||||
radeon_check_space(ws, cs, 512);
|
||||
result = radv_create_cmd_stream(device, queue->qf, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
radeon_check_space(ws, cs->b, 512);
|
||||
dest_cs[i] = cs;
|
||||
|
||||
if (scratch_bo)
|
||||
radv_cs_add_buffer(ws, cs, scratch_bo);
|
||||
radv_cs_add_buffer(ws, cs->b, scratch_bo);
|
||||
|
||||
/* Emit initial configuration. */
|
||||
switch (queue->qf) {
|
||||
|
|
@ -1205,19 +1205,19 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->qf, flush_bits, &sqtt_flush_bits, 0);
|
||||
}
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
result = radv_finalize_cmd_stream(device, cs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (queue->initial_full_flush_preamble_cs)
|
||||
ws->cs_destroy(queue->initial_full_flush_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs);
|
||||
|
||||
if (queue->initial_preamble_cs)
|
||||
ws->cs_destroy(queue->initial_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->initial_preamble_cs);
|
||||
|
||||
if (queue->continue_preamble_cs)
|
||||
ws->cs_destroy(queue->continue_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->continue_preamble_cs);
|
||||
|
||||
queue->initial_full_flush_preamble_cs = dest_cs[0];
|
||||
queue->initial_preamble_cs = dest_cs[1];
|
||||
|
|
@ -1272,7 +1272,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
fail:
|
||||
for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
|
||||
if (dest_cs[i])
|
||||
ws->cs_destroy(dest_cs[i]);
|
||||
radv_destroy_cmd_stream(device, dest_cs[i]);
|
||||
if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
|
||||
radv_bo_destroy(device, NULL, descriptor_bo);
|
||||
if (scratch_bo && scratch_bo != queue->scratch_bo)
|
||||
|
|
@ -1394,12 +1394,14 @@ radv_create_flush_postamble(struct radv_queue *queue)
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_ip_type ip = radv_queue_family_to_ring(pdev, queue->state.qf);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radv_cmd_stream *cs;
|
||||
VkResult result;
|
||||
|
||||
struct radeon_cmdbuf *cs = ws->cs_create(ws, ip, false);
|
||||
if (!cs)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
result = radv_create_cmd_stream(device, queue->state.qf, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
radeon_check_space(ws, cs, 256);
|
||||
radeon_check_space(ws, cs->b, 256);
|
||||
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum radv_cmd_flush_bits flush_bits = 0;
|
||||
|
|
@ -1418,10 +1420,10 @@ radv_create_flush_postamble(struct radv_queue *queue)
|
|||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
radv_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, queue->state.qf, flush_bits, &sqtt_flush_bits, 0);
|
||||
|
||||
VkResult r = ws->cs_finalize(cs);
|
||||
if (r != VK_SUCCESS) {
|
||||
ws->cs_destroy(cs);
|
||||
return r;
|
||||
result = radv_finalize_cmd_stream(device, cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
return result;
|
||||
}
|
||||
|
||||
queue->state.flush_postamble_cs = cs;
|
||||
|
|
@ -1439,7 +1441,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
|
||||
VkResult r = VK_SUCCESS;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf);
|
||||
struct radeon_winsys_bo *gang_sem_bo = NULL;
|
||||
|
||||
/* Gang semaphores BO.
|
||||
|
|
@ -1452,25 +1453,34 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
if (r != VK_SUCCESS)
|
||||
return r;
|
||||
|
||||
struct radeon_cmdbuf *leader_pre_cs = ws->cs_create(ws, leader_ip, false);
|
||||
struct radeon_cmdbuf *leader_post_cs = ws->cs_create(ws, leader_ip, false);
|
||||
struct radeon_cmdbuf *ace_pre_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false);
|
||||
struct radeon_cmdbuf *ace_post_cs = ws->cs_create(ws, AMD_IP_COMPUTE, false);
|
||||
struct radv_cmd_stream *leader_pre_cs = NULL, *leader_post_cs = NULL;
|
||||
struct radv_cmd_stream *ace_pre_cs = NULL, *ace_post_cs = NULL;
|
||||
|
||||
if (!leader_pre_cs || !leader_post_cs || !ace_pre_cs || !ace_post_cs) {
|
||||
r = VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
r = radv_create_cmd_stream(device, queue->state.qf, false, &leader_pre_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
radeon_check_space(ws, leader_pre_cs, 256);
|
||||
radeon_check_space(ws, leader_post_cs, 256);
|
||||
radeon_check_space(ws, ace_pre_cs, 256);
|
||||
radeon_check_space(ws, ace_post_cs, 256);
|
||||
radv_create_cmd_stream(device, queue->state.qf, false, &leader_post_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
radv_cs_add_buffer(ws, leader_pre_cs, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, leader_post_cs, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, ace_pre_cs, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, ace_post_cs, gang_sem_bo);
|
||||
radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_pre_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
radv_create_cmd_stream(device, RADV_QUEUE_COMPUTE, false, &ace_post_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
radeon_check_space(ws, leader_pre_cs->b, 256);
|
||||
radeon_check_space(ws, leader_post_cs->b, 256);
|
||||
radeon_check_space(ws, ace_pre_cs->b, 256);
|
||||
radeon_check_space(ws, ace_post_cs->b, 256);
|
||||
|
||||
radv_cs_add_buffer(ws, leader_pre_cs->b, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, leader_post_cs->b, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, ace_pre_cs->b, gang_sem_bo);
|
||||
radv_cs_add_buffer(ws, ace_post_cs->b, gang_sem_bo);
|
||||
|
||||
const uint64_t ace_wait_va = radv_buffer_get_va(gang_sem_bo);
|
||||
const uint64_t leader_wait_va = ace_wait_va + 4;
|
||||
|
|
@ -1486,7 +1496,6 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
radv_cp_wait_mem(ace_pre_cs, RADV_QUEUE_COMPUTE, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
|
||||
radv_cs_write_data(device, ace_pre_cs, RADV_QUEUE_COMPUTE, V_370_ME, ace_wait_va, 1, &zero, false);
|
||||
radv_cs_write_data(device, leader_pre_cs, queue->state.qf, V_370_ME, ace_wait_va, 1, &one, false);
|
||||
|
||||
/* Create postambles for gang submission.
|
||||
* This ensures that the gang leader waits for the whole gang,
|
||||
* which is necessary because the kernel signals the userspace fence
|
||||
|
|
@ -1498,16 +1507,16 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
|
||||
|
||||
r = ws->cs_finalize(leader_pre_cs);
|
||||
r = radv_finalize_cmd_stream(device, leader_pre_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
r = ws->cs_finalize(leader_post_cs);
|
||||
r = radv_finalize_cmd_stream(device, leader_post_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
r = ws->cs_finalize(ace_pre_cs);
|
||||
r = radv_finalize_cmd_stream(device, ace_pre_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
r = ws->cs_finalize(ace_post_cs);
|
||||
r = radv_finalize_cmd_stream(device, ace_post_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
|
|
@ -1521,13 +1530,13 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
|
||||
fail:
|
||||
if (leader_pre_cs)
|
||||
ws->cs_destroy(leader_pre_cs);
|
||||
radv_destroy_cmd_stream(device, leader_pre_cs);
|
||||
if (leader_post_cs)
|
||||
ws->cs_destroy(leader_post_cs);
|
||||
radv_destroy_cmd_stream(device, leader_post_cs);
|
||||
if (ace_pre_cs)
|
||||
ws->cs_destroy(ace_pre_cs);
|
||||
radv_destroy_cmd_stream(device, ace_pre_cs);
|
||||
if (ace_post_cs)
|
||||
ws->cs_destroy(ace_post_cs);
|
||||
radv_destroy_cmd_stream(device, ace_post_cs);
|
||||
if (gang_sem_bo)
|
||||
radv_bo_destroy(device, &queue->vk.base, gang_sem_bo);
|
||||
|
||||
|
|
@ -1585,22 +1594,23 @@ radv_update_gang_preambles(struct radv_queue *queue)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static struct radeon_cmdbuf *
|
||||
static struct radv_cmd_stream *
|
||||
radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock)
|
||||
{
|
||||
struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
|
||||
struct radv_cmd_stream *cs;
|
||||
VkResult result;
|
||||
|
||||
if (*cs_ref)
|
||||
return *cs_ref;
|
||||
|
||||
cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
|
||||
if (!cs)
|
||||
result = radv_create_cmd_stream(device, RADV_QUEUE_GENERAL, false, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return NULL;
|
||||
|
||||
ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21);
|
||||
ASSERTED unsigned cdw = radeon_check_space(device->ws, cs->b, 21);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, device->perf_counter_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -1650,11 +1660,11 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
|
|||
}
|
||||
|
||||
radeon_end();
|
||||
assert(cs->cdw <= cdw);
|
||||
assert(cs->b->cdw <= cdw);
|
||||
|
||||
VkResult result = device->ws->cs_finalize(cs);
|
||||
result = radv_finalize_cmd_stream(device, cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
device->ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -1662,7 +1672,7 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
|
|||
* alternative.
|
||||
*/
|
||||
if (p_atomic_cmpxchg((uintptr_t *)cs_ref, 0, (uintptr_t)cs) != 0) {
|
||||
device->ws->cs_destroy(cs);
|
||||
radv_destroy_cmd_stream(device, cs);
|
||||
}
|
||||
|
||||
return *cs_ref;
|
||||
|
|
@ -1748,18 +1758,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
|
||||
if (queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE) {
|
||||
initial_preambles[num_initial_preambles++] =
|
||||
need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs;
|
||||
need_wait ? queue->state.initial_full_flush_preamble_cs->b : queue->state.initial_preamble_cs->b;
|
||||
|
||||
continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs;
|
||||
continue_preambles[num_continue_preambles++] = queue->state.continue_preamble_cs->b;
|
||||
|
||||
if (use_perf_counters) {
|
||||
/* RADV only supports perf counters on the GFX queue currently. */
|
||||
assert(queue->state.qf == RADV_QUEUE_GENERAL);
|
||||
|
||||
/* Create the lock/unlock CS. */
|
||||
struct radeon_cmdbuf *perf_ctr_lock_cs =
|
||||
struct radv_cmd_stream *perf_ctr_lock_cs =
|
||||
radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, false);
|
||||
struct radeon_cmdbuf *perf_ctr_unlock_cs =
|
||||
struct radv_cmd_stream *perf_ctr_unlock_cs =
|
||||
radv_create_perf_counter_lock_cs(device, submission->perf_pass_index, true);
|
||||
|
||||
if (!perf_ctr_lock_cs || !perf_ctr_unlock_cs) {
|
||||
|
|
@ -1767,14 +1777,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
goto fail;
|
||||
}
|
||||
|
||||
initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs;
|
||||
continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs;
|
||||
postambles[num_postambles++] = perf_ctr_unlock_cs;
|
||||
initial_preambles[num_initial_preambles++] = perf_ctr_lock_cs->b;
|
||||
continue_preambles[num_continue_preambles++] = perf_ctr_lock_cs->b;
|
||||
postambles[num_postambles++] = perf_ctr_unlock_cs->b;
|
||||
}
|
||||
}
|
||||
|
||||
if (queue->state.flush_postamble_cs) {
|
||||
postambles[num_postambles++] = queue->state.flush_postamble_cs;
|
||||
postambles[num_postambles++] = queue->state.flush_postamble_cs->b;
|
||||
}
|
||||
|
||||
const unsigned num_1q_initial_preambles = num_initial_preambles;
|
||||
|
|
@ -1782,17 +1792,17 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
const unsigned num_1q_postambles = num_postambles;
|
||||
|
||||
if (use_ace) {
|
||||
initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs;
|
||||
initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs;
|
||||
initial_preambles[num_initial_preambles++] =
|
||||
need_wait ? queue->follower_state->initial_full_flush_preamble_cs : queue->follower_state->initial_preamble_cs;
|
||||
initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs->b;
|
||||
initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs->b;
|
||||
initial_preambles[num_initial_preambles++] = need_wait ? queue->follower_state->initial_full_flush_preamble_cs->b
|
||||
: queue->follower_state->initial_preamble_cs->b;
|
||||
|
||||
continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs;
|
||||
continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs;
|
||||
continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs;
|
||||
continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs->b;
|
||||
continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs->b;
|
||||
continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs->b;
|
||||
|
||||
postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs;
|
||||
postambles[num_postambles++] = queue->state.gang_wait_postamble_cs;
|
||||
postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs->b;
|
||||
postambles[num_postambles++] = queue->state.gang_wait_postamble_cs->b;
|
||||
}
|
||||
|
||||
struct radv_winsys_submit_info submit = {
|
||||
|
|
@ -1826,12 +1836,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c];
|
||||
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
const bool can_chain_next = !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *ace_cs = cmd_buffer->gang.cs;
|
||||
|
||||
/* Follower needs to be before the gang leader because the last CS must match the queue's IP type. */
|
||||
if (cmd_buffer->gang.cs) {
|
||||
device->ws->cs_unchain(cmd_buffer->gang.cs);
|
||||
if (!chainable_ace || !device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false)) {
|
||||
cs_array[num_submitted_cs++] = cmd_buffer->gang.cs;
|
||||
if (ace_cs) {
|
||||
device->ws->cs_unchain(ace_cs->b);
|
||||
if (!chainable_ace || !device->ws->cs_chain(chainable_ace, ace_cs->b, false)) {
|
||||
cs_array[num_submitted_cs++] = ace_cs->b;
|
||||
|
||||
/* Prevent chaining the gang leader when the follower couldn't be chained.
|
||||
* Otherwise, they would be in the wrong order.
|
||||
|
|
@ -1839,19 +1851,18 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
chainable = NULL;
|
||||
}
|
||||
|
||||
chainable_ace = can_chain_next ? cmd_buffer->gang.cs : NULL;
|
||||
chainable_ace = can_chain_next ? ace_cs->b : NULL;
|
||||
submit_ace = true;
|
||||
}
|
||||
|
||||
device->ws->cs_unchain(cmd_buffer->cs);
|
||||
if (!chainable || !device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) {
|
||||
device->ws->cs_unchain(cs->b);
|
||||
if (!chainable || !device->ws->cs_chain(chainable, cs->b, queue->state.uses_shadow_regs)) {
|
||||
/* don't submit empty command buffers to the kernel. */
|
||||
if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) ||
|
||||
cmd_buffer->cs->cdw != 0)
|
||||
cs_array[num_submitted_cs++] = cmd_buffer->cs;
|
||||
if ((radv_queue_ring(queue) != AMD_IP_VCN_ENC && radv_queue_ring(queue) != AMD_IP_UVD) || cs->b->cdw != 0)
|
||||
cs_array[num_submitted_cs++] = cs->b;
|
||||
}
|
||||
|
||||
chainable = can_chain_next ? cmd_buffer->cs : NULL;
|
||||
chainable = can_chain_next ? cs->b : NULL;
|
||||
}
|
||||
|
||||
submit.cs_count = num_submitted_cs;
|
||||
|
|
@ -1873,8 +1884,8 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi
|
|||
radv_check_trap_handler(queue);
|
||||
}
|
||||
|
||||
initial_preambles[0] = queue->state.initial_preamble_cs;
|
||||
initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs;
|
||||
initial_preambles[0] = queue->state.initial_preamble_cs ? queue->state.initial_preamble_cs->b : NULL;
|
||||
initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs->b;
|
||||
}
|
||||
|
||||
queue->last_shader_upload_seq = MAX2(queue->last_shader_upload_seq, shader_upload_seq);
|
||||
|
|
@ -2047,17 +2058,17 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *devi
|
|||
{
|
||||
radv_destroy_shadow_regs_preamble(device, queue, device->ws);
|
||||
if (queue->initial_full_flush_preamble_cs)
|
||||
device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->initial_full_flush_preamble_cs);
|
||||
if (queue->initial_preamble_cs)
|
||||
device->ws->cs_destroy(queue->initial_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->initial_preamble_cs);
|
||||
if (queue->continue_preamble_cs)
|
||||
device->ws->cs_destroy(queue->continue_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->continue_preamble_cs);
|
||||
if (queue->gang_wait_preamble_cs)
|
||||
device->ws->cs_destroy(queue->gang_wait_preamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->gang_wait_preamble_cs);
|
||||
if (queue->gang_wait_postamble_cs)
|
||||
device->ws->cs_destroy(queue->gang_wait_postamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->gang_wait_postamble_cs);
|
||||
if (queue->flush_postamble_cs)
|
||||
device->ws->cs_destroy(queue->flush_postamble_cs);
|
||||
radv_destroy_cmd_stream(device, queue->flush_postamble_cs);
|
||||
if (queue->descriptor_bo)
|
||||
radv_bo_destroy(device, NULL, queue->descriptor_bo);
|
||||
if (queue->scratch_bo) {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include "radv_radeon_winsys.h"
|
||||
|
||||
struct radv_physical_device;
|
||||
struct radv_cmd_stream;
|
||||
struct radv_device;
|
||||
|
||||
struct radv_queue_ring_info {
|
||||
|
|
@ -62,12 +63,12 @@ struct radv_queue_state {
|
|||
struct radeon_winsys_bo *gds_bo;
|
||||
struct radeon_winsys_bo *gds_oa_bo;
|
||||
|
||||
struct radeon_cmdbuf *initial_preamble_cs;
|
||||
struct radeon_cmdbuf *initial_full_flush_preamble_cs;
|
||||
struct radeon_cmdbuf *continue_preamble_cs;
|
||||
struct radeon_cmdbuf *gang_wait_preamble_cs;
|
||||
struct radeon_cmdbuf *gang_wait_postamble_cs;
|
||||
struct radeon_cmdbuf *flush_postamble_cs; /* GFX6 only */
|
||||
struct radv_cmd_stream *initial_preamble_cs;
|
||||
struct radv_cmd_stream *initial_full_flush_preamble_cs;
|
||||
struct radv_cmd_stream *continue_preamble_cs;
|
||||
struct radv_cmd_stream *gang_wait_preamble_cs;
|
||||
struct radv_cmd_stream *gang_wait_postamble_cs;
|
||||
struct radv_cmd_stream *flush_postamble_cs; /* GFX6 only */
|
||||
|
||||
/* the uses_shadow_regs here will be set only for general queue */
|
||||
bool uses_shadow_regs;
|
||||
|
|
@ -108,7 +109,7 @@ void radv_queue_finish(struct radv_queue *queue);
|
|||
|
||||
enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfo *pObj);
|
||||
|
||||
void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
|
||||
void radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs);
|
||||
|
||||
bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -346,17 +346,17 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs)
|
||||
{
|
||||
/* SDMA NOP acts as a fence command and causes the SDMA engine to wait for pending copy operations. */
|
||||
radeon_check_space(device->ws, cs, 1);
|
||||
radeon_check_space(device->ws, cs->b, 1);
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
void
|
||||
radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va)
|
||||
radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_TIMESTAMP, SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP, 0));
|
||||
|
|
@ -366,7 +366,7 @@ radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va)
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence)
|
||||
radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_FENCE, 0, SDMA_FENCE_MTYPE_UC));
|
||||
|
|
@ -377,7 +377,7 @@ radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence)
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
|
||||
radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM);
|
||||
|
|
@ -390,7 +390,7 @@ radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
|
||||
radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
|
||||
|
|
@ -401,7 +401,7 @@ radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t c
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
|
||||
radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va, uint64_t dst_va,
|
||||
uint64_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
|
|
@ -428,7 +428,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
ncopy++;
|
||||
}
|
||||
|
||||
radeon_check_space(device->ws, cs, ncopy * 7);
|
||||
radeon_check_space(device->ws, cs->b, ncopy * 7);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -450,7 +450,7 @@ radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
|
||||
radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va,
|
||||
const uint64_t size, const uint32_t value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -467,7 +467,7 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
*/
|
||||
const uint64_t max_fill_bytes = BITFIELD64_MASK(ver >= SDMA_6_0 ? 30 : 22) & ~0x3;
|
||||
const unsigned num_packets = DIV_ROUND_UP(size, max_fill_bytes);
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, num_packets * 5);
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, num_packets * 5);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -484,11 +484,11 @@ radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
}
|
||||
|
||||
radeon_end();
|
||||
assert(cs->cdw <= cdw_max);
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
|
||||
const VkExtent3D pix_extent)
|
||||
{
|
||||
|
|
@ -524,7 +524,7 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
|
|||
dst_off.x *= texel_scale;
|
||||
ext.width *= texel_scale;
|
||||
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 13);
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 13);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | util_logbase2(src->bpp)
|
||||
|
|
@ -543,11 +543,11 @@ radv_sdma_emit_copy_linear_sub_window(const struct radv_device *device, struct r
|
|||
radeon_emit((ext.depth - 1));
|
||||
radeon_end();
|
||||
|
||||
assert(cs->cdw == cdw_end);
|
||||
assert(cs->b->cdw == cdw_end);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *const tiled,
|
||||
const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent,
|
||||
const bool detile)
|
||||
|
|
@ -570,7 +570,7 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
|
|||
assert(util_is_power_of_two_nonzero(tiled->bpp));
|
||||
radv_sdma_check_pitches(linear_pitch, linear_slice_pitch, tiled->bpp, uses_depth);
|
||||
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 14 + (dcc ? 3 : 0));
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 14 + (dcc ? 3 : 0));
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | dcc << 19 | detile << 31 |
|
||||
|
|
@ -600,11 +600,11 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
|
|||
}
|
||||
|
||||
radeon_end();
|
||||
assert(cs->cdw <= cdw_end);
|
||||
assert(cs->b->cdw <= cdw_end);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
|
||||
const VkExtent3D px_extent)
|
||||
{
|
||||
|
|
@ -639,7 +639,7 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
|
|||
assert(util_is_power_of_two_nonzero(src->bpp));
|
||||
assert(util_is_power_of_two_nonzero(dst->bpp));
|
||||
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs, 15 + (dcc ? 3 : 0));
|
||||
ASSERTED unsigned cdw_end = radeon_check_space(device->ws, cs->b, 15 + (dcc ? 3 : 0));
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0) | dcc << 19 | dcc_dir << 31 |
|
||||
|
|
@ -678,11 +678,11 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
|
|||
}
|
||||
|
||||
radeon_end();
|
||||
assert(cs->cdw <= cdw_end);
|
||||
assert(cs->b->cdw <= cdw_end);
|
||||
}
|
||||
|
||||
void
|
||||
radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img, const VkExtent3D extent,
|
||||
bool to_image)
|
||||
{
|
||||
|
|
@ -715,7 +715,7 @@ radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, cons
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in,
|
||||
const VkExtent3D base_extent, struct radeon_winsys_bo *temp_bo, bool to_image)
|
||||
{
|
||||
|
|
@ -787,7 +787,7 @@ radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct r
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
|
||||
radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs, const struct radv_sdma_surf *src,
|
||||
const struct radv_sdma_surf *dst, const VkExtent3D extent)
|
||||
{
|
||||
if (src->is_linear) {
|
||||
|
|
@ -864,7 +864,7 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r
|
|||
}
|
||||
|
||||
void
|
||||
radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
|
||||
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "radv_image.h"
|
||||
|
||||
struct radv_cmd_stream;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -57,36 +59,36 @@ struct radv_sdma_surf radv_sdma_get_buf_surf(uint64_t buffer_va, const struct ra
|
|||
const VkBufferImageCopy2 *const region);
|
||||
struct radv_sdma_surf radv_sdma_get_surf(const struct radv_device *const device, const struct radv_image *const image,
|
||||
const VkImageSubresourceLayers subresource, const VkOffset3D offset);
|
||||
void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
void radv_sdma_copy_buffer_image(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img,
|
||||
const VkExtent3D extent, bool to_image);
|
||||
bool radv_sdma_use_unaligned_buffer_image_copy(const struct radv_device *device, const struct radv_sdma_surf *buf,
|
||||
const struct radv_sdma_surf *img, const VkExtent3D ext);
|
||||
void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *buf, const struct radv_sdma_surf *img_in,
|
||||
const VkExtent3D copy_extent, struct radeon_winsys_bo *temp_bo,
|
||||
bool to_image);
|
||||
void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
|
||||
const struct radv_sdma_surf *dst, const VkExtent3D extent);
|
||||
void radv_sdma_copy_image(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent);
|
||||
bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src,
|
||||
const struct radv_sdma_surf *dst, const VkExtent3D extent);
|
||||
void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radv_cmd_stream *cs,
|
||||
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
|
||||
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo);
|
||||
void radv_sdma_copy_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
|
||||
uint64_t size);
|
||||
void radv_sdma_fill_memory(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
|
||||
void radv_sdma_copy_memory(const struct radv_device *device, struct radv_cmd_stream *cs, uint64_t src_va,
|
||||
uint64_t dst_va, uint64_t size);
|
||||
void radv_sdma_fill_memory(const struct radv_device *device, struct radv_cmd_stream *cs, const uint64_t va,
|
||||
const uint64_t size, const uint32_t value);
|
||||
|
||||
void radv_sdma_emit_nop(const struct radv_device *device, struct radeon_cmdbuf *cs);
|
||||
void radv_sdma_emit_nop(const struct radv_device *device, struct radv_cmd_stream *cs);
|
||||
|
||||
void radv_sdma_emit_write_timestamp(struct radeon_cmdbuf *cs, uint64_t va);
|
||||
void radv_sdma_emit_write_timestamp(struct radv_cmd_stream *cs, uint64_t va);
|
||||
|
||||
void radv_sdma_emit_fence(struct radeon_cmdbuf *cs, uint64_t va, uint32_t fence);
|
||||
void radv_sdma_emit_fence(struct radv_cmd_stream *cs, uint64_t va, uint32_t fence);
|
||||
|
||||
void radv_sdma_emit_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
|
||||
void radv_sdma_emit_wait_mem(struct radv_cmd_stream *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
|
||||
|
||||
void radv_sdma_emit_write_data_head(struct radeon_cmdbuf *cs, uint64_t va, uint32_t count);
|
||||
void radv_sdma_emit_write_data_head(struct radv_cmd_stream *cs, uint64_t va, uint32_t count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1311,11 +1311,13 @@ radv_init_shader_upload_queue(struct radv_device *device)
|
|||
|
||||
for (unsigned i = 0; i < RADV_SHADER_UPLOAD_CS_COUNT; i++) {
|
||||
struct radv_shader_dma_submission *submission = calloc(1, sizeof(struct radv_shader_dma_submission));
|
||||
submission->cs = ws->cs_create(ws, AMD_IP_SDMA, false);
|
||||
if (!submission->cs) {
|
||||
|
||||
result = radv_create_cmd_stream(device, RADV_QUEUE_TRANSFER, false, &submission->cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
free(submission);
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
return result;
|
||||
}
|
||||
|
||||
list_addtail(&submission->list, &device->shader_dma_submissions);
|
||||
}
|
||||
|
||||
|
|
@ -1350,7 +1352,7 @@ radv_destroy_shader_upload_queue(struct radv_device *device)
|
|||
|
||||
list_for_each_entry_safe (struct radv_shader_dma_submission, submission, &device->shader_dma_submissions, list) {
|
||||
if (submission->cs)
|
||||
ws->cs_destroy(submission->cs);
|
||||
radv_finalize_cmd_stream(device, submission->cs);
|
||||
if (submission->bo)
|
||||
radv_bo_destroy(device, NULL, submission->bo);
|
||||
list_del(&submission->list);
|
||||
|
|
@ -2506,7 +2508,7 @@ struct radv_shader_dma_submission *
|
|||
radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size)
|
||||
{
|
||||
struct radv_shader_dma_submission *submission = radv_shader_dma_pop_submission(device);
|
||||
struct radeon_cmdbuf *cs = submission->cs;
|
||||
struct radv_cmd_stream *cs = submission->cs;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -2515,7 +2517,7 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
ws->cs_reset(cs);
|
||||
radv_reset_cmd_stream(device, cs);
|
||||
|
||||
if (submission->bo_size < size) {
|
||||
result = radv_shader_dma_resize_upload_buf(device, submission, size);
|
||||
|
|
@ -2524,10 +2526,10 @@ radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_
|
|||
}
|
||||
|
||||
radv_sdma_copy_memory(device, cs, radv_buffer_get_va(submission->bo), va, size);
|
||||
radv_cs_add_buffer(ws, cs, submission->bo);
|
||||
radv_cs_add_buffer(ws, cs, bo);
|
||||
radv_cs_add_buffer(ws, cs->b, submission->bo);
|
||||
radv_cs_add_buffer(ws, cs->b, bo);
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
result = radv_finalize_cmd_stream(device, cs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
|
|
@ -2547,7 +2549,7 @@ bool
|
|||
radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submission *submission,
|
||||
uint64_t *upload_seq_out)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = submission->cs;
|
||||
struct radv_cmd_stream *cs = submission->cs;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -2566,7 +2568,7 @@ radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submis
|
|||
struct radv_winsys_submit_info submit = {
|
||||
.ip_type = AMD_IP_SDMA,
|
||||
.queue_index = 0,
|
||||
.cs_array = &cs,
|
||||
.cs_array = &cs->b,
|
||||
.cs_count = 1,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -504,7 +504,7 @@ struct radv_shader_part_cache {
|
|||
struct radv_shader_dma_submission {
|
||||
struct list_head list;
|
||||
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream *cs;
|
||||
struct radeon_winsys_bo *bo;
|
||||
uint64_t bo_size;
|
||||
char *ptr;
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ radv_spm_resize_bo(struct radv_device *device)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
radv_emit_spm_counters(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
|
||||
|
|
@ -82,7 +82,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
|
|||
if (!num_counters)
|
||||
continue;
|
||||
|
||||
radeon_check_space(device->ws, cs, 3 + num_counters * 3);
|
||||
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
|
||||
radeon_begin(cs);
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
|
||||
|
|
@ -105,7 +105,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
|
|||
if (!num_counters)
|
||||
continue;
|
||||
|
||||
radeon_check_space(device->ws, cs, 3 + num_counters * 3);
|
||||
radeon_check_space(device->ws, cs->b, 3 + num_counters * 3);
|
||||
radeon_begin(cs);
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, S_030800_SH_BROADCAST_WRITES(1) |
|
||||
|
|
@ -130,7 +130,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
|
|||
for (unsigned i = 0; i < block_sel->num_instances; i++) {
|
||||
struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
|
||||
|
||||
radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
|
||||
radeon_check_space(device->ws, cs->b, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
|
||||
radeon_begin(cs);
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
|
||||
|
|
@ -160,7 +160,7 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
radv_emit_spm_muxsel(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, qf);
|
||||
|
|
@ -190,7 +190,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum
|
|||
pdev->info.gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
|
||||
}
|
||||
|
||||
radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
|
||||
radeon_check_space(device->ws, cs->b, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
|
||||
radeon_begin(cs);
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
||||
|
|
@ -215,7 +215,7 @@ radv_emit_spm_muxsel(struct radv_device *device, struct radeon_cmdbuf *cs, enum
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ac_spm *spm = &device->spm;
|
||||
|
|
@ -227,7 +227,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
|
|||
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
|
||||
assert(spm->sample_interval >= 32);
|
||||
|
||||
radeon_check_space(device->ws, cs, 27);
|
||||
radeon_check_space(device->ws, cs->b, 27);
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Configure the SPM ring buffer. */
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "radv_queue.h"
|
||||
#include "radv_radeon_winsys.h"
|
||||
|
||||
void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf);
|
||||
void radv_emit_spm_setup(struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf);
|
||||
|
||||
bool radv_spm_init(struct radv_device *device);
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ radv_ip_to_queue_family(enum amd_ip_type t)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
radv_emit_wait_for_idle(const struct radv_device *device, struct radv_cmd_stream *cs, int family)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum radv_queue_family qf = radv_ip_to_queue_family(family);
|
||||
|
|
@ -62,7 +62,7 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
}
|
||||
|
||||
static void
|
||||
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
radv_emit_sqtt_start(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
|
||||
|
|
@ -75,14 +75,14 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
ac_sqtt_emit_start(&pdev->info, pm4, &device->sqtt, is_compute_queue);
|
||||
ac_pm4_finalize(pm4);
|
||||
|
||||
radeon_check_space(device->ws, cs, pm4->ndw);
|
||||
radeon_check_space(device->ws, cs->b, pm4->ndw);
|
||||
radv_emit_pm4_commands(cs, pm4);
|
||||
|
||||
ac_pm4_free_state(pm4);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const bool is_compute_queue = qf == RADV_QUEUE_COMPUTE;
|
||||
|
|
@ -95,7 +95,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
ac_sqtt_emit_stop(&pdev->info, pm4, is_compute_queue);
|
||||
ac_pm4_finalize(pm4);
|
||||
|
||||
radeon_check_space(device->ws, cs, pm4->ndw);
|
||||
radeon_check_space(device->ws, cs->b, pm4->ndw);
|
||||
radv_emit_pm4_commands(cs, pm4);
|
||||
|
||||
ac_pm4_clear_state(pm4, &pdev->info, false, is_compute_queue);
|
||||
|
|
@ -108,7 +108,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
ac_sqtt_emit_wait(&pdev->info, pm4, &device->sqtt, is_compute_queue);
|
||||
ac_pm4_finalize(pm4);
|
||||
|
||||
radeon_check_space(device->ws, cs, pm4->ndw);
|
||||
radeon_check_space(device->ws, cs->b, pm4->ndw);
|
||||
radv_emit_pm4_commands(cs, pm4);
|
||||
|
||||
ac_pm4_free_state(pm4);
|
||||
|
|
@ -121,7 +121,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum amd_ip_type ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
const struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
const uint32_t *dwords = (uint32_t *)data;
|
||||
|
||||
/* SQTT user data packets aren't supported on SDMA queues. */
|
||||
|
|
@ -131,7 +131,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
|
|||
while (num_dwords > 0) {
|
||||
uint32_t count = MIN2(num_dwords, 2);
|
||||
|
||||
radeon_check_space(device->ws, cs, 2 + count);
|
||||
radeon_check_space(device->ws, cs->b, 2 + count);
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Without the perfctr bit the CP might not always pass the
|
||||
|
|
@ -150,7 +150,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
|
||||
radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
|
|||
}
|
||||
|
||||
void
|
||||
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
|
||||
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
|
|
@ -516,7 +516,7 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum radv_queue_family family = queue->state.qf;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream cs;
|
||||
VkResult result;
|
||||
|
||||
/* Destroy the previous start CS and create a new one. */
|
||||
|
|
@ -525,13 +525,13 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.start_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
if (!cs)
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
radeon_check_space(ws, cs, 512);
|
||||
radeon_check_space(ws, cs.b, 512);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_begin(&cs);
|
||||
|
||||
switch (family) {
|
||||
case RADV_QUEUE_GENERAL:
|
||||
|
|
@ -551,40 +551,40 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
radeon_end();
|
||||
|
||||
/* Make sure to wait-for-idle before starting SQTT. */
|
||||
radv_emit_wait_for_idle(device, cs, family);
|
||||
radv_emit_wait_for_idle(device, &cs, family);
|
||||
|
||||
/* Disable clock gating before starting SQTT. */
|
||||
radv_emit_inhibit_clockgating(device, cs, true);
|
||||
radv_emit_inhibit_clockgating(device, &cs, true);
|
||||
|
||||
/* Enable SQG events that collects thread trace data. */
|
||||
radv_emit_spi_config_cntl(device, cs, true);
|
||||
radv_emit_spi_config_cntl(device, &cs, true);
|
||||
|
||||
radv_perfcounter_emit_reset(cs, true);
|
||||
radv_perfcounter_emit_reset(&cs, true);
|
||||
|
||||
if (device->spm.bo) {
|
||||
/* Enable all shader stages by default. */
|
||||
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info));
|
||||
radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info));
|
||||
|
||||
radv_emit_spm_setup(device, cs, family);
|
||||
radv_emit_spm_setup(device, &cs, family);
|
||||
}
|
||||
|
||||
/* Start SQTT. */
|
||||
radv_emit_sqtt_start(device, cs, family);
|
||||
radv_emit_sqtt_start(device, &cs, family);
|
||||
|
||||
if (device->spm.bo) {
|
||||
radeon_check_space(ws, cs, 8);
|
||||
radv_perfcounter_emit_spm_start(device, cs, family);
|
||||
radeon_check_space(ws, cs.b, 8);
|
||||
radv_perfcounter_emit_spm_start(device, &cs, family);
|
||||
}
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
result = ws->cs_finalize(cs.b);
|
||||
if (result != VK_SUCCESS) {
|
||||
ws->cs_destroy(cs);
|
||||
ws->cs_destroy(cs.b);
|
||||
return false;
|
||||
}
|
||||
|
||||
device->sqtt.start_cs[family] = cs;
|
||||
device->sqtt.start_cs[family] = cs.b;
|
||||
|
||||
return radv_queue_internal_submit(queue, cs);
|
||||
return radv_queue_internal_submit(queue, cs.b);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -593,7 +593,7 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
struct radv_device *device = radv_queue_device(queue);
|
||||
enum radv_queue_family family = queue->state.qf;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_stream cs;
|
||||
VkResult result;
|
||||
|
||||
/* Destroy the previous stop CS and create a new one. */
|
||||
|
|
@ -602,13 +602,13 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.stop_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
if (!cs)
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
radeon_check_space(ws, cs, 512);
|
||||
radeon_check_space(ws, cs.b, 512);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_begin(&cs);
|
||||
|
||||
switch (family) {
|
||||
case RADV_QUEUE_GENERAL:
|
||||
|
|
@ -628,33 +628,33 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
radeon_end();
|
||||
|
||||
/* Make sure to wait-for-idle before stopping SQTT. */
|
||||
radv_emit_wait_for_idle(device, cs, family);
|
||||
radv_emit_wait_for_idle(device, &cs, family);
|
||||
|
||||
if (device->spm.bo) {
|
||||
radeon_check_space(ws, cs, 8);
|
||||
radv_perfcounter_emit_spm_stop(device, cs, family);
|
||||
radeon_check_space(ws, cs.b, 8);
|
||||
radv_perfcounter_emit_spm_stop(device, &cs, family);
|
||||
}
|
||||
|
||||
/* Stop SQTT. */
|
||||
radv_emit_sqtt_stop(device, cs, family);
|
||||
radv_emit_sqtt_stop(device, &cs, family);
|
||||
|
||||
radv_perfcounter_emit_reset(cs, true);
|
||||
radv_perfcounter_emit_reset(&cs, true);
|
||||
|
||||
/* Restore previous state by disabling SQG events. */
|
||||
radv_emit_spi_config_cntl(device, cs, false);
|
||||
radv_emit_spi_config_cntl(device, &cs, false);
|
||||
|
||||
/* Restore previous state by re-enabling clock gating. */
|
||||
radv_emit_inhibit_clockgating(device, cs, false);
|
||||
radv_emit_inhibit_clockgating(device, &cs, false);
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
result = ws->cs_finalize(cs.b);
|
||||
if (result != VK_SUCCESS) {
|
||||
ws->cs_destroy(cs);
|
||||
ws->cs_destroy(cs.b);
|
||||
return false;
|
||||
}
|
||||
|
||||
device->sqtt.stop_cs[family] = cs;
|
||||
device->sqtt.stop_cs[family] = cs.b;
|
||||
|
||||
return radv_queue_internal_submit(queue, cs);
|
||||
return radv_queue_internal_submit(queue, cs.b);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -837,11 +837,14 @@ radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_winsys_bo *ti
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
radeon_check_space(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, 28);
|
||||
struct radv_cmd_buffer *cmd_buffer = radv_cmd_buffer_from_handle(cmdbuf);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cs->b, 28);
|
||||
|
||||
timestamp_va = radv_buffer_get_va(timestamp_bo) + timestamp_offset;
|
||||
|
||||
radv_cs_add_buffer(device->ws, radv_cmd_buffer_from_handle(cmdbuf)->cs, timestamp_bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, timestamp_bo);
|
||||
|
||||
radv_write_timestamp(radv_cmd_buffer_from_handle(cmdbuf), timestamp_va, timestamp_stage);
|
||||
|
||||
|
|
|
|||
|
|
@ -65,9 +65,9 @@ bool radv_sqtt_queue_events_enabled(void);
|
|||
|
||||
void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
|
||||
|
||||
void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
|
||||
void radv_emit_spi_config_cntl(const struct radv_device *device, struct radv_cmd_stream *cs, bool enable);
|
||||
|
||||
void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit);
|
||||
void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radv_cmd_stream *cs, bool inhibit);
|
||||
|
||||
VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
|
||||
uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
|
|||
|
||||
/* vcn unified queue (sq) ib header */
|
||||
void
|
||||
radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
|
||||
radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
|
||||
{
|
||||
if (!skip_signature) {
|
||||
/* vcn ib signature */
|
||||
|
|
@ -97,8 +97,8 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty
|
|||
radeon_emit(0);
|
||||
radeon_end();
|
||||
|
||||
sq->signature_ib_checksum = &cs->buf[cs->cdw - 2];
|
||||
sq->signature_ib_total_size_in_dw = &cs->buf[cs->cdw - 1];
|
||||
sq->signature_ib_checksum = &cs->b->buf[cs->b->cdw - 2];
|
||||
sq->signature_ib_total_size_in_dw = &cs->b->buf[cs->b->cdw - 1];
|
||||
} else {
|
||||
sq->signature_ib_checksum = NULL;
|
||||
sq->signature_ib_total_size_in_dw = NULL;
|
||||
|
|
@ -112,17 +112,17 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned ty
|
|||
radeon_emit(0);
|
||||
radeon_end();
|
||||
|
||||
sq->engine_ib_size_of_packages = &cs->buf[cs->cdw - 1];
|
||||
sq->engine_ib_size_of_packages = &cs->b->buf[cs->b->cdw - 1];
|
||||
}
|
||||
|
||||
void
|
||||
radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
|
||||
radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq)
|
||||
{
|
||||
uint32_t *end;
|
||||
uint32_t size_in_dw;
|
||||
uint32_t checksum = 0;
|
||||
|
||||
end = &cs->buf[cs->cdw];
|
||||
end = &cs->b->buf[cs->b->cdw];
|
||||
|
||||
if (sq->signature_ib_checksum == NULL && sq->signature_ib_total_size_in_dw == NULL) {
|
||||
if (sq->engine_ib_size_of_packages == NULL)
|
||||
|
|
@ -148,18 +148,18 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct rvcn_sq_var sq;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
/* UVD doesn't support events, and probably never will */
|
||||
if (pdev->vid_decode_ip == AMD_IP_UVD)
|
||||
return;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, event->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, event->bo);
|
||||
uint64_t va = radv_buffer_get_va(event->bo);
|
||||
|
||||
bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
|
||||
if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 8);
|
||||
radeon_check_space(device->ws, cs->b, 8);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, va & 0xffffffff);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, va >> 32);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data2, value);
|
||||
|
|
@ -167,20 +167,21 @@ radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *even
|
|||
return;
|
||||
}
|
||||
|
||||
radeon_check_space(device->ws, cs, 256);
|
||||
radeon_check_space(device->ws, cs->b, 256);
|
||||
radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON, separate_queue);
|
||||
struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]);
|
||||
struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->b->buf[cs->b->cdw]);
|
||||
ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory);
|
||||
cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY;
|
||||
cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
|
||||
struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]);
|
||||
struct rvcn_cmn_engine_op_writememory *write_memory =
|
||||
(struct rvcn_cmn_engine_op_writememory *)&(cs->b->buf[cs->b->cdw]);
|
||||
write_memory->dest_addr_lo = va & 0xffffffff;
|
||||
write_memory->dest_addr_hi = va >> 32;
|
||||
write_memory->data = value;
|
||||
|
||||
cs->cdw += sizeof(*write_memory) / 4;
|
||||
cs->b->cdw += sizeof(*write_memory) / 4;
|
||||
radv_vcn_sq_tail(cs, &sq);
|
||||
}
|
||||
|
||||
|
|
@ -188,16 +189,17 @@ static void
|
|||
radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 512);
|
||||
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
|
||||
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
||||
radeon_check_space(device->ws, cs->b, 512);
|
||||
radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
|
||||
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]);
|
||||
ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
|
||||
cmd_buffer->cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
|
||||
cmd_buffer->cs->cdw++;
|
||||
cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
||||
cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
|
||||
cs->b->cdw++;
|
||||
cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cs->b->buf[cs->b->cdw]);
|
||||
cs->b->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
|
||||
memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
|
||||
}
|
||||
|
||||
|
|
@ -1389,7 +1391,7 @@ radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession,
|
|||
static void
|
||||
set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(RDECODE_PKT0(reg >> 2, 0));
|
||||
|
|
@ -1402,11 +1404,12 @@ send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, bo);
|
||||
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 6);
|
||||
radeon_check_space(device->ws, cs->b, 6);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
|
||||
|
|
@ -2385,6 +2388,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
struct radv_image_plane *luma = &img->planes[0];
|
||||
struct radv_image_plane *chroma = &img->planes[1];
|
||||
bool use_intra_only_allocation_for_dpb = false;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (vid->dpb_type == DPB_DYNAMIC_TIER_3) {
|
||||
VkImageUsageFlags coincide = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
|
||||
|
|
@ -2565,10 +2569,10 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
uint64_t addr;
|
||||
if (use_intra_only_allocation_for_dpb) {
|
||||
addr = radv_buffer_get_va(vid->intra_only_dpb.mem->bo) + vid->intra_only_dpb.offset;
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, vid->intra_only_dpb.mem->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, vid->intra_only_dpb.mem->bo);
|
||||
} else {
|
||||
addr = dpb->bindings[0].addr;
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo);
|
||||
addr += dpb_array_idx *
|
||||
(dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
|
||||
}
|
||||
|
|
@ -2610,7 +2614,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
int f_dpb_array_idx =
|
||||
frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
|
||||
addr = dpb_img->bindings[0].addr;
|
||||
addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size +
|
||||
dpb_img->planes[1].surface.u.gfx9.surf_slice_size);
|
||||
|
|
@ -2645,16 +2649,15 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
}
|
||||
|
||||
uint32_t size = sizeof(rvcn_dec_ref_buffers_header_t) + sizeof(rvcn_dec_ref_buffer_t) * num_bufs;
|
||||
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
||||
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cs->b->buf[cs->b->cdw]);
|
||||
|
||||
ib_header->package_size = size + sizeof(struct rvcn_decode_ib_package_s);
|
||||
cmd_buffer->cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
ib_header->package_type = RDECODE_IB_PARAM_DYNAMIC_REFLIST_BUFFER;
|
||||
cmd_buffer->cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
|
||||
rvcn_dec_ref_buffers_header_t *refs =
|
||||
(rvcn_dec_ref_buffers_header_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
||||
cmd_buffer->cs->cdw += size / 4;
|
||||
rvcn_dec_ref_buffers_header_t *refs = (rvcn_dec_ref_buffers_header_t *)&(cs->b->buf[cs->b->cdw]);
|
||||
cs->b->cdw += size / 4;
|
||||
refs->size = size;
|
||||
refs->num_bufs = 0;
|
||||
|
||||
|
|
@ -2669,7 +2672,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
|
||||
fill_ref_buffer(&refs->pBufs[refs->num_bufs++], dpb_img, f_dpb_array_idx,
|
||||
frame_info->pReferenceSlots[i].slotIndex);
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
|
||||
used_slots |= 1 << frame_info->pReferenceSlots[i].slotIndex;
|
||||
}
|
||||
|
||||
|
|
@ -3106,6 +3109,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
void *ptr;
|
||||
uint32_t out_offset;
|
||||
|
|
@ -3135,13 +3139,13 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
|
||||
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 8);
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_check_space(device->ws, cs->b, 8);
|
||||
radeon_begin(cs);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
radeon_emit(0x81ff);
|
||||
radeon_end();
|
||||
} else
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3149,9 +3153,11 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint32_t size = sizeof(struct ruvd_msg);
|
||||
void *ptr;
|
||||
uint32_t out_offset;
|
||||
|
||||
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
||||
|
||||
ruvd_dec_message_create(vid, ptr);
|
||||
|
|
@ -3163,8 +3169,8 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
|
||||
int padsize = vid->sessionctx.mem ? 4 : 6;
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, padsize);
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_check_space(device->ws, cs->b, padsize);
|
||||
radeon_begin(cs);
|
||||
for (unsigned i = 0; i < padsize; i++)
|
||||
radeon_emit(PKT2_NOP_PAD);
|
||||
radeon_end();
|
||||
|
|
@ -3207,6 +3213,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
uint32_t out_offset, fb_offset, it_probs_offset = 0;
|
||||
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
|
||||
unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
|
||||
fb_bo = cmd_buffer->upload.upload_bo;
|
||||
|
|
@ -3248,7 +3255,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo,
|
||||
radv_buffer_get_va(it_probs_bo) + it_probs_offset);
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 2);
|
||||
radeon_check_space(device->ws, cs->b, 2);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
|
||||
}
|
||||
|
||||
|
|
@ -3264,6 +3271,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
void *ptr, *fb_ptr, *it_probs_ptr = NULL;
|
||||
uint32_t out_offset, fb_offset, it_probs_offset = 0;
|
||||
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
size += sizeof(rvcn_dec_message_header_t); /* header */
|
||||
size += sizeof(rvcn_dec_message_index_t); /* codec */
|
||||
|
|
@ -3352,10 +3360,10 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, radv_buffer_get_va(it_probs_bo) + it_probs_offset);
|
||||
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 2);
|
||||
radeon_check_space(device->ws, cs->b, 2);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
|
||||
} else
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ struct radv_physical_device;
|
|||
struct rvcn_sq_var;
|
||||
struct radv_cmd_buffer;
|
||||
struct radv_image_create_info;
|
||||
struct radv_cmd_stream;
|
||||
|
||||
#define RADV_ENC_MAX_RATE_LAYER 4
|
||||
|
||||
|
|
@ -82,8 +83,8 @@ void radv_init_physical_device_decoder(struct radv_physical_device *pdev);
|
|||
void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list,
|
||||
uint32_t *width_align_out, uint32_t *height_align_out);
|
||||
|
||||
void radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature);
|
||||
void radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq);
|
||||
void radv_vcn_sq_header(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature);
|
||||
void radv_vcn_sq_tail(struct radv_cmd_stream *cs, struct rvcn_sq_var *sq);
|
||||
void radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value);
|
||||
|
||||
void radv_init_physical_device_encoder(struct radv_physical_device *pdevice);
|
||||
|
|
|
|||
|
|
@ -196,16 +196,16 @@ static const unsigned index_to_shifts[4] = {24, 16, 8, 0};
|
|||
static void
|
||||
radv_enc_output_one_byte(struct radv_cmd_buffer *cmd_buffer, unsigned char byte)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_enc_state *enc = &cmd_buffer->video.enc;
|
||||
if (enc->byte_index == 0)
|
||||
cs->buf[cs->cdw] = 0;
|
||||
cs->buf[cs->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]);
|
||||
cs->b->buf[cs->b->cdw] = 0;
|
||||
cs->b->buf[cs->b->cdw] |= ((unsigned int)(byte) << index_to_shifts[enc->byte_index]);
|
||||
enc->byte_index++;
|
||||
|
||||
if (enc->byte_index >= 4) {
|
||||
enc->byte_index = 0;
|
||||
cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -279,7 +279,7 @@ static void
|
|||
radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_enc_state *enc = &cmd_buffer->video.enc;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
if (enc->bits_in_shifter != 0) {
|
||||
unsigned char output_byte = (unsigned char)(enc->shifter >> 24);
|
||||
radv_enc_emulation_prevention(cmd_buffer, output_byte);
|
||||
|
|
@ -291,7 +291,7 @@ radv_enc_flush_headers(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
|
||||
if (enc->byte_index > 0) {
|
||||
cs->cdw++;
|
||||
cs->b->cdw++;
|
||||
enc->byte_index = 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -377,15 +377,15 @@ radv_enc_h265_pic_type(enum StdVideoH265PictureType type)
|
|||
}
|
||||
}
|
||||
|
||||
#define RADEON_ENC_CS(value) (cmd_buffer->cs->buf[cmd_buffer->cs->cdw++] = (value))
|
||||
#define RADEON_ENC_CS(value) (cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++] = (value))
|
||||
|
||||
#define RADEON_ENC_BEGIN(cmd) \
|
||||
{ \
|
||||
uint32_t *begin = &cmd_buffer->cs->buf[cmd_buffer->cs->cdw++]; \
|
||||
uint32_t *begin = &cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw++]; \
|
||||
RADEON_ENC_CS(cmd)
|
||||
|
||||
#define RADEON_ENC_END() \
|
||||
*begin = (&cmd_buffer->cs->buf[cmd_buffer->cs->cdw] - begin) * 4; \
|
||||
*begin = (&cmd_buffer->cs->b->buf[cmd_buffer->cs->b->cdw] - begin) * 4; \
|
||||
cmd_buffer->video.enc.total_task_size += *begin; \
|
||||
}
|
||||
|
||||
|
|
@ -404,7 +404,7 @@ radv_enc_av1_bs_copy_end(struct radv_cmd_buffer *cmd_buffer, uint32_t bits)
|
|||
static void
|
||||
radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t inst, uint32_t obu_type)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_enc_state *enc = &cmd_buffer->video.enc;
|
||||
|
||||
radv_enc_flush_headers(cmd_buffer);
|
||||
|
|
@ -412,7 +412,7 @@ radv_enc_av1_bs_instruction_type(struct radv_cmd_buffer *cmd_buffer, uint32_t in
|
|||
if (enc->bits_output)
|
||||
radv_enc_av1_bs_copy_end(cmd_buffer, enc->bits_output);
|
||||
|
||||
enc->copy_start = &cs->buf[cs->cdw++];
|
||||
enc->copy_start = &cs->b->buf[cs->b->cdw++];
|
||||
RADEON_ENC_CS(inst);
|
||||
|
||||
if (inst != RENCODE_HEADER_INSTRUCTION_COPY) {
|
||||
|
|
@ -432,9 +432,9 @@ radv_enc_session_info(struct radv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->sessionctx.mem->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->sessionctx.mem->bo);
|
||||
|
||||
uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->sessionctx.mem->bo);
|
||||
va += cmd_buffer->video.vid->sessionctx.offset;
|
||||
|
|
@ -455,12 +455,12 @@ radv_enc_task_info(struct radv_cmd_buffer *cmd_buffer, bool feedback)
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_enc_state *enc = &cmd_buffer->video.enc;
|
||||
|
||||
enc->task_id++;
|
||||
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.task_info);
|
||||
enc->p_task_size = &cs->buf[cs->cdw++];
|
||||
enc->p_task_size = &cs->b->buf[cs->b->cdw++];
|
||||
RADEON_ENC_CS(enc->task_id);
|
||||
RADEON_ENC_CS(feedback ? 1 : 0);
|
||||
RADEON_ENC_END();
|
||||
|
|
@ -919,13 +919,13 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header);
|
||||
radv_enc_reset(cmd_buffer);
|
||||
radv_enc_set_emulation_prevention(cmd_buffer, false);
|
||||
|
||||
cdw_start = cs->cdw;
|
||||
cdw_start = cs->b->cdw;
|
||||
|
||||
if (pic->flags.IdrPicFlag)
|
||||
radv_enc_code_fixed_bits(cmd_buffer, 0x65, 8);
|
||||
|
|
@ -1073,7 +1073,7 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
|
||||
instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
|
||||
|
||||
cdw_filled = cs->cdw - cdw_start;
|
||||
cdw_filled = cs->b->cdw - cdw_start;
|
||||
for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++)
|
||||
RADEON_ENC_CS(0x00000000);
|
||||
for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
|
||||
|
|
@ -1158,14 +1158,14 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco
|
|||
unsigned int num_pic_total_curr = 0;
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
unsigned nal_unit_type = vk_video_get_h265_nal_unit(pic);
|
||||
|
||||
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.slice_header);
|
||||
radv_enc_reset(cmd_buffer);
|
||||
radv_enc_set_emulation_prevention(cmd_buffer, false);
|
||||
|
||||
cdw_start = cs->cdw;
|
||||
cdw_start = cs->b->cdw;
|
||||
radv_enc_code_fixed_bits(cmd_buffer, 0x0, 1);
|
||||
radv_enc_code_fixed_bits(cmd_buffer, nal_unit_type, 6);
|
||||
radv_enc_code_fixed_bits(cmd_buffer, 0x0, 6);
|
||||
|
|
@ -1354,7 +1354,7 @@ radv_enc_slice_header_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEnco
|
|||
|
||||
instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
|
||||
|
||||
cdw_filled = cs->cdw - cdw_start;
|
||||
cdw_filled = cs->b->cdw - cdw_start;
|
||||
for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++)
|
||||
RADEON_ENC_CS(0x00000000);
|
||||
for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
|
||||
|
|
@ -1392,7 +1392,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_image_view *dpb_iv = NULL;
|
||||
struct radv_image *dpb = NULL;
|
||||
uint64_t va = 0;
|
||||
|
|
@ -1420,7 +1420,7 @@ radv_enc_ctx(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *inf
|
|||
|
||||
dpb_image_sizes(dpb, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, dpb->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dpb->bindings[0].bo);
|
||||
va = dpb->bindings[0].addr;
|
||||
}
|
||||
|
||||
|
|
@ -1533,6 +1533,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in
|
|||
uint32_t luma_pitch = 0, luma_size = 0, chroma_size = 0, colloc_bytes = 0;
|
||||
int max_ref_slot_idx = 0;
|
||||
const VkVideoPictureResourceInfoKHR *slots[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES] = {NULL};
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
if (info->pSetupReferenceSlot) {
|
||||
max_ref_slot_idx = info->pSetupReferenceSlot->slotIndex;
|
||||
|
|
@ -1569,7 +1570,7 @@ radv_enc_ctx2(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *in
|
|||
struct radv_image_view *dpb_iv = radv_image_view_from_handle(res->imageViewBinding);
|
||||
assert(dpb_iv != NULL);
|
||||
struct radv_image *dpb_img = dpb_iv->image;
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, dpb_img->bindings[0].bo);
|
||||
dpb_image_sizes(dpb_iv->image, &luma_pitch, &luma_size, &chroma_size, &colloc_bytes);
|
||||
|
||||
uint32_t metadata_size = RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME;
|
||||
|
|
@ -1621,9 +1622,9 @@ radv_enc_bitstream(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffe
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t va = vk_buffer_address(&buffer->vk, offset);
|
||||
radv_cs_add_buffer(device->ws, cs, buffer->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, buffer->bo);
|
||||
|
||||
RADEON_ENC_BEGIN(pdev->vcn_enc_cmds.bitstream);
|
||||
RADEON_ENC_CS(RENCODE_REC_SWIZZLE_MODE_LINEAR);
|
||||
|
|
@ -1739,7 +1740,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *
|
|||
struct radv_image *src_img = src_iv->image;
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint32_t array_idx = enc_info->srcPictureResource.baseArrayLayer + src_iv->vk.base_array_layer;
|
||||
uint64_t va = src_img->bindings[0].addr;
|
||||
uint64_t luma_va = va + src_img->planes[0].surface.u.gfx9.surf_offset +
|
||||
|
|
@ -1750,7 +1751,7 @@ radv_enc_params(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *
|
|||
unsigned int slot_idx = 0xffffffff;
|
||||
unsigned int max_layers = cmd_buffer->video.vid->rc_layer_control.max_num_temporal_layers;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, src_img->bindings[0].bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, src_img->bindings[0].bo);
|
||||
if (h264_pic) {
|
||||
switch (h264_pic->primary_pic_type) {
|
||||
case STD_VIDEO_H264_PICTURE_TYPE_P:
|
||||
|
|
@ -2094,14 +2095,14 @@ radv_enc_headers_hevc(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
static void
|
||||
radv_enc_cdf_default_table(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKHR *enc_info)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct VkVideoEncodeAV1PictureInfoKHR *av1_picture_info =
|
||||
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_AV1_PICTURE_INFO_KHR);
|
||||
const StdVideoEncodeAV1PictureInfo *av1_pic = av1_picture_info->pStdPictureInfo;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, cmd_buffer->video.vid->ctx.mem->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, cmd_buffer->video.vid->ctx.mem->bo);
|
||||
uint64_t va = radv_buffer_get_va(cmd_buffer->video.vid->ctx.mem->bo);
|
||||
va += cmd_buffer->video.vid->ctx.offset;
|
||||
uint32_t use_cdf_default = (av1_pic->frame_type == STD_VIDEO_AV1_FRAME_TYPE_KEY ||
|
||||
|
|
@ -2639,6 +2640,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_enc_state *enc = &cmd_buffer->video.enc;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
uint64_t feedback_query_va;
|
||||
switch (vid->vk.op) {
|
||||
case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
|
||||
|
|
@ -2650,10 +2652,10 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
return;
|
||||
}
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 1600);
|
||||
radeon_check_space(device->ws, cs->b, 1600);
|
||||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
|
||||
radv_vcn_sq_header(cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
|
||||
|
||||
const struct VkVideoInlineQueryInfoKHR *inline_queries = NULL;
|
||||
if (vid->vk.flags & VK_VIDEO_SESSION_CREATE_INLINE_QUERIES_BIT_KHR) {
|
||||
|
|
@ -2662,7 +2664,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
if (inline_queries) {
|
||||
VK_FROM_HANDLE(radv_query_pool, pool, inline_queries->queryPool);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->bo);
|
||||
radv_cs_add_buffer(device->ws, cs->b, pool->bo);
|
||||
|
||||
feedback_query_va = radv_buffer_get_va(pool->bo);
|
||||
feedback_query_va += pool->stride * inline_queries->firstQuery;
|
||||
|
|
@ -2745,7 +2747,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
*enc->p_task_size = enc->total_task_size;
|
||||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue