diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index fe59de4c855..2cdb7b05000 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -429,22 +429,40 @@ radv_amdgpu_winsys_cs_pad(struct radeon_cmdbuf *_cs, unsigned leave_dw_space) const uint32_t pad_dw_mask = cs->ws->info.ip[ip_type].ib_pad_dw_mask; const uint32_t unaligned_dw = (cs->base.cdw + leave_dw_space) & pad_dw_mask; - assert(ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE); + if (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE) { + if (unaligned_dw) { + const int remaining = pad_dw_mask + 1 - unaligned_dw; - if (unaligned_dw) { - const int remaining = pad_dw_mask + 1 - unaligned_dw; + /* Only pad by 1 dword with the type-2 NOP if necessary. */ + if (remaining == 1 && cs->ws->info.gfx_ib_pad_with_type2) { + radeon_emit_unchecked(&cs->base, PKT2_NOP_PAD); + } else { + /* Pad with a single NOP packet to minimize CP overhead because NOP is a variable-sized + * packet. The size of the packet body after the header is always count + 1. + * If count == -1, there is no packet body. NOP is the only packet that can have + * count == -1, which is the definition of PKT3_NOP_PAD (count == 0x3fff means -1). + */ + radeon_emit_unchecked(&cs->base, PKT3(PKT3_NOP, remaining - 2, 0)); + cs->base.cdw += remaining - 1; + } + } + } else { + /* Pad the CS with NOP packets. */ + bool pad = true; - /* Only pad by 1 dword with the type-2 NOP if necessary. */ - if (remaining == 1 && cs->ws->info.gfx_ib_pad_with_type2) { - radeon_emit_unchecked(&cs->base, PKT2_NOP_PAD); - } else { - /* Pad with a single NOP packet to minimize CP overhead because NOP is a variable-sized - * packet. The size of the packet body after the header is always count + 1. - * If count == -1, there is no packet body. NOP is the only packet that can have - * count == -1, which is the definition of PKT3_NOP_PAD (count == 0x3fff means -1). - */ - radeon_emit_unchecked(&cs->base, PKT3(PKT3_NOP, remaining - 2, 0)); - cs->base.cdw += remaining - 1; + /* Don't pad on VCN encode/unified as no NOPs */ + if (ip_type == AMDGPU_HW_IP_VCN_ENC) + pad = false; + + /* Don't add padding to 0 length UVD due to kernel */ + if (ip_type == AMDGPU_HW_IP_UVD && cs->base.cdw == 0) + pad = false; + + if (pad) { + const uint32_t nop_packet = get_nop_packet(cs); + + while (!cs->base.cdw || (cs->base.cdw & pad_dw_mask)) + radeon_emit_unchecked(&cs->base, nop_packet); } } @@ -455,7 +473,6 @@ static VkResult radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); - enum amd_ip_type ip_type = cs->hw_ip; assert(cs->base.cdw <= cs->base.reserved_dw); @@ -472,20 +489,7 @@ radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs) *cs->ib_size_ptr |= cs->base.cdw; } else { - /* Pad the CS with NOP packets. */ - bool pad = true; - - /* Don't pad on VCN encode/unified as no NOPs */ - if (ip_type == AMDGPU_HW_IP_VCN_ENC) - pad = false; - - /* Don't add padding to 0 length UVD due to kernel */ - if (ip_type == AMDGPU_HW_IP_UVD && cs->base.cdw == 0) - pad = false; - - if (pad) { - radv_amdgpu_winsys_cs_pad(_cs, 0); - } + radv_amdgpu_winsys_cs_pad(_cs, 0); } /* Append the current (last) IB to the array of IB buffers. */