mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-16 14:08:07 +02:00
I missed there's an off by one, and found the kernel defined
the correct bits, so just use that.
Fixes: 9477f117f4 ("radv/video: add initial frameworking.")
Reviewed-by: Lynne <dev@lynne.ee>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23757>
1904 lines
82 KiB
C
1904 lines
82 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright 2017 Advanced Micro Devices, Inc.
|
|
* Copyright 2021 Red Hat Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
**************************************************************************/
|
|
#include "radv_private.h"
|
|
|
|
#ifndef _WIN32
|
|
#include "drm-uapi/amdgpu_drm.h"
|
|
#endif
|
|
|
|
#include "vk_video/vulkan_video_codecs_common.h"
|
|
#include "ac_uvd_dec.h"
|
|
#include "ac_vcn_dec.h"
|
|
|
|
#include "radv_cs.h"
|
|
#include "radv_debug.h"
|
|
|
|
#define NUM_H264_REFS 17
|
|
#define NUM_H265_REFS 8
|
|
#define FB_BUFFER_OFFSET 0x1000
|
|
#define FB_BUFFER_SIZE 2048
|
|
#define FB_BUFFER_SIZE_TONGA (2048 * 64)
|
|
#define IT_SCALING_TABLE_SIZE 992
|
|
#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
|
|
|
|
/* Not 100% sure this isn't too much but works */
|
|
#define VID_DEFAULT_ALIGNMENT 256
|
|
|
|
const int vl_zscan_h265_up_right_diagonal_16[] = {
|
|
/* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
|
|
0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15,
|
|
};
|
|
|
|
const int vl_zscan_h265_up_right_diagonal[] = {
|
|
/* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
|
|
0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48,
|
|
41, 34, 27, 20, 13, 6, 56, 49, 42, 35, 28, 21, 14, 7, 57, 50, 43, 36, 29, 22, 15, 58,
|
|
51, 44, 37, 30, 23, 59, 52, 45, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
|
|
};
|
|
|
|
static bool
|
|
radv_enable_tier2(struct radv_physical_device *pdevice)
|
|
{
|
|
if (pdevice->rad_info.family >= CHIP_NAVI21 && !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
|
|
{
|
|
return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
|
|
}
|
|
|
|
/* vcn unified queue (sq) ib header */
|
|
static void
|
|
radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, bool enc)
|
|
{
|
|
/* vcn ib signature */
|
|
radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
|
|
radeon_emit(cs, RADEON_VCN_SIGNATURE);
|
|
sq->ib_checksum = &cs->buf[cs->cdw];
|
|
radeon_emit(cs, 0);
|
|
sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
|
|
radeon_emit(cs, 0);
|
|
|
|
/* vcn ib engine info */
|
|
radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
|
|
radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
|
|
radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE : RADEON_VCN_ENGINE_TYPE_DECODE);
|
|
radeon_emit(cs, 0);
|
|
}
|
|
|
|
static void
|
|
radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
|
|
{
|
|
uint32_t *end;
|
|
uint32_t size_in_dw;
|
|
uint32_t checksum = 0;
|
|
|
|
if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
|
|
return;
|
|
|
|
end = &cs->buf[cs->cdw];
|
|
size_in_dw = end - sq->ib_total_size_in_dw - 1;
|
|
*sq->ib_total_size_in_dw = size_in_dw;
|
|
*(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
|
|
|
|
for (int i = 0; i < size_in_dw; i++)
|
|
checksum += *(sq->ib_checksum + 2 + i);
|
|
|
|
*sq->ib_checksum = checksum;
|
|
}
|
|
|
|
/* generate an stream handle */
|
|
static unsigned
|
|
si_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
|
|
{
|
|
unsigned stream_handle = pdevice->stream_handle_base;
|
|
|
|
stream_handle ^= ++pdevice->stream_handle_counter;
|
|
return stream_handle;
|
|
}
|
|
|
|
void
|
|
radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
|
|
{
|
|
if (pdevice->rad_info.family >= CHIP_GFX1100 || pdevice->rad_info.family == CHIP_GFX940)
|
|
pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
|
|
else if (radv_has_uvd(pdevice))
|
|
pdevice->vid_decode_ip = AMD_IP_UVD;
|
|
else
|
|
pdevice->vid_decode_ip = AMD_IP_VCN_DEC;
|
|
|
|
pdevice->stream_handle_counter = 0;
|
|
pdevice->stream_handle_base = 0;
|
|
|
|
pdevice->stream_handle_base = util_bitreverse(getpid());
|
|
|
|
pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
|
|
|
|
switch (pdevice->rad_info.family) {
|
|
case CHIP_VEGA10:
|
|
case CHIP_VEGA12:
|
|
case CHIP_VEGA20:
|
|
pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
|
|
pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
|
|
pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
|
|
pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
|
|
break;
|
|
case CHIP_RAVEN:
|
|
case CHIP_RAVEN2:
|
|
pdevice->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
|
|
pdevice->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
|
|
pdevice->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
|
|
pdevice->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
|
|
break;
|
|
case CHIP_NAVI10:
|
|
case CHIP_NAVI12:
|
|
case CHIP_NAVI14:
|
|
case CHIP_RENOIR:
|
|
pdevice->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
|
|
pdevice->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
|
|
pdevice->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
|
|
pdevice->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
|
|
break;
|
|
case CHIP_MI100:
|
|
case CHIP_MI200:
|
|
case CHIP_NAVI21:
|
|
case CHIP_NAVI22:
|
|
case CHIP_NAVI23:
|
|
case CHIP_NAVI24:
|
|
case CHIP_VANGOGH:
|
|
case CHIP_REMBRANDT:
|
|
case CHIP_RAPHAEL_MENDOCINO:
|
|
pdevice->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
|
|
pdevice->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
|
|
pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
|
|
pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
|
|
break;
|
|
case CHIP_GFX940:
|
|
pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
|
|
break;
|
|
case CHIP_GFX1100:
|
|
case CHIP_GFX1101:
|
|
case CHIP_GFX1102:
|
|
case CHIP_GFX1103_R1:
|
|
case CHIP_GFX1103_R2:
|
|
pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
|
|
break;
|
|
default:
|
|
if (radv_has_uvd(pdevice)) {
|
|
pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
|
|
pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
|
|
pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
|
|
pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
have_it(struct radv_video_session *vid)
|
|
{
|
|
return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
|
|
}
|
|
|
|
static unsigned
|
|
calc_ctx_size_h264_perf(struct radv_video_session *vid)
|
|
{
|
|
unsigned width_in_mb, height_in_mb, ctx_size;
|
|
unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
|
|
unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
|
|
|
|
unsigned max_references = vid->vk.max_dpb_slots + 1;
|
|
|
|
// picture width & height in 16 pixel units
|
|
width_in_mb = width / VL_MACROBLOCK_WIDTH;
|
|
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
|
|
|
|
ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
|
|
|
|
return ctx_size;
|
|
}
|
|
|
|
static unsigned
|
|
calc_ctx_size_h265_main(struct radv_video_session *vid)
|
|
{
|
|
unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
|
|
unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
|
|
|
|
unsigned max_references = vid->vk.max_dpb_slots + 1;
|
|
|
|
if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
|
|
max_references = MAX2(max_references, 8);
|
|
else
|
|
max_references = MAX2(max_references, 17);
|
|
|
|
width = align(width, 16);
|
|
height = align(height, 16);
|
|
return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
|
|
}
|
|
|
|
static unsigned
|
|
calc_ctx_size_h265_main10(struct radv_video_session *vid)
|
|
{
|
|
unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
|
|
unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
|
|
unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
|
|
|
|
unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
|
|
unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
|
|
unsigned coeff_10bit = 2;
|
|
|
|
unsigned max_references = vid->vk.max_dpb_slots + 1;
|
|
|
|
if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
|
|
max_references = MAX2(max_references, 8);
|
|
else
|
|
max_references = MAX2(max_references, 17);
|
|
|
|
/* 64x64 is the maximum ctb size. */
|
|
log2_ctb_size = 6;
|
|
|
|
width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
|
|
height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
|
|
|
|
num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
|
|
context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
|
|
max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
|
|
|
|
cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
|
|
db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
|
|
|
|
return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
|
|
}
|
|
|
|
VkResult
|
|
radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
|
|
{
|
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
|
|
|
struct radv_video_session *vid =
|
|
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (!vid)
|
|
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
memset(vid, 0, sizeof(struct radv_video_session));
|
|
|
|
VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free2(&device->vk.alloc, pAllocator, vid);
|
|
return result;
|
|
}
|
|
|
|
vid->interlaced = false;
|
|
vid->dpb_type = DPB_MAX_RES;
|
|
|
|
switch (vid->vk.op) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
|
vid->stream_type = RDECODE_CODEC_H264_PERF;
|
|
if (radv_enable_tier2(device->physical_device))
|
|
vid->dpb_type = DPB_DYNAMIC_TIER_2;
|
|
break;
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
|
vid->stream_type = RDECODE_CODEC_H265;
|
|
if (radv_enable_tier2(device->physical_device))
|
|
vid->dpb_type = DPB_DYNAMIC_TIER_2;
|
|
break;
|
|
default:
|
|
return VK_ERROR_FEATURE_NOT_PRESENT;
|
|
}
|
|
|
|
vid->stream_handle = si_vid_alloc_stream_handle(device->physical_device);
|
|
vid->dbg_frame_cnt = 0;
|
|
vid->db_alignment =
|
|
(device->physical_device->rad_info.family >= CHIP_RENOIR && vid->vk.max_coded.width > 32 &&
|
|
(vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
|
|
? 64
|
|
: 32;
|
|
|
|
*pVideoSession = radv_video_session_to_handle(vid);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
|
RADV_FROM_HANDLE(radv_video_session, vid, _session);
|
|
if (!_session)
|
|
return;
|
|
|
|
vk_object_base_finish(&vid->vk.base);
|
|
vk_free2(&device->vk.alloc, pAllocator, vid);
|
|
}
|
|
|
|
VkResult
|
|
radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkVideoSessionParametersKHR *pVideoSessionParameters)
|
|
{
|
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
|
RADV_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
|
|
RADV_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
|
|
struct radv_video_session_params *params =
|
|
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (!params)
|
|
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
VkResult result =
|
|
vk_video_session_parameters_init(&device->vk, ¶ms->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free2(&device->vk.alloc, pAllocator, params);
|
|
return result;
|
|
}
|
|
|
|
*pVideoSessionParameters = radv_video_session_params_to_handle(params);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
|
RADV_FROM_HANDLE(radv_video_session_params, params, _params);
|
|
|
|
vk_video_session_parameters_finish(&device->vk, ¶ms->vk);
|
|
vk_free2(&device->vk.alloc, pAllocator, params);
|
|
}
|
|
|
|
VkResult
|
|
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
|
|
VkVideoCapabilitiesKHR *pCapabilities)
|
|
{
|
|
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
|
|
const struct video_codec_cap *cap = NULL;
|
|
|
|
switch (pVideoProfile->videoCodecOperation) {
|
|
#ifndef _WIN32
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
|
cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
|
|
break;
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
|
cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
|
|
break;
|
|
#endif
|
|
default:
|
|
unreachable("unsupported operation");
|
|
}
|
|
|
|
if (cap && !cap->valid)
|
|
cap = NULL;
|
|
|
|
pCapabilities->flags = 0;
|
|
pCapabilities->minBitstreamBufferOffsetAlignment = 128;
|
|
pCapabilities->minBitstreamBufferSizeAlignment = 128;
|
|
pCapabilities->pictureAccessGranularity.width = VL_MACROBLOCK_WIDTH;
|
|
pCapabilities->pictureAccessGranularity.height = VL_MACROBLOCK_HEIGHT;
|
|
pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH;
|
|
pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT;
|
|
|
|
struct VkVideoDecodeCapabilitiesKHR *dec_caps =
|
|
(struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
|
|
if (dec_caps)
|
|
dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
|
|
|
|
/* H264 allows different luma and chroma bit depths */
|
|
if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
|
|
return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
|
|
|
|
if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR)
|
|
return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
|
|
|
|
switch (pVideoProfile->videoCodecOperation) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
|
|
struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
|
|
pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
|
|
|
|
const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
|
|
vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
|
|
|
|
if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
|
|
h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
|
|
h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
|
|
return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
|
|
|
|
if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
|
|
return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
|
|
|
|
pCapabilities->maxDpbSlots = NUM_H264_REFS;
|
|
pCapabilities->maxActiveReferencePictures = NUM_H264_REFS;
|
|
|
|
/* for h264 on navi21+ separate dpb images should work */
|
|
if (radv_enable_tier2(pdevice))
|
|
pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
|
|
ext->fieldOffsetGranularity.x = 0;
|
|
ext->fieldOffsetGranularity.y = 0;
|
|
ext->maxLevelIdc = 51;
|
|
strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
|
|
pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
|
|
break;
|
|
}
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
|
|
struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
|
|
pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
|
|
|
|
const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
|
|
vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
|
|
|
|
if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
|
|
h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
|
|
h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
|
|
return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
|
|
|
|
if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
|
|
pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
|
|
return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
|
|
|
|
pCapabilities->maxDpbSlots = NUM_H264_REFS;
|
|
pCapabilities->maxActiveReferencePictures = NUM_H265_REFS;
|
|
/* for h265 on navi21+ separate dpb images should work */
|
|
if (radv_enable_tier2(pdevice))
|
|
pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
|
|
ext->maxLevelIdc = 51;
|
|
strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
|
|
pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (cap) {
|
|
pCapabilities->maxCodedExtent.width = cap->max_width;
|
|
pCapabilities->maxCodedExtent.height = cap->max_height;
|
|
} else {
|
|
switch (pVideoProfile->videoCodecOperation) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
|
pCapabilities->maxCodedExtent.width = (pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096;
|
|
pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096;
|
|
break;
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
|
pCapabilities->maxCodedExtent.width =
|
|
(pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
|
pCapabilities->maxCodedExtent.height =
|
|
(pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
|
|
const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
|
|
uint32_t *pVideoFormatPropertyCount,
|
|
VkVideoFormatPropertiesKHR *pVideoFormatProperties)
|
|
{
|
|
/* radv requires separate allocates for DPB and decode video. */
|
|
if ((pVideoFormatInfo->imageUsage &
|
|
(VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
|
|
(VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
|
|
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
|
|
|
VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
|
|
|
|
bool need_8bit = true;
|
|
bool need_10bit = false;
|
|
const struct VkVideoProfileListInfoKHR *prof_list =
|
|
(struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
|
|
if (prof_list) {
|
|
for (unsigned i = 0; i < prof_list->profileCount; i++) {
|
|
const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
|
|
if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
|
|
need_10bit = true;
|
|
}
|
|
}
|
|
|
|
if (need_10bit) {
|
|
vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
|
|
{
|
|
p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
|
|
p->imageType = VK_IMAGE_TYPE_2D;
|
|
p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
|
|
p->imageUsageFlags = pVideoFormatInfo->imageUsage;
|
|
}
|
|
|
|
if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
|
|
need_8bit = false;
|
|
}
|
|
|
|
if (need_8bit) {
|
|
vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
|
|
{
|
|
p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
|
|
p->imageType = VK_IMAGE_TYPE_2D;
|
|
p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
|
|
p->imageUsageFlags = pVideoFormatInfo->imageUsage;
|
|
}
|
|
}
|
|
|
|
return vk_outarray_status(&out);
|
|
}
|
|
|
|
#define RADV_BIND_SESSION_CTX 0
|
|
#define RADV_BIND_DECODER_CTX 1
|
|
|
|
VkResult
|
|
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
|
|
uint32_t *pMemoryRequirementsCount,
|
|
VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
|
|
{
|
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
|
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
|
|
uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
|
|
|
|
VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
|
|
|
|
/* 1 buffer for session context */
|
|
if (device->physical_device->rad_info.family >= CHIP_POLARIS10) {
|
|
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
|
|
{
|
|
m->memoryBindIndex = RADV_BIND_SESSION_CTX;
|
|
m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
|
|
m->memoryRequirements.alignment = 0;
|
|
m->memoryRequirements.memoryTypeBits = memory_type_bits;
|
|
}
|
|
}
|
|
|
|
if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) {
|
|
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
|
|
{
|
|
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
|
|
m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
|
|
m->memoryRequirements.alignment = 0;
|
|
m->memoryRequirements.memoryTypeBits = memory_type_bits;
|
|
}
|
|
}
|
|
if (vid->stream_type == RDECODE_CODEC_H265) {
|
|
uint32_t ctx_size;
|
|
|
|
if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
|
|
ctx_size = calc_ctx_size_h265_main10(vid);
|
|
else
|
|
ctx_size = calc_ctx_size_h265_main(vid);
|
|
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
|
|
{
|
|
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
|
|
m->memoryRequirements.size = align(ctx_size, 4096);
|
|
m->memoryRequirements.alignment = 0;
|
|
m->memoryRequirements.memoryTypeBits = memory_type_bits;
|
|
}
|
|
}
|
|
return vk_outarray_status(&out);
|
|
}
|
|
|
|
VkResult
|
|
radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
|
|
const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
|
|
{
|
|
RADV_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
|
|
|
|
return vk_video_session_parameters_update(¶ms->vk, pUpdateInfo);
|
|
}
|
|
|
|
static void
|
|
copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
|
|
{
|
|
dst->mem = radv_device_memory_from_handle(src->memory);
|
|
dst->offset = src->memoryOffset;
|
|
dst->size = src->memorySize;
|
|
}
|
|
|
|
VkResult
|
|
radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
|
|
const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
|
|
{
|
|
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
|
|
|
|
for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
|
|
switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
|
|
case RADV_BIND_SESSION_CTX:
|
|
copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
|
|
break;
|
|
case RADV_BIND_DECODER_CTX:
|
|
copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/* add a new set register command to the IB */
|
|
static void
|
|
set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
|
|
{
|
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
|
radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
|
|
radeon_emit(cs, val);
|
|
}
|
|
|
|
static void
|
|
send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
|
|
{
|
|
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
|
uint64_t addr;
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
|
|
addr = radv_buffer_get_va(bo);
|
|
addr += offset;
|
|
|
|
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
|
|
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
|
|
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
|
|
set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
|
|
return;
|
|
}
|
|
switch (cmd) {
|
|
case RDECODE_CMD_MSG_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
|
|
cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_DPB_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
|
|
cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_DECODING_TARGET_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
|
|
cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_FEEDBACK_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
|
|
cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_PROB_TBL_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
|
|
cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
|
|
cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_BITSTREAM_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
|
|
cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
|
|
cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
|
|
break;
|
|
case RDECODE_CMD_CONTEXT_BUFFER:
|
|
cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
|
|
cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
|
|
cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
|
|
{
|
|
rvcn_dec_message_header_t *header = ptr;
|
|
rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
|
|
|
|
memset(ptr, 0, size);
|
|
header->header_size = sizeof(rvcn_dec_message_header_t);
|
|
header->total_size = size;
|
|
header->num_buffers = 1;
|
|
header->msg_type = RDECODE_MSG_CREATE;
|
|
header->stream_handle = vid->stream_handle;
|
|
header->status_report_feedback_number = 0;
|
|
|
|
header->index[0].message_id = RDECODE_MESSAGE_CREATE;
|
|
header->index[0].offset = sizeof(rvcn_dec_message_header_t);
|
|
header->index[0].size = sizeof(rvcn_dec_message_create_t);
|
|
header->index[0].filled = 0;
|
|
|
|
create->stream_type = vid->stream_type;
|
|
create->session_flags = 0;
|
|
create->width_in_samples = vid->vk.max_coded.width;
|
|
create->height_in_samples = vid->vk.max_coded.height;
|
|
}
|
|
|
|
static void
|
|
rvcn_dec_message_feedback(void *ptr)
|
|
{
|
|
rvcn_dec_feedback_header_t *header = (void *)ptr;
|
|
|
|
header->header_size = sizeof(rvcn_dec_feedback_header_t);
|
|
header->total_size = sizeof(rvcn_dec_feedback_header_t);
|
|
header->num_buffers = 0;
|
|
}
|
|
|
|
static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
|
|
static uint8_t
|
|
get_h264_level(StdVideoH264LevelIdc level)
|
|
{
|
|
assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
|
|
return h264_levels[level];
|
|
}
|
|
|
|
static rvcn_dec_message_avc_t
|
|
get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
|
|
const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
|
|
uint32_t *height_in_samples, void *it_ptr)
|
|
{
|
|
rvcn_dec_message_avc_t result;
|
|
const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
|
|
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
|
|
|
|
*slice_offset = h264_pic_info->pSliceOffsets[0];
|
|
|
|
memset(&result, 0, sizeof(result));
|
|
|
|
assert(params->vk.h264_dec.std_sps_count > 0);
|
|
const StdVideoH264SequenceParameterSet *sps =
|
|
vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
|
|
switch (sps->profile_idc) {
|
|
case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
|
|
result.profile = RDECODE_H264_PROFILE_BASELINE;
|
|
break;
|
|
case STD_VIDEO_H264_PROFILE_IDC_MAIN:
|
|
result.profile = RDECODE_H264_PROFILE_MAIN;
|
|
break;
|
|
case STD_VIDEO_H264_PROFILE_IDC_HIGH:
|
|
result.profile = RDECODE_H264_PROFILE_HIGH;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
|
|
result.profile = RDECODE_H264_PROFILE_MAIN;
|
|
break;
|
|
}
|
|
|
|
*width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
|
|
*height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
|
|
if (!sps->flags.frame_mbs_only_flag)
|
|
*height_in_samples *= 2;
|
|
result.level = get_h264_level(sps->level_idc);
|
|
|
|
result.sps_info_flags = 0;
|
|
|
|
result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
|
|
result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
|
|
result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
|
|
result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
|
|
if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
|
|
result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
|
|
|
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
|
result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
|
|
result.pic_order_cnt_type = sps->pic_order_cnt_type;
|
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
|
|
|
result.chroma_format = sps->chroma_format_idc;
|
|
|
|
const StdVideoH264PictureParameterSet *pps =
|
|
vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
|
|
result.pps_info_flags = 0;
|
|
result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
|
|
result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
|
|
result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
|
|
result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
|
|
result.pps_info_flags |= pps->weighted_bipred_idc << 4;
|
|
result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
|
|
result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
|
|
result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
|
|
|
|
result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
|
|
result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
|
|
result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
|
|
|
|
if (pps->flags.pic_scaling_matrix_present_flag) {
|
|
memcpy(result.scaling_list_4x4, pps->pScalingLists->ScalingList4x4, 6 * 16);
|
|
memcpy(result.scaling_list_8x8[0], pps->pScalingLists->ScalingList8x8[0], 64);
|
|
memcpy(result.scaling_list_8x8[1], pps->pScalingLists->ScalingList8x8[3], 64);
|
|
} else if (sps->flags.seq_scaling_matrix_present_flag) {
|
|
memcpy(result.scaling_list_4x4, sps->pScalingLists->ScalingList4x4, 6 * 16);
|
|
memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64);
|
|
memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64);
|
|
} else {
|
|
memset(result.scaling_list_4x4, 0x10, 6 * 16);
|
|
memset(result.scaling_list_8x8, 0x10, 2 * 64);
|
|
}
|
|
|
|
memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
|
|
memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
|
|
memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
|
|
|
|
result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
|
|
result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
|
|
|
|
result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
|
|
result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
|
|
|
|
result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
|
|
|
|
result.num_ref_frames = sps->max_num_ref_frames;
|
|
result.non_existing_frame_flags = 0;
|
|
result.used_for_reference_flags = 0;
|
|
|
|
memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
|
|
memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
|
|
for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
|
|
int idx = frame_info->pReferenceSlots[i].slotIndex;
|
|
const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
|
|
vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
|
|
|
|
result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
|
|
result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
|
|
result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
|
|
|
|
result.ref_frame_list[i] = idx;
|
|
|
|
if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
|
|
result.used_for_reference_flags |= (1 << (2 * i));
|
|
if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
|
|
result.used_for_reference_flags |= (1 << (2 * i + 1));
|
|
|
|
if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
|
|
result.used_for_reference_flags |= (3 << (2 * i));
|
|
|
|
if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
|
|
result.ref_frame_list[i] |= 0x80;
|
|
if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
|
|
result.non_existing_frame_flags |= 1 << i;
|
|
}
|
|
result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
|
|
result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
|
|
|
|
return result;
|
|
}
|
|
|
|
static void
|
|
update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
|
|
{
|
|
uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS] = {
|
|
0};
|
|
uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS] = {
|
|
0};
|
|
uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS]
|
|
[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS] = {0};
|
|
uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS]
|
|
[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS] = {0};
|
|
int i, j;
|
|
|
|
if (scaling_lists) {
|
|
for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) {
|
|
for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
|
|
ScalingList4x4[i][j] = scaling_lists->ScalingList4x4[i][vl_zscan_h265_up_right_diagonal_16[j]];
|
|
for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++) {
|
|
ScalingList8x8[i][j] = scaling_lists->ScalingList8x8[i][vl_zscan_h265_up_right_diagonal[j]];
|
|
ScalingList16x16[i][j] = scaling_lists->ScalingList16x16[i][vl_zscan_h265_up_right_diagonal[j]];
|
|
if (i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS)
|
|
ScalingList32x32[i][j] = scaling_lists->ScalingList32x32[i][vl_zscan_h265_up_right_diagonal[j]];
|
|
}
|
|
}
|
|
}
|
|
|
|
memcpy(it_ptr, ScalingList4x4,
|
|
STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
|
|
memcpy((char *)it_ptr + 96, ScalingList8x8,
|
|
STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
|
|
memcpy((char *)it_ptr + 480, ScalingList16x16,
|
|
STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
|
|
memcpy((char *)it_ptr + 864, ScalingList32x32,
|
|
STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
|
|
}
|
|
|
|
static rvcn_dec_message_hevc_t
|
|
get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
|
|
const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
|
|
{
|
|
rvcn_dec_message_hevc_t result;
|
|
int i, j;
|
|
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
|
|
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
|
|
memset(&result, 0, sizeof(result));
|
|
|
|
const StdVideoH265SequenceParameterSet *sps =
|
|
vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
|
|
const StdVideoH265PictureParameterSet *pps =
|
|
vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
|
|
|
|
result.sps_info_flags = 0;
|
|
result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
|
|
result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
|
|
result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
|
|
result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
|
|
result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
|
|
result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
|
|
result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
|
|
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
|
|
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
|
|
|
|
if (device->physical_device->rad_info.family == CHIP_CARRIZO)
|
|
result.sps_info_flags |= 1 << 9;
|
|
|
|
if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
|
|
result.sps_info_flags |= 1 << 11;
|
|
}
|
|
result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
|
|
|
|
result.chroma_format = sps->chroma_format_idc;
|
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
|
result.sps_max_dec_pic_buffering_minus1 =
|
|
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
|
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
|
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
|
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
|
result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
|
|
result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
|
|
result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
|
|
if (sps->flags.pcm_enabled_flag) {
|
|
result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
|
|
result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
|
|
result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
|
|
result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
|
|
}
|
|
result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
|
|
|
|
result.pps_info_flags = 0;
|
|
result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
|
|
result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
|
|
result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
|
|
result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
|
|
result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
|
|
result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
|
|
result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
|
|
result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
|
|
result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
|
|
result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
|
|
result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
|
|
result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
|
|
result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
|
|
result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
|
|
result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
|
|
result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
|
|
result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
|
|
result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
|
|
result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
|
|
result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
|
|
|
|
result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
|
|
result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
|
|
result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
|
|
result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
|
|
result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
|
|
result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
|
|
result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
|
|
result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
|
|
result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
|
|
result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
|
|
result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
|
|
result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
|
|
result.init_qp_minus26 = pps->init_qp_minus26;
|
|
|
|
for (i = 0; i < 19; ++i)
|
|
result.column_width_minus1[i] = pps->column_width_minus1[i];
|
|
|
|
for (i = 0; i < 21; ++i)
|
|
result.row_height_minus1[i] = pps->row_height_minus1[i];
|
|
|
|
result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
|
|
result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
|
|
|
|
uint8_t idxs[16];
|
|
memset(result.poc_list, 0, 16 * sizeof(int));
|
|
memset(result.ref_pic_list, 0x7f, 16);
|
|
memset(idxs, 0xff, 16);
|
|
for (i = 0; i < frame_info->referenceSlotCount; i++) {
|
|
const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
|
|
vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
|
|
int idx = frame_info->pReferenceSlots[i].slotIndex;
|
|
result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
|
|
result.ref_pic_list[i] = idx;
|
|
idxs[idx] = i;
|
|
}
|
|
result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
|
|
|
|
#define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
|
|
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
|
|
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
|
|
|
|
const StdVideoH265ScalingLists *scaling_lists = NULL;
|
|
if (pps->flags.pps_scaling_list_data_present_flag)
|
|
scaling_lists = pps->pScalingLists;
|
|
else if (sps->flags.sps_scaling_list_data_present_flag)
|
|
scaling_lists = sps->pScalingLists;
|
|
|
|
update_h265_scaling(it_ptr, scaling_lists);
|
|
|
|
if (scaling_lists) {
|
|
for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
|
|
result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
|
|
|
|
for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
|
|
result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
|
|
}
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
for (j = 0; j < 15; j++)
|
|
result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j];
|
|
}
|
|
|
|
if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
|
|
if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
|
|
result.p010_mode = 1;
|
|
result.msb_mode = 1;
|
|
} else {
|
|
result.p010_mode = 0;
|
|
result.luma_10to8 = 5;
|
|
result.chroma_10to8 = 5;
|
|
result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
|
|
result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static bool
|
|
rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
|
|
struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
|
|
const struct VkVideoDecodeInfoKHR *frame_info)
|
|
{
|
|
struct radv_device *device = cmd_buffer->device;
|
|
rvcn_dec_message_header_t *header;
|
|
rvcn_dec_message_index_t *index_codec;
|
|
rvcn_dec_message_decode_t *decode;
|
|
rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
|
|
rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
|
|
void *codec;
|
|
unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
|
|
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
|
|
struct radv_image *img = dst_iv->image;
|
|
struct radv_image_plane *luma = &img->planes[0];
|
|
struct radv_image_plane *chroma = &img->planes[1];
|
|
struct radv_image_view *dpb_iv =
|
|
radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
|
|
struct radv_image *dpb = dpb_iv->image;
|
|
|
|
header = ptr;
|
|
sizes += sizeof(rvcn_dec_message_header_t);
|
|
|
|
index_codec = (void *)((char *)header + sizes);
|
|
sizes += sizeof(rvcn_dec_message_index_t);
|
|
|
|
if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
|
|
index_dynamic_dpb = (void *)((char *)header + sizes);
|
|
sizes += sizeof(rvcn_dec_message_index_t);
|
|
}
|
|
|
|
offset_decode = sizes;
|
|
decode = (void *)((char *)header + sizes);
|
|
sizes += sizeof(rvcn_dec_message_decode_t);
|
|
|
|
if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
|
|
offset_dynamic_dpb = sizes;
|
|
dynamic_dpb_t2 = (void *)((char *)header + sizes);
|
|
sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
|
|
}
|
|
|
|
offset_codec = sizes;
|
|
codec = (void *)((char *)header + sizes);
|
|
|
|
memset(ptr, 0, sizes);
|
|
|
|
header->header_size = sizeof(rvcn_dec_message_header_t);
|
|
header->total_size = sizes;
|
|
header->msg_type = RDECODE_MSG_DECODE;
|
|
header->stream_handle = vid->stream_handle;
|
|
header->status_report_feedback_number = vid->dbg_frame_cnt++;
|
|
|
|
header->index[0].message_id = RDECODE_MESSAGE_DECODE;
|
|
header->index[0].offset = offset_decode;
|
|
header->index[0].size = sizeof(rvcn_dec_message_decode_t);
|
|
header->index[0].filled = 0;
|
|
header->num_buffers = 1;
|
|
|
|
index_codec->offset = offset_codec;
|
|
index_codec->size = sizeof(rvcn_dec_message_avc_t);
|
|
index_codec->filled = 0;
|
|
++header->num_buffers;
|
|
|
|
if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
|
|
index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
|
|
index_dynamic_dpb->offset = offset_dynamic_dpb;
|
|
index_dynamic_dpb->filled = 0;
|
|
++header->num_buffers;
|
|
index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
|
|
}
|
|
|
|
decode->stream_type = vid->stream_type;
|
|
decode->decode_flags = 0;
|
|
decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
|
|
decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
|
|
|
|
decode->bsd_size = frame_info->srcBufferRange;
|
|
|
|
decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
|
|
|
|
decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
|
|
decode->sct_size = 0;
|
|
decode->sc_coeff_size = 0;
|
|
|
|
decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
|
|
|
|
decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
|
|
decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
|
|
decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
|
|
decode->db_array_mode = device->physical_device->vid_addr_gfx_mode;
|
|
|
|
decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
|
|
decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
|
|
|
|
if (luma->surface.meta_offset) {
|
|
fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
|
|
return false;
|
|
}
|
|
|
|
decode->dt_tiling_mode = 0;
|
|
decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
|
|
decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode;
|
|
decode->dt_field_mode = vid->interlaced ? 1 : 0;
|
|
decode->dt_surf_tile_config = 0;
|
|
decode->dt_uv_surf_tile_config = 0;
|
|
|
|
decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
|
|
decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
|
|
|
|
if (decode->dt_field_mode) {
|
|
decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
|
|
decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
|
|
} else {
|
|
decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
|
|
decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
|
|
}
|
|
|
|
*slice_offset = 0;
|
|
switch (vid->vk.op) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
|
|
rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
|
|
&decode->height_in_samples, it_ptr);
|
|
memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
|
|
index_codec->message_id = RDECODE_MESSAGE_AVC;
|
|
break;
|
|
}
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
|
|
rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, it_ptr);
|
|
memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
|
|
index_codec->message_id = RDECODE_MESSAGE_HEVC;
|
|
break;
|
|
}
|
|
default:
|
|
unreachable("unknown operation");
|
|
}
|
|
|
|
decode->hw_ctxt_size = vid->ctx.size;
|
|
|
|
if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
|
|
return true;
|
|
|
|
uint64_t addr;
|
|
for (int i = 0; i < frame_info->referenceSlotCount; i++) {
|
|
struct radv_image_view *f_dpb_iv =
|
|
radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
|
|
struct radv_image *dpb_img = f_dpb_iv->image;
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
|
|
addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
|
|
|
|
dynamic_dpb_t2->dpbAddrLo[i] = addr;
|
|
dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
|
|
++dynamic_dpb_t2->dpbArraySize;
|
|
}
|
|
|
|
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
|
|
addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
|
|
|
|
dynamic_dpb_t2->dpbCurrLo = addr;
|
|
dynamic_dpb_t2->dpbCurrHi = addr >> 32;
|
|
|
|
decode->decode_flags = 1;
|
|
dynamic_dpb_t2->dpbConfigFlags = 0;
|
|
dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch;
|
|
dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height;
|
|
dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size;
|
|
|
|
dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch;
|
|
dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height;
|
|
dynamic_dpb_t2->dpbChromaAlignedSize = chroma->surface.u.gfx9.surf_slice_size;
|
|
|
|
return true;
|
|
}
|
|
|
|
static struct ruvd_h264
|
|
get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
|
|
const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
|
|
uint32_t *height_in_samples, void *it_ptr)
|
|
{
|
|
struct ruvd_h264 result;
|
|
const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
|
|
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
|
|
|
|
*slice_offset = h264_pic_info->pSliceOffsets[0];
|
|
|
|
memset(&result, 0, sizeof(result));
|
|
|
|
const StdVideoH264SequenceParameterSet *sps =
|
|
vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
|
|
switch (sps->profile_idc) {
|
|
case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
|
|
result.profile = RUVD_H264_PROFILE_BASELINE;
|
|
break;
|
|
case STD_VIDEO_H264_PROFILE_IDC_MAIN:
|
|
result.profile = RUVD_H264_PROFILE_MAIN;
|
|
break;
|
|
case STD_VIDEO_H264_PROFILE_IDC_HIGH:
|
|
result.profile = RUVD_H264_PROFILE_HIGH;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
|
|
result.profile = RUVD_H264_PROFILE_MAIN;
|
|
break;
|
|
}
|
|
|
|
*width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
|
|
*height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
|
|
if (!sps->flags.frame_mbs_only_flag)
|
|
*height_in_samples *= 2;
|
|
result.level = get_h264_level(sps->level_idc);
|
|
|
|
result.sps_info_flags = 0;
|
|
|
|
result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
|
|
result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
|
|
result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
|
|
result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
|
|
result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
|
|
|
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
|
result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
|
|
result.pic_order_cnt_type = sps->pic_order_cnt_type;
|
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
|
|
|
result.chroma_format = sps->chroma_format_idc;
|
|
|
|
const StdVideoH264PictureParameterSet *pps =
|
|
vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
|
|
result.pps_info_flags = 0;
|
|
result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
|
|
result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
|
|
result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
|
|
result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
|
|
result.pps_info_flags |= pps->weighted_bipred_idc << 4;
|
|
result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
|
|
result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
|
|
result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
|
|
|
|
result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
|
|
result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
|
|
result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
|
|
|
|
if (pps->flags.pic_scaling_matrix_present_flag) {
|
|
memcpy(result.scaling_list_4x4, pps->pScalingLists->ScalingList4x4, 6 * 16);
|
|
memcpy(result.scaling_list_8x8[0], pps->pScalingLists->ScalingList8x8[0], 64);
|
|
memcpy(result.scaling_list_8x8[1], pps->pScalingLists->ScalingList8x8[3], 64);
|
|
} else if (sps->flags.seq_scaling_matrix_present_flag) {
|
|
memcpy(result.scaling_list_4x4, sps->pScalingLists->ScalingList4x4, 6 * 16);
|
|
memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64);
|
|
memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64);
|
|
} else {
|
|
memset(result.scaling_list_4x4, 0x10, 6 * 16);
|
|
memset(result.scaling_list_8x8, 0x10, 2 * 64);
|
|
}
|
|
|
|
memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
|
|
memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
|
|
memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
|
|
|
|
result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
|
|
result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
|
|
|
|
result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
|
|
result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
|
|
|
|
result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
|
|
|
|
result.num_ref_frames = sps->max_num_ref_frames;
|
|
memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
|
|
memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
|
|
for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
|
|
int idx = frame_info->pReferenceSlots[i].slotIndex;
|
|
const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
|
|
vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
|
|
|
|
result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
|
|
result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
|
|
result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
|
|
|
|
result.ref_frame_list[i] = idx;
|
|
|
|
if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
|
|
result.ref_frame_list[i] |= 0x80;
|
|
}
|
|
result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
|
|
result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
|
|
|
|
return result;
|
|
}
|
|
|
|
static struct ruvd_h265
|
|
get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
|
|
const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
|
|
{
|
|
struct ruvd_h265 result;
|
|
int i, j;
|
|
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
|
|
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
|
|
|
|
memset(&result, 0, sizeof(result));
|
|
|
|
const StdVideoH265SequenceParameterSet *sps =
|
|
vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
|
|
const StdVideoH265PictureParameterSet *pps =
|
|
vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
|
|
|
|
result.sps_info_flags = 0;
|
|
result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
|
|
result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
|
|
result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
|
|
result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
|
|
result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
|
|
result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
|
|
result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
|
|
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
|
|
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
|
|
|
|
if (device->physical_device->rad_info.family == CHIP_CARRIZO)
|
|
result.sps_info_flags |= 1 << 9;
|
|
|
|
result.chroma_format = sps->chroma_format_idc;
|
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
|
result.sps_max_dec_pic_buffering_minus1 =
|
|
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
|
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
|
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
|
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
|
result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
|
|
result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
|
|
result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
|
|
if (sps->flags.pcm_enabled_flag) {
|
|
result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
|
|
result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
|
|
result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
|
|
result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
|
|
}
|
|
result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
|
|
|
|
result.pps_info_flags = 0;
|
|
result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
|
|
result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
|
|
result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
|
|
result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
|
|
result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
|
|
result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
|
|
result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
|
|
result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
|
|
result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
|
|
result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
|
|
result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
|
|
result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
|
|
result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
|
|
result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
|
|
result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
|
|
result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
|
|
result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
|
|
result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
|
|
result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
|
|
result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
|
|
|
|
result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
|
|
result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
|
|
result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
|
|
result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
|
|
result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
|
|
result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
|
|
result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
|
|
result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
|
|
result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
|
|
result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
|
|
result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
|
|
result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
|
|
result.init_qp_minus26 = pps->init_qp_minus26;
|
|
|
|
for (i = 0; i < 19; ++i)
|
|
result.column_width_minus1[i] = pps->column_width_minus1[i];
|
|
|
|
for (i = 0; i < 21; ++i)
|
|
result.row_height_minus1[i] = pps->row_height_minus1[i];
|
|
|
|
result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
|
|
result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
|
|
|
|
uint8_t idxs[16];
|
|
memset(result.poc_list, 0, 16 * sizeof(int));
|
|
memset(result.ref_pic_list, 0x7f, 16);
|
|
memset(idxs, 0xff, 16);
|
|
for (i = 0; i < frame_info->referenceSlotCount; i++) {
|
|
const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
|
|
vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
|
|
int idx = frame_info->pReferenceSlots[i].slotIndex;
|
|
result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
|
|
result.ref_pic_list[i] = idx;
|
|
idxs[idx] = i;
|
|
}
|
|
result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
|
|
|
|
#define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
|
|
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
|
|
|
|
for (i = 0; i < 8; ++i)
|
|
result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
|
|
|
|
const StdVideoH265ScalingLists *scaling_lists = NULL;
|
|
if (pps->flags.pps_scaling_list_data_present_flag)
|
|
scaling_lists = pps->pScalingLists;
|
|
else if (sps->flags.sps_scaling_list_data_present_flag)
|
|
scaling_lists = sps->pScalingLists;
|
|
|
|
update_h265_scaling(it_ptr, scaling_lists);
|
|
if (scaling_lists) {
|
|
for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
|
|
result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
|
|
|
|
for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
|
|
result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
|
|
}
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
for (j = 0; j < 15; j++)
|
|
result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j];
|
|
}
|
|
|
|
if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
|
|
if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
|
|
result.p010_mode = 1;
|
|
result.msb_mode = 1;
|
|
} else {
|
|
result.p010_mode = 0;
|
|
result.luma_10to8 = 5;
|
|
result.chroma_10to8 = 5;
|
|
result.sclr_luma10to8 = 4;
|
|
result.sclr_chroma10to8 = 4;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static unsigned
|
|
texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
|
|
{
|
|
return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
|
|
layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
|
|
}
|
|
|
|
static bool
|
|
ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
|
|
struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
|
|
const struct VkVideoDecodeInfoKHR *frame_info)
|
|
{
|
|
struct ruvd_msg *msg = ptr;
|
|
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
|
|
struct radv_image *img = dst_iv->image;
|
|
struct radv_image_plane *luma = &img->planes[0];
|
|
struct radv_image_plane *chroma = &img->planes[1];
|
|
struct radv_image_view *dpb_iv =
|
|
radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
|
|
struct radv_image *dpb = dpb_iv->image;
|
|
|
|
memset(msg, 0, sizeof(struct ruvd_msg));
|
|
msg->size = sizeof(*msg);
|
|
msg->msg_type = RUVD_MSG_DECODE;
|
|
msg->stream_handle = vid->stream_handle;
|
|
msg->status_report_feedback_number = vid->dbg_frame_cnt++;
|
|
|
|
msg->body.decode.stream_type = vid->stream_type;
|
|
msg->body.decode.decode_flags = 0x1;
|
|
msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
|
|
msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
|
|
|
|
msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
|
|
msg->body.decode.bsd_size = frame_info->srcBufferRange;
|
|
msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
|
|
|
|
if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10)
|
|
msg->body.decode.dpb_reserved = vid->ctx.size;
|
|
|
|
*slice_offset = 0;
|
|
switch (vid->vk.op) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
|
|
msg->body.decode.codec.h264 =
|
|
get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
|
|
&msg->body.decode.height_in_samples, it_ptr);
|
|
break;
|
|
}
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
|
|
msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info, it_ptr);
|
|
|
|
if (vid->ctx.mem)
|
|
msg->body.decode.dpb_reserved = vid->ctx.size;
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
msg->body.decode.dt_field_mode = false;
|
|
|
|
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
|
msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
|
|
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
|
|
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
|
|
msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
|
|
msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
|
|
if (msg->body.decode.dt_field_mode) {
|
|
msg->body.decode.dt_luma_bottom_offset =
|
|
luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
|
|
msg->body.decode.dt_chroma_bottom_offset =
|
|
chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
|
|
} else {
|
|
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
|
|
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
|
|
}
|
|
msg->body.decode.dt_surf_tile_config = 0;
|
|
} else {
|
|
msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
|
|
switch (luma->surface.u.legacy.level[0].mode) {
|
|
case RADEON_SURF_MODE_LINEAR_ALIGNED:
|
|
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
|
|
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
|
|
break;
|
|
case RADEON_SURF_MODE_1D:
|
|
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
|
|
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
|
|
break;
|
|
case RADEON_SURF_MODE_2D:
|
|
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
|
|
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, 0);
|
|
if (chroma)
|
|
msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, 0);
|
|
if (msg->body.decode.dt_field_mode) {
|
|
msg->body.decode.dt_luma_bottom_offset = texture_offset_legacy(&luma->surface, 1);
|
|
if (chroma)
|
|
msg->body.decode.dt_chroma_bottom_offset = texture_offset_legacy(&chroma->surface, 1);
|
|
} else {
|
|
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
|
|
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
|
|
}
|
|
|
|
if (chroma) {
|
|
assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
|
|
assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
|
|
assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
|
|
}
|
|
|
|
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
|
|
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
|
|
msg->body.decode.dt_surf_tile_config |=
|
|
RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
|
|
}
|
|
|
|
if (device->physical_device->rad_info.family >= CHIP_STONEY)
|
|
msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
|
|
|
|
msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
|
|
msg->body.decode.extension_support = 0x1;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
|
|
{
|
|
struct ruvd_msg *msg = ptr;
|
|
|
|
memset(ptr, 0, sizeof(*msg));
|
|
msg->size = sizeof(*msg);
|
|
msg->msg_type = RUVD_MSG_CREATE;
|
|
msg->stream_handle = vid->stream_handle;
|
|
msg->body.create.stream_type = vid->stream_type;
|
|
msg->body.create.width_in_samples = vid->vk.max_coded.width;
|
|
msg->body.create.height_in_samples = vid->vk.max_coded.height;
|
|
}
|
|
|
|
void
|
|
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
|
|
{
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
RADV_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
|
|
RADV_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
|
|
|
|
cmd_buffer->video.vid = vid;
|
|
cmd_buffer->video.params = params;
|
|
|
|
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) {
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
|
|
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
|
|
rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
|
ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
|
|
cmd_buffer->cs->cdw++;
|
|
ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
|
|
cmd_buffer->cs->cdw++;
|
|
cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
|
|
cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
|
|
memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|
{
|
|
struct radv_video_session *vid = cmd_buffer->video.vid;
|
|
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
|
|
|
|
void *ptr;
|
|
uint32_t out_offset;
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
|
|
|
rvcn_dec_message_create(vid, ptr, size);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
|
|
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
|
|
|
|
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
|
|
for (unsigned i = 0; i < 8; i++)
|
|
radeon_emit(cmd_buffer->cs, 0x81ff);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|
{
|
|
struct radv_video_session *vid = cmd_buffer->video.vid;
|
|
uint32_t size = sizeof(struct ruvd_msg);
|
|
void *ptr;
|
|
uint32_t out_offset;
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
|
|
|
ruvd_dec_message_create(vid, ptr);
|
|
if (vid->sessionctx.mem)
|
|
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
|
|
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
|
|
for (unsigned i = 0; i < 8; i++)
|
|
radeon_emit(cmd_buffer->cs, 0x81ff);
|
|
}
|
|
|
|
void
|
|
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
|
|
{
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
|
|
if (radv_has_uvd(cmd_buffer->device->physical_device))
|
|
radv_uvd_cmd_reset(cmd_buffer);
|
|
else
|
|
radv_vcn_cmd_reset(cmd_buffer);
|
|
}
|
|
}
|
|
|
|
void
|
|
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
|
|
{
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED)
|
|
return;
|
|
|
|
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
|
}
|
|
|
|
static void
|
|
radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
|
|
{
|
|
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
|
|
struct radv_video_session *vid = cmd_buffer->video.vid;
|
|
struct radv_video_session_params *params = cmd_buffer->video.params;
|
|
unsigned size = sizeof(struct ruvd_msg);
|
|
void *ptr, *fb_ptr, *it_ptr = NULL;
|
|
uint32_t out_offset, fb_offset, it_offset = 0;
|
|
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL;
|
|
unsigned fb_size =
|
|
(cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
|
|
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
|
|
fb_bo = cmd_buffer->upload.upload_bo;
|
|
if (have_it(vid)) {
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr);
|
|
it_bo = cmd_buffer->upload.upload_bo;
|
|
}
|
|
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
|
msg_bo = cmd_buffer->upload.upload_bo;
|
|
|
|
uint32_t slice_offset;
|
|
ruvd_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_ptr, &slice_offset, frame_info);
|
|
rvcn_dec_message_feedback(fb_ptr);
|
|
if (vid->sessionctx.mem)
|
|
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
|
|
|
|
if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
|
|
struct radv_image_view *dpb_iv =
|
|
radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
|
|
struct radv_image *dpb = dpb_iv->image;
|
|
send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
|
|
}
|
|
|
|
if (vid->ctx.mem)
|
|
send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
|
|
|
|
send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
|
|
src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
|
|
|
|
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
|
|
struct radv_image *img = dst_iv->image;
|
|
send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
|
|
if (have_it(vid))
|
|
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_bo, it_offset);
|
|
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
|
|
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
|
|
}
|
|
|
|
static void
|
|
radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
|
|
{
|
|
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
|
|
struct radv_video_session *vid = cmd_buffer->video.vid;
|
|
struct radv_video_session_params *params = cmd_buffer->video.params;
|
|
unsigned size = 0;
|
|
void *ptr, *fb_ptr, *it_ptr = NULL;
|
|
uint32_t out_offset, fb_offset, it_offset = 0;
|
|
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL;
|
|
|
|
size += sizeof(rvcn_dec_message_header_t); /* header */
|
|
size += sizeof(rvcn_dec_message_index_t); /* codec */
|
|
if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
|
|
size += sizeof(rvcn_dec_message_index_t);
|
|
size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
|
|
}
|
|
size += sizeof(rvcn_dec_message_decode_t); /* decode */
|
|
switch (vid->vk.op) {
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
|
size += sizeof(rvcn_dec_message_avc_t);
|
|
break;
|
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
|
size += sizeof(rvcn_dec_message_hevc_t);
|
|
break;
|
|
default:
|
|
unreachable("unsupported codec.");
|
|
}
|
|
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
|
|
fb_bo = cmd_buffer->upload.upload_bo;
|
|
if (have_it(vid)) {
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr);
|
|
it_bo = cmd_buffer->upload.upload_bo;
|
|
}
|
|
|
|
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
|
msg_bo = cmd_buffer->upload.upload_bo;
|
|
|
|
uint32_t slice_offset;
|
|
rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_ptr, &slice_offset, frame_info);
|
|
rvcn_dec_message_feedback(fb_ptr);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
|
|
|
|
if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
|
|
struct radv_image_view *dpb_iv =
|
|
radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
|
|
struct radv_image *dpb = dpb_iv->image;
|
|
send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
|
|
}
|
|
|
|
if (vid->ctx.mem)
|
|
send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
|
|
|
|
send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
|
|
src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
|
|
|
|
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
|
|
struct radv_image *img = dst_iv->image;
|
|
send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
|
|
send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
|
|
if (have_it(vid))
|
|
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_bo, it_offset);
|
|
|
|
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
|
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
|
|
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
|
|
}
|
|
}
|
|
|
|
void
|
|
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
|
|
{
|
|
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
if (radv_has_uvd(cmd_buffer->device->physical_device))
|
|
radv_uvd_decode_video(cmd_buffer, frame_info);
|
|
else
|
|
radv_vcn_decode_video(cmd_buffer, frame_info);
|
|
}
|