From d103b76ad68ceb21c309ab2de62c59e4027824a5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 16 Jan 2024 12:33:37 +1000 Subject: [PATCH] radv/video: add VK_KHR_video_decode_av1 support. This adds support for AV1 decode to radv, there are likely some cleanups necessary. Co-author: Charlie Turner (I wrote the initial mesa extension, and Charlie ported it to the KHR extension and kept it up to date and fixed CTS things) Acked-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_physical_device.c | 6 + src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_video.c | 1099 ++++++++++++++++++++++++- 3 files changed, 1082 insertions(+), 24 deletions(-) diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index ea83ba04bca..5a63b512ac9 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -553,6 +553,9 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .KHR_variable_pointers = true, .KHR_vertex_attribute_divisor = true, .KHR_video_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), + .KHR_video_decode_av1 = + (device->rad_info.vcn_ip_version >= VCN_3_0_0 && device->rad_info.vcn_ip_version != VCN_3_0_33 && + VIDEO_CODEC_AV1DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE)), .KHR_video_decode_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), @@ -2328,6 +2331,9 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR; if (VIDEO_CODEC_H265DEC) prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; + if (VIDEO_CODEC_AV1DEC && pdevice->rad_info.vcn_ip_version >= VCN_3_0_0 && + pdevice->rad_info.vcn_ip_version != VCN_3_0_33) + prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR; } break; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 3a435e7b0ae..b2995c15dc5 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -366,6 +366,7 @@ struct radv_physical_device { uint32_t vid_addr_gfx_mode; uint32_t stream_handle_base; uint32_t stream_handle_counter; + uint32_t av1_version; struct radv_physical_device_cache_key cache_key; }; diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index e5ff39b5842..cf0cbbe5193 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -34,6 +34,7 @@ #include "util/vl_zscan_data.h" #include "vk_video/vulkan_video_codecs_common.h" #include "ac_uvd_dec.h" +#include "ac_vcn_av1_default.h" #include "ac_vcn_dec.h" #include "radv_cs.h" @@ -59,9 +60,9 @@ radv_enable_tier2(struct radv_physical_device *pdevice) } static uint32_t -radv_video_get_db_alignment(struct radv_physical_device *pdevice, int width, bool is_h265_main_10) +radv_video_get_db_alignment(struct radv_physical_device *pdevice, int width, bool is_h265_main_10_or_av1) { - if (pdevice->rad_info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10) + if (pdevice->rad_info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1) return 64; return 32; } @@ -187,12 +188,14 @@ init_vcn_decoder(struct radv_physical_device *pdevice) break; case VCN_4_0_3: pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9; + pdevice->av1_version = RDECODE_AV1_VER_1; break; case VCN_4_0_0: case VCN_4_0_2: case VCN_4_0_4: case VCN_4_0_5: pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11; + pdevice->av1_version = RDECODE_AV1_VER_1; break; default: break; @@ -208,6 +211,7 @@ radv_init_physical_device_decoder(struct radv_physical_device *pdevice) pdevice->vid_decode_ip = AMD_IP_UVD; else pdevice->vid_decode_ip = AMD_IP_VCN_DEC; + pdevice->av1_version = RDECODE_AV1_VER_0; pdevice->stream_handle_counter = 0; pdevice->stream_handle_base = 0; @@ -228,6 +232,12 @@ have_it(struct radv_video_session *vid) return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265; } +static bool +have_probs(struct radv_video_session *vid) +{ + return vid->stream_type == RDECODE_CODEC_AV1; +} + static unsigned calc_ctx_size_h264_perf(struct radv_video_session *vid) { @@ -237,7 +247,7 @@ calc_ctx_size_h264_perf(struct radv_video_session *vid) unsigned max_references = vid->vk.max_dpb_slots + 1; - // picture width & height in 16 pixel units + /* picture width & height in 16 pixel units */ width_in_mb = width / VL_MACROBLOCK_WIDTH; height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); @@ -298,6 +308,38 @@ calc_ctx_size_h265_main10(struct radv_video_session *vid) return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; } +static unsigned +calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid) +{ + struct radv_physical_device *pdev = device->physical_device; + unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0 + ? align(sizeof(rvcn_av1_frame_context_t), 2048) + : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048); + unsigned ctx_size = (9 + 4) * frame_ctxt_size + 9 * 64 * 34 * 512 + 9 * 64 * 34 * 256 * 5; + + int num_64x64_CTB_8k = 68; + int num_128x128_CTB_8k = 34; + int sdb_pitch_64x64 = align(32 * num_64x64_CTB_8k, 256) * 2; + int sdb_pitch_128x128 = align(32 * num_128x128_CTB_8k, 256) * 2; + int sdb_lf_size_ctb_64x64 = sdb_pitch_64x64 * (align(1728, 64) / 64); + int sdb_lf_size_ctb_128x128 = sdb_pitch_128x128 * (align(3008, 64) / 64); + int sdb_superres_size_ctb_64x64 = sdb_pitch_64x64 * (align(3232, 64) / 64); + int sdb_superres_size_ctb_128x128 = sdb_pitch_128x128 * (align(6208, 64) / 64); + int sdb_output_size_ctb_64x64 = sdb_pitch_64x64 * (align(1312, 64) / 64); + int sdb_output_size_ctb_128x128 = sdb_pitch_128x128 * (align(2336, 64) / 64); + int sdb_fg_avg_luma_size_ctb_64x64 = sdb_pitch_64x64 * (align(384, 64) / 64); + int sdb_fg_avg_luma_size_ctb_128x128 = sdb_pitch_128x128 * (align(640, 64) / 64); + + ctx_size += (MAX2(sdb_lf_size_ctb_64x64, sdb_lf_size_ctb_128x128) + + MAX2(sdb_superres_size_ctb_64x64, sdb_superres_size_ctb_128x128) + + MAX2(sdb_output_size_ctb_64x64, sdb_output_size_ctb_128x128) + + MAX2(sdb_fg_avg_luma_size_ctb_64x64, sdb_fg_avg_luma_size_ctb_128x128)) * + 2 + + 68 * 512; + + return ctx_size; +} + VKAPI_ATTR VkResult VKAPI_CALL radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession) @@ -331,6 +373,10 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR * if (radv_enable_tier2(device->physical_device)) vid->dpb_type = DPB_DYNAMIC_TIER_2; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: + vid->stream_type = RDECODE_CODEC_AV1; + vid->dpb_type = DPB_DYNAMIC_TIER_2; + break; default: return VK_ERROR_FEATURE_NOT_PRESENT; } @@ -339,7 +385,8 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR * vid->dbg_frame_cnt = 0; vid->db_alignment = radv_video_get_db_alignment( device->physical_device, vid->vk.max_coded.width, - vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10); + (vid->stream_type == RDECODE_CODEC_AV1 || + (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))); *pVideoSession = radv_video_session_to_handle(vid); return VK_SUCCESS; @@ -407,6 +454,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC]; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: + cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1]; + break; #endif default: unreachable("unsupported operation"); @@ -428,15 +478,12 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons if (dec_caps) dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR; - /* H264 allows different luma and chroma bit depths */ - if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth) - return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR; - - if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR) - return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR; - switch (pVideoProfile->videoCodecOperation) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { + /* H264 allows different luma and chroma bit depths */ + if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth) + return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR; + struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct( pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR); @@ -465,6 +512,10 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons break; } case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + /* H265 allows different luma and chroma bit depths */ + if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth) + return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR; + struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct( pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); @@ -490,9 +541,25 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION; break; } + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: { + /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */ + if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR && + pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth) + return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR; + struct VkVideoDecodeAV1CapabilitiesKHR *ext = + vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR); + pCapabilities->maxDpbSlots = 9; + pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES; + pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR; + ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */ + strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME); + pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION; + break; + } default: break; } + if (cap) { pCapabilities->maxCodedExtent.width = cap->max_width; pCapabilities->maxCodedExtent.height = cap->max_height; @@ -581,7 +648,6 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount); - /* 1 buffer for session context */ if (device->physical_device->rad_info.family >= CHIP_POLARIS10) { vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) @@ -617,6 +683,19 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi m->memoryRequirements.memoryTypeBits = memory_type_bits; } } + if (vid->stream_type == RDECODE_CODEC_AV1) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { + m->memoryBindIndex = RADV_BIND_DECODER_CTX; + m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096); + m->memoryRequirements.alignment = 0; + m->memoryRequirements.memoryTypeBits = 0; + for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++) + if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags & + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + m->memoryRequirements.memoryTypeBits |= (1 << i); + } + } return vk_outarray_status(&out); } @@ -1081,7 +1160,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct for (i = 0; i < 2; i++) { for (j = 0; j < 15; j++) - result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j]; + result.direct_reflist[i][j] = 0xff; } if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) { @@ -1100,6 +1179,902 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct return result; } +enum { + AV1_RESTORE_NONE = 0, + AV1_RESTORE_WIENER = 1, + AV1_RESTORE_SGRPROJ = 2, + AV1_RESTORE_SWITCHABLE = 3, +}; + +#define AV1_SUPERRES_NUM 8 +#define AV1_SUPERRES_DENOM_MIN 9 + +#define LUMA_BLOCK_SIZE_Y 73 +#define LUMA_BLOCK_SIZE_X 82 +#define CHROMA_BLOCK_SIZE_Y 38 +#define CHROMA_BLOCK_SIZE_X 44 + +static int32_t +radv_vcn_av1_film_grain_random_number(unsigned short *seed, int32_t bits) +{ + unsigned short bit; + unsigned short value = *seed; + + bit = ((value >> 0) ^ (value >> 1) ^ (value >> 3) ^ (value >> 12)) & 1; + value = (value >> 1) | (bit << 15); + *seed = value; + + return (value >> (16 - bits)) & ((1 << bits) - 1); +} + +static void +radv_vcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2], uint8_t num, short scaling_lut[]) +{ + int32_t i, x, delta_x, delta_y; + int64_t delta; + + if (num == 0) + return; + + for (i = 0; i < scaling_points[0][0]; i++) + scaling_lut[i] = scaling_points[0][1]; + + for (i = 0; i < num - 1; i++) { + delta_y = scaling_points[i + 1][1] - scaling_points[i][1]; + delta_x = scaling_points[i + 1][0] - scaling_points[i][0]; + + delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x); + + for (x = 0; x < delta_x; x++) + scaling_lut[scaling_points[i][0] + x] = (short)(scaling_points[i][1] + (int32_t)((x * delta + 32768) >> 16)); + } + + for (i = scaling_points[num - 1][0]; i < 256; i++) + scaling_lut[i] = scaling_points[num - 1][1]; +} + +static void +radv_vcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t *fg_params, rvcn_dec_av1_fg_init_buf_t *fg_buf) +{ + const int32_t luma_block_size_y = LUMA_BLOCK_SIZE_Y; + const int32_t luma_block_size_x = LUMA_BLOCK_SIZE_X; + const int32_t chroma_block_size_y = CHROMA_BLOCK_SIZE_Y; + const int32_t chroma_block_size_x = CHROMA_BLOCK_SIZE_X; + const int32_t gauss_bits = 11; + int32_t filt_luma_grain_block[LUMA_BLOCK_SIZE_Y][LUMA_BLOCK_SIZE_X]; + int32_t filt_cb_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X]; + int32_t filt_cr_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X]; + int32_t chroma_subsamp_y = 1; + int32_t chroma_subsamp_x = 1; + unsigned short seed = fg_params->random_seed; + int32_t ar_coeff_lag = fg_params->ar_coeff_lag; + int32_t bit_depth = fg_params->bit_depth_minus_8 + 8; + short grain_center = 128 << (bit_depth - 8); + short grain_min = 0 - grain_center; + short grain_max = (256 << (bit_depth - 8)) - 1 - grain_center; + int32_t shift = 12 - bit_depth + fg_params->grain_scale_shift; + short luma_grain_block_tmp[64][80]; + short cb_grain_block_tmp[32][40]; + short cr_grain_block_tmp[32][40]; + short *align_ptr, *align_ptr0, *align_ptr1; + int32_t x, y, g, i, j, c, c0, c1, delta_row, delta_col; + int32_t s, s0, s1, pos, r; + + /* generate luma grain block */ + memset(filt_luma_grain_block, 0, sizeof(filt_luma_grain_block)); + for (y = 0; y < luma_block_size_y; y++) { + for (x = 0; x < luma_block_size_x; x++) { + g = 0; + if (fg_params->num_y_points > 0) { + r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits); + g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)]; + } + filt_luma_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift); + } + } + + for (y = 3; y < luma_block_size_y; y++) { + for (x = 3; x < luma_block_size_x - 3; x++) { + s = 0; + pos = 0; + for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) { + for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) { + if (delta_row == 0 && delta_col == 0) + break; + c = fg_params->ar_coeffs_y[pos]; + s += filt_luma_grain_block[y + delta_row][x + delta_col] * c; + pos++; + } + } + filt_luma_grain_block[y][x] = AV1_CLAMP( + filt_luma_grain_block[y][x] + ROUND_POWER_OF_TWO(s, fg_params->ar_coeff_shift), grain_min, grain_max); + } + } + + /* generate chroma grain block */ + memset(filt_cb_grain_block, 0, sizeof(filt_cb_grain_block)); + shift = 12 - bit_depth + fg_params->grain_scale_shift; + seed = fg_params->random_seed ^ 0xb524; + for (y = 0; y < chroma_block_size_y; y++) { + for (x = 0; x < chroma_block_size_x; x++) { + g = 0; + if (fg_params->num_cb_points || fg_params->chroma_scaling_from_luma) { + r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits); + g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)]; + } + filt_cb_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift); + } + } + + memset(filt_cr_grain_block, 0, sizeof(filt_cr_grain_block)); + seed = fg_params->random_seed ^ 0x49d8; + for (y = 0; y < chroma_block_size_y; y++) { + for (x = 0; x < chroma_block_size_x; x++) { + g = 0; + if (fg_params->num_cr_points || fg_params->chroma_scaling_from_luma) { + r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits); + g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)]; + } + filt_cr_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift); + } + } + + for (y = 3; y < chroma_block_size_y; y++) { + for (x = 3; x < chroma_block_size_x - 3; x++) { + s0 = 0, s1 = 0, pos = 0; + for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) { + for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) { + c0 = fg_params->ar_coeffs_cb[pos]; + c1 = fg_params->ar_coeffs_cr[pos]; + if (delta_row == 0 && delta_col == 0) { + if (fg_params->num_y_points > 0) { + int luma = 0; + int luma_x = ((x - 3) << chroma_subsamp_x) + 3; + int luma_y = ((y - 3) << chroma_subsamp_y) + 3; + for (i = 0; i <= chroma_subsamp_y; i++) + for (j = 0; j <= chroma_subsamp_x; j++) + luma += filt_luma_grain_block[luma_y + i][luma_x + j]; + + luma = ROUND_POWER_OF_TWO(luma, chroma_subsamp_x + chroma_subsamp_y); + s0 += luma * c0; + s1 += luma * c1; + } + break; + } + s0 += filt_cb_grain_block[y + delta_row][x + delta_col] * c0; + s1 += filt_cr_grain_block[y + delta_row][x + delta_col] * c1; + pos++; + } + } + filt_cb_grain_block[y][x] = AV1_CLAMP( + filt_cb_grain_block[y][x] + ROUND_POWER_OF_TWO(s0, fg_params->ar_coeff_shift), grain_min, grain_max); + filt_cr_grain_block[y][x] = AV1_CLAMP( + filt_cr_grain_block[y][x] + ROUND_POWER_OF_TWO(s1, fg_params->ar_coeff_shift), grain_min, grain_max); + } + } + + for (i = 9; i < luma_block_size_y; i++) + for (j = 9; j < luma_block_size_x; j++) + luma_grain_block_tmp[i - 9][j - 9] = filt_luma_grain_block[i][j]; + + for (i = 6; i < chroma_block_size_y; i++) + for (j = 6; j < chroma_block_size_x; j++) { + cb_grain_block_tmp[i - 6][j - 6] = filt_cb_grain_block[i][j]; + cr_grain_block_tmp[i - 6][j - 6] = filt_cr_grain_block[i][j]; + } + + align_ptr = &fg_buf->luma_grain_block[0][0]; + for (i = 0; i < 64; i++) { + for (j = 0; j < 80; j++) + *align_ptr++ = luma_grain_block_tmp[i][j]; + + if (((i + 1) % 4) == 0) + align_ptr += 64; + } + + align_ptr0 = &fg_buf->cb_grain_block[0][0]; + align_ptr1 = &fg_buf->cr_grain_block[0][0]; + for (i = 0; i < 32; i++) { + for (j = 0; j < 40; j++) { + *align_ptr0++ = cb_grain_block_tmp[i][j]; + *align_ptr1++ = cr_grain_block_tmp[i][j]; + } + if (((i + 1) % 8) == 0) { + align_ptr0 += 64; + align_ptr1 += 64; + } + } + + memset(fg_buf->scaling_lut_y, 0, sizeof(fg_buf->scaling_lut_y)); + radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_y, fg_params->num_y_points, fg_buf->scaling_lut_y); + if (fg_params->chroma_scaling_from_luma) { + memcpy(fg_buf->scaling_lut_cb, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y)); + memcpy(fg_buf->scaling_lut_cr, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y)); + } else { + memset(fg_buf->scaling_lut_cb, 0, sizeof(fg_buf->scaling_lut_cb)); + memset(fg_buf->scaling_lut_cr, 0, sizeof(fg_buf->scaling_lut_cr)); + radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cb, fg_params->num_cb_points, + fg_buf->scaling_lut_cb); + radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cr, fg_params->num_cr_points, + fg_buf->scaling_lut_cr); + } +} + +static rvcn_dec_message_av1_t +get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot) +{ + rvcn_dec_message_av1_t result; + unsigned i, j; + const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info = + vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR); + const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo; + const StdVideoAV1SequenceHeader *seq_hdr = ¶ms->vk.av1_dec.seq_hdr.base; + memset(&result, 0, sizeof(result)); + + const int intra_only_decoding = vid->vk.max_dpb_slots == 0; + if (intra_only_decoding) + assert(frame_info->pSetupReferenceSlot == NULL); + + *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0); + + result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/ + << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK; + + result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK; + + result.frame_header_flags |= + ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK; + + result.frame_header_flags |= + ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK; + + result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK; + + result.frame_header_flags |= + (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK; + + result.frame_header_flags |= + (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK; + + result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK; + + result.frame_header_flags |= + (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK; + + result.frame_header_flags |= + (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK; + + result.frame_header_flags |= + (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK; + + result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK; + + result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK; + + result.frame_header_flags |= + (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK; + + result.frame_header_flags |= + (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK; + + result.frame_header_flags |= + (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK; + + result.frame_header_flags |= + (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK; + + result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK; + + result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK; + + result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK; + + result.frame_header_flags |= + (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK; + + result.frame_header_flags |= + (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK; + + result.frame_header_flags |= + (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK; + + result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK; + + result.frame_header_flags |= + (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK; + + result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1) + << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK; + + result.frame_header_flags |= + ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) & + RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK; + + result.current_frame_id = pi->current_frame_id; + result.frame_offset = pi->OrderHint; + result.profile = seq_hdr->seq_profile; + result.is_annexb = 0; + + result.frame_type = pi->frame_type; + result.primary_ref_frame = pi->primary_ref_frame; + + const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot = + intra_only_decoding + ? NULL + : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR); + + /* The AMD FW interface does not need this information, since it's + * redundant with the information derivable from the current frame header, + * which the FW is parsing and tracking. + */ + (void)setup_dpb_slot; + result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex; + + result.sb_size = seq_hdr->flags.use_128x128_superblock; + result.interp_filter = pi->interpolation_filter; + for (i = 0; i < 2; ++i) + result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i]; + result.filter_level_u = pi->pLoopFilter->loop_filter_level[2]; + result.filter_level_v = pi->pLoopFilter->loop_filter_level[3]; + result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness; + for (i = 0; i < 8; ++i) + result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i]; + for (i = 0; i < 2; ++i) + result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i]; + result.base_qindex = pi->pQuantization->base_q_idx; + result.y_dc_delta_q = pi->pQuantization->DeltaQYDc; + result.u_dc_delta_q = pi->pQuantization->DeltaQUDc; + result.v_dc_delta_q = pi->pQuantization->DeltaQVDc; + result.u_ac_delta_q = pi->pQuantization->DeltaQUAc; + result.v_ac_delta_q = pi->pQuantization->DeltaQVAc; + + if (pi->pQuantization->flags.using_qmatrix) { + result.qm_y = pi->pQuantization->qm_y | 0xf0; + result.qm_u = pi->pQuantization->qm_u | 0xf0; + result.qm_v = pi->pQuantization->qm_v | 0xf0; + } else { + result.qm_y = 0xff; + result.qm_u = 0xff; + result.qm_v = 0xff; + } + result.delta_q_res = (1 << pi->delta_q_res); + result.delta_lf_res = (1 << pi->delta_lf_res); + result.tile_cols = pi->pTileInfo->TileCols; + result.tile_rows = pi->pTileInfo->TileRows; + + result.tx_mode = pi->TxMode; + result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0; + result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1; + result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1; + result.context_update_tile_id = pi->pTileInfo->context_update_tile_id; + + for (i = 0; i < result.tile_cols; i++) + result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i]; + result.tile_col_start_sb[result.tile_cols] = + result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1; + for (i = 0; i < pi->pTileInfo->TileRows; i++) + result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i]; + result.tile_row_start_sb[result.tile_rows] = + result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1; + + result.max_width = seq_hdr->max_frame_width_minus_1 + 1; + result.max_height = seq_hdr->max_frame_height_minus_1 + 1; + VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent; + result.superres_scale_denominator = + pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM; + if (pi->flags.use_superres) { + result.width = + (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator; + } else { + result.width = frameExtent.width; + } + result.height = frameExtent.height; + + result.superres_upscaled_width = frameExtent.width; + + result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1; + + /* The VCN FW will evict references that aren't specified in + * ref_frame_map, even if they are still valid. To prevent this we will + * specify every possible reference in ref_frame_map. + */ + uint16_t used_slots = (1 << result.curr_pic_idx); + for (i = 0; i < frame_info->referenceSlotCount; i++) { + const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot = + vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR); + (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */ + (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */ + int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex; + result.ref_frame_map[i] = slotIndex; + used_slots |= 1 << slotIndex; + } + /* Go through all the slots and fill in the ones that haven't been used. */ + for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) { + if ((used_slots & (1 << j)) == 0) { + result.ref_frame_map[i] = j; + used_slots |= 1 << j; + i++; + } + } + + assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES); + + for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) { + result.frame_refs[i] = + av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i]; + } + + result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8; + + int16_t *feature_data = (int16_t *)probs_ptr; + int fd_idx = 0; + for (i = 0; i < 8; ++i) { + result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i]; + for (j = 0; j < 8; ++j) { + result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j]; + feature_data[fd_idx++] = result.feature_data[i][j]; + } + } + + memcpy(((char *)probs_ptr + 128), result.feature_mask, 8); + result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3; + result.cdef_bits = pi->pCDEF->cdef_bits; + for (i = 0; i < 8; ++i) { + result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i]; + result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i]; + } + + if (pi->flags.UsesLr) { + for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) { + result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane]; + result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane]; + } + } + + if (seq_hdr->pColorConfig->BitDepth > 8) { + if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 || + vid->vk.picture_format == VK_FORMAT_G16_B16R16_2PLANE_420_UNORM) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.luma_10to8 = 1; + result.chroma_10to8 = 1; + } + } + + result.preskip_segid = 0; + result.last_active_segid = 0; + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + if (result.feature_mask[i] & (1 << j)) { + result.last_active_segid = i; + if (j >= 5) + result.preskip_segid = 1; + } + } + } + result.seg_lossless_flag = 0; + for (i = 0; i < 8; ++i) { + int av1_get_qindex, qindex; + int segfeature_active = result.feature_mask[i] & (1 << 0); + if (segfeature_active) { + int seg_qindex = result.base_qindex + result.feature_data[i][0]; + av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex); + } else { + av1_get_qindex = result.base_qindex; + } + qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex; + result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 && + result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0) + << i); + } + + rvcn_dec_film_grain_params_t *fg_params = &result.film_grain; + fg_params->apply_grain = pi->flags.apply_grain; + if (fg_params->apply_grain) { + rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256); + fg_params->random_seed = pi->pFilmGrain->grain_seed; + fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift; + fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8; + fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma; + fg_params->num_y_points = pi->pFilmGrain->num_y_points; + fg_params->num_cb_points = pi->pFilmGrain->num_cb_points; + fg_params->num_cr_points = pi->pFilmGrain->num_cr_points; + fg_params->cb_mult = pi->pFilmGrain->cb_mult; + fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult; + fg_params->cb_offset = pi->pFilmGrain->cb_offset; + fg_params->cr_mult = pi->pFilmGrain->cr_mult; + fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult; + fg_params->cr_offset = pi->pFilmGrain->cr_offset; + fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8; + for (i = 0; i < fg_params->num_y_points; ++i) { + fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i]; + fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i]; + } + for (i = 0; i < fg_params->num_cb_points; ++i) { + fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i]; + fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i]; + } + for (i = 0; i < fg_params->num_cr_points; ++i) { + fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i]; + fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i]; + } + + fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag; + fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6; + + for (i = 0; i < 24; ++i) + fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128; + + for (i = 0; i < 25; ++i) { + fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128; + fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128; + } + + fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag; + fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range; + radv_vcn_av1_init_film_grain_buffer(fg_params, fg_buf); + } + + result.uncompressed_header_size = 0; + for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) { + result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i]; + for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j) + result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j]; + } + for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) { + result.tile_info[i].offset = av1_pic_info->pTileOffsets[i]; + result.tile_info[i].size = av1_pic_info->pTileSizes[i]; + } + + return result; +} + +static void +rvcn_av1_init_mode_probs(void *prob) +{ + rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob; + int i; + + memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf)); + memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf)); + memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf)); + memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, + sizeof(default_palette_uv_color_index_cdf)); + memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf)); + memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf)); + memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf)); + memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf)); + memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf)); + memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf)); + memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf)); + memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf)); + memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf)); + memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf)); + memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf)); + memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs)); + memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs)); + memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf)); + memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf)); + memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf)); + memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf)); + memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf)); + memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf)); + memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf)); + memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf)); + memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf)); + memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf)); + memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf)); + memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf)); + memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf)); + memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf)); + memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf)); + memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf)); + memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf)); + memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf)); + memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf)); + memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf)); + memcpy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf, sizeof(default_intra_ext_tx_cdf)); + memcpy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf, sizeof(default_inter_ext_tx_cdf)); + memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs)); + memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf)); + memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf)); + for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) + memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], + sizeof(default_spatial_pred_seg_tree_cdf[i])); + memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf)); + memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf)); + memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs)); + memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf)); + memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf)); + memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf)); + memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf)); + memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs)); + memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf)); + memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf)); +} + +static void +rvcn_av1_init_mv_probs(void *prob) +{ + rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob; + + memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf)); + memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf)); + memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, + sizeof(default_nmv_context.comps[0].class0_cdf)); + memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, + sizeof(default_nmv_context.comps[0].class0_fp_cdf)); + memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, + sizeof(default_nmv_context.comps[0].class0_hp_cdf)); + memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, + sizeof(default_nmv_context.comps[0].classes_cdf)); + memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf)); + memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf)); + memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf)); + memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf)); + memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, + sizeof(default_nmv_context.comps[1].class0_cdf)); + memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, + sizeof(default_nmv_context.comps[1].class0_fp_cdf)); + memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, + sizeof(default_nmv_context.comps[1].class0_hp_cdf)); + memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, + sizeof(default_nmv_context.comps[1].classes_cdf)); + memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf)); + memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf)); + memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf)); + memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf)); + memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf)); + memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, + sizeof(default_nmv_context.comps[0].class0_cdf)); + memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, + sizeof(default_nmv_context.comps[0].class0_fp_cdf)); + memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, + sizeof(default_nmv_context.comps[0].class0_hp_cdf)); + memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, + sizeof(default_nmv_context.comps[0].classes_cdf)); + memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf)); + memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf)); + memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf)); + memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf)); + memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, + sizeof(default_nmv_context.comps[1].class0_cdf)); + memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, + sizeof(default_nmv_context.comps[1].class0_fp_cdf)); + memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, + sizeof(default_nmv_context.comps[1].class0_hp_cdf)); + memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, + sizeof(default_nmv_context.comps[1].classes_cdf)); + memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf)); + memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf)); + memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf)); +} + +static void +rvcn_av1_default_coef_probs(void *prob, int index) +{ + rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob; + + memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index])); + memcpy(fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index], sizeof(av1_default_eob_extra_cdfs[index])); + memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index])); + memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index])); + memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], + sizeof(av1_default_coeff_base_multi_cdfs[index])); + memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], + sizeof(av1_default_coeff_base_eob_multi_cdfs[index])); + memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index])); + memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index])); + memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index])); + memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index])); + memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index])); + memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index])); + memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index])); +} + +static void +rvcn_vcn4_init_mode_probs(void *prob) +{ + rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob; + int i; + + memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf)); + memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf)); + memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf)); + memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, + sizeof(default_palette_uv_color_index_cdf)); + memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf)); + memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf)); + memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf)); + memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf)); + memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf)); + memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf)); + memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf)); + memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf)); + memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf)); + memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf)); + memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf)); + memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs)); + memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs)); + memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf)); + memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf)); + memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf)); + memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf)); + memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf)); + memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf)); + memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf)); + memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf)); + memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf)); + memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf)); + memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf)); + memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf)); + memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf)); + memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf)); + memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf)); + memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf)); + memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf)); + memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf)); + memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf)); + memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf)); + memcpy(fc->intra_ext_tx_cdf, &default_intra_ext_tx_cdf[1], sizeof(default_intra_ext_tx_cdf[1]) * 2); + memcpy(fc->inter_ext_tx_cdf, &default_inter_ext_tx_cdf[1], sizeof(default_inter_ext_tx_cdf[1]) * 3); + memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs)); + memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf)); + memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf)); + for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) + memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], + sizeof(default_spatial_pred_seg_tree_cdf[i])); + memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf)); + memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf)); + memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs)); + memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf)); + memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf)); + memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf)); + memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf)); + memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs)); + memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf)); + memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf)); +} + +static void +rvcn_vcn4_av1_init_mv_probs(void *prob) +{ + rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob; + + memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf)); + memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf)); + memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, + sizeof(default_nmv_context.comps[0].class0_cdf)); + memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, + sizeof(default_nmv_context.comps[0].class0_fp_cdf)); + memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, + sizeof(default_nmv_context.comps[0].class0_hp_cdf)); + memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, + sizeof(default_nmv_context.comps[0].classes_cdf)); + memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf)); + memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf)); + memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf)); + memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf)); + memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, + sizeof(default_nmv_context.comps[1].class0_cdf)); + memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, + sizeof(default_nmv_context.comps[1].class0_fp_cdf)); + memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, + sizeof(default_nmv_context.comps[1].class0_hp_cdf)); + memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, + sizeof(default_nmv_context.comps[1].classes_cdf)); + memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf)); + memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf)); + memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf)); + memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf)); + memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf)); + memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, + sizeof(default_nmv_context.comps[0].class0_cdf)); + memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, + sizeof(default_nmv_context.comps[0].class0_fp_cdf)); + memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, + sizeof(default_nmv_context.comps[0].class0_hp_cdf)); + memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, + sizeof(default_nmv_context.comps[0].classes_cdf)); + memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf)); + memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf)); + memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf)); + memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf)); + memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, + sizeof(default_nmv_context.comps[1].class0_cdf)); + memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, + sizeof(default_nmv_context.comps[1].class0_fp_cdf)); + memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, + sizeof(default_nmv_context.comps[1].class0_hp_cdf)); + memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, + sizeof(default_nmv_context.comps[1].classes_cdf)); + memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf)); + memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf)); + memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf)); +} + +static void +rvcn_vcn4_av1_default_coef_probs(void *prob, int index) +{ + rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob; + char *p; + int i, j; + unsigned size; + + memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index])); + + p = (char *)fc->eob_extra_cdf; + size = sizeof(av1_default_eob_extra_cdfs[0][0][0][0]) * EOB_COEF_CONTEXTS_VCN4; + for (i = 0; i < AV1_TX_SIZES; i++) { + for (j = 0; j < AV1_PLANE_TYPES; j++) { + memcpy(p, &av1_default_eob_extra_cdfs[index][i][j][3], size); + p += size; + } + } + + memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index])); + memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index])); + memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], + sizeof(av1_default_coeff_base_multi_cdfs[index])); + memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], + sizeof(av1_default_coeff_base_eob_multi_cdfs[index])); + memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index])); + memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index])); + memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index])); + memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index])); + memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index])); + memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index])); + memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index])); +} + static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid, struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset, @@ -1117,9 +2092,6 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se struct radv_image *img = dst_iv->image; struct radv_image_plane *luma = &img->planes[0]; struct radv_image_plane *chroma = &img->planes[1]; - struct radv_image_view *dpb_iv = - radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); - struct radv_image *dpb = dpb_iv->image; header = ptr; sizes += sizeof(rvcn_dec_message_header_t); @@ -1178,19 +2150,12 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se decode->bsd_size = frame_info->srcBufferRange; - decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0; - decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size; decode->sct_size = 0; decode->sc_coeff_size = 0; decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; - decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch; - decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height; - decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode; - decode->db_array_mode = device->physical_device->vid_addr_gfx_mode; - decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w; @@ -1216,8 +2181,19 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; } + if (vid->stream_type == RDECODE_CODEC_AV1) + decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w; *slice_offset = 0; + + /* Intra-only decoding will only work without a setup slot for AV1 + * currently, other codecs require the application to pass a + * setup slot for this use-case, since the FW is not able to skip write-out + * for H26X. In order to fix that properly, additional scratch space will + * be needed in the video session just for intra-only DPB targets. + */ + int dpb_update_required = 1; + switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { index_codec->size = sizeof(rvcn_dec_message_avc_t); @@ -1234,19 +2210,62 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se index_codec->message_id = RDECODE_MESSAGE_HEVC; break; } + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: { + index_codec->size = sizeof(rvcn_dec_message_av1_t); + rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required); + memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t)); + index_codec->message_id = RDECODE_MESSAGE_AV1; + assert(frame_info->referenceSlotCount < 9); + break; + } default: unreachable("unknown operation"); } + if (dpb_update_required) + assert(frame_info->pSetupReferenceSlot != NULL); + + struct radv_image_view *dpb_iv = + dpb_update_required + ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding) + : NULL; + struct radv_image *dpb = dpb_update_required ? dpb_iv->image : img; + + decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0; + decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch; + decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height; + decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode; + decode->db_array_mode = device->physical_device->vid_addr_gfx_mode; + decode->hw_ctxt_size = vid->ctx.size; if (vid->dpb_type != DPB_DYNAMIC_TIER_2) return true; uint64_t addr; + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo); + addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset; + dynamic_dpb_t2->dpbCurrLo = addr; + dynamic_dpb_t2->dpbCurrHi = addr >> 32; + + if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + /* The following loop will fill in the references for the current frame, + * this ensures all DPB addresses are "valid" (pointing at the current + * decode target), so that the firmware doesn't evict things it should not. + * It will not perform any actual writes to these dummy slots. + */ + for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) { + dynamic_dpb_t2->dpbAddrHi[i] = addr; + dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32; + } + } + for (int i = 0; i < frame_info->referenceSlotCount; i++) { + int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex; + assert(slot_idx >= 0 && slot_idx < 16); struct radv_image_view *f_dpb_iv = radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + assert(f_dpb_iv != NULL); struct radv_image *dpb_img = f_dpb_iv->image; radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); @@ -1520,7 +2539,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str for (i = 0; i < 2; i++) { for (j = 0; j < 15; j++) - result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j]; + result.direct_reflist[i][j] = 0xff; } if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) { @@ -1697,10 +2716,34 @@ static void radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) { struct radv_video_session *vid = cmd_buffer->video.vid; + struct radv_physical_device *pdev = cmd_buffer->device->physical_device; uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); void *ptr; uint32_t out_offset; + + if (vid->stream_type == RDECODE_CODEC_AV1) { + unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0 + ? align(sizeof(rvcn_av1_frame_context_t), 2048) + : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048); + + uint8_t *ctxptr = cmd_buffer->device->ws->buffer_map(vid->ctx.mem->bo); + ctxptr += vid->ctx.offset; + if (pdev->av1_version == RDECODE_AV1_VER_0) { + for (unsigned i = 0; i < 4; ++i) { + rvcn_av1_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size)); + rvcn_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size)); + rvcn_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i); + } + } else { + for (unsigned i = 0; i < 4; ++i) { + rvcn_vcn4_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size)); + rvcn_vcn4_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size)); + rvcn_vcn4_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i); + } + } + cmd_buffer->device->ws->buffer_unmap(vid->ctx.mem->bo); + } radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) @@ -1836,6 +2879,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: size += sizeof(rvcn_dec_message_hevc_t); break; + case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: + size += sizeof(rvcn_dec_message_av1_t); + break; default: unreachable("unsupported codec."); } @@ -1845,6 +2891,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf if (have_it(vid)) { radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr); it_probs_bo = cmd_buffer->upload.upload_bo; + } else if (have_probs(vid)) { + radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr); + it_probs_bo = cmd_buffer->upload.upload_bo; } radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); @@ -1878,6 +2927,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset); if (have_it(vid)) send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset); + else if (have_probs(vid)) + send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset); if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);