From bb5d84c9d2f1e978c3eddfb5ccbe138036682a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C4=83n=20Anh?= <65241526+demon071@users.noreply.github.com> Date: Sun, 16 Jul 2023 04:03:23 +0700 Subject: [PATCH] [ie/facebook:reel] Fix extraction (#7564) Closes #7469 Authored by: demon071, bashonly --- yt_dlp/extractor/facebook.py | 83 +++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 9f4d3fb789..574f8e8c95 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -24,6 +24,7 @@ from ..utils import ( parse_count, parse_qs, qualities, + str_or_none, traverse_obj, try_get, url_or_none, @@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Asif Nawab Butt', - 'description': 'Asif Nawab Butt', + 'title': 'Asif', + 'description': '', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, 'thumbnail': r're:^https?://.*', + 'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl', + 'duration': 131.03, + 'concurrent_view_count': int, }, - 'expected_warnings': [ - 'title' - ] }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', @@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor): # have 1080P, but only up to 720p in swf params # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '3f3798adb2b73423263e59376f1f5eb7', + 'md5': 'ca63897a90c9452efee5f8c40d080e25', 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', @@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor): 'uploader': 'CNN', 'thumbnail': r're:^https?://.*', 'view_count': int, + 'uploader_id': '100059479812265', + 'concurrent_view_count': int, + 'duration': 44.478, }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall @@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '1417995061575415', 'ext': 'mp4', - 'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео', + 'title': 'Довгоочікуване відео | By Yaroslav - Facebook', 'description': 'Довгоочікуване відео', - 'timestamp': 1486648771, + 'timestamp': 1486648217, 'upload_date': '20170209', 'uploader': 'Yaroslav Korpan', - 'uploader_id': '100000948048708', + 'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl', + 'concurrent_view_count': int, + 'thumbnail': r're:^https?://.*', + 'view_count': int, + 'duration': 11736.446, }, 'params': { 'skip_download': True, @@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'La Guía Del Varón', 'thumbnail': r're:^https?://.*', }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Requires logging in', }, { # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', @@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'Elisabeth Ahtn', 'uploader_id': '100013949973717', }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Requires logging in', }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor): 'timestamp': 1527084179, 'upload_date': '20180523', 'uploader': 'ESL One Dota 2', - 'uploader_id': '234218833769558', + 'uploader_id': '100066514874195', + 'duration': 4524.212, + 'view_count': int, + 'thumbnail': r're:^https?://.*', + 'concurrent_view_count': int, }, 'params': { 'skip_download': True, @@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'info_dict': { 'id': '106560053808006', + 'ext': 'mp4', + 'title': 'Josef', + 'thumbnail': r're:^https?://.*', + 'concurrent_view_count': int, + 'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl', + 'timestamp': 1549275572, + 'duration': 3.413, + 'uploader': 'Josef Novak', + 'description': '', + 'upload_date': '20190204', }, - 'playlist_count': 2, }, { # data.video.story.attachments[].media 'url': 'https://www.facebook.com/watch/?v=647537299265662', @@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor): 'id': '10157667649866271', }, 'playlist_count': 3, + 'skip': 'Requires logging in', }, { # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', @@ -497,6 +515,13 @@ class FacebookIE(InfoExtractor): entries = [] def parse_graphql_video(video): + v_id = video.get('videoId') or video.get('id') or video_id + reel_info = traverse_obj( + video, ('creation_story', 'short_form_video_context', 'playback_video', {dict})) + if reel_info: + video = video['creation_story'] + video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) + video.update(reel_info) formats = [] q = qualities(['sd', 'hd']) for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), @@ -513,15 +538,15 @@ class FacebookIE(InfoExtractor): 'url': playable_url, }) extract_dash_manifest(video, formats) - v_id = video.get('videoId') or video.get('id') or video_id info = { 'id': v_id, 'formats': formats, 'thumbnail': traverse_obj( video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), - 'uploader_id': try_get(video, lambda x: x['owner']['id']), - 'timestamp': int_or_none(video.get('publish_time')), - 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), + 'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})), + 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none), + 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000) + or float_or_none(video.get('length_in_second'))), } process_formats(info) description = try_get(video, lambda x: x['savable_description']['text']) @@ -782,18 +807,18 @@ class FacebookReelIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.facebook.com/reel/1195289147628387', - 'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831', + 'md5': 'f13dd37f2633595982db5ed8765474d3', 'info_dict': { 'id': '1195289147628387', 'ext': 'mp4', - 'title': 'md5:9f5b142921b2dc57004fa13f76005f87', - 'description': 'md5:24ea7ef062215d295bdde64e778f5474', - 'uploader': 'Beast Camp Training', - 'uploader_id': '1738535909799870', - 'duration': 9.536, - 'thumbnail': r're:^https?://.*', + 'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e', + 'description': 'md5:22f03309b216ac84720183961441d8db', + 'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1', + 'uploader_id': '100040874179269', + 'duration': 9.579, + 'timestamp': 1637502609, 'upload_date': '20211121', - 'timestamp': 1637502604, + 'thumbnail': r're:^https?://.*', } }]