diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index f30dcb6bfb..5303efd0d4 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -59,6 +59,17 @@ class YoutubeLiveChatFD(FragmentFD): self._append_fragment(ctx, processed_fragment) return continuation_id, offset + def try_refresh_replay_beginning(live_chat_continuation): + # choose the second option that contains the unfiltered live chat replay + refresh_continuation_id = try_get( + live_chat_continuation, + lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData']['continuation'], str) + if refresh_continuation_id: + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + return refresh_continuation_id, 0 + return parse_actions_replay(live_chat_continuation) + live_offset = 0 def parse_actions_live(live_chat_continuation): @@ -90,23 +101,29 @@ class YoutubeLiveChatFD(FragmentFD): self._append_fragment(ctx, processed_fragment) return continuation_id, live_offset - if info_dict['protocol'] == 'youtube_live_chat_replay': - parse_actions = parse_actions_replay - elif info_dict['protocol'] == 'youtube_live_chat': - parse_actions = parse_actions_live - - def download_and_parse_fragment(url, frag_index, request_data, headers): + def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): count = 0 while count <= fragment_retries: try: success, raw_fragment = dl_fragment(url, request_data, headers) if not success: return False, None, None - data = json.loads(raw_fragment) + try: + data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None + if not data: + data = json.loads(raw_fragment) live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - continuation_id, offset = parse_actions(live_chat_continuation) + if info_dict['protocol'] == 'youtube_live_chat_replay': + if frag_index == 1: + continuation_id, offset = try_refresh_replay_beginning(live_chat_continuation) + else: + continuation_id, offset = parse_actions_replay(live_chat_continuation) + elif info_dict['protocol'] == 'youtube_live_chat': + continuation_id, offset = parse_actions_live(live_chat_continuation) return True, continuation_id, offset except compat_urllib_error.HTTPError as err: count += 1 @@ -142,8 +159,10 @@ class YoutubeLiveChatFD(FragmentFD): visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) if info_dict['protocol'] == 'youtube_live_chat_replay': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id elif info_dict['protocol'] == 'youtube_live_chat': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id frag_index = offset = 0 while continuation_id is not None: @@ -154,11 +173,13 @@ class YoutubeLiveChatFD(FragmentFD): } if frag_index > 1: request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} - headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data) - headers.update({'content-type': 'application/json'}) - fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' - success, continuation_id, offset = download_and_parse_fragment( - url, frag_index, fragment_request_data, headers) + headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + success, continuation_id, offset = download_and_parse_fragment( + url, frag_index, fragment_request_data, headers) + else: + success, continuation_id, offset = download_and_parse_fragment(chat_page_url, frag_index) if not success: return False if test: