From 15eae44d74c80cca29cd5b24129585ad2d1e535f Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 11 Aug 2020 00:13:43 +0300 Subject: [PATCH] harden regex with lookbehind --- youtube_dl/downloader/youtube_live_chat.py | 4 ++-- youtube_dl/extractor/youtube.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index 697e525500..4932dd9c52 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,8 +28,8 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);' - var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);' + window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});' + var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});' for patt in window_patt, var_patt: try: raw_json = re.search(patt, data).group(1) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e143bbee7b..9fff8bdf4c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,8 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_yt_initial_data(self, video_id, webpage): config = self._search_regex( - (r'window\["ytInitialData"\]\s*=\s*(.*);', - r'var\s+ytInitialData\s*=\s*(.*?);'), + (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});', + r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'), webpage, 'ytInitialData', default=None) if config: return self._parse_json(