From ebfab36fca0901f99076158f9eb4f7fc9d87589b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 11:03:04 +0530 Subject: [PATCH] [tvp] Add TVPStreamIE (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/tvp.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d47c066476..4f9de71e27 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1571,6 +1571,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, + TVPStreamIE, TVPWebsiteIE, ) from .tvplay import ( diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 22cfbd25e0..48e2c6e764 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -251,6 +251,52 @@ class TVPIE(InfoExtractor): } +class TVPStreamIE(InfoExtractor): + IE_NAME = 'tvp:stream' + _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' + _TESTS = [{ + # untestable as "video" id changes many times across a day + 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', + 'only_matching': True, + }, { + 'url': 'tvpstream:39821455', + 'only_matching': True, + }, { + # the default stream when you provide no channel_id, most probably TVP Info + 'url': 'tvpstream:', + 'only_matching': True, + }, { + 'url': 'https://tvpstream.vod.tvp.pl/', + 'only_matching': True, + }] + + _PLAYER_BOX_RE = r']*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' + _BUTTON_RE = r']*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') + webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') + if not channel_id: + channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', + webpage, 'default channel id') + video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', + webpage, 'video id') + audition_title, station_name = self._search_regex( + self._BUTTON_RE % (re.escape(channel_id)), webpage, + 'audition title and station name', + group=(1, 2)) + return { + '_type': 'url_transparent', + 'id': channel_id, + 'url': 'tvp:%s' % video_id, + 'title': audition_title, + 'alt_title': station_name, + 'is_live': True, + 'ie_key': 'TVPEmbed', + } + + class TVPEmbedIE(InfoExtractor): IE_NAME = 'tvp:embed' IE_DESC = 'Telewizja Polska'