mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 15:14:57 +00:00
[rutube:playlist] Add extractor (closes #13534)
This commit is contained in:
parent
51aee72d16
commit
debed8d759
2 changed files with 85 additions and 0 deletions
|
@ -899,6 +899,7 @@ from .rutube import (
|
|||
RutubeEmbedIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
RutubePlaylistIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
|
|
|
@ -7,10 +7,14 @@ import itertools
|
|||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
try_get,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
parts = compat_urllib_parse_urlparse(url)
|
||||
params = compat_parse_qs(parts.query)
|
||||
|
||||
# see if URL without parameters is OK
|
||||
res = super(RutubeIE, cls).suitable(url)
|
||||
|
||||
if params: # we only allow pl_id parameter in the url
|
||||
res = res and 'pl_id' in params and len(params) == 1
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [mobj.group('url') for mobj in re.finditer(
|
||||
|
@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE):
|
|||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
|
||||
|
||||
class RutubePlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:playlist'
|
||||
IE_DESC = 'Rutube playlists'
|
||||
_TESTS = [{
|
||||
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||
'info_dict': {
|
||||
'id': '4252',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)'
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s'
|
||||
|
||||
@staticmethod
|
||||
def suitable(url):
|
||||
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
return params.get('pl_id') and int_or_none(params['pl_id'][0]) \
|
||||
and params.get('pl_type')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
def _extract_playlist(self, playlist_id):
|
||||
entries = []
|
||||
for pagenum in itertools.count(1):
|
||||
page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum)
|
||||
|
||||
# download_json will sent an accept: application/xml header
|
||||
page = self._download_json(page_url, playlist_id,
|
||||
"Downloading metadata for page %s" % pagenum,
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
if not page['results']:
|
||||
break
|
||||
|
||||
results = page['results']
|
||||
for result in results:
|
||||
entry = self.url_result(result.get('video_url'), 'Rutube')
|
||||
category = try_get(result, lambda x: x['category']['name'])
|
||||
entry.update({
|
||||
'id': result.get('id'),
|
||||
'uploader': try_get(result, lambda x: x['author']['name']),
|
||||
'uploader_id': try_get(result, lambda x: x['author']['id']),
|
||||
'upload_date': unified_strdate(result.get('created_ts')),
|
||||
'title': result.get('title'),
|
||||
'description': result.get('description'),
|
||||
'thumbnail': result.get('thumbnail_url'),
|
||||
'duration': int_or_none(result.get('duration')),
|
||||
'category': [category] if category else None,
|
||||
'age_limit': 18 if result.get('is_adult') else 0,
|
||||
'view_count': int_or_none(result.get('hits')),
|
||||
'is_live': result.get('is_livestream'),
|
||||
'webpage_url': result.get('video_url'),
|
||||
})
|
||||
entries.append(entry)
|
||||
|
||||
if page['has_next'] is False:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, playlist_id, page['name'])
|
||||
|
|
Loading…
Reference in a new issue