mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-14 07:12:41 +00:00
Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport
This commit is contained in:
commit
2c28da8e05
5 changed files with 218 additions and 118 deletions
|
@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE
|
|||
from .bet import BetIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bpb import BpbIE
|
||||
|
|
84
youtube_dl/extractor/amp.py
Normal file
84
youtube_dl/extractor/amp.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AMPIE(InfoExtractor):
|
||||
# parse Akamai Adaptive Media Player feed
|
||||
def _extract_feed_info(self, url):
|
||||
item = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')['channel']['item']
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = 'media-%s' % name
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
thumbnails = []
|
||||
media_thumbnail = get_media_node('thumbnail')
|
||||
if media_thumbnail:
|
||||
if isinstance(media_thumbnail, dict):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data['@attributes']
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
media_subtitle = get_media_node('subTitle')
|
||||
if media_subtitle:
|
||||
if isinstance(media_subtitle, dict):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data['@attributes']
|
||||
lang = subtitle.get('lang') or 'en'
|
||||
subtitles[lang] = [{'url': subtitle['href']}]
|
||||
|
||||
formats = []
|
||||
media_content = get_media_node('content')
|
||||
if isinstance(media_content, dict):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type == 'video/f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
elif media_type == 'application/x-mpegURL':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data['media-category']['@attributes']['label'],
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_media_node('title'),
|
||||
'description': get_media_node('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
|
||||
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||
'formats': formats,
|
||||
}
|
106
youtube_dl/extractor/bleacherreport.py
Normal file
106
youtube_dl/extractor/bleacherreport.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class BleacherReportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
|
||||
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
|
||||
'info_dict': {
|
||||
'id': '2496438',
|
||||
'ext': 'mp4',
|
||||
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
|
||||
'uploader_id': 3992341,
|
||||
'description': 'CFB, ACC, Florida State',
|
||||
'timestamp': 1434380212,
|
||||
'upload_date': '20150615',
|
||||
'uploader': 'Team Stream Now ',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
|
||||
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
|
||||
'info_dict': {
|
||||
'id': '2586817',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
|
||||
'uploader_id': 6466954,
|
||||
'upload_date': '20151011',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
|
||||
|
||||
thumbnails = []
|
||||
primary_photo = article_data.get('primaryPhoto')
|
||||
if primary_photo:
|
||||
thumbnails = [{
|
||||
'url': primary_photo['url'],
|
||||
'width': primary_photo.get('width'),
|
||||
'height': primary_photo.get('height'),
|
||||
}]
|
||||
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'id': article_id,
|
||||
'title': article_data['title'],
|
||||
'uploader': article_data.get('author', {}).get('name'),
|
||||
'uploader_id': article_data.get('authorId'),
|
||||
'timestamp': parse_iso8601(article_data.get('createdAt')),
|
||||
'thumbnails': thumbnails,
|
||||
'comment_count': int_or_none(article_data.get('commentsCount')),
|
||||
'view_count': int_or_none(article_data.get('hitCount')),
|
||||
}
|
||||
|
||||
video = article_data.get('video')
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type == 'cms.bleacherreport.com':
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
elif video_type == 'ooyala.com':
|
||||
info['url'] = 'ooyala:%s' % video['id']
|
||||
elif video_type == 'youtube.com':
|
||||
info['url'] = video['id']
|
||||
elif video_type == 'vine.co':
|
||||
info['url'] = 'https://vine.co/v/%s' % video['id']
|
||||
else:
|
||||
info['url'] = video_type + video['id']
|
||||
return info
|
||||
else:
|
||||
raise ExtractorError('no video in the article', expected=True)
|
||||
|
||||
|
||||
class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'md5': 'f0ca220af012d4df857b54f792c586bb',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'flv',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
|
||||
info['id'] = video_id
|
||||
return info
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
|
@ -12,14 +12,11 @@ from ..compat import (
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
class DramaFeverBaseIE(AMPIE):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
|
||||
|
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||
'timestamp': 1404336058,
|
||||
'upload_date': '20140702',
|
||||
'duration': 343,
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', '.')
|
||||
|
||||
try:
|
||||
feed = self._download_json(
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
|
||||
video_id, 'Downloading episode JSON')['channel']['item']
|
||||
info = self._extract_feed_info(
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
raise ExtractorError(
|
||||
'Currently unavailable in your country.', expected=True)
|
||||
raise
|
||||
|
||||
media_group = feed.get('media-group', {})
|
||||
|
||||
formats = []
|
||||
for media_content in media_group['media-content']:
|
||||
src = media_content.get('@attributes', {}).get('url')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media_group.get('media-title')
|
||||
description = media_group.get('media-description')
|
||||
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
|
||||
thumbnail = self._proto_relative_url(
|
||||
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
|
||||
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
|
||||
|
||||
subtitles = {}
|
||||
for media_subtitle in media_group.get('media-subTitle', []):
|
||||
lang = media_subtitle.get('@attributes', {}).get('lang')
|
||||
href = media_subtitle.get('@attributes', {}).get('href')
|
||||
if not lang or not href:
|
||||
continue
|
||||
subtitles[lang] = [{
|
||||
'ext': 'ttml',
|
||||
'url': href,
|
||||
}]
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
episode_info = self._download_json(
|
||||
# We only need a single episode info, so restricting page size to one episode
|
||||
|
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||
if value:
|
||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
subtitles.setdefault('English', []).append({
|
||||
info['subtitiles'].setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
return info
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
|
|
|
@ -2,14 +2,10 @@ from __future__ import unicode_literals
|
|||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
from .amp import AMPIE
|
||||
|
||||
|
||||
class FoxNewsIE(InfoExtractor):
|
||||
class FoxNewsIE(AMPIE):
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
|
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
|
|||
'id': '3937480',
|
||||
'ext': 'flv',
|
||||
'title': 'Frozen in Time',
|
||||
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
|
||||
'description': '16-year-old girl is size of toddler',
|
||||
'duration': 265,
|
||||
'timestamp': 1304411491,
|
||||
'upload_date': '20110503',
|
||||
# 'timestamp': 1304411491,
|
||||
# 'upload_date': '20110503',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
|
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
|
|||
'id': '3922535568001',
|
||||
'ext': 'mp4',
|
||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||
'description': "Congressman discusses the president's executive action",
|
||||
'description': "Congressman discusses president's plan",
|
||||
'duration': 292,
|
||||
'timestamp': 1417662047,
|
||||
'upload_date': '20141204',
|
||||
# 'timestamp': 1417662047,
|
||||
# 'upload_date': '20141204',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
|
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
|
|||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
video = self._download_json(
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
||||
|
||||
item = video['channel']['item']
|
||||
title = item['title']
|
||||
description = item['description']
|
||||
timestamp = parse_iso8601(item['dc-date'])
|
||||
|
||||
media_group = item['media-group']
|
||||
duration = None
|
||||
formats = []
|
||||
for media in media_group['media-content']:
|
||||
attributes = media['@attributes']
|
||||
video_url = attributes['url']
|
||||
if video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
|
||||
elif not video_url.endswith('.smil'):
|
||||
duration = int_or_none(attributes.get('duration'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': media['media-category']['@attributes']['label'],
|
||||
'preference': 1,
|
||||
'vbr': int_or_none(attributes.get('bitrate')),
|
||||
'filesize': int_or_none(attributes.get('fileSize'))
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
media_thumbnail = media_group['media-thumbnail']['@attributes']
|
||||
thumbnails = [{
|
||||
'url': media_thumbnail['url'],
|
||||
'width': int_or_none(media_thumbnail.get('width')),
|
||||
'height': int_or_none(media_thumbnail.get('height')),
|
||||
}] if media_thumbnail else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
info = self._extract_feed_info(
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
|
Loading…
Reference in a new issue