[extractor/JWPlatform] Fix extractor (#5112)

Fix bitrate and filesize extraction and support embeds with unquoted urls.

Related: #5106 

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2022-10-04 08:37:48 +13:00 committed by GitHub
parent 8671f995cc
commit d3a3d7f0cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 14 deletions

View file

@ -3587,7 +3587,8 @@ class InfoExtractor:
'url': source_url,
'width': int_or_none(source.get('width')),
'height': height,
'tbr': int_or_none(source.get('bitrate')),
'tbr': int_or_none(source.get('bitrate'), scale=1000),
'filesize': int_or_none(source.get('filesize')),
'ext': ext,
}
if source_url.startswith('rtmp'):

View file

@ -1071,18 +1071,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# JWPlatform iframe
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
'info_dict': {
'id': 'AG26UQXM',
'ext': 'mp4',
'upload_date': '20160719',
'timestamp': 468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
},
'add_ie': ['JWPlatform'],
},
{
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',

View file

@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# JWPlatform iframe
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
'info_dict': {
'id': 'AG26UQXM',
'ext': 'mp4',
'upload_date': '20160719',
'timestamp': 1468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
'description': '',
'duration': 294.0,
},
}, {
# Player url not surrounded by quotes
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
'info_dict': {
'id': 'R10NQdhY',
'title': 'Playgirl',
'ext': 'mp4',
'upload_date': '20220624',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
'timestamp': 1656064800,
'description': 'BRD 1966, Will Tremper',
'duration': 5146.0,
},
'params': {'allowed_extractors': ['generic', 'jwplatform']},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
ret = re.findall(
r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
webpage)
if ret:
return ret