[extractor/kompas] Add extractor (#4562)

Authored by: HobbyistDev
This commit is contained in:
HobbyistDev 2022-08-05 23:49:45 +09:00 committed by GitHub
parent ad26f15a06
commit d380fc1614
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 0 deletions

View file

@ -765,6 +765,7 @@ from .kicker import KickerIE
from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kompas import KompasVideoIE
from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE
from .kth import KTHIE

View file

@ -0,0 +1,68 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# Video from www.kompas.tv and video.kompas.com seems use jixie player
# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info
class KompasVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
'info_dict': {
'id': '164474',
'ext': 'mp4',
'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel',
'description': 'md5:262530c4fb7462398235f9a5dba92456',
'uploader_id': '9262bf2590d558736cac4fff7978fcb1',
'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
'duration': 85.066667,
'categories': ['news'],
'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg',
'tags': 'count:9',
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
json_data = self._download_json(
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')),
'categories': try_call(lambda: json_data['metadata']['categories'].split(',')),
'uploader_id': json_data.get('owner_id'),
}