From 3ef1d0c73373000bf65851c7afe9c68e707e4a97 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Wed, 31 Aug 2016 10:56:11 +0200 Subject: [PATCH] [tv2hu] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv2hu.py | 78 ++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/tv2hu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68e1a5cfc2..7484099f6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1031,6 +1031,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv2hu import TV2HUIE from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py new file mode 100644 index 0000000000..cb1ce822a8 --- /dev/null +++ b/youtube_dl/extractor/tv2hu.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class TV2HUIE(InfoExtractor): + IE_NAME = 'tv2.hu' + _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P[^/]+)/(?:teljes_adasok/)?(?P[0-9]+)_(.+?)\.html' + _JSON_URL = r'(?Phttps?://.+?\.tv2\.hu/vod/(?P\d+)/id_(?P\d+).+?&type=json)' + + _TESTS = [{ + 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', + 'info_dict': { + 'id': '217679', + 'ext': 'mp4', + 'title': 'Ezek megőrültek! - 1. adás 1. rész', + 'upload_id': '220289', + 'upload_date': '20160826', + 'uploader': 'ezek_megorultek', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', + 'only_matching': True + }, { + 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading info page') + + json_url = re.search(self._JSON_URL, webpage) + + json_data = self._download_json( + json_url.group('json_url'), video_id, 'Downloading video info') + + manifest_url = json_data['bitrates']['hls'] + + formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') + + for i in range(len(json_data['bitrates']['mp4'])): + quality = json_data.get('mp4Labels')[i] + + if quality.lower() == 'auto': + continue + + formats.append({ + 'protocol': 'http', + 'url': json_data['bitrates']['mp4'][i], + 'height': int(quality[:-1]), + 'width': int(quality[:-1])/9*16, + 'ext': 'mp4', + 'format_id': quality, + 'format_note': 'HTTP', + 'preference': int(quality[:-1]) + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage).strip(), + 'thumbnail': self._og_search_property('image', webpage), + 'uploader': self._search_regex(self._VALID_URL, url, 'uploader'), + 'upload_id': json_url.group('upload_id'), + 'upload_date': json_url.group('upload_date'), + 'formats': formats + }