From 3af1fac7b0f43778e44b3b86e0c74bf25fb6f489 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 09:51:59 +0100 Subject: [PATCH 1/4] [dcn] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dcn.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/dcn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f21064b6..cc0da81d16 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -110,6 +110,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE +from .dcn import DcnIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py new file mode 100644 index 0000000000..5263def4c6 --- /dev/null +++ b/youtube_dl/extractor/dcn.py @@ -0,0 +1,46 @@ +from .common import InfoExtractor + +class DcnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' + _TEST = { + 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', + 'info_dict': + { + 'id': '17375', + 'ext': 'm3u8', + 'title': 'رحلة العمر : الحلقة 1', + 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', + 'duration': '2041' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, + video_id + ) + title = json_data['title_ar']; + thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; + duration = json_data['duration']; + description = json_data['description_ar']; + webpage = self._download_webpage( + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + video_id + ) + m3u8_url = self._html_search_regex( + r'file: "(?P.*?)"', + webpage, + 'm3u8_url', + group='m3u8_url' + ) + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'description': description, + 'formats': formats, + } From 9d681c2bb3b75a666b76d8e346ffab66b65f9132 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 10:00:24 +0100 Subject: [PATCH 2/4] remove unnecessary group name --- youtube_dl/extractor/dcn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 5263def4c6..f76ebda9e5 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -30,10 +30,9 @@ class DcnIE(InfoExtractor): video_id ) m3u8_url = self._html_search_regex( - r'file: "(?P.*?)"', + r'file:\s*"([^"]+)', webpage, - 'm3u8_url', - group='m3u8_url' + 'm3u8_url' ) formats = self._extract_m3u8_formats(m3u8_url, video_id) return { From cd6b555e19c601d575679dd29da0080eda7f8890 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 6 Aug 2015 19:17:50 +0100 Subject: [PATCH 3/4] [dcn] add origin to api request and fix the test and check with flake8 --- youtube_dl/extractor/dcn.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index f76ebda9e5..d44e8cef03 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -1,4 +1,9 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor +from ..compat import compat_urllib_request + class DcnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' @@ -9,24 +14,29 @@ class DcnIE(InfoExtractor): 'id': '17375', 'ext': 'm3u8', 'title': 'رحلة العمر : الحلقة 1', - 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', 'duration': '2041' - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, - video_id + request = compat_urllib_request.Request( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, + headers={'Origin': 'http://www.dcndigital.ae'} ) - title = json_data['title_ar']; - thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; - duration = json_data['duration']; - description = json_data['description_ar']; + json_data = self._download_json(request, video_id) + title = json_data['title_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] + duration = json_data['duration'] + description = json_data['description_ar'] webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id ) m3u8_url = self._html_search_regex( From 6d30cf04db9c9662dbb30c2490e24eb5c6dca4c3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 7 Aug 2015 10:01:18 +0100 Subject: [PATCH 4/4] [dcn] fix type and key errors --- youtube_dl/extractor/dcn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index d44e8cef03..22ff35b56c 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_request +from ..utils import int_or_none class DcnIE(InfoExtractor): @@ -16,7 +17,7 @@ class DcnIE(InfoExtractor): 'title': 'رحلة العمر : الحلقة 1', 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': '2041' + 'duration': 2041 }, 'params': { # m3u8 download @@ -32,9 +33,9 @@ class DcnIE(InfoExtractor): ) json_data = self._download_json(request, video_id) title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] - duration = json_data['duration'] - description = json_data['description_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') + duration = int_or_none(json_data.get('duration')) + description = json_data.get('description_ar') webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id