From bccdbd22d559cc22b23bbd2ff96075ea5d88c944 Mon Sep 17 00:00:00 2001
From: coletdjnz <colethedj@protonmail.com>
Date: Mon, 6 Sep 2021 06:52:38 +0000
Subject: [PATCH] [Mediaklikk] Add Extractor (#867)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/17453, https://github.com/ytdl-org/youtube-dl/pull/25098
Fixes: https://github.com/ytdl-org/youtube-dl/issues/21431
Authored-by: tmarki, mrx23dot, coletdjnz
---
 yt_dlp/extractor/extractors.py |   1 +
 yt_dlp/extractor/mediaklikk.py | 104 +++++++++++++++++++++++++++++++++
 yt_dlp/utils.py                |   1 +
 3 files changed, 106 insertions(+)
 create mode 100644 yt_dlp/extractor/mediaklikk.py

diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 4ef581b076..c745fd0793 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -725,6 +725,7 @@ from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .medaltv import MedalTVIE
+from .mediaklikk import MediaKlikkIE
 from .mediaset import MediasetIE
 from .mediasite import (
     MediasiteIE,
diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
new file mode 100644
index 0000000000..b9b6d739f5
--- /dev/null
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from ..utils import (
+    unified_strdate
+)
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_str
+)
+
+
+class MediaKlikkIE(InfoExtractor):
+    _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)?
+                        (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/
+                        (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
+                        (?P<id>[^/#?_]+)'''
+
+    _TESTS = [{
+        # mediaklikk. date in html.
+        'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
+        'info_dict': {
+            'id': '4754129',
+            'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
+            'ext': 'mp4',
+            'upload_date': '20210901',
+            'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+        }
+    }, {
+        # m4sport
+        'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
+        'info_dict': {
+            'id': '4754999',
+            'title': 'Gyémánt Liga, Párizs',
+            'ext': 'mp4',
+            'upload_date': '20210830',
+            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
+        }
+    }, {
+        # m4sport with *video/ url and no date
+        'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
+        'info_dict': {
+            'id': '4492099',
+            'title': 'Real Madrid - Chelsea 1-1',
+            'ext': 'mp4',
+            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+        }
+    }, {
+        # hirado
+        'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
+        'info_dict': {
+            'id': '4760120',
+            'title': 'Feltételeket szabott a főváros',
+            'ext': 'mp4',
+            'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
+        }
+    }, {
+        # petofilive
+        'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
+        'info_dict': {
+            'id': '4571948',
+            'title': 'Tha Shudras az Akusztikban',
+            'ext': 'mp4',
+            'upload_date': '20210607',
+            'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = self._match_valid_url(url)
+        display_id = mobj.group('id')
+        webpage = self._download_webpage(url, display_id)
+
+        player_data_str = self._html_search_regex(
+            r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
+        player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote)
+        video_id = compat_str(player_data['contentId'])
+        title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
+            self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
+
+        upload_date = unified_strdate(
+            '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day')))
+        if not upload_date:
+            upload_date = unified_strdate(self._html_search_regex(
+                r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
+
+        player_data['video'] = player_data.pop('token')
+        player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
+        playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
+            self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
+
+        formats = self._extract_wowza_formats(
+            playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'display_id': display_id,
+            'formats': formats,
+            'upload_date': upload_date,
+            'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage)
+        }
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index ce84f74166..983ca6cede 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -1740,6 +1740,7 @@ DATE_FORMATS = (
     '%b %dth %Y %I:%M',
     '%Y %m %d',
     '%Y-%m-%d',
+    '%Y.%m.%d.',
     '%Y/%m/%d',
     '%Y/%m/%d %H:%M',
     '%Y/%m/%d %H:%M:%S',