[tiktok] Detect embeds

Closes #3799
This commit is contained in:
pukkandan 2022-05-20 06:01:08 +05:30
parent 0b9c08b47b
commit b801cd7179
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 24 additions and 10 deletions

View file

@ -74,6 +74,7 @@ from .teachable import TeachableIE
from .ted import TedEmbedIE
from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE
from .tunein import TuneInBaseIE
@ -3756,6 +3757,11 @@ class GenericIE(InfoExtractor):
if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title)
# Look for Tiktok embeds
tiktok_urls = TikTokIE._extract_urls(webpage)
if tiktok_urls:
return self.playlist_from_matches(tiktok_urls, video_id, video_title)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:

View file

@ -1,28 +1,26 @@
import itertools
import json
import random
import re
import string
import time
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse
)
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
LazyList,
UnsupportedError,
get_first,
int_or_none,
join_nonempty,
LazyList,
qualities,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
qualities,
)
@ -36,6 +34,10 @@ class TikTokBaseIE(InfoExtractor):
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
@staticmethod
def _create_url(user_id, video_id):
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@ -361,7 +363,7 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
_VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@ -466,7 +468,7 @@ class TikTokIE(TikTokBaseIE):
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
'title': 'tiktok video #7059698374567611694',
'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@ -490,6 +492,11 @@ class TikTokIE(TikTokBaseIE):
'only_matching': True
}]
@classmethod
def _extract_urls(cls, webpage):
return [mobj.group('url') for mobj in re.finditer(
rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@ -506,7 +513,8 @@ class TikTokIE(TikTokBaseIE):
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
video_id = self._match_id(url)
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
url = self._create_url(user_id, video_id)
try:
return self._extract_aweme_app(video_id)