[khanacademy] Add support (Fixes #2066)

This commit is contained in:
Philipp Hagemeister 2014-01-07 09:35:34 +01:00
parent 0cdad20c75
commit 3d3538e422
4 changed files with 93 additions and 1 deletions

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
# Allow direct execution # Allow direct execution
import os import os
import sys import sys
@ -30,6 +29,7 @@ from youtube_dl.extractor import (
SmotriUserIE, SmotriUserIE,
IviCompilationIE, IviCompilationIE,
ImdbListIE, ImdbListIE,
KhanAcademyIE,
) )
@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Animated and Family Films') self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48) self.assertTrue(len(result['entries']) >= 48)
def test_khanacademy_topic(self):
dl = FakeYDL()
ie = KhanAcademyIE(dl)
result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'cryptography')
self.assertEqual(result['title'], u'Journey into cryptography')
self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -98,6 +98,7 @@ from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE from .kankan import KankanIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .keek import KeekIE from .keek import KeekIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE

View file

@ -1,4 +1,5 @@
import base64 import base64
import json
import os import os
import re import re
import socket import socket
@ -260,6 +261,15 @@ class InfoExtractor(object):
xml_string = transform_source(xml_string) xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
errnote=u'Unable to download JSON metadata'):
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
try:
return json.loads(json_string)
except ValueError as ve:
raise ExtractorError('Failed to download JSON', cause=ve)
def report_warning(self, msg, video_id=None): def report_warning(self, msg, video_id=None):
idstr = u'' if video_id is None else u'%s: ' % video_id idstr = u'' if video_id is None else u'%s: ' % video_id
self._downloader.report_warning( self._downloader.report_warning(

View file

@ -0,0 +1,71 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
class KhanAcademyIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
_TEST = {
'url': 'http://www.khanacademy.org/video/one-time-pad',
'file': 'one-time-pad.mp4',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'info_dict': {
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'upload_date': '20120411',
}
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
if m.group('key') == 'video':
data = self._download_json(
'http://api.khanacademy.org/api/v1/videos/' + video_id,
video_id, 'Downloading video info')
upload_date = unified_strdate(data['date_added'])
uploader = ', '.join(data['author_names'])
return {
'_type': 'url_transparent',
'url': data['url'],
'id': video_id,
'title': data['title'],
'thumbnail': data['image_url'],
'duration': data['duration'],
'description': data['description'],
'uploader': uploader,
'upload_date': upload_date,
}
else:
# topic
data = self._download_json(
'http://api.khanacademy.org/api/v1/topic/' + video_id,
video_id, 'Downloading topic info')
entries = [
{
'_type': 'url',
'url': c['url'],
'id': c['id'],
'title': c['title'],
}
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
return {
'_type': 'playlist',
'id': video_id,
'title': data['title'],
'description': data['description'],
'entries': entries,
}