[thisav] Simplify and use unicode literals

This commit is contained in:
Sergey M. 2014-02-05 19:13:06 +07:00
parent 8c82077619
commit fa7df757a7

View file

@ -1,22 +1,23 @@
#coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
from ..utils import determine_ext
class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TEST = {
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
u"file": u"47734.flv",
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
u"info_dict": {
u"title": u"高樹マリア - Just fit",
u"uploader": u"dj7970",
u"uploader_id": u"dj7970"
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
'id': '47734',
'ext': 'flv',
'title': '高樹マリア - Just fit',
'uploader': 'dj7970',
'uploader_id': 'dj7970'
}
}
@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, u'uploader name', fatal=False)
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, u'uploader id', fatal=False)
webpage, 'uploader id', fatal=False)
ext = determine_ext(video_url)
return {
'_type': 'video',
'id': video_id,
'url': video_url,
'uploader': uploader,