1 | from __future__ import unicode_literals |
---|
2 | |
---|
3 | import re |
---|
4 | |
---|
5 | from .common import InfoExtractor |
---|
6 | from ..aes import aes_decrypt_text |
---|
7 | from ..compat import ( |
---|
8 | compat_str, |
---|
9 | compat_urllib_parse_unquote, |
---|
10 | ) |
---|
11 | from ..utils import ( |
---|
12 | determine_ext, |
---|
13 | ExtractorError, |
---|
14 | int_or_none, |
---|
15 | str_to_int, |
---|
16 | strip_or_none, |
---|
17 | ) |
---|
18 | |
---|
19 | |
---|
20 | class KeezMoviesIE(InfoExtractor): |
---|
21 | _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)' |
---|
22 | _TESTS = [{ |
---|
23 | 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', |
---|
24 | 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0', |
---|
25 | 'info_dict': { |
---|
26 | 'id': '1214711', |
---|
27 | 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', |
---|
28 | 'ext': 'mp4', |
---|
29 | 'title': 'Petite Asian Lady Mai Playing In Bathtub', |
---|
30 | 'thumbnail': r're:^https?://.*\.jpg$', |
---|
31 | 'view_count': int, |
---|
32 | 'age_limit': 18, |
---|
33 | } |
---|
34 | }, { |
---|
35 | 'url': 'http://www.keezmovies.com/video/1214711', |
---|
36 | 'only_matching': True, |
---|
37 | }] |
---|
38 | |
---|
39 | def _extract_info(self, url): |
---|
40 | mobj = re.match(self._VALID_URL, url) |
---|
41 | video_id = mobj.group('id') |
---|
42 | display_id = (mobj.group('display_id') |
---|
43 | if 'display_id' in mobj.groupdict() |
---|
44 | else None) or mobj.group('id') |
---|
45 | |
---|
46 | webpage = self._download_webpage( |
---|
47 | url, display_id, headers={'Cookie': 'age_verified=1'}) |
---|
48 | |
---|
49 | formats = [] |
---|
50 | format_urls = set() |
---|
51 | |
---|
52 | title = None |
---|
53 | thumbnail = None |
---|
54 | duration = None |
---|
55 | encrypted = False |
---|
56 | |
---|
57 | def extract_format(format_url, height=None): |
---|
58 | if not isinstance(format_url, compat_str) or not format_url.startswith('http'): |
---|
59 | return |
---|
60 | if format_url in format_urls: |
---|
61 | return |
---|
62 | format_urls.add(format_url) |
---|
63 | tbr = int_or_none(self._search_regex( |
---|
64 | r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) |
---|
65 | if not height: |
---|
66 | height = int_or_none(self._search_regex( |
---|
67 | r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) |
---|
68 | if encrypted: |
---|
69 | format_url = aes_decrypt_text( |
---|
70 | video_url, title, 32).decode('utf-8') |
---|
71 | formats.append({ |
---|
72 | 'url': format_url, |
---|
73 | 'format_id': '%dp' % height if height else None, |
---|
74 | 'height': height, |
---|
75 | 'tbr': tbr, |
---|
76 | }) |
---|
77 | |
---|
78 | flashvars = self._parse_json( |
---|
79 | self._search_regex( |
---|
80 | r'flashvars\s*=\s*({.+?});', webpage, |
---|
81 | 'flashvars', default='{}'), |
---|
82 | display_id, fatal=False) |
---|
83 | |
---|
84 | if flashvars: |
---|
85 | title = flashvars.get('video_title') |
---|
86 | thumbnail = flashvars.get('image_url') |
---|
87 | duration = int_or_none(flashvars.get('video_duration')) |
---|
88 | encrypted = flashvars.get('encrypted') is True |
---|
89 | for key, value in flashvars.items(): |
---|
90 | mobj = re.search(r'quality_(\d+)[pP]', key) |
---|
91 | if mobj: |
---|
92 | extract_format(value, int(mobj.group(1))) |
---|
93 | video_url = flashvars.get('video_url') |
---|
94 | if video_url and determine_ext(video_url, None): |
---|
95 | extract_format(video_url) |
---|
96 | |
---|
97 | video_url = self._html_search_regex( |
---|
98 | r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', |
---|
99 | webpage, 'video url', default=None, group='url') |
---|
100 | if video_url: |
---|
101 | extract_format(compat_urllib_parse_unquote(video_url)) |
---|
102 | |
---|
103 | if not formats: |
---|
104 | if 'title="This video is no longer available"' in webpage: |
---|
105 | raise ExtractorError( |
---|
106 | 'Video %s is no longer available' % video_id, expected=True) |
---|
107 | |
---|
108 | self._sort_formats(formats) |
---|
109 | |
---|
110 | if not title: |
---|
111 | title = self._html_search_regex( |
---|
112 | r'<h1[^>]*>([^<]+)', webpage, 'title') |
---|
113 | |
---|
114 | return webpage, { |
---|
115 | 'id': video_id, |
---|
116 | 'display_id': display_id, |
---|
117 | 'title': strip_or_none(title), |
---|
118 | 'thumbnail': thumbnail, |
---|
119 | 'duration': duration, |
---|
120 | 'age_limit': 18, |
---|
121 | 'formats': formats, |
---|
122 | } |
---|
123 | |
---|
124 | def _real_extract(self, url): |
---|
125 | webpage, info = self._extract_info(url) |
---|
126 | info['view_count'] = str_to_int(self._search_regex( |
---|
127 | r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False)) |
---|
128 | return info |
---|