1 | # coding: utf-8 |
---|
2 | |
---|
3 | from __future__ import unicode_literals |
---|
4 | |
---|
5 | import base64 |
---|
6 | |
---|
7 | from ..compat import ( |
---|
8 | compat_urllib_parse_unquote, |
---|
9 | compat_urlparse, |
---|
10 | ) |
---|
11 | from ..utils import determine_ext |
---|
12 | from .bokecc import BokeCCBaseIE |
---|
13 | |
---|
14 | |
---|
15 | class InfoQIE(BokeCCBaseIE): |
---|
16 | _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)' |
---|
17 | |
---|
18 | _TESTS = [{ |
---|
19 | 'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things', |
---|
20 | 'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2', |
---|
21 | 'info_dict': { |
---|
22 | 'id': 'A-Few-of-My-Favorite-Python-Things', |
---|
23 | 'ext': 'mp4', |
---|
24 | 'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.', |
---|
25 | 'title': 'A Few of My Favorite [Python] Things', |
---|
26 | }, |
---|
27 | }, { |
---|
28 | 'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript', |
---|
29 | 'only_matching': True, |
---|
30 | }, { |
---|
31 | 'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery', |
---|
32 | 'md5': '4918d0cca1497f2244572caf626687ef', |
---|
33 | 'info_dict': { |
---|
34 | 'id': 'openstack-continued-delivery', |
---|
35 | 'title': 'OpenStack持续交付之路', |
---|
36 | 'ext': 'flv', |
---|
37 | 'description': 'md5:308d981fb28fa42f49f9568322c683ff', |
---|
38 | }, |
---|
39 | }, { |
---|
40 | 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', |
---|
41 | 'md5': '0e34642d4d9ef44bf86f66f6399672db', |
---|
42 | 'info_dict': { |
---|
43 | 'id': 'Simple-Made-Easy', |
---|
44 | 'title': 'Simple Made Easy', |
---|
45 | 'ext': 'mp3', |
---|
46 | 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b', |
---|
47 | }, |
---|
48 | 'params': { |
---|
49 | 'format': 'bestaudio', |
---|
50 | }, |
---|
51 | }] |
---|
52 | |
---|
53 | def _extract_rtmp_video(self, webpage): |
---|
54 | # The server URL is hardcoded |
---|
55 | video_url = 'rtmpe://video.infoq.com/cfx/st/' |
---|
56 | |
---|
57 | # Extract video URL |
---|
58 | encoded_id = self._search_regex( |
---|
59 | r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) |
---|
60 | |
---|
61 | real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) |
---|
62 | playpath = 'mp4:' + real_id |
---|
63 | |
---|
64 | return [{ |
---|
65 | 'format_id': 'rtmp_video', |
---|
66 | 'url': video_url, |
---|
67 | 'ext': determine_ext(playpath), |
---|
68 | 'play_path': playpath, |
---|
69 | }] |
---|
70 | |
---|
71 | def _extract_cookies(self, webpage): |
---|
72 | policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') |
---|
73 | signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') |
---|
74 | key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') |
---|
75 | return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( |
---|
76 | policy, signature, key_pair_id) |
---|
77 | |
---|
78 | def _extract_http_video(self, webpage): |
---|
79 | http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') |
---|
80 | return [{ |
---|
81 | 'format_id': 'http_video', |
---|
82 | 'url': http_video_url, |
---|
83 | 'http_headers': { |
---|
84 | 'Cookie': self._extract_cookies(webpage) |
---|
85 | }, |
---|
86 | }] |
---|
87 | |
---|
88 | def _extract_http_audio(self, webpage, video_id): |
---|
89 | fields = self._hidden_inputs(webpage) |
---|
90 | http_audio_url = fields['filename'] |
---|
91 | if http_audio_url is None: |
---|
92 | return [] |
---|
93 | |
---|
94 | cookies_header = {'Cookie': self._extract_cookies(webpage)} |
---|
95 | |
---|
96 | # base URL is found in the Location header in the response returned by |
---|
97 | # GET https://www.infoq.com/mp3download.action?filename=... when logged in. |
---|
98 | http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) |
---|
99 | |
---|
100 | # audio file seem to be missing some times even if there is a download link |
---|
101 | # so probe URL to make sure |
---|
102 | if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header): |
---|
103 | return [] |
---|
104 | |
---|
105 | return [{ |
---|
106 | 'format_id': 'http_audio', |
---|
107 | 'url': http_audio_url, |
---|
108 | 'vcodec': 'none', |
---|
109 | 'http_headers': cookies_header, |
---|
110 | }] |
---|
111 | |
---|
112 | def _real_extract(self, url): |
---|
113 | video_id = self._match_id(url) |
---|
114 | webpage = self._download_webpage(url, video_id) |
---|
115 | |
---|
116 | video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') |
---|
117 | video_description = self._html_search_meta('description', webpage, 'description') |
---|
118 | |
---|
119 | if '/cn/' in url: |
---|
120 | # for China videos, HTTP video URL exists but always fails with 403 |
---|
121 | formats = self._extract_bokecc_formats(webpage, video_id) |
---|
122 | else: |
---|
123 | formats = ( |
---|
124 | self._extract_rtmp_video(webpage) + |
---|
125 | self._extract_http_video(webpage) + |
---|
126 | self._extract_http_audio(webpage, video_id)) |
---|
127 | |
---|
128 | self._sort_formats(formats) |
---|
129 | |
---|
130 | return { |
---|
131 | 'id': video_id, |
---|
132 | 'title': video_title, |
---|
133 | 'description': video_description, |
---|
134 | 'formats': formats, |
---|
135 | } |
---|