source: titan/mediathek/localhoster/lib/youtube_dl/extractor/infoq.py @ 40094

Last change on this file since 40094 was 40094, checked in by obi, 7 years ago

tithek add yoztube-dl support

File size: 4.9 KB
Line 
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6
7from ..compat import (
8    compat_urllib_parse_unquote,
9    compat_urlparse,
10)
11from ..utils import determine_ext
12from .bokecc import BokeCCBaseIE
13
14
15class InfoQIE(BokeCCBaseIE):
16    _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
17
18    _TESTS = [{
19        'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
20        'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
21        'info_dict': {
22            'id': 'A-Few-of-My-Favorite-Python-Things',
23            'ext': 'mp4',
24            'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
25            'title': 'A Few of My Favorite [Python] Things',
26        },
27    }, {
28        'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
29        'only_matching': True,
30    }, {
31        'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
32        'md5': '4918d0cca1497f2244572caf626687ef',
33        'info_dict': {
34            'id': 'openstack-continued-delivery',
35            'title': 'OpenStack持续交付之路',
36            'ext': 'flv',
37            'description': 'md5:308d981fb28fa42f49f9568322c683ff',
38        },
39    }, {
40        'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
41        'md5': '0e34642d4d9ef44bf86f66f6399672db',
42        'info_dict': {
43            'id': 'Simple-Made-Easy',
44            'title': 'Simple Made Easy',
45            'ext': 'mp3',
46            'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
47        },
48        'params': {
49            'format': 'bestaudio',
50        },
51    }]
52
53    def _extract_rtmp_video(self, webpage):
54        # The server URL is hardcoded
55        video_url = 'rtmpe://video.infoq.com/cfx/st/'
56
57        # Extract video URL
58        encoded_id = self._search_regex(
59            r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
60
61        real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
62        playpath = 'mp4:' + real_id
63
64        return [{
65            'format_id': 'rtmp_video',
66            'url': video_url,
67            'ext': determine_ext(playpath),
68            'play_path': playpath,
69        }]
70
71    def _extract_cookies(self, webpage):
72        policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
73        signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
74        key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
75        return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
76            policy, signature, key_pair_id)
77
78    def _extract_http_video(self, webpage):
79        http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
80        return [{
81            'format_id': 'http_video',
82            'url': http_video_url,
83            'http_headers': {
84                'Cookie': self._extract_cookies(webpage)
85            },
86        }]
87
88    def _extract_http_audio(self, webpage, video_id):
89        fields = self._hidden_inputs(webpage)
90        http_audio_url = fields['filename']
91        if http_audio_url is None:
92            return []
93
94        cookies_header = {'Cookie': self._extract_cookies(webpage)}
95
96        # base URL is found in the Location header in the response returned by
97        # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
98        http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
99
100        # audio file seem to be missing some times even if there is a download link
101        # so probe URL to make sure
102        if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
103            return []
104
105        return [{
106            'format_id': 'http_audio',
107            'url': http_audio_url,
108            'vcodec': 'none',
109            'http_headers': cookies_header,
110        }]
111
112    def _real_extract(self, url):
113        video_id = self._match_id(url)
114        webpage = self._download_webpage(url, video_id)
115
116        video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
117        video_description = self._html_search_meta('description', webpage, 'description')
118
119        if '/cn/' in url:
120            # for China videos, HTTP video URL exists but always fails with 403
121            formats = self._extract_bokecc_formats(webpage, video_id)
122        else:
123            formats = (
124                self._extract_rtmp_video(webpage) +
125                self._extract_http_video(webpage) +
126                self._extract_http_audio(webpage, video_id))
127
128        self._sort_formats(formats)
129
130        return {
131            'id': video_id,
132            'title': video_title,
133            'description': video_description,
134            'formats': formats,
135        }
Note: See TracBrowser for help on using the repository browser.