1 | # coding: utf-8 |
---|
2 | from __future__ import unicode_literals |
---|
3 | |
---|
4 | from .common import InfoExtractor |
---|
5 | |
---|
6 | |
---|
7 | class CriterionIE(InfoExtractor): |
---|
8 | _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+' |
---|
9 | _TEST = { |
---|
10 | 'url': 'http://www.criterion.com/films/184-le-samourai', |
---|
11 | 'md5': 'bc51beba55685509883a9a7830919ec3', |
---|
12 | 'info_dict': { |
---|
13 | 'id': '184', |
---|
14 | 'ext': 'mp4', |
---|
15 | 'title': 'Le Samouraï', |
---|
16 | 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', |
---|
17 | 'thumbnail': r're:^https?://.*\.jpg$', |
---|
18 | } |
---|
19 | } |
---|
20 | |
---|
21 | def _real_extract(self, url): |
---|
22 | video_id = self._match_id(url) |
---|
23 | webpage = self._download_webpage(url, video_id) |
---|
24 | |
---|
25 | final_url = self._search_regex( |
---|
26 | r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') |
---|
27 | title = self._og_search_title(webpage) |
---|
28 | description = self._html_search_meta('description', webpage) |
---|
29 | thumbnail = self._search_regex( |
---|
30 | r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', |
---|
31 | webpage, 'thumbnail url') |
---|
32 | |
---|
33 | return { |
---|
34 | 'id': video_id, |
---|
35 | 'url': final_url, |
---|
36 | 'title': title, |
---|
37 | 'description': description, |
---|
38 | 'thumbnail': thumbnail, |
---|
39 | } |
---|