1 | # coding: utf-8 |
---|
2 | from __future__ import unicode_literals |
---|
3 | |
---|
4 | from .common import InfoExtractor |
---|
5 | from .cbs import CBSIE |
---|
6 | from ..utils import ( |
---|
7 | parse_duration, |
---|
8 | ) |
---|
9 | |
---|
10 | |
---|
11 | class CBSNewsIE(CBSIE): |
---|
12 | IE_NAME = 'cbsnews' |
---|
13 | IE_DESC = 'CBS News' |
---|
14 | _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' |
---|
15 | |
---|
16 | _TESTS = [ |
---|
17 | { |
---|
18 | 'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/', |
---|
19 | 'info_dict': { |
---|
20 | 'id': 'tesla-and-spacex-elon-musks-industrial-empire', |
---|
21 | 'ext': 'flv', |
---|
22 | 'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire', |
---|
23 | 'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg', |
---|
24 | 'duration': 791, |
---|
25 | }, |
---|
26 | 'params': { |
---|
27 | # rtmp download |
---|
28 | 'skip_download': True, |
---|
29 | }, |
---|
30 | 'skip': 'Subscribers only', |
---|
31 | }, |
---|
32 | { |
---|
33 | 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', |
---|
34 | 'info_dict': { |
---|
35 | 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', |
---|
36 | 'ext': 'mp4', |
---|
37 | 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', |
---|
38 | 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', |
---|
39 | 'upload_date': '20140404', |
---|
40 | 'timestamp': 1396650660, |
---|
41 | 'uploader': 'CBSI-NEW', |
---|
42 | 'thumbnail': r're:^https?://.*\.jpg$', |
---|
43 | 'duration': 205, |
---|
44 | 'subtitles': { |
---|
45 | 'en': [{ |
---|
46 | 'ext': 'ttml', |
---|
47 | }], |
---|
48 | }, |
---|
49 | }, |
---|
50 | 'params': { |
---|
51 | # m3u8 download |
---|
52 | 'skip_download': True, |
---|
53 | }, |
---|
54 | }, |
---|
55 | ] |
---|
56 | |
---|
57 | def _real_extract(self, url): |
---|
58 | video_id = self._match_id(url) |
---|
59 | |
---|
60 | webpage = self._download_webpage(url, video_id) |
---|
61 | |
---|
62 | video_info = self._parse_json(self._html_search_regex( |
---|
63 | r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', |
---|
64 | webpage, 'video JSON info'), video_id) |
---|
65 | |
---|
66 | item = video_info['item'] if 'item' in video_info else video_info |
---|
67 | guid = item['mpxRefId'] |
---|
68 | return self._extract_video_info(guid) |
---|
69 | |
---|
70 | |
---|
71 | class CBSNewsLiveVideoIE(InfoExtractor): |
---|
72 | IE_NAME = 'cbsnews:livevideo' |
---|
73 | IE_DESC = 'CBS News Live Videos' |
---|
74 | _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)' |
---|
75 | |
---|
76 | # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples |
---|
77 | _TEST = { |
---|
78 | 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', |
---|
79 | 'info_dict': { |
---|
80 | 'id': 'clinton-sanders-prepare-to-face-off-in-nh', |
---|
81 | 'ext': 'mp4', |
---|
82 | 'title': 'Clinton, Sanders Prepare To Face Off In NH', |
---|
83 | 'duration': 334, |
---|
84 | }, |
---|
85 | 'skip': 'Video gone', |
---|
86 | } |
---|
87 | |
---|
88 | def _real_extract(self, url): |
---|
89 | display_id = self._match_id(url) |
---|
90 | |
---|
91 | video_info = self._download_json( |
---|
92 | 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ |
---|
93 | 'device': 'desktop', |
---|
94 | 'dvr_slug': display_id, |
---|
95 | }) |
---|
96 | |
---|
97 | formats = self._extract_akamai_formats(video_info['url'], display_id) |
---|
98 | self._sort_formats(formats) |
---|
99 | |
---|
100 | return { |
---|
101 | 'id': display_id, |
---|
102 | 'display_id': display_id, |
---|
103 | 'title': video_info['headline'], |
---|
104 | 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), |
---|
105 | 'duration': parse_duration(video_info.get('segmentDur')), |
---|
106 | 'formats': formats, |
---|
107 | } |
---|