1 | # coding: utf-8 |
---|
2 | from __future__ import unicode_literals |
---|
3 | |
---|
4 | import re |
---|
5 | |
---|
6 | from .common import InfoExtractor |
---|
7 | from ..utils import ( |
---|
8 | ExtractorError, |
---|
9 | int_or_none, |
---|
10 | ) |
---|
11 | |
---|
12 | |
---|
13 | class AolIE(InfoExtractor): |
---|
14 | IE_NAME = 'on.aol.com' |
---|
15 | _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)' |
---|
16 | |
---|
17 | _TESTS = [{ |
---|
18 | # video with 5min ID |
---|
19 | 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', |
---|
20 | 'md5': '18ef68f48740e86ae94b98da815eec42', |
---|
21 | 'info_dict': { |
---|
22 | 'id': '518167793', |
---|
23 | 'ext': 'mp4', |
---|
24 | 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', |
---|
25 | 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', |
---|
26 | 'timestamp': 1395405060, |
---|
27 | 'upload_date': '20140321', |
---|
28 | 'uploader': 'Newsy Studio', |
---|
29 | }, |
---|
30 | 'params': { |
---|
31 | # m3u8 download |
---|
32 | 'skip_download': True, |
---|
33 | } |
---|
34 | }, { |
---|
35 | # video with vidible ID |
---|
36 | 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', |
---|
37 | 'info_dict': { |
---|
38 | 'id': '5707d6b8e4b090497b04f706', |
---|
39 | 'ext': 'mp4', |
---|
40 | 'title': 'Netflix is Raising Rates', |
---|
41 | 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', |
---|
42 | 'upload_date': '20160408', |
---|
43 | 'timestamp': 1460123280, |
---|
44 | 'uploader': 'Veuer', |
---|
45 | }, |
---|
46 | 'params': { |
---|
47 | # m3u8 download |
---|
48 | 'skip_download': True, |
---|
49 | } |
---|
50 | }, { |
---|
51 | 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', |
---|
52 | 'only_matching': True, |
---|
53 | }, { |
---|
54 | 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', |
---|
55 | 'only_matching': True, |
---|
56 | }, { |
---|
57 | 'url': 'http://on.aol.com/video/519442220', |
---|
58 | 'only_matching': True, |
---|
59 | }, { |
---|
60 | 'url': 'aol-video:5707d6b8e4b090497b04f706', |
---|
61 | 'only_matching': True, |
---|
62 | }] |
---|
63 | |
---|
64 | def _real_extract(self, url): |
---|
65 | video_id = self._match_id(url) |
---|
66 | |
---|
67 | response = self._download_json( |
---|
68 | 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, |
---|
69 | video_id)['response'] |
---|
70 | if response['statusText'] != 'Ok': |
---|
71 | raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) |
---|
72 | |
---|
73 | video_data = response['data'] |
---|
74 | formats = [] |
---|
75 | m3u8_url = video_data.get('videoMasterPlaylist') |
---|
76 | if m3u8_url: |
---|
77 | formats.extend(self._extract_m3u8_formats( |
---|
78 | m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) |
---|
79 | for rendition in video_data.get('renditions', []): |
---|
80 | video_url = rendition.get('url') |
---|
81 | if not video_url: |
---|
82 | continue |
---|
83 | ext = rendition.get('format') |
---|
84 | if ext == 'm3u8': |
---|
85 | formats.extend(self._extract_m3u8_formats( |
---|
86 | video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) |
---|
87 | else: |
---|
88 | f = { |
---|
89 | 'url': video_url, |
---|
90 | 'format_id': rendition.get('quality'), |
---|
91 | } |
---|
92 | mobj = re.search(r'(\d+)x(\d+)', video_url) |
---|
93 | if mobj: |
---|
94 | f.update({ |
---|
95 | 'width': int(mobj.group(1)), |
---|
96 | 'height': int(mobj.group(2)), |
---|
97 | }) |
---|
98 | formats.append(f) |
---|
99 | self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) |
---|
100 | |
---|
101 | return { |
---|
102 | 'id': video_id, |
---|
103 | 'title': video_data['title'], |
---|
104 | 'duration': int_or_none(video_data.get('duration')), |
---|
105 | 'timestamp': int_or_none(video_data.get('publishDate')), |
---|
106 | 'view_count': int_or_none(video_data.get('views')), |
---|
107 | 'description': video_data.get('description'), |
---|
108 | 'uploader': video_data.get('videoOwner'), |
---|
109 | 'formats': formats, |
---|
110 | } |
---|