1 | from __future__ import unicode_literals |
---|
2 | |
---|
3 | from .common import InfoExtractor |
---|
4 | from ..compat import compat_urllib_parse_urlencode |
---|
5 | from ..utils import ( |
---|
6 | int_or_none, |
---|
7 | qualities, |
---|
8 | ) |
---|
9 | |
---|
10 | |
---|
11 | class NprIE(InfoExtractor): |
---|
12 | _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' |
---|
13 | _TESTS = [{ |
---|
14 | 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', |
---|
15 | 'info_dict': { |
---|
16 | 'id': '449974205', |
---|
17 | 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' |
---|
18 | }, |
---|
19 | 'playlist_count': 7, |
---|
20 | }, { |
---|
21 | 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1', |
---|
22 | 'info_dict': { |
---|
23 | 'id': '446928052', |
---|
24 | 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" |
---|
25 | }, |
---|
26 | 'playlist': [{ |
---|
27 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', |
---|
28 | 'info_dict': { |
---|
29 | 'id': '446929930', |
---|
30 | 'ext': 'mp3', |
---|
31 | 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', |
---|
32 | 'duration': 402, |
---|
33 | }, |
---|
34 | }], |
---|
35 | }] |
---|
36 | |
---|
37 | def _real_extract(self, url): |
---|
38 | playlist_id = self._match_id(url) |
---|
39 | |
---|
40 | config = self._download_json( |
---|
41 | 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({ |
---|
42 | 'id': playlist_id, |
---|
43 | 'fields': 'titles,audio,show', |
---|
44 | 'format': 'json', |
---|
45 | 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', |
---|
46 | }), playlist_id) |
---|
47 | |
---|
48 | story = config['list']['story'][0] |
---|
49 | |
---|
50 | KNOWN_FORMATS = ('threegp', 'mp4', 'mp3') |
---|
51 | quality = qualities(KNOWN_FORMATS) |
---|
52 | |
---|
53 | entries = [] |
---|
54 | for audio in story.get('audio', []): |
---|
55 | title = audio.get('title', {}).get('$text') |
---|
56 | duration = int_or_none(audio.get('duration', {}).get('$text')) |
---|
57 | formats = [] |
---|
58 | for format_id, formats_entry in audio.get('format', {}).items(): |
---|
59 | if not formats_entry: |
---|
60 | continue |
---|
61 | if isinstance(formats_entry, list): |
---|
62 | formats_entry = formats_entry[0] |
---|
63 | format_url = formats_entry.get('$text') |
---|
64 | if not format_url: |
---|
65 | continue |
---|
66 | if format_id in KNOWN_FORMATS: |
---|
67 | formats.append({ |
---|
68 | 'url': format_url, |
---|
69 | 'format_id': format_id, |
---|
70 | 'ext': formats_entry.get('type'), |
---|
71 | 'quality': quality(format_id), |
---|
72 | }) |
---|
73 | self._sort_formats(formats) |
---|
74 | entries.append({ |
---|
75 | 'id': audio['id'], |
---|
76 | 'title': title, |
---|
77 | 'duration': duration, |
---|
78 | 'formats': formats, |
---|
79 | }) |
---|
80 | |
---|
81 | playlist_title = story.get('title', {}).get('$text') |
---|
82 | return self.playlist_result(entries, playlist_id, playlist_title) |
---|