1 | from __future__ import unicode_literals |
---|
2 | |
---|
3 | import re |
---|
4 | |
---|
5 | from .common import InfoExtractor |
---|
6 | from ..compat import compat_str |
---|
7 | from ..utils import ( |
---|
8 | clean_html, |
---|
9 | int_or_none, |
---|
10 | unified_timestamp, |
---|
11 | update_url_query, |
---|
12 | ) |
---|
13 | |
---|
14 | |
---|
15 | class RBMARadioIE(InfoExtractor): |
---|
16 | _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)' |
---|
17 | _TEST = { |
---|
18 | 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', |
---|
19 | 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', |
---|
20 | 'info_dict': { |
---|
21 | 'id': 'ford-lopatin-live-at-primavera-sound-2011', |
---|
22 | 'ext': 'mp3', |
---|
23 | 'title': 'Main Stage - Ford & Lopatin', |
---|
24 | 'description': 'md5:4f340fb48426423530af5a9d87bd7b91', |
---|
25 | 'thumbnail': r're:^https?://.*\.jpg', |
---|
26 | 'duration': 2452, |
---|
27 | 'timestamp': 1307103164, |
---|
28 | 'upload_date': '20110603', |
---|
29 | }, |
---|
30 | } |
---|
31 | |
---|
32 | def _real_extract(self, url): |
---|
33 | mobj = re.match(self._VALID_URL, url) |
---|
34 | show_id = mobj.group('show_id') |
---|
35 | episode_id = mobj.group('id') |
---|
36 | |
---|
37 | webpage = self._download_webpage(url, episode_id) |
---|
38 | |
---|
39 | episode = self._parse_json( |
---|
40 | self._search_regex( |
---|
41 | r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>', |
---|
42 | webpage, 'json data'), |
---|
43 | episode_id)['episodes'][show_id][episode_id] |
---|
44 | |
---|
45 | title = episode['title'] |
---|
46 | |
---|
47 | show_title = episode.get('showTitle') |
---|
48 | if show_title: |
---|
49 | title = '%s - %s' % (show_title, title) |
---|
50 | |
---|
51 | formats = [{ |
---|
52 | 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), |
---|
53 | 'format_id': compat_str(abr), |
---|
54 | 'abr': abr, |
---|
55 | 'vcodec': 'none', |
---|
56 | } for abr in (96, 128, 256)] |
---|
57 | |
---|
58 | description = clean_html(episode.get('longTeaser')) |
---|
59 | thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) |
---|
60 | duration = int_or_none(episode.get('duration')) |
---|
61 | timestamp = unified_timestamp(episode.get('publishedAt')) |
---|
62 | |
---|
63 | return { |
---|
64 | 'id': episode_id, |
---|
65 | 'title': title, |
---|
66 | 'description': description, |
---|
67 | 'thumbnail': thumbnail, |
---|
68 | 'duration': duration, |
---|
69 | 'timestamp': timestamp, |
---|
70 | 'formats': formats, |
---|
71 | } |
---|