1 | # coding: utf-8 |
---|
2 | |
---|
3 | from __future__ import unicode_literals |
---|
4 | |
---|
5 | import re |
---|
6 | import json |
---|
7 | |
---|
8 | from .common import InfoExtractor |
---|
9 | from ..compat import compat_urlparse |
---|
10 | from ..utils import ( |
---|
11 | clean_html, |
---|
12 | ExtractorError, |
---|
13 | int_or_none, |
---|
14 | parse_duration, |
---|
15 | determine_ext, |
---|
16 | ) |
---|
17 | from .dailymotion import ( |
---|
18 | DailymotionIE, |
---|
19 | DailymotionCloudIE, |
---|
20 | ) |
---|
21 | |
---|
22 | |
---|
23 | class FranceTVBaseInfoExtractor(InfoExtractor): |
---|
24 | def _extract_video(self, video_id, catalogue): |
---|
25 | info = self._download_json( |
---|
26 | 'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s' |
---|
27 | % (video_id, catalogue), |
---|
28 | video_id, 'Downloading video JSON') |
---|
29 | |
---|
30 | if info.get('status') == 'NOK': |
---|
31 | raise ExtractorError( |
---|
32 | '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) |
---|
33 | allowed_countries = info['videos'][0].get('geoblocage') |
---|
34 | if allowed_countries: |
---|
35 | georestricted = True |
---|
36 | geo_info = self._download_json( |
---|
37 | 'http://geo.francetv.fr/ws/edgescape.json', video_id, |
---|
38 | 'Downloading geo restriction info') |
---|
39 | country = geo_info['reponse']['geo_info']['country_code'] |
---|
40 | if country not in allowed_countries: |
---|
41 | raise ExtractorError( |
---|
42 | 'The video is not available from your location', |
---|
43 | expected=True) |
---|
44 | else: |
---|
45 | georestricted = False |
---|
46 | |
---|
47 | formats = [] |
---|
48 | for video in info['videos']: |
---|
49 | if video['statut'] != 'ONLINE': |
---|
50 | continue |
---|
51 | video_url = video['url'] |
---|
52 | if not video_url: |
---|
53 | continue |
---|
54 | format_id = video['format'] |
---|
55 | ext = determine_ext(video_url) |
---|
56 | if ext == 'f4m': |
---|
57 | if georestricted: |
---|
58 | # See https://github.com/rg3/youtube-dl/issues/3963 |
---|
59 | # m3u8 urls work fine |
---|
60 | continue |
---|
61 | f4m_url = self._download_webpage( |
---|
62 | 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url, |
---|
63 | video_id, 'Downloading f4m manifest token', fatal=False) |
---|
64 | if f4m_url: |
---|
65 | formats.extend(self._extract_f4m_formats( |
---|
66 | f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', |
---|
67 | video_id, f4m_id=format_id, fatal=False)) |
---|
68 | elif ext == 'm3u8': |
---|
69 | formats.extend(self._extract_m3u8_formats( |
---|
70 | video_url, video_id, 'mp4', entry_protocol='m3u8_native', |
---|
71 | m3u8_id=format_id, fatal=False)) |
---|
72 | elif video_url.startswith('rtmp'): |
---|
73 | formats.append({ |
---|
74 | 'url': video_url, |
---|
75 | 'format_id': 'rtmp-%s' % format_id, |
---|
76 | 'ext': 'flv', |
---|
77 | }) |
---|
78 | else: |
---|
79 | if self._is_valid_url(video_url, video_id, format_id): |
---|
80 | formats.append({ |
---|
81 | 'url': video_url, |
---|
82 | 'format_id': format_id, |
---|
83 | }) |
---|
84 | self._sort_formats(formats) |
---|
85 | |
---|
86 | title = info['titre'] |
---|
87 | subtitle = info.get('sous_titre') |
---|
88 | if subtitle: |
---|
89 | title += ' - %s' % subtitle |
---|
90 | title = title.strip() |
---|
91 | |
---|
92 | subtitles = {} |
---|
93 | subtitles_list = [{ |
---|
94 | 'url': subformat['url'], |
---|
95 | 'ext': subformat.get('format'), |
---|
96 | } for subformat in info.get('subtitles', []) if subformat.get('url')] |
---|
97 | if subtitles_list: |
---|
98 | subtitles['fr'] = subtitles_list |
---|
99 | |
---|
100 | return { |
---|
101 | 'id': video_id, |
---|
102 | 'title': title, |
---|
103 | 'description': clean_html(info['synopsis']), |
---|
104 | 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), |
---|
105 | 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']), |
---|
106 | 'timestamp': int_or_none(info['diffusion']['timestamp']), |
---|
107 | 'formats': formats, |
---|
108 | 'subtitles': subtitles, |
---|
109 | } |
---|
110 | |
---|
111 | |
---|
112 | class PluzzIE(FranceTVBaseInfoExtractor): |
---|
113 | IE_NAME = 'pluzz.francetv.fr' |
---|
114 | _VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html' |
---|
115 | |
---|
116 | # Can't use tests, videos expire in 7 days |
---|
117 | |
---|
118 | def _real_extract(self, url): |
---|
119 | display_id = self._match_id(url) |
---|
120 | |
---|
121 | webpage = self._download_webpage(url, display_id) |
---|
122 | |
---|
123 | video_id = self._html_search_meta( |
---|
124 | 'id_video', webpage, 'video id', default=None) |
---|
125 | if not video_id: |
---|
126 | video_id = self._search_regex( |
---|
127 | r'data-diffusion=["\'](\d+)', webpage, 'video id') |
---|
128 | |
---|
129 | return self._extract_video(video_id, 'Pluzz') |
---|
130 | |
---|
131 | |
---|
132 | class FranceTvInfoIE(FranceTVBaseInfoExtractor): |
---|
133 | IE_NAME = 'francetvinfo.fr' |
---|
134 | _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)' |
---|
135 | |
---|
136 | _TESTS = [{ |
---|
137 | 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', |
---|
138 | 'info_dict': { |
---|
139 | 'id': '84981923', |
---|
140 | 'ext': 'mp4', |
---|
141 | 'title': 'Soir 3', |
---|
142 | 'upload_date': '20130826', |
---|
143 | 'timestamp': 1377548400, |
---|
144 | 'subtitles': { |
---|
145 | 'fr': 'mincount:2', |
---|
146 | }, |
---|
147 | }, |
---|
148 | 'params': { |
---|
149 | # m3u8 downloads |
---|
150 | 'skip_download': True, |
---|
151 | }, |
---|
152 | }, { |
---|
153 | 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', |
---|
154 | 'info_dict': { |
---|
155 | 'id': 'EV_20019', |
---|
156 | 'ext': 'mp4', |
---|
157 | 'title': 'Débat des candidats à la Commission européenne', |
---|
158 | 'description': 'Débat des candidats à la Commission européenne', |
---|
159 | }, |
---|
160 | 'params': { |
---|
161 | 'skip_download': 'HLS (reqires ffmpeg)' |
---|
162 | }, |
---|
163 | 'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.', |
---|
164 | }, { |
---|
165 | 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', |
---|
166 | 'md5': 'f485bda6e185e7d15dbc69b72bae993e', |
---|
167 | 'info_dict': { |
---|
168 | 'id': 'NI_173343', |
---|
169 | 'ext': 'mp4', |
---|
170 | 'title': 'Les entreprises familiales : le secret de la réussite', |
---|
171 | 'thumbnail': r're:^https?://.*\.jpe?g$', |
---|
172 | 'timestamp': 1433273139, |
---|
173 | 'upload_date': '20150602', |
---|
174 | }, |
---|
175 | 'params': { |
---|
176 | # m3u8 downloads |
---|
177 | 'skip_download': True, |
---|
178 | }, |
---|
179 | }, { |
---|
180 | 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', |
---|
181 | 'md5': 'f485bda6e185e7d15dbc69b72bae993e', |
---|
182 | 'info_dict': { |
---|
183 | 'id': 'NI_657393', |
---|
184 | 'ext': 'mp4', |
---|
185 | 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', |
---|
186 | 'description': 'md5:a3264114c9d29aeca11ced113c37b16c', |
---|
187 | 'thumbnail': r're:^https?://.*\.jpe?g$', |
---|
188 | 'timestamp': 1458300695, |
---|
189 | 'upload_date': '20160318', |
---|
190 | }, |
---|
191 | 'params': { |
---|
192 | 'skip_download': True, |
---|
193 | }, |
---|
194 | }, { |
---|
195 | # Dailymotion embed |
---|
196 | 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html', |
---|
197 | 'md5': 'ee7f1828f25a648addc90cb2687b1f12', |
---|
198 | 'info_dict': { |
---|
199 | 'id': 'x4iiko0', |
---|
200 | 'ext': 'mp4', |
---|
201 | 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen', |
---|
202 | 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016', |
---|
203 | 'timestamp': 1467011958, |
---|
204 | 'upload_date': '20160627', |
---|
205 | 'uploader': 'France Inter', |
---|
206 | 'uploader_id': 'x2q2ez', |
---|
207 | }, |
---|
208 | 'add_ie': ['Dailymotion'], |
---|
209 | }, { |
---|
210 | 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', |
---|
211 | 'only_matching': True, |
---|
212 | }] |
---|
213 | |
---|
214 | def _real_extract(self, url): |
---|
215 | mobj = re.match(self._VALID_URL, url) |
---|
216 | page_title = mobj.group('title') |
---|
217 | webpage = self._download_webpage(url, page_title) |
---|
218 | |
---|
219 | dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) |
---|
220 | if dmcloud_url: |
---|
221 | return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key()) |
---|
222 | |
---|
223 | dailymotion_urls = DailymotionIE._extract_urls(webpage) |
---|
224 | if dailymotion_urls: |
---|
225 | return self.playlist_result([ |
---|
226 | self.url_result(dailymotion_url, DailymotionIE.ie_key()) |
---|
227 | for dailymotion_url in dailymotion_urls]) |
---|
228 | |
---|
229 | video_id, catalogue = self._search_regex( |
---|
230 | (r'id-video=([^@]+@[^"]+)', |
---|
231 | r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), |
---|
232 | webpage, 'video id').split('@') |
---|
233 | return self._extract_video(video_id, catalogue) |
---|
234 | |
---|
235 | |
---|
236 | class FranceTVIE(FranceTVBaseInfoExtractor): |
---|
237 | IE_NAME = 'francetv' |
---|
238 | IE_DESC = 'France 2, 3, 4, 5 and Ô' |
---|
239 | _VALID_URL = r'''(?x) |
---|
240 | https?:// |
---|
241 | (?: |
---|
242 | (?:www\.)?france[2345o]\.fr/ |
---|
243 | (?: |
---|
244 | emissions/[^/]+/(?:videos|diffusions)| |
---|
245 | emission/[^/]+| |
---|
246 | videos| |
---|
247 | jt |
---|
248 | ) |
---|
249 | /| |
---|
250 | embed\.francetv\.fr/\?ue= |
---|
251 | ) |
---|
252 | (?P<id>[^/?]+) |
---|
253 | ''' |
---|
254 | |
---|
255 | _TESTS = [ |
---|
256 | # france2 |
---|
257 | { |
---|
258 | 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', |
---|
259 | 'md5': 'c03fc87cb85429ffd55df32b9fc05523', |
---|
260 | 'info_dict': { |
---|
261 | 'id': '109169362', |
---|
262 | 'ext': 'flv', |
---|
263 | 'title': '13h15, le dimanche...', |
---|
264 | 'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7', |
---|
265 | 'upload_date': '20140914', |
---|
266 | 'timestamp': 1410693600, |
---|
267 | }, |
---|
268 | }, |
---|
269 | # france3 |
---|
270 | { |
---|
271 | 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', |
---|
272 | 'md5': '679bb8f8921f8623bd658fa2f8364da0', |
---|
273 | 'info_dict': { |
---|
274 | 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', |
---|
275 | 'ext': 'mp4', |
---|
276 | 'title': 'Le scandale du prix des médicaments', |
---|
277 | 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', |
---|
278 | 'upload_date': '20131113', |
---|
279 | 'timestamp': 1384380000, |
---|
280 | }, |
---|
281 | }, |
---|
282 | # france4 |
---|
283 | { |
---|
284 | 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', |
---|
285 | 'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c', |
---|
286 | 'info_dict': { |
---|
287 | 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', |
---|
288 | 'ext': 'mp4', |
---|
289 | 'title': 'Hero Corp Making of - Extrait 1', |
---|
290 | 'description': 'md5:c87d54871b1790679aec1197e73d650a', |
---|
291 | 'upload_date': '20131106', |
---|
292 | 'timestamp': 1383766500, |
---|
293 | }, |
---|
294 | }, |
---|
295 | # france5 |
---|
296 | { |
---|
297 | 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1', |
---|
298 | 'md5': 'f6c577df3806e26471b3d21631241fd0', |
---|
299 | 'info_dict': { |
---|
300 | 'id': '123327454', |
---|
301 | 'ext': 'flv', |
---|
302 | 'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?', |
---|
303 | 'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4', |
---|
304 | 'upload_date': '20150831', |
---|
305 | 'timestamp': 1441035120, |
---|
306 | }, |
---|
307 | }, |
---|
308 | # franceo |
---|
309 | { |
---|
310 | 'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015', |
---|
311 | 'md5': '47d5816d3b24351cdce512ad7ab31da8', |
---|
312 | 'info_dict': { |
---|
313 | 'id': '125377621', |
---|
314 | 'ext': 'flv', |
---|
315 | 'title': 'Infô soir', |
---|
316 | 'description': 'md5:01b8c6915a3d93d8bbbd692651714309', |
---|
317 | 'upload_date': '20150718', |
---|
318 | 'timestamp': 1437241200, |
---|
319 | 'duration': 414, |
---|
320 | }, |
---|
321 | }, |
---|
322 | { |
---|
323 | # francetv embed |
---|
324 | 'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87', |
---|
325 | 'info_dict': { |
---|
326 | 'id': 'EV_30231', |
---|
327 | 'ext': 'flv', |
---|
328 | 'title': 'Alcaline, le concert avec Calogero', |
---|
329 | 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff', |
---|
330 | 'upload_date': '20150226', |
---|
331 | 'timestamp': 1424989860, |
---|
332 | 'duration': 5400, |
---|
333 | }, |
---|
334 | }, |
---|
335 | { |
---|
336 | 'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05', |
---|
337 | 'only_matching': True, |
---|
338 | }, |
---|
339 | { |
---|
340 | 'url': 'http://www.franceo.fr/videos/125377617', |
---|
341 | 'only_matching': True, |
---|
342 | } |
---|
343 | ] |
---|
344 | |
---|
345 | def _real_extract(self, url): |
---|
346 | video_id = self._match_id(url) |
---|
347 | webpage = self._download_webpage(url, video_id) |
---|
348 | video_id, catalogue = self._html_search_regex( |
---|
349 | r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', |
---|
350 | webpage, 'video ID').split('@') |
---|
351 | return self._extract_video(video_id, catalogue) |
---|
352 | |
---|
353 | |
---|
354 | class GenerationQuoiIE(InfoExtractor): |
---|
355 | IE_NAME = 'france2.fr:generation-quoi' |
---|
356 | _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)' |
---|
357 | |
---|
358 | _TEST = { |
---|
359 | 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', |
---|
360 | 'info_dict': { |
---|
361 | 'id': 'k7FJX8VBcvvLmX4wA5Q', |
---|
362 | 'ext': 'mp4', |
---|
363 | 'title': 'Génération Quoi - Garde à Vous', |
---|
364 | 'uploader': 'Génération Quoi', |
---|
365 | }, |
---|
366 | 'params': { |
---|
367 | # It uses Dailymotion |
---|
368 | 'skip_download': True, |
---|
369 | }, |
---|
370 | } |
---|
371 | |
---|
372 | def _real_extract(self, url): |
---|
373 | display_id = self._match_id(url) |
---|
374 | info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) |
---|
375 | info_json = self._download_webpage(info_url, display_id) |
---|
376 | info = json.loads(info_json) |
---|
377 | return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], |
---|
378 | ie='Dailymotion') |
---|
379 | |
---|
380 | |
---|
381 | class CultureboxIE(FranceTVBaseInfoExtractor): |
---|
382 | IE_NAME = 'culturebox.francetvinfo.fr' |
---|
383 | _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' |
---|
384 | |
---|
385 | _TEST = { |
---|
386 | 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', |
---|
387 | 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', |
---|
388 | 'info_dict': { |
---|
389 | 'id': 'EV_50111', |
---|
390 | 'ext': 'flv', |
---|
391 | 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", |
---|
392 | 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', |
---|
393 | 'upload_date': '20150320', |
---|
394 | 'timestamp': 1426892400, |
---|
395 | 'duration': 2760.9, |
---|
396 | }, |
---|
397 | } |
---|
398 | |
---|
399 | def _real_extract(self, url): |
---|
400 | mobj = re.match(self._VALID_URL, url) |
---|
401 | name = mobj.group('name') |
---|
402 | |
---|
403 | webpage = self._download_webpage(url, name) |
---|
404 | |
---|
405 | if ">Ce live n'est plus disponible en replay<" in webpage: |
---|
406 | raise ExtractorError('Video %s is not available' % name, expected=True) |
---|
407 | |
---|
408 | video_id, catalogue = self._search_regex( |
---|
409 | r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') |
---|
410 | |
---|
411 | return self._extract_video(video_id, catalogue) |
---|