1 | # coding: utf-8 |
---|
2 | from __future__ import unicode_literals |
---|
3 | |
---|
4 | import re |
---|
5 | import time |
---|
6 | |
---|
7 | from .common import InfoExtractor |
---|
8 | from ..utils import ( |
---|
9 | determine_ext, |
---|
10 | js_to_json, |
---|
11 | ) |
---|
12 | |
---|
13 | |
---|
14 | class IPrimaIE(InfoExtractor): |
---|
15 | _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)' |
---|
16 | _GEO_BYPASS = False |
---|
17 | |
---|
18 | _TESTS = [{ |
---|
19 | 'url': 'http://play.iprima.cz/gondici-s-r-o-33', |
---|
20 | 'info_dict': { |
---|
21 | 'id': 'p136534', |
---|
22 | 'ext': 'mp4', |
---|
23 | 'title': 'Gondíci s. r. o. (34)', |
---|
24 | 'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd', |
---|
25 | }, |
---|
26 | 'params': { |
---|
27 | 'skip_download': True, # m3u8 download |
---|
28 | }, |
---|
29 | }, { |
---|
30 | 'url': 'http://play.iprima.cz/particka/particka-92', |
---|
31 | 'only_matching': True, |
---|
32 | }, { |
---|
33 | # geo restricted |
---|
34 | 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', |
---|
35 | 'only_matching': True, |
---|
36 | }] |
---|
37 | |
---|
38 | def _real_extract(self, url): |
---|
39 | video_id = self._match_id(url) |
---|
40 | |
---|
41 | webpage = self._download_webpage(url, video_id) |
---|
42 | |
---|
43 | video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') |
---|
44 | |
---|
45 | playerpage = self._download_webpage( |
---|
46 | 'http://play.iprima.cz/prehravac/init', |
---|
47 | video_id, note='Downloading player', query={ |
---|
48 | '_infuse': 1, |
---|
49 | '_ts': round(time.time()), |
---|
50 | 'productId': video_id, |
---|
51 | }, headers={'Referer': url}) |
---|
52 | |
---|
53 | formats = [] |
---|
54 | |
---|
55 | def extract_formats(format_url, format_key=None, lang=None): |
---|
56 | ext = determine_ext(format_url) |
---|
57 | new_formats = [] |
---|
58 | if format_key == 'hls' or ext == 'm3u8': |
---|
59 | new_formats = self._extract_m3u8_formats( |
---|
60 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', |
---|
61 | m3u8_id='hls', fatal=False) |
---|
62 | elif format_key == 'dash' or ext == 'mpd': |
---|
63 | return |
---|
64 | new_formats = self._extract_mpd_formats( |
---|
65 | format_url, video_id, mpd_id='dash', fatal=False) |
---|
66 | if lang: |
---|
67 | for f in new_formats: |
---|
68 | if not f.get('language'): |
---|
69 | f['language'] = lang |
---|
70 | formats.extend(new_formats) |
---|
71 | |
---|
72 | options = self._parse_json( |
---|
73 | self._search_regex( |
---|
74 | r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', |
---|
75 | playerpage, 'player options', default='{}'), |
---|
76 | video_id, transform_source=js_to_json, fatal=False) |
---|
77 | if options: |
---|
78 | for key, tracks in options.get('tracks', {}).items(): |
---|
79 | if not isinstance(tracks, list): |
---|
80 | continue |
---|
81 | for track in tracks: |
---|
82 | src = track.get('src') |
---|
83 | if src: |
---|
84 | extract_formats(src, key.lower(), track.get('lang')) |
---|
85 | |
---|
86 | if not formats: |
---|
87 | for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): |
---|
88 | extract_formats(src) |
---|
89 | |
---|
90 | if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: |
---|
91 | self.raise_geo_restricted(countries=['CZ']) |
---|
92 | |
---|
93 | self._sort_formats(formats) |
---|
94 | |
---|
95 | return { |
---|
96 | 'id': video_id, |
---|
97 | 'title': self._og_search_title(webpage), |
---|
98 | 'thumbnail': self._og_search_thumbnail(webpage), |
---|
99 | 'formats': formats, |
---|
100 | 'description': self._og_search_description(webpage), |
---|
101 | } |
---|