1 | from __future__ import unicode_literals |
---|
2 | |
---|
3 | import re |
---|
4 | |
---|
5 | from .common import InfoExtractor |
---|
6 | from ..compat import compat_urllib_parse_unquote |
---|
7 | from ..utils import ( |
---|
8 | clean_html, |
---|
9 | ExtractorError, |
---|
10 | determine_ext, |
---|
11 | ) |
---|
12 | |
---|
13 | |
---|
14 | class XVideosIE(InfoExtractor): |
---|
15 | _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)' |
---|
16 | _TEST = { |
---|
17 | 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', |
---|
18 | 'md5': '14cea69fcb84db54293b1e971466c2e1', |
---|
19 | 'info_dict': { |
---|
20 | 'id': '4588838', |
---|
21 | 'ext': 'mp4', |
---|
22 | 'title': 'Biker Takes his Girl', |
---|
23 | 'age_limit': 18, |
---|
24 | } |
---|
25 | } |
---|
26 | |
---|
27 | def _real_extract(self, url): |
---|
28 | video_id = self._match_id(url) |
---|
29 | webpage = self._download_webpage(url, video_id) |
---|
30 | |
---|
31 | mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) |
---|
32 | if mobj: |
---|
33 | raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) |
---|
34 | |
---|
35 | video_title = self._html_search_regex( |
---|
36 | r'<title>(.*?)\s+-\s+XVID', webpage, 'title') |
---|
37 | video_thumbnail = self._search_regex( |
---|
38 | r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) |
---|
39 | |
---|
40 | formats = [] |
---|
41 | |
---|
42 | video_url = compat_urllib_parse_unquote(self._search_regex( |
---|
43 | r'flv_url=(.+?)&', webpage, 'video URL', default='')) |
---|
44 | if video_url: |
---|
45 | formats.append({ |
---|
46 | 'url': video_url, |
---|
47 | 'format_id': 'flv', |
---|
48 | }) |
---|
49 | |
---|
50 | for kind, _, format_url in re.findall( |
---|
51 | r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage): |
---|
52 | format_id = kind.lower() |
---|
53 | if format_id == 'hls': |
---|
54 | formats.extend(self._extract_m3u8_formats( |
---|
55 | format_url, video_id, 'mp4', |
---|
56 | entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) |
---|
57 | elif format_id in ('urllow', 'urlhigh'): |
---|
58 | formats.append({ |
---|
59 | 'url': format_url, |
---|
60 | 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]), |
---|
61 | 'quality': -2 if format_id.endswith('low') else None, |
---|
62 | }) |
---|
63 | |
---|
64 | self._sort_formats(formats) |
---|
65 | |
---|
66 | return { |
---|
67 | 'id': video_id, |
---|
68 | 'formats': formats, |
---|
69 | 'title': video_title, |
---|
70 | 'thumbnail': video_thumbnail, |
---|
71 | 'age_limit': 18, |
---|
72 | } |
---|