source: titan/mediathek/localhoster/lib/requests/chardet/sbcsgroupprober.py @ 44466

Last change on this file since 44466 was 44466, checked in by obi, 4 years ago

fix cloudflare

File size: 3.5 KB
Line 
1######################## BEGIN LICENSE BLOCK ########################
2# The Original Code is Mozilla Universal charset detector code.
3#
4# The Initial Developer of the Original Code is
5# Netscape Communications Corporation.
6# Portions created by the Initial Developer are Copyright (C) 2001
7# the Initial Developer. All Rights Reserved.
8#
9# Contributor(s):
10#   Mark Pilgrim - port to Python
11#   Shy Shalom - original C code
12#
13# This library is free software; you can redistribute it and/or
14# modify it under the terms of the GNU Lesser General Public
15# License as published by the Free Software Foundation; either
16# version 2.1 of the License, or (at your option) any later version.
17#
18# This library is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21# Lesser General Public License for more details.
22#
23# You should have received a copy of the GNU Lesser General Public
24# License along with this library; if not, write to the Free Software
25# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26# 02110-1301  USA
27######################### END LICENSE BLOCK #########################
28
29from .charsetgroupprober import CharSetGroupProber
30from .sbcharsetprober import SingleByteCharSetProber
31from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32                                Latin5CyrillicModel, MacCyrillicModel,
33                                Ibm866Model, Ibm855Model)
34from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37from .langthaimodel import TIS620ThaiModel
38from .langhebrewmodel import Win1255HebrewModel
39from .hebrewprober import HebrewProber
40from .langturkishmodel import Latin5TurkishModel
41
42
43class SBCSGroupProber(CharSetGroupProber):
44    def __init__(self):
45        super(SBCSGroupProber, self).__init__()
46        self.probers = [
47            SingleByteCharSetProber(Win1251CyrillicModel),
48            SingleByteCharSetProber(Koi8rModel),
49            SingleByteCharSetProber(Latin5CyrillicModel),
50            SingleByteCharSetProber(MacCyrillicModel),
51            SingleByteCharSetProber(Ibm866Model),
52            SingleByteCharSetProber(Ibm855Model),
53            SingleByteCharSetProber(Latin7GreekModel),
54            SingleByteCharSetProber(Win1253GreekModel),
55            SingleByteCharSetProber(Latin5BulgarianModel),
56            SingleByteCharSetProber(Win1251BulgarianModel),
57            # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
58            #       after we retrain model.
59            # SingleByteCharSetProber(Latin2HungarianModel),
60            # SingleByteCharSetProber(Win1250HungarianModel),
61            SingleByteCharSetProber(TIS620ThaiModel),
62            SingleByteCharSetProber(Latin5TurkishModel),
63        ]
64        hebrew_prober = HebrewProber()
65        logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
66                                                        False, hebrew_prober)
67        visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
68                                                       hebrew_prober)
69        hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
70        self.probers.extend([hebrew_prober, logical_hebrew_prober,
71                             visual_hebrew_prober])
72
73        self.reset()
Note: See TracBrowser for help on using the repository browser.