1 | ######################## BEGIN LICENSE BLOCK ######################## |
---|
2 | # The Original Code is Mozilla Communicator client code. |
---|
3 | # |
---|
4 | # The Initial Developer of the Original Code is |
---|
5 | # Netscape Communications Corporation. |
---|
6 | # Portions created by the Initial Developer are Copyright (C) 1998 |
---|
7 | # the Initial Developer. All Rights Reserved. |
---|
8 | # |
---|
9 | # Contributor(s): |
---|
10 | # Mark Pilgrim - port to Python |
---|
11 | # |
---|
12 | # This library is free software; you can redistribute it and/or |
---|
13 | # modify it under the terms of the GNU Lesser General Public |
---|
14 | # License as published by the Free Software Foundation; either |
---|
15 | # version 2.1 of the License, or (at your option) any later version. |
---|
16 | # |
---|
17 | # This library is distributed in the hope that it will be useful, |
---|
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
20 | # Lesser General Public License for more details. |
---|
21 | # |
---|
22 | # You should have received a copy of the GNU Lesser General Public |
---|
23 | # License along with this library; if not, write to the Free Software |
---|
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
---|
25 | # 02110-1301 USA |
---|
26 | ######################### END LICENSE BLOCK ######################### |
---|
27 | |
---|
28 | from .enums import ProbingState |
---|
29 | from .charsetprober import CharSetProber |
---|
30 | |
---|
31 | |
---|
32 | class CharSetGroupProber(CharSetProber): |
---|
33 | def __init__(self, lang_filter=None): |
---|
34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) |
---|
35 | self._active_num = 0 |
---|
36 | self.probers = [] |
---|
37 | self._best_guess_prober = None |
---|
38 | |
---|
39 | def reset(self): |
---|
40 | super(CharSetGroupProber, self).reset() |
---|
41 | self._active_num = 0 |
---|
42 | for prober in self.probers: |
---|
43 | if prober: |
---|
44 | prober.reset() |
---|
45 | prober.active = True |
---|
46 | self._active_num += 1 |
---|
47 | self._best_guess_prober = None |
---|
48 | |
---|
49 | @property |
---|
50 | def charset_name(self): |
---|
51 | if not self._best_guess_prober: |
---|
52 | self.get_confidence() |
---|
53 | if not self._best_guess_prober: |
---|
54 | return None |
---|
55 | return self._best_guess_prober.charset_name |
---|
56 | |
---|
57 | @property |
---|
58 | def language(self): |
---|
59 | if not self._best_guess_prober: |
---|
60 | self.get_confidence() |
---|
61 | if not self._best_guess_prober: |
---|
62 | return None |
---|
63 | return self._best_guess_prober.language |
---|
64 | |
---|
65 | def feed(self, byte_str): |
---|
66 | for prober in self.probers: |
---|
67 | if not prober: |
---|
68 | continue |
---|
69 | if not prober.active: |
---|
70 | continue |
---|
71 | state = prober.feed(byte_str) |
---|
72 | if not state: |
---|
73 | continue |
---|
74 | if state == ProbingState.FOUND_IT: |
---|
75 | self._best_guess_prober = prober |
---|
76 | return self.state |
---|
77 | elif state == ProbingState.NOT_ME: |
---|
78 | prober.active = False |
---|
79 | self._active_num -= 1 |
---|
80 | if self._active_num <= 0: |
---|
81 | self._state = ProbingState.NOT_ME |
---|
82 | return self.state |
---|
83 | return self.state |
---|
84 | |
---|
85 | def get_confidence(self): |
---|
86 | state = self.state |
---|
87 | if state == ProbingState.FOUND_IT: |
---|
88 | return 0.99 |
---|
89 | elif state == ProbingState.NOT_ME: |
---|
90 | return 0.01 |
---|
91 | best_conf = 0.0 |
---|
92 | self._best_guess_prober = None |
---|
93 | for prober in self.probers: |
---|
94 | if not prober: |
---|
95 | continue |
---|
96 | if not prober.active: |
---|
97 | self.logger.debug('%s not active', prober.charset_name) |
---|
98 | continue |
---|
99 | conf = prober.get_confidence() |
---|
100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) |
---|
101 | if best_conf < conf: |
---|
102 | best_conf = conf |
---|
103 | self._best_guess_prober = prober |
---|
104 | if not self._best_guess_prober: |
---|
105 | return 0.0 |
---|
106 | return best_conf |
---|