source: titan/mediathek/localhoster/lib/python2.7/gettext.py @ 40114

Last change on this file since 40114 was 40094, checked in by obi, 7 years ago

tithek add yoztube-dl support

File size: 24.6 KB
Line 
1"""Internationalization and localization support.
2
3This module provides internationalization (I18N) and localization (L10N)
4support for your Python programs by providing an interface to the GNU gettext
5message catalog library.
6
7I18N refers to the operation by which a program is made aware of multiple
8languages.  L10N refers to the adaptation of your program, once
9internationalized, to the local language and cultural habits.
10
11"""
12
13# This module represents the integration of work, contributions, feedback, and
14# suggestions from the following people:
15#
16# Martin von Loewis, who wrote the initial implementation of the underlying
17# C-based libintlmodule (later renamed _gettext), along with a skeletal
18# gettext.py implementation.
19#
20# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
21# which also included a pure-Python implementation to read .mo files if
22# intlmodule wasn't available.
23#
24# James Henstridge, who also wrote a gettext.py module, which has some
25# interesting, but currently unsupported experimental features: the notion of
26# a Catalog class and instances, and the ability to add to a catalog file via
27# a Python API.
28#
29# Barry Warsaw integrated these modules, wrote the .install() API and code,
30# and conformed all C and Python code to Python's coding standards.
31#
32# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
33# module.
34#
35# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
36#
37# TODO:
38# - Lazy loading of .mo files.  Currently the entire catalog is loaded into
39#   memory, but that's probably bad for large translated programs.  Instead,
40#   the lexical sort of original strings in GNU .mo files should be exploited
41#   to do binary searches and lazy initializations.  Or you might want to use
42#   the undocumented double-hash algorithm for .mo files with hash tables, but
43#   you'll need to study the GNU gettext code to do this.
44#
45# - Support Solaris .mo file formats.  Unfortunately, we've been unable to
46#   find this format documented anywhere.
47
48
49import locale, copy, os, re, struct, sys
50from errno import ENOENT
51
52
53__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
54           'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
55           'bind_textdomain_codeset',
56           'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext',
57           'ldngettext', 'lngettext', 'ngettext',
58           'pgettext', 'lpgettext', 'npgettext', 'lnpgettext', 'ldnpgettext',
59           ]
60
61_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
62
63
64def test(condition, true, false):
65    """
66    Implements the C expression:
67
68      condition ? true : false
69
70    Required to correctly interpret plural forms.
71    """
72    if condition:
73        return true
74    else:
75        return false
76
77
78def c2py(plural):
79    """Gets a C expression as used in PO files for plural forms and returns a
80    Python lambda function that implements an equivalent expression.
81    """
82    # Security check, allow only the "n" identifier
83    try:
84        from cStringIO import StringIO
85    except ImportError:
86        from StringIO import StringIO
87    import token, tokenize
88    tokens = tokenize.generate_tokens(StringIO(plural).readline)
89    try:
90        danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
91    except tokenize.TokenError:
92        raise ValueError, \
93              'plural forms expression error, maybe unbalanced parenthesis'
94    else:
95        if danger:
96            raise ValueError, 'plural forms expression could be dangerous'
97
98    # Replace some C operators by their Python equivalents
99    plural = plural.replace('&&', ' and ')
100    plural = plural.replace('||', ' or ')
101
102    expr = re.compile(r'\!([^=])')
103    plural = expr.sub(' not \\1', plural)
104
105    # Regular expression and replacement function used to transform
106    # "a?b:c" to "test(a,b,c)".
107    expr = re.compile(r'(.*?)\?(.*?):(.*)')
108    def repl(x):
109        return "test(%s, %s, %s)" % (x.group(1), x.group(2),
110                                     expr.sub(repl, x.group(3)))
111
112    # Code to transform the plural expression, taking care of parentheses
113    stack = ['']
114    for c in plural:
115        if c == '(':
116            stack.append('')
117        elif c == ')':
118            if len(stack) == 1:
119                # Actually, we never reach this code, because unbalanced
120                # parentheses get caught in the security check at the
121                # beginning.
122                raise ValueError, 'unbalanced parenthesis in plural form'
123            s = expr.sub(repl, stack.pop())
124            stack[-1] += '(%s)' % s
125        else:
126            stack[-1] += c
127    plural = expr.sub(repl, stack.pop())
128
129    return eval('lambda n: int(%s)' % plural)
130
131
132
133def _expand_lang(locale):
134    from locale import normalize
135    locale = normalize(locale)
136    COMPONENT_CODESET   = 1 << 0
137    COMPONENT_TERRITORY = 1 << 1
138    COMPONENT_MODIFIER  = 1 << 2
139    # split up the locale into its base components
140    mask = 0
141    pos = locale.find('@')
142    if pos >= 0:
143        modifier = locale[pos:]
144        locale = locale[:pos]
145        mask |= COMPONENT_MODIFIER
146    else:
147        modifier = ''
148    pos = locale.find('.')
149    if pos >= 0:
150        codeset = locale[pos:]
151        locale = locale[:pos]
152        mask |= COMPONENT_CODESET
153    else:
154        codeset = ''
155    pos = locale.find('_')
156    if pos >= 0:
157        territory = locale[pos:]
158        locale = locale[:pos]
159        mask |= COMPONENT_TERRITORY
160    else:
161        territory = ''
162    language = locale
163    ret = []
164    for i in range(mask+1):
165        if not (i & ~mask):  # if all components for this combo exist ...
166            val = language
167            if i & COMPONENT_TERRITORY: val += territory
168            if i & COMPONENT_CODESET:   val += codeset
169            if i & COMPONENT_MODIFIER:  val += modifier
170            ret.append(val)
171    ret.reverse()
172    return ret
173
174
175
176class NullTranslations:
177    def __init__(self, fp=None):
178        self._info = {}
179        self._charset = None
180        self._output_charset = None
181        self._fallback = None
182        if fp is not None:
183            self._parse(fp)
184
185    def _parse(self, fp):
186        pass
187
188    def add_fallback(self, fallback):
189        if self._fallback:
190            self._fallback.add_fallback(fallback)
191        else:
192            self._fallback = fallback
193
194    def gettext(self, message):
195        if self._fallback:
196            return self._fallback.gettext(message)
197        return message
198
199    def pgettext(self, context, message):
200        if self._fallback:
201            return self._fallback.pgettext(context, message)
202        return message
203
204    def lgettext(self, message):
205        if self._fallback:
206            return self._fallback.lgettext(message)
207        return message
208
209    def lpgettext(self, context, message):
210        if self._fallback:
211            return self._fallback.lpgettext(context, message)
212        return message
213
214    def ngettext(self, msgid1, msgid2, n):
215        if self._fallback:
216            return self._fallback.ngettext(msgid1, msgid2, n)
217        if n == 1:
218            return msgid1
219        else:
220            return msgid2
221
222    def npgettext(self, context, msgid1, msgid2, n):
223        if self._fallback:
224            return self._fallback.npgettext(context, msgid1, msgid2, n)
225        if n == 1:
226            return msgid1
227        else:
228            return msgid2
229
230    def lngettext(self, msgid1, msgid2, n):
231        if self._fallback:
232            return self._fallback.lngettext(msgid1, msgid2, n)
233        if n == 1:
234            return msgid1
235        else:
236            return msgid2
237
238    def ugettext(self, message):
239        if self._fallback:
240            return self._fallback.ugettext(message)
241        return unicode(message)
242
243    def ungettext(self, msgid1, msgid2, n):
244        if self._fallback:
245            return self._fallback.ungettext(msgid1, msgid2, n)
246        if n == 1:
247            return unicode(msgid1)
248        else:
249            return unicode(msgid2)
250
251    def lnpgettext(self, context, msgid1, msgid2, n):
252        if self._fallback:
253            return self._fallback.lnpgettext(context, msgid1, msgid2, n)
254        if n == 1:
255            return msgid1
256        else:
257            return msgid2
258
259    def info(self):
260        return self._info
261
262    def charset(self):
263        return self._charset
264
265    def output_charset(self):
266        return self._output_charset
267
268    def set_output_charset(self, charset):
269        self._output_charset = charset
270
271    def install(self, unicode=False, names=None):
272        import __builtin__
273        __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
274        if hasattr(names, "__contains__"):
275            if "gettext" in names:
276                __builtin__.__dict__['gettext'] = __builtin__.__dict__['_']
277            if "ngettext" in names:
278                __builtin__.__dict__['ngettext'] = (unicode and self.ungettext
279                                                             or self.ngettext)
280            if "lgettext" in names:
281                __builtin__.__dict__['lgettext'] = self.lgettext
282            if "lngettext" in names:
283                __builtin__.__dict__['lngettext'] = self.lngettext
284            if "pgettext" in names:
285                __builtin__.__dict__['pgettext'] = self.pgettext
286            if "npgettext" in names:
287                __builtin__.__dict__['npgettext'] = self.npgettext
288            if "lnpgettext" in names:
289                __builtin__.__dict__['lnpgettext'] = self.lnpgettext
290
291class GNUTranslations(NullTranslations):
292    # Magic number of .mo files
293    LE_MAGIC = 0x950412deL
294    BE_MAGIC = 0xde120495L
295
296    # The encoding of a msgctxt and a msgid in a .mo file is
297    # msgctxt + "\x04" + msgid (gettext version >= 0.15)
298    CONTEXT = "%s\x04%s"
299
300    def _parse(self, fp):
301        """Override this method to support alternative .mo formats."""
302        unpack = struct.unpack
303        filename = getattr(fp, 'name', '')
304        # Parse the .mo file header, which consists of 5 little endian 32
305        # bit words.
306        self._catalog = catalog = {}
307        self.plural = lambda n: int(n != 1) # germanic plural by default
308        buf = fp.read()
309        buflen = len(buf)
310        # Are we big endian or little endian?
311        magic = unpack('<I', buf[:4])[0]
312        if magic == self.LE_MAGIC:
313            version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
314            ii = '<II'
315        elif magic == self.BE_MAGIC:
316            version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
317            ii = '>II'
318        else:
319            raise IOError(0, 'Bad magic number', filename)
320        # Now put all messages from the .mo file buffer into the catalog
321        # dictionary.
322        for i in xrange(0, msgcount):
323            mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
324            mend = moff + mlen
325            tlen, toff = unpack(ii, buf[transidx:transidx+8])
326            tend = toff + tlen
327            if mend < buflen and tend < buflen:
328                msg = buf[moff:mend]
329                tmsg = buf[toff:tend]
330            else:
331                raise IOError(0, 'File is corrupt', filename)
332            # See if we're looking at GNU .mo conventions for metadata
333            if mlen == 0:
334                # Catalog description
335                lastk = None
336                for item in tmsg.splitlines():
337                    item = item.strip()
338                    if not item:
339                        continue
340                    k = v = None
341                    if ':' in item:
342                        k, v = item.split(':', 1)
343                        k = k.strip().lower()
344                        v = v.strip()
345                        self._info[k] = v
346                        lastk = k
347                    elif lastk:
348                        self._info[lastk] += '\n' + item
349                    if k == 'content-type':
350                        self._charset = v.split('charset=')[1]
351                    elif k == 'plural-forms':
352                        v = v.split(';')
353                        plural = v[1].split('plural=')[1]
354                        self.plural = c2py(plural)
355            # Note: we unconditionally convert both msgids and msgstrs to
356            # Unicode using the character encoding specified in the charset
357            # parameter of the Content-Type header.  The gettext documentation
358            # strongly encourages msgids to be us-ascii, but some applications
359            # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
360            # traditional gettext applications, the msgid conversion will
361            # cause no problems since us-ascii should always be a subset of
362            # the charset encoding.  We may want to fall back to 8-bit msgids
363            # if the Unicode conversion fails.
364            if '\x00' in msg:
365                # Plural forms
366                msgid1, msgid2 = msg.split('\x00')
367                tmsg = tmsg.split('\x00')
368                if self._charset:
369                    msgid1 = unicode(msgid1, self._charset)
370                    tmsg = [unicode(x, self._charset) for x in tmsg]
371                for i in range(len(tmsg)):
372                    catalog[(msgid1, i)] = tmsg[i]
373            else:
374                if self._charset:
375                    msg = unicode(msg, self._charset)
376                    tmsg = unicode(tmsg, self._charset)
377                catalog[msg] = tmsg
378            # advance to next entry in the seek tables
379            masteridx += 8
380            transidx += 8
381
382    def gettext(self, message):
383        missing = object()
384        tmsg = self._catalog.get(message, missing)
385        if tmsg is missing:
386            if self._fallback:
387                return self._fallback.gettext(message)
388            return message
389        # Encode the Unicode tmsg back to an 8-bit string, if possible
390        if self._output_charset:
391            return tmsg.encode(self._output_charset)
392        elif self._charset:
393            return tmsg.encode(self._charset)
394        return tmsg
395
396    def pgettext(self, context, message):
397        ctxt_msg_id = self.CONTEXT % (context, message)
398        missing = object()
399        tmsg = self._catalog.get(ctxt_msg_id, missing)
400        if tmsg is missing:
401            if self._fallback:
402                return self._fallback.pgettext(context, message)
403            return message
404        # Encode the Unicode tmsg back to an 8-bit string, if possible
405        if self._output_charset:
406            return tmsg.encode(self._output_charset)
407        elif self._charset:
408            return tmsg.encode(self._charset)
409        return tmsg
410
411    def lgettext(self, message):
412        missing = object()
413        tmsg = self._catalog.get(message, missing)
414        if tmsg is missing:
415            if self._fallback:
416                return self._fallback.lgettext(message)
417            return message
418        if self._output_charset:
419            return tmsg.encode(self._output_charset)
420        return tmsg.encode(locale.getpreferredencoding())
421
422    def lpgettext(self, context, message):
423        ctxt_msg_id = self.CONTEXT % (context, message)
424        missing = object()
425        tmsg = self._catalog.get(ctxt_msg_id, missing)
426        if tmsg is missing:
427            if self._fallback:
428                return self._fallback.lpgettext(context, message)
429            return message
430        if self._output_charset:
431            return tmsg.encode(self._output_charset)
432        return tmsg.encode(locale.getpreferredencoding())
433
434    def ngettext(self, msgid1, msgid2, n):
435        try:
436            tmsg = self._catalog[(msgid1, self.plural(n))]
437            if self._output_charset:
438                return tmsg.encode(self._output_charset)
439            elif self._charset:
440                return tmsg.encode(self._charset)
441            return tmsg
442        except KeyError:
443            if self._fallback:
444                return self._fallback.ngettext(msgid1, msgid2, n)
445            if n == 1:
446                return msgid1
447            else:
448                return msgid2
449
450    def npgettext(self, context, msgid1, msgid2, n):
451        ctxt_msg_id = self.CONTEXT % (context, msgid1)
452        try:
453            tmsg = self._catalog[(ctxt_msg_id, self.plural(n))]
454            if self._output_charset:
455                return tmsg.encode(self._output_charset)
456            elif self._charset:
457                return tmsg.encode(self._charset)
458            return tmsg
459        except KeyError:
460            if self._fallback:
461                return self._fallback.npgettext(context, msgid1, msgid2, n)
462            if n == 1:
463                return msgid1
464            else:
465                return msgid2
466
467    def lngettext(self, msgid1, msgid2, n):
468        try:
469            tmsg = self._catalog[(msgid1, self.plural(n))]
470            if self._output_charset:
471                return tmsg.encode(self._output_charset)
472            return tmsg.encode(locale.getpreferredencoding())
473        except KeyError:
474            if self._fallback:
475                return self._fallback.lngettext(msgid1, msgid2, n)
476            if n == 1:
477                return msgid1
478            else:
479                return msgid2
480
481    def ugettext(self, message):
482        missing = object()
483        tmsg = self._catalog.get(message, missing)
484        if tmsg is missing:
485            if self._fallback:
486                return self._fallback.ugettext(message)
487            return unicode(message)
488        return tmsg
489
490    def ungettext(self, msgid1, msgid2, n):
491        try:
492            tmsg = self._catalog[(msgid1, self.plural(n))]
493        except KeyError:
494            if self._fallback:
495                return self._fallback.ungettext(msgid1, msgid2, n)
496            if n == 1:
497                tmsg = unicode(msgid1)
498            else:
499                tmsg = unicode(msgid2)
500        return tmsg
501
502    def lnpgettext(self, context, msgid1, msgid2, n):
503        ctxt_msg_id = self.CONTEXT % (context, msgid1)
504        try:
505            tmsg = self._catalog[(ctxt_msg_id, self.plural(n))]
506            if self._output_charset:
507                return tmsg.encode(self._output_charset)
508            return tmsg.encode(locale.getpreferredencoding())
509        except KeyError:
510            if self._fallback:
511                return self._fallback.lnpgettext(context, msgid1, msgid2, n)
512            if n == 1:
513                return msgid1
514            else:
515                return msgid2
516
517
518# Locate a .mo file using the gettext strategy
519def find(domain, localedir=None, languages=None, all=0):
520    # Get some reasonable defaults for arguments that were not supplied
521    if localedir is None:
522        localedir = _default_localedir
523    if languages is None:
524        languages = []
525        for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
526            val = os.environ.get(envar)
527            if val:
528                languages = val.split(':')
529                break
530        if 'C' not in languages:
531            languages.append('C')
532    # now normalize and expand the languages
533    nelangs = []
534    for lang in languages:
535        for nelang in _expand_lang(lang):
536            if nelang not in nelangs:
537                nelangs.append(nelang)
538    # select a language
539    if all:
540        result = []
541    else:
542        result = None
543    for lang in nelangs:
544        if lang == 'C':
545            break
546        mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
547        if os.path.exists(mofile):
548            if all:
549                result.append(mofile)
550            else:
551                return mofile
552    return result
553
554
555
556# a mapping between absolute .mo file path and Translation object
557_translations = {}
558
559def translation(domain, localedir=None, languages=None,
560                class_=None, fallback=False, codeset=None):
561    if class_ is None:
562        class_ = GNUTranslations
563    mofiles = find(domain, localedir, languages, all=1)
564    if not mofiles:
565        if fallback:
566            return NullTranslations()
567        raise IOError(ENOENT, 'No translation file found for domain', domain)
568    # Avoid opening, reading, and parsing the .mo file after it's been done
569    # once.
570    result = None
571    for mofile in mofiles:
572        key = (class_, os.path.abspath(mofile))
573        t = _translations.get(key)
574        if t is None:
575            with open(mofile, 'rb') as fp:
576                t = _translations.setdefault(key, class_(fp))
577        # Copy the translation object to allow setting fallbacks and
578        # output charset. All other instance data is shared with the
579        # cached object.
580        t = copy.copy(t)
581        if codeset:
582            t.set_output_charset(codeset)
583        if result is None:
584            result = t
585        else:
586            result.add_fallback(t)
587    return result
588
589
590def install(domain, localedir=None, unicode=False, codeset=None, names=None):
591    t = translation(domain, localedir, fallback=True, codeset=codeset)
592    t.install(unicode, names)
593
594
595
596# a mapping b/w domains and locale directories
597_localedirs = {}
598# a mapping b/w domains and codesets
599_localecodesets = {}
600# current global domain, `messages' used for compatibility w/ GNU gettext
601_current_domain = 'messages'
602
603
604def textdomain(domain=None):
605    global _current_domain
606    if domain is not None:
607        _current_domain = domain
608    return _current_domain
609
610
611def bindtextdomain(domain, localedir=None):
612    global _localedirs
613    if localedir is not None:
614        _localedirs[domain] = localedir
615    return _localedirs.get(domain, _default_localedir)
616
617
618def bind_textdomain_codeset(domain, codeset=None):
619    global _localecodesets
620    if codeset is not None:
621        _localecodesets[domain] = codeset
622    return _localecodesets.get(domain)
623
624
625def dgettext(domain, message):
626    try:
627        t = translation(domain, _localedirs.get(domain, None),
628                        codeset=_localecodesets.get(domain))
629    except IOError:
630        return message
631    return t.gettext(message)
632
633def dpgettext(domain, context, message):
634    try:
635        t = translation(domain, _localedirs.get(domain, None),
636                        codeset=_localecodesets.get(domain))
637    except IOError:
638        return message
639    return t.pgettext(context, message)
640
641def ldgettext(domain, message):
642    try:
643        t = translation(domain, _localedirs.get(domain, None),
644                        codeset=_localecodesets.get(domain))
645    except IOError:
646        return message
647    return t.lgettext(message)
648
649def ldpgettext(domain, context, message):
650    try:
651        t = translation(domain, _localedirs.get(domain, None),
652                        codeset=_localecodesets.get(domain))
653    except IOError:
654        return message
655    return t.lpgettext(context, message)
656
657def dngettext(domain, msgid1, msgid2, n):
658    try:
659        t = translation(domain, _localedirs.get(domain, None),
660                        codeset=_localecodesets.get(domain))
661    except IOError:
662        if n == 1:
663            return msgid1
664        else:
665            return msgid2
666    return t.ngettext(msgid1, msgid2, n)
667
668def dnpgettext(domain, context, msgid1, msgid2, n):
669    try:
670        t = translation(domain, _localedirs.get(domain, None),
671                        codeset=_localecodesets.get(domain))
672    except IOError:
673        if n == 1:
674            return msgid1
675        else:
676            return msgid2
677    return t.npgettext(context, msgid1, msgid2, n)
678
679def ldngettext(domain, msgid1, msgid2, n):
680    try:
681        t = translation(domain, _localedirs.get(domain, None),
682                        codeset=_localecodesets.get(domain))
683    except IOError:
684        if n == 1:
685            return msgid1
686        else:
687            return msgid2
688    return t.lngettext(msgid1, msgid2, n)
689
690def ldnpgettext(domain, context, msgid1, msgid2, n):
691    try:
692        t = translation(domain, _localedirs.get(domain, None),
693                        codeset=_localecodesets.get(domain))
694    except IOError:
695        if n == 1:
696            return msgid1
697        else:
698            return msgid2
699    return t.lnpgettext(context, msgid1, msgid2, n)
700
701def gettext(message):
702    return dgettext(_current_domain, message)
703
704def pgettext(context, message):
705    return dpgettext(_current_domain, context, message)
706
707def lgettext(message):
708    return ldgettext(_current_domain, message)
709
710def lpgettext(context, message):
711    return ldpgettext(_current_domain, context, message)
712
713def ngettext(msgid1, msgid2, n):
714    return dngettext(_current_domain, msgid1, msgid2, n)
715
716def npgettext(context, msgid1, msgid2, n):
717    return dnpgettext(_current_domain, context, msgid1, msgid2, n)
718
719def lngettext(msgid1, msgid2, n):
720    return ldngettext(_current_domain, msgid1, msgid2, n)
721
722def lnpgettext(context, msgid1, msgid2, n):
723    return ldnpgettext(_current_domain, context, msgid1, msgid2, n)
724
725# dcgettext() has been deemed unnecessary and is not implemented.
726
727# James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
728# was:
729#
730#    import gettext
731#    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
732#    _ = cat.gettext
733#    print _('Hello World')
734
735# The resulting catalog object currently don't support access through a
736# dictionary API, which was supported (but apparently unused) in GNOME
737# gettext.
738
739Catalog = translation
Note: See TracBrowser for help on using the repository browser.