1 | """Create portable serialized representations of Python objects. |
---|
2 | |
---|
3 | See module cPickle for a (much) faster implementation. |
---|
4 | See module copy_reg for a mechanism for registering custom picklers. |
---|
5 | See module pickletools source for extensive comments. |
---|
6 | |
---|
7 | Classes: |
---|
8 | |
---|
9 | Pickler |
---|
10 | Unpickler |
---|
11 | |
---|
12 | Functions: |
---|
13 | |
---|
14 | dump(object, file) |
---|
15 | dumps(object) -> string |
---|
16 | load(file) -> object |
---|
17 | loads(string) -> object |
---|
18 | |
---|
19 | Misc variables: |
---|
20 | |
---|
21 | __version__ |
---|
22 | format_version |
---|
23 | compatible_formats |
---|
24 | |
---|
25 | """ |
---|
26 | |
---|
27 | __version__ = "$Revision: 72223 $" # Code version |
---|
28 | |
---|
29 | from types import * |
---|
30 | from copy_reg import dispatch_table |
---|
31 | from copy_reg import _extension_registry, _inverted_registry, _extension_cache |
---|
32 | import marshal |
---|
33 | import sys |
---|
34 | import struct |
---|
35 | import re |
---|
36 | |
---|
37 | __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", |
---|
38 | "Unpickler", "dump", "dumps", "load", "loads"] |
---|
39 | |
---|
40 | # These are purely informational; no code uses these. |
---|
41 | format_version = "2.0" # File format version we write |
---|
42 | compatible_formats = ["1.0", # Original protocol 0 |
---|
43 | "1.1", # Protocol 0 with INST added |
---|
44 | "1.2", # Original protocol 1 |
---|
45 | "1.3", # Protocol 1 with BINFLOAT added |
---|
46 | "2.0", # Protocol 2 |
---|
47 | ] # Old format versions we can read |
---|
48 | |
---|
49 | # Keep in synch with cPickle. This is the highest protocol number we |
---|
50 | # know how to read. |
---|
51 | HIGHEST_PROTOCOL = 2 |
---|
52 | |
---|
53 | # Why use struct.pack() for pickling but marshal.loads() for |
---|
54 | # unpickling? struct.pack() is 40% faster than marshal.dumps(), but |
---|
55 | # marshal.loads() is twice as fast as struct.unpack()! |
---|
56 | mloads = marshal.loads |
---|
57 | |
---|
58 | class PickleError(Exception): |
---|
59 | """A common base class for the other pickling exceptions.""" |
---|
60 | pass |
---|
61 | |
---|
62 | class PicklingError(PickleError): |
---|
63 | """This exception is raised when an unpicklable object is passed to the |
---|
64 | dump() method. |
---|
65 | |
---|
66 | """ |
---|
67 | pass |
---|
68 | |
---|
69 | class UnpicklingError(PickleError): |
---|
70 | """This exception is raised when there is a problem unpickling an object, |
---|
71 | such as a security violation. |
---|
72 | |
---|
73 | Note that other exceptions may also be raised during unpickling, including |
---|
74 | (but not necessarily limited to) AttributeError, EOFError, ImportError, |
---|
75 | and IndexError. |
---|
76 | |
---|
77 | """ |
---|
78 | pass |
---|
79 | |
---|
80 | # An instance of _Stop is raised by Unpickler.load_stop() in response to |
---|
81 | # the STOP opcode, passing the object that is the result of unpickling. |
---|
82 | class _Stop(Exception): |
---|
83 | def __init__(self, value): |
---|
84 | self.value = value |
---|
85 | |
---|
86 | # Jython has PyStringMap; it's a dict subclass with string keys |
---|
87 | try: |
---|
88 | from org.python.core import PyStringMap |
---|
89 | except ImportError: |
---|
90 | PyStringMap = None |
---|
91 | |
---|
92 | # UnicodeType may or may not be exported (normally imported from types) |
---|
93 | try: |
---|
94 | UnicodeType |
---|
95 | except NameError: |
---|
96 | UnicodeType = None |
---|
97 | |
---|
98 | # Pickle opcodes. See pickletools.py for extensive docs. The listing |
---|
99 | # here is in kind-of alphabetical order of 1-character pickle code. |
---|
100 | # pickletools groups them by purpose. |
---|
101 | |
---|
102 | MARK = '(' # push special markobject on stack |
---|
103 | STOP = '.' # every pickle ends with STOP |
---|
104 | POP = '0' # discard topmost stack item |
---|
105 | POP_MARK = '1' # discard stack top through topmost markobject |
---|
106 | DUP = '2' # duplicate top stack item |
---|
107 | FLOAT = 'F' # push float object; decimal string argument |
---|
108 | INT = 'I' # push integer or bool; decimal string argument |
---|
109 | BININT = 'J' # push four-byte signed int |
---|
110 | BININT1 = 'K' # push 1-byte unsigned int |
---|
111 | LONG = 'L' # push long; decimal string argument |
---|
112 | BININT2 = 'M' # push 2-byte unsigned int |
---|
113 | NONE = 'N' # push None |
---|
114 | PERSID = 'P' # push persistent object; id is taken from string arg |
---|
115 | BINPERSID = 'Q' # " " " ; " " " " stack |
---|
116 | REDUCE = 'R' # apply callable to argtuple, both on stack |
---|
117 | STRING = 'S' # push string; NL-terminated string argument |
---|
118 | BINSTRING = 'T' # push string; counted binary string argument |
---|
119 | SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes |
---|
120 | UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument |
---|
121 | BINUNICODE = 'X' # " " " ; counted UTF-8 string argument |
---|
122 | APPEND = 'a' # append stack top to list below it |
---|
123 | BUILD = 'b' # call __setstate__ or __dict__.update() |
---|
124 | GLOBAL = 'c' # push self.find_class(modname, name); 2 string args |
---|
125 | DICT = 'd' # build a dict from stack items |
---|
126 | EMPTY_DICT = '}' # push empty dict |
---|
127 | APPENDS = 'e' # extend list on stack by topmost stack slice |
---|
128 | GET = 'g' # push item from memo on stack; index is string arg |
---|
129 | BINGET = 'h' # " " " " " " ; " " 1-byte arg |
---|
130 | INST = 'i' # build & push class instance |
---|
131 | LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg |
---|
132 | LIST = 'l' # build list from topmost stack items |
---|
133 | EMPTY_LIST = ']' # push empty list |
---|
134 | OBJ = 'o' # build & push class instance |
---|
135 | PUT = 'p' # store stack top in memo; index is string arg |
---|
136 | BINPUT = 'q' # " " " " " ; " " 1-byte arg |
---|
137 | LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg |
---|
138 | SETITEM = 's' # add key+value pair to dict |
---|
139 | TUPLE = 't' # build tuple from topmost stack items |
---|
140 | EMPTY_TUPLE = ')' # push empty tuple |
---|
141 | SETITEMS = 'u' # modify dict by adding topmost key+value pairs |
---|
142 | BINFLOAT = 'G' # push float; arg is 8-byte float encoding |
---|
143 | |
---|
144 | TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py |
---|
145 | FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py |
---|
146 | |
---|
147 | # Protocol 2 |
---|
148 | |
---|
149 | PROTO = '\x80' # identify pickle protocol |
---|
150 | NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple |
---|
151 | EXT1 = '\x82' # push object from extension registry; 1-byte index |
---|
152 | EXT2 = '\x83' # ditto, but 2-byte index |
---|
153 | EXT4 = '\x84' # ditto, but 4-byte index |
---|
154 | TUPLE1 = '\x85' # build 1-tuple from stack top |
---|
155 | TUPLE2 = '\x86' # build 2-tuple from two topmost stack items |
---|
156 | TUPLE3 = '\x87' # build 3-tuple from three topmost stack items |
---|
157 | NEWTRUE = '\x88' # push True |
---|
158 | NEWFALSE = '\x89' # push False |
---|
159 | LONG1 = '\x8a' # push long from < 256 bytes |
---|
160 | LONG4 = '\x8b' # push really big long |
---|
161 | |
---|
162 | _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] |
---|
163 | |
---|
164 | |
---|
165 | __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) |
---|
166 | del x |
---|
167 | |
---|
168 | |
---|
169 | # Pickling machinery |
---|
170 | |
---|
171 | class Pickler: |
---|
172 | |
---|
173 | def __init__(self, file, protocol=None): |
---|
174 | """This takes a file-like object for writing a pickle data stream. |
---|
175 | |
---|
176 | The optional protocol argument tells the pickler to use the |
---|
177 | given protocol; supported protocols are 0, 1, 2. The default |
---|
178 | protocol is 0, to be backwards compatible. (Protocol 0 is the |
---|
179 | only protocol that can be written to a file opened in text |
---|
180 | mode and read back successfully. When using a protocol higher |
---|
181 | than 0, make sure the file is opened in binary mode, both when |
---|
182 | pickling and unpickling.) |
---|
183 | |
---|
184 | Protocol 1 is more efficient than protocol 0; protocol 2 is |
---|
185 | more efficient than protocol 1. |
---|
186 | |
---|
187 | Specifying a negative protocol version selects the highest |
---|
188 | protocol version supported. The higher the protocol used, the |
---|
189 | more recent the version of Python needed to read the pickle |
---|
190 | produced. |
---|
191 | |
---|
192 | The file parameter must have a write() method that accepts a single |
---|
193 | string argument. It can thus be an open file object, a StringIO |
---|
194 | object, or any other custom object that meets this interface. |
---|
195 | |
---|
196 | """ |
---|
197 | if protocol is None: |
---|
198 | protocol = 0 |
---|
199 | if protocol < 0: |
---|
200 | protocol = HIGHEST_PROTOCOL |
---|
201 | elif not 0 <= protocol <= HIGHEST_PROTOCOL: |
---|
202 | raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) |
---|
203 | self.write = file.write |
---|
204 | self.memo = {} |
---|
205 | self.proto = int(protocol) |
---|
206 | self.bin = protocol >= 1 |
---|
207 | self.fast = 0 |
---|
208 | |
---|
209 | def clear_memo(self): |
---|
210 | """Clears the pickler's "memo". |
---|
211 | |
---|
212 | The memo is the data structure that remembers which objects the |
---|
213 | pickler has already seen, so that shared or recursive objects are |
---|
214 | pickled by reference and not by value. This method is useful when |
---|
215 | re-using picklers. |
---|
216 | |
---|
217 | """ |
---|
218 | self.memo.clear() |
---|
219 | |
---|
220 | def dump(self, obj): |
---|
221 | """Write a pickled representation of obj to the open file.""" |
---|
222 | if self.proto >= 2: |
---|
223 | self.write(PROTO + chr(self.proto)) |
---|
224 | self.save(obj) |
---|
225 | self.write(STOP) |
---|
226 | |
---|
227 | def memoize(self, obj): |
---|
228 | """Store an object in the memo.""" |
---|
229 | |
---|
230 | # The Pickler memo is a dictionary mapping object ids to 2-tuples |
---|
231 | # that contain the Unpickler memo key and the object being memoized. |
---|
232 | # The memo key is written to the pickle and will become |
---|
233 | # the key in the Unpickler's memo. The object is stored in the |
---|
234 | # Pickler memo so that transient objects are kept alive during |
---|
235 | # pickling. |
---|
236 | |
---|
237 | # The use of the Unpickler memo length as the memo key is just a |
---|
238 | # convention. The only requirement is that the memo values be unique. |
---|
239 | # But there appears no advantage to any other scheme, and this |
---|
240 | # scheme allows the Unpickler memo to be implemented as a plain (but |
---|
241 | # growable) array, indexed by memo key. |
---|
242 | if self.fast: |
---|
243 | return |
---|
244 | assert id(obj) not in self.memo |
---|
245 | memo_len = len(self.memo) |
---|
246 | self.write(self.put(memo_len)) |
---|
247 | self.memo[id(obj)] = memo_len, obj |
---|
248 | |
---|
249 | # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. |
---|
250 | def put(self, i, pack=struct.pack): |
---|
251 | if self.bin: |
---|
252 | if i < 256: |
---|
253 | return BINPUT + chr(i) |
---|
254 | else: |
---|
255 | return LONG_BINPUT + pack("<i", i) |
---|
256 | |
---|
257 | return PUT + repr(i) + '\n' |
---|
258 | |
---|
259 | # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. |
---|
260 | def get(self, i, pack=struct.pack): |
---|
261 | if self.bin: |
---|
262 | if i < 256: |
---|
263 | return BINGET + chr(i) |
---|
264 | else: |
---|
265 | return LONG_BINGET + pack("<i", i) |
---|
266 | |
---|
267 | return GET + repr(i) + '\n' |
---|
268 | |
---|
269 | def save(self, obj): |
---|
270 | # Check for persistent id (defined by a subclass) |
---|
271 | pid = self.persistent_id(obj) |
---|
272 | if pid is not None: |
---|
273 | self.save_pers(pid) |
---|
274 | return |
---|
275 | |
---|
276 | # Check the memo |
---|
277 | x = self.memo.get(id(obj)) |
---|
278 | if x: |
---|
279 | self.write(self.get(x[0])) |
---|
280 | return |
---|
281 | |
---|
282 | # Check the type dispatch table |
---|
283 | t = type(obj) |
---|
284 | f = self.dispatch.get(t) |
---|
285 | if f: |
---|
286 | f(self, obj) # Call unbound method with explicit self |
---|
287 | return |
---|
288 | |
---|
289 | # Check copy_reg.dispatch_table |
---|
290 | reduce = dispatch_table.get(t) |
---|
291 | if reduce: |
---|
292 | rv = reduce(obj) |
---|
293 | else: |
---|
294 | # Check for a class with a custom metaclass; treat as regular class |
---|
295 | try: |
---|
296 | issc = issubclass(t, TypeType) |
---|
297 | except TypeError: # t is not a class (old Boost; see SF #502085) |
---|
298 | issc = 0 |
---|
299 | if issc: |
---|
300 | self.save_global(obj) |
---|
301 | return |
---|
302 | |
---|
303 | # Check for a __reduce_ex__ method, fall back to __reduce__ |
---|
304 | reduce = getattr(obj, "__reduce_ex__", None) |
---|
305 | if reduce: |
---|
306 | rv = reduce(self.proto) |
---|
307 | else: |
---|
308 | reduce = getattr(obj, "__reduce__", None) |
---|
309 | if reduce: |
---|
310 | rv = reduce() |
---|
311 | else: |
---|
312 | raise PicklingError("Can't pickle %r object: %r" % |
---|
313 | (t.__name__, obj)) |
---|
314 | |
---|
315 | # Check for string returned by reduce(), meaning "save as global" |
---|
316 | if type(rv) is StringType: |
---|
317 | self.save_global(obj, rv) |
---|
318 | return |
---|
319 | |
---|
320 | # Assert that reduce() returned a tuple |
---|
321 | if type(rv) is not TupleType: |
---|
322 | raise PicklingError("%s must return string or tuple" % reduce) |
---|
323 | |
---|
324 | # Assert that it returned an appropriately sized tuple |
---|
325 | l = len(rv) |
---|
326 | if not (2 <= l <= 5): |
---|
327 | raise PicklingError("Tuple returned by %s must have " |
---|
328 | "two to five elements" % reduce) |
---|
329 | |
---|
330 | # Save the reduce() output and finally memoize the object |
---|
331 | self.save_reduce(obj=obj, *rv) |
---|
332 | |
---|
333 | def persistent_id(self, obj): |
---|
334 | # This exists so a subclass can override it |
---|
335 | return None |
---|
336 | |
---|
337 | def save_pers(self, pid): |
---|
338 | # Save a persistent id reference |
---|
339 | if self.bin: |
---|
340 | self.save(pid) |
---|
341 | self.write(BINPERSID) |
---|
342 | else: |
---|
343 | self.write(PERSID + str(pid) + '\n') |
---|
344 | |
---|
345 | def save_reduce(self, func, args, state=None, |
---|
346 | listitems=None, dictitems=None, obj=None): |
---|
347 | # This API is called by some subclasses |
---|
348 | |
---|
349 | # Assert that args is a tuple or None |
---|
350 | if not isinstance(args, TupleType): |
---|
351 | raise PicklingError("args from reduce() should be a tuple") |
---|
352 | |
---|
353 | # Assert that func is callable |
---|
354 | if not hasattr(func, '__call__'): |
---|
355 | raise PicklingError("func from reduce should be callable") |
---|
356 | |
---|
357 | save = self.save |
---|
358 | write = self.write |
---|
359 | |
---|
360 | # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ |
---|
361 | if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__": |
---|
362 | # A __reduce__ implementation can direct protocol 2 to |
---|
363 | # use the more efficient NEWOBJ opcode, while still |
---|
364 | # allowing protocol 0 and 1 to work normally. For this to |
---|
365 | # work, the function returned by __reduce__ should be |
---|
366 | # called __newobj__, and its first argument should be a |
---|
367 | # new-style class. The implementation for __newobj__ |
---|
368 | # should be as follows, although pickle has no way to |
---|
369 | # verify this: |
---|
370 | # |
---|
371 | # def __newobj__(cls, *args): |
---|
372 | # return cls.__new__(cls, *args) |
---|
373 | # |
---|
374 | # Protocols 0 and 1 will pickle a reference to __newobj__, |
---|
375 | # while protocol 2 (and above) will pickle a reference to |
---|
376 | # cls, the remaining args tuple, and the NEWOBJ code, |
---|
377 | # which calls cls.__new__(cls, *args) at unpickling time |
---|
378 | # (see load_newobj below). If __reduce__ returns a |
---|
379 | # three-tuple, the state from the third tuple item will be |
---|
380 | # pickled regardless of the protocol, calling __setstate__ |
---|
381 | # at unpickling time (see load_build below). |
---|
382 | # |
---|
383 | # Note that no standard __newobj__ implementation exists; |
---|
384 | # you have to provide your own. This is to enforce |
---|
385 | # compatibility with Python 2.2 (pickles written using |
---|
386 | # protocol 0 or 1 in Python 2.3 should be unpicklable by |
---|
387 | # Python 2.2). |
---|
388 | cls = args[0] |
---|
389 | if not hasattr(cls, "__new__"): |
---|
390 | raise PicklingError( |
---|
391 | "args[0] from __newobj__ args has no __new__") |
---|
392 | if obj is not None and cls is not obj.__class__: |
---|
393 | raise PicklingError( |
---|
394 | "args[0] from __newobj__ args has the wrong class") |
---|
395 | args = args[1:] |
---|
396 | save(cls) |
---|
397 | save(args) |
---|
398 | write(NEWOBJ) |
---|
399 | else: |
---|
400 | save(func) |
---|
401 | save(args) |
---|
402 | write(REDUCE) |
---|
403 | |
---|
404 | if obj is not None: |
---|
405 | # If the object is already in the memo, this means it is |
---|
406 | # recursive. In this case, throw away everything we put on the |
---|
407 | # stack, and fetch the object back from the memo. |
---|
408 | if id(obj) in self.memo: |
---|
409 | write(POP + self.get(self.memo[id(obj)][0])) |
---|
410 | else: |
---|
411 | self.memoize(obj) |
---|
412 | |
---|
413 | # More new special cases (that work with older protocols as |
---|
414 | # well): when __reduce__ returns a tuple with 4 or 5 items, |
---|
415 | # the 4th and 5th item should be iterators that provide list |
---|
416 | # items and dict items (as (key, value) tuples), or None. |
---|
417 | |
---|
418 | if listitems is not None: |
---|
419 | self._batch_appends(listitems) |
---|
420 | |
---|
421 | if dictitems is not None: |
---|
422 | self._batch_setitems(dictitems) |
---|
423 | |
---|
424 | if state is not None: |
---|
425 | save(state) |
---|
426 | write(BUILD) |
---|
427 | |
---|
428 | # Methods below this point are dispatched through the dispatch table |
---|
429 | |
---|
430 | dispatch = {} |
---|
431 | |
---|
432 | def save_none(self, obj): |
---|
433 | self.write(NONE) |
---|
434 | dispatch[NoneType] = save_none |
---|
435 | |
---|
436 | def save_bool(self, obj): |
---|
437 | if self.proto >= 2: |
---|
438 | self.write(obj and NEWTRUE or NEWFALSE) |
---|
439 | else: |
---|
440 | self.write(obj and TRUE or FALSE) |
---|
441 | dispatch[bool] = save_bool |
---|
442 | |
---|
443 | def save_int(self, obj, pack=struct.pack): |
---|
444 | if self.bin: |
---|
445 | # If the int is small enough to fit in a signed 4-byte 2's-comp |
---|
446 | # format, we can store it more efficiently than the general |
---|
447 | # case. |
---|
448 | # First one- and two-byte unsigned ints: |
---|
449 | if obj >= 0: |
---|
450 | if obj <= 0xff: |
---|
451 | self.write(BININT1 + chr(obj)) |
---|
452 | return |
---|
453 | if obj <= 0xffff: |
---|
454 | self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) |
---|
455 | return |
---|
456 | # Next check for 4-byte signed ints: |
---|
457 | high_bits = obj >> 31 # note that Python shift sign-extends |
---|
458 | if high_bits == 0 or high_bits == -1: |
---|
459 | # All high bits are copies of bit 2**31, so the value |
---|
460 | # fits in a 4-byte signed int. |
---|
461 | self.write(BININT + pack("<i", obj)) |
---|
462 | return |
---|
463 | # Text pickle, or int too big to fit in signed 4-byte format. |
---|
464 | self.write(INT + repr(obj) + '\n') |
---|
465 | dispatch[IntType] = save_int |
---|
466 | |
---|
467 | def save_long(self, obj, pack=struct.pack): |
---|
468 | if self.proto >= 2: |
---|
469 | bytes = encode_long(obj) |
---|
470 | n = len(bytes) |
---|
471 | if n < 256: |
---|
472 | self.write(LONG1 + chr(n) + bytes) |
---|
473 | else: |
---|
474 | self.write(LONG4 + pack("<i", n) + bytes) |
---|
475 | return |
---|
476 | self.write(LONG + repr(obj) + '\n') |
---|
477 | dispatch[LongType] = save_long |
---|
478 | |
---|
479 | def save_float(self, obj, pack=struct.pack): |
---|
480 | if self.bin: |
---|
481 | self.write(BINFLOAT + pack('>d', obj)) |
---|
482 | else: |
---|
483 | self.write(FLOAT + repr(obj) + '\n') |
---|
484 | dispatch[FloatType] = save_float |
---|
485 | |
---|
486 | def save_string(self, obj, pack=struct.pack): |
---|
487 | if self.bin: |
---|
488 | n = len(obj) |
---|
489 | if n < 256: |
---|
490 | self.write(SHORT_BINSTRING + chr(n) + obj) |
---|
491 | else: |
---|
492 | self.write(BINSTRING + pack("<i", n) + obj) |
---|
493 | else: |
---|
494 | self.write(STRING + repr(obj) + '\n') |
---|
495 | self.memoize(obj) |
---|
496 | dispatch[StringType] = save_string |
---|
497 | |
---|
498 | def save_unicode(self, obj, pack=struct.pack): |
---|
499 | if self.bin: |
---|
500 | encoding = obj.encode('utf-8') |
---|
501 | n = len(encoding) |
---|
502 | self.write(BINUNICODE + pack("<i", n) + encoding) |
---|
503 | else: |
---|
504 | obj = obj.replace("\\", "\\u005c") |
---|
505 | obj = obj.replace("\n", "\\u000a") |
---|
506 | self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n') |
---|
507 | self.memoize(obj) |
---|
508 | dispatch[UnicodeType] = save_unicode |
---|
509 | |
---|
510 | if StringType is UnicodeType: |
---|
511 | # This is true for Jython |
---|
512 | def save_string(self, obj, pack=struct.pack): |
---|
513 | unicode = obj.isunicode() |
---|
514 | |
---|
515 | if self.bin: |
---|
516 | if unicode: |
---|
517 | obj = obj.encode("utf-8") |
---|
518 | l = len(obj) |
---|
519 | if l < 256 and not unicode: |
---|
520 | self.write(SHORT_BINSTRING + chr(l) + obj) |
---|
521 | else: |
---|
522 | s = pack("<i", l) |
---|
523 | if unicode: |
---|
524 | self.write(BINUNICODE + s + obj) |
---|
525 | else: |
---|
526 | self.write(BINSTRING + s + obj) |
---|
527 | else: |
---|
528 | if unicode: |
---|
529 | obj = obj.replace("\\", "\\u005c") |
---|
530 | obj = obj.replace("\n", "\\u000a") |
---|
531 | obj = obj.encode('raw-unicode-escape') |
---|
532 | self.write(UNICODE + obj + '\n') |
---|
533 | else: |
---|
534 | self.write(STRING + repr(obj) + '\n') |
---|
535 | self.memoize(obj) |
---|
536 | dispatch[StringType] = save_string |
---|
537 | |
---|
538 | def save_tuple(self, obj): |
---|
539 | write = self.write |
---|
540 | proto = self.proto |
---|
541 | |
---|
542 | n = len(obj) |
---|
543 | if n == 0: |
---|
544 | if proto: |
---|
545 | write(EMPTY_TUPLE) |
---|
546 | else: |
---|
547 | write(MARK + TUPLE) |
---|
548 | return |
---|
549 | |
---|
550 | save = self.save |
---|
551 | memo = self.memo |
---|
552 | if n <= 3 and proto >= 2: |
---|
553 | for element in obj: |
---|
554 | save(element) |
---|
555 | # Subtle. Same as in the big comment below. |
---|
556 | if id(obj) in memo: |
---|
557 | get = self.get(memo[id(obj)][0]) |
---|
558 | write(POP * n + get) |
---|
559 | else: |
---|
560 | write(_tuplesize2code[n]) |
---|
561 | self.memoize(obj) |
---|
562 | return |
---|
563 | |
---|
564 | # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple |
---|
565 | # has more than 3 elements. |
---|
566 | write(MARK) |
---|
567 | for element in obj: |
---|
568 | save(element) |
---|
569 | |
---|
570 | if id(obj) in memo: |
---|
571 | # Subtle. d was not in memo when we entered save_tuple(), so |
---|
572 | # the process of saving the tuple's elements must have saved |
---|
573 | # the tuple itself: the tuple is recursive. The proper action |
---|
574 | # now is to throw away everything we put on the stack, and |
---|
575 | # simply GET the tuple (it's already constructed). This check |
---|
576 | # could have been done in the "for element" loop instead, but |
---|
577 | # recursive tuples are a rare thing. |
---|
578 | get = self.get(memo[id(obj)][0]) |
---|
579 | if proto: |
---|
580 | write(POP_MARK + get) |
---|
581 | else: # proto 0 -- POP_MARK not available |
---|
582 | write(POP * (n+1) + get) |
---|
583 | return |
---|
584 | |
---|
585 | # No recursion. |
---|
586 | self.write(TUPLE) |
---|
587 | self.memoize(obj) |
---|
588 | |
---|
589 | dispatch[TupleType] = save_tuple |
---|
590 | |
---|
591 | # save_empty_tuple() isn't used by anything in Python 2.3. However, I |
---|
592 | # found a Pickler subclass in Zope3 that calls it, so it's not harmless |
---|
593 | # to remove it. |
---|
594 | def save_empty_tuple(self, obj): |
---|
595 | self.write(EMPTY_TUPLE) |
---|
596 | |
---|
597 | def save_list(self, obj): |
---|
598 | write = self.write |
---|
599 | |
---|
600 | if self.bin: |
---|
601 | write(EMPTY_LIST) |
---|
602 | else: # proto 0 -- can't use EMPTY_LIST |
---|
603 | write(MARK + LIST) |
---|
604 | |
---|
605 | self.memoize(obj) |
---|
606 | self._batch_appends(iter(obj)) |
---|
607 | |
---|
608 | dispatch[ListType] = save_list |
---|
609 | |
---|
610 | # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets |
---|
611 | # out of synch, though. |
---|
612 | _BATCHSIZE = 1000 |
---|
613 | |
---|
614 | def _batch_appends(self, items): |
---|
615 | # Helper to batch up APPENDS sequences |
---|
616 | save = self.save |
---|
617 | write = self.write |
---|
618 | |
---|
619 | if not self.bin: |
---|
620 | for x in items: |
---|
621 | save(x) |
---|
622 | write(APPEND) |
---|
623 | return |
---|
624 | |
---|
625 | r = xrange(self._BATCHSIZE) |
---|
626 | while items is not None: |
---|
627 | tmp = [] |
---|
628 | for i in r: |
---|
629 | try: |
---|
630 | x = items.next() |
---|
631 | tmp.append(x) |
---|
632 | except StopIteration: |
---|
633 | items = None |
---|
634 | break |
---|
635 | n = len(tmp) |
---|
636 | if n > 1: |
---|
637 | write(MARK) |
---|
638 | for x in tmp: |
---|
639 | save(x) |
---|
640 | write(APPENDS) |
---|
641 | elif n: |
---|
642 | save(tmp[0]) |
---|
643 | write(APPEND) |
---|
644 | # else tmp is empty, and we're done |
---|
645 | |
---|
646 | def save_dict(self, obj): |
---|
647 | write = self.write |
---|
648 | |
---|
649 | if self.bin: |
---|
650 | write(EMPTY_DICT) |
---|
651 | else: # proto 0 -- can't use EMPTY_DICT |
---|
652 | write(MARK + DICT) |
---|
653 | |
---|
654 | self.memoize(obj) |
---|
655 | self._batch_setitems(obj.iteritems()) |
---|
656 | |
---|
657 | dispatch[DictionaryType] = save_dict |
---|
658 | if not PyStringMap is None: |
---|
659 | dispatch[PyStringMap] = save_dict |
---|
660 | |
---|
661 | def _batch_setitems(self, items): |
---|
662 | # Helper to batch up SETITEMS sequences; proto >= 1 only |
---|
663 | save = self.save |
---|
664 | write = self.write |
---|
665 | |
---|
666 | if not self.bin: |
---|
667 | for k, v in items: |
---|
668 | save(k) |
---|
669 | save(v) |
---|
670 | write(SETITEM) |
---|
671 | return |
---|
672 | |
---|
673 | r = xrange(self._BATCHSIZE) |
---|
674 | while items is not None: |
---|
675 | tmp = [] |
---|
676 | for i in r: |
---|
677 | try: |
---|
678 | tmp.append(items.next()) |
---|
679 | except StopIteration: |
---|
680 | items = None |
---|
681 | break |
---|
682 | n = len(tmp) |
---|
683 | if n > 1: |
---|
684 | write(MARK) |
---|
685 | for k, v in tmp: |
---|
686 | save(k) |
---|
687 | save(v) |
---|
688 | write(SETITEMS) |
---|
689 | elif n: |
---|
690 | k, v = tmp[0] |
---|
691 | save(k) |
---|
692 | save(v) |
---|
693 | write(SETITEM) |
---|
694 | # else tmp is empty, and we're done |
---|
695 | |
---|
696 | def save_inst(self, obj): |
---|
697 | cls = obj.__class__ |
---|
698 | |
---|
699 | memo = self.memo |
---|
700 | write = self.write |
---|
701 | save = self.save |
---|
702 | |
---|
703 | if hasattr(obj, '__getinitargs__'): |
---|
704 | args = obj.__getinitargs__() |
---|
705 | len(args) # XXX Assert it's a sequence |
---|
706 | _keep_alive(args, memo) |
---|
707 | else: |
---|
708 | args = () |
---|
709 | |
---|
710 | write(MARK) |
---|
711 | |
---|
712 | if self.bin: |
---|
713 | save(cls) |
---|
714 | for arg in args: |
---|
715 | save(arg) |
---|
716 | write(OBJ) |
---|
717 | else: |
---|
718 | for arg in args: |
---|
719 | save(arg) |
---|
720 | write(INST + cls.__module__ + '\n' + cls.__name__ + '\n') |
---|
721 | |
---|
722 | self.memoize(obj) |
---|
723 | |
---|
724 | try: |
---|
725 | getstate = obj.__getstate__ |
---|
726 | except AttributeError: |
---|
727 | stuff = obj.__dict__ |
---|
728 | else: |
---|
729 | stuff = getstate() |
---|
730 | _keep_alive(stuff, memo) |
---|
731 | save(stuff) |
---|
732 | write(BUILD) |
---|
733 | |
---|
734 | dispatch[InstanceType] = save_inst |
---|
735 | |
---|
736 | def save_global(self, obj, name=None, pack=struct.pack): |
---|
737 | write = self.write |
---|
738 | memo = self.memo |
---|
739 | |
---|
740 | if name is None: |
---|
741 | name = obj.__name__ |
---|
742 | |
---|
743 | module = getattr(obj, "__module__", None) |
---|
744 | if module is None: |
---|
745 | module = whichmodule(obj, name) |
---|
746 | |
---|
747 | try: |
---|
748 | __import__(module) |
---|
749 | mod = sys.modules[module] |
---|
750 | klass = getattr(mod, name) |
---|
751 | except (ImportError, KeyError, AttributeError): |
---|
752 | raise PicklingError( |
---|
753 | "Can't pickle %r: it's not found as %s.%s" % |
---|
754 | (obj, module, name)) |
---|
755 | else: |
---|
756 | if klass is not obj: |
---|
757 | raise PicklingError( |
---|
758 | "Can't pickle %r: it's not the same object as %s.%s" % |
---|
759 | (obj, module, name)) |
---|
760 | |
---|
761 | if self.proto >= 2: |
---|
762 | code = _extension_registry.get((module, name)) |
---|
763 | if code: |
---|
764 | assert code > 0 |
---|
765 | if code <= 0xff: |
---|
766 | write(EXT1 + chr(code)) |
---|
767 | elif code <= 0xffff: |
---|
768 | write("%c%c%c" % (EXT2, code&0xff, code>>8)) |
---|
769 | else: |
---|
770 | write(EXT4 + pack("<i", code)) |
---|
771 | return |
---|
772 | |
---|
773 | write(GLOBAL + module + '\n' + name + '\n') |
---|
774 | self.memoize(obj) |
---|
775 | |
---|
776 | dispatch[ClassType] = save_global |
---|
777 | dispatch[FunctionType] = save_global |
---|
778 | dispatch[BuiltinFunctionType] = save_global |
---|
779 | dispatch[TypeType] = save_global |
---|
780 | |
---|
781 | # Pickling helpers |
---|
782 | |
---|
783 | def _keep_alive(x, memo): |
---|
784 | """Keeps a reference to the object x in the memo. |
---|
785 | |
---|
786 | Because we remember objects by their id, we have |
---|
787 | to assure that possibly temporary objects are kept |
---|
788 | alive by referencing them. |
---|
789 | We store a reference at the id of the memo, which should |
---|
790 | normally not be used unless someone tries to deepcopy |
---|
791 | the memo itself... |
---|
792 | """ |
---|
793 | try: |
---|
794 | memo[id(memo)].append(x) |
---|
795 | except KeyError: |
---|
796 | # aha, this is the first one :-) |
---|
797 | memo[id(memo)]=[x] |
---|
798 | |
---|
799 | |
---|
800 | # A cache for whichmodule(), mapping a function object to the name of |
---|
801 | # the module in which the function was found. |
---|
802 | |
---|
803 | classmap = {} # called classmap for backwards compatibility |
---|
804 | |
---|
805 | def whichmodule(func, funcname): |
---|
806 | """Figure out the module in which a function occurs. |
---|
807 | |
---|
808 | Search sys.modules for the module. |
---|
809 | Cache in classmap. |
---|
810 | Return a module name. |
---|
811 | If the function cannot be found, return "__main__". |
---|
812 | """ |
---|
813 | # Python functions should always get an __module__ from their globals. |
---|
814 | mod = getattr(func, "__module__", None) |
---|
815 | if mod is not None: |
---|
816 | return mod |
---|
817 | if func in classmap: |
---|
818 | return classmap[func] |
---|
819 | |
---|
820 | for name, module in sys.modules.items(): |
---|
821 | if module is None: |
---|
822 | continue # skip dummy package entries |
---|
823 | if name != '__main__' and getattr(module, funcname, None) is func: |
---|
824 | break |
---|
825 | else: |
---|
826 | name = '__main__' |
---|
827 | classmap[func] = name |
---|
828 | return name |
---|
829 | |
---|
830 | |
---|
831 | # Unpickling machinery |
---|
832 | |
---|
833 | class Unpickler: |
---|
834 | |
---|
835 | def __init__(self, file): |
---|
836 | """This takes a file-like object for reading a pickle data stream. |
---|
837 | |
---|
838 | The protocol version of the pickle is detected automatically, so no |
---|
839 | proto argument is needed. |
---|
840 | |
---|
841 | The file-like object must have two methods, a read() method that |
---|
842 | takes an integer argument, and a readline() method that requires no |
---|
843 | arguments. Both methods should return a string. Thus file-like |
---|
844 | object can be a file object opened for reading, a StringIO object, |
---|
845 | or any other custom object that meets this interface. |
---|
846 | """ |
---|
847 | self.readline = file.readline |
---|
848 | self.read = file.read |
---|
849 | self.memo = {} |
---|
850 | |
---|
851 | def load(self): |
---|
852 | """Read a pickled object representation from the open file. |
---|
853 | |
---|
854 | Return the reconstituted object hierarchy specified in the file. |
---|
855 | """ |
---|
856 | self.mark = object() # any new unique object |
---|
857 | self.stack = [] |
---|
858 | self.append = self.stack.append |
---|
859 | read = self.read |
---|
860 | dispatch = self.dispatch |
---|
861 | try: |
---|
862 | while 1: |
---|
863 | key = read(1) |
---|
864 | dispatch[key](self) |
---|
865 | except _Stop, stopinst: |
---|
866 | return stopinst.value |
---|
867 | |
---|
868 | # Return largest index k such that self.stack[k] is self.mark. |
---|
869 | # If the stack doesn't contain a mark, eventually raises IndexError. |
---|
870 | # This could be sped by maintaining another stack, of indices at which |
---|
871 | # the mark appears. For that matter, the latter stack would suffice, |
---|
872 | # and we wouldn't need to push mark objects on self.stack at all. |
---|
873 | # Doing so is probably a good thing, though, since if the pickle is |
---|
874 | # corrupt (or hostile) we may get a clue from finding self.mark embedded |
---|
875 | # in unpickled objects. |
---|
876 | def marker(self): |
---|
877 | stack = self.stack |
---|
878 | mark = self.mark |
---|
879 | k = len(stack)-1 |
---|
880 | while stack[k] is not mark: k = k-1 |
---|
881 | return k |
---|
882 | |
---|
883 | dispatch = {} |
---|
884 | |
---|
885 | def load_eof(self): |
---|
886 | raise EOFError |
---|
887 | dispatch[''] = load_eof |
---|
888 | |
---|
889 | def load_proto(self): |
---|
890 | proto = ord(self.read(1)) |
---|
891 | if not 0 <= proto <= 2: |
---|
892 | raise ValueError, "unsupported pickle protocol: %d" % proto |
---|
893 | dispatch[PROTO] = load_proto |
---|
894 | |
---|
895 | def load_persid(self): |
---|
896 | pid = self.readline()[:-1] |
---|
897 | self.append(self.persistent_load(pid)) |
---|
898 | dispatch[PERSID] = load_persid |
---|
899 | |
---|
900 | def load_binpersid(self): |
---|
901 | pid = self.stack.pop() |
---|
902 | self.append(self.persistent_load(pid)) |
---|
903 | dispatch[BINPERSID] = load_binpersid |
---|
904 | |
---|
905 | def load_none(self): |
---|
906 | self.append(None) |
---|
907 | dispatch[NONE] = load_none |
---|
908 | |
---|
909 | def load_false(self): |
---|
910 | self.append(False) |
---|
911 | dispatch[NEWFALSE] = load_false |
---|
912 | |
---|
913 | def load_true(self): |
---|
914 | self.append(True) |
---|
915 | dispatch[NEWTRUE] = load_true |
---|
916 | |
---|
917 | def load_int(self): |
---|
918 | data = self.readline() |
---|
919 | if data == FALSE[1:]: |
---|
920 | val = False |
---|
921 | elif data == TRUE[1:]: |
---|
922 | val = True |
---|
923 | else: |
---|
924 | try: |
---|
925 | val = int(data) |
---|
926 | except ValueError: |
---|
927 | val = long(data) |
---|
928 | self.append(val) |
---|
929 | dispatch[INT] = load_int |
---|
930 | |
---|
931 | def load_binint(self): |
---|
932 | self.append(mloads('i' + self.read(4))) |
---|
933 | dispatch[BININT] = load_binint |
---|
934 | |
---|
935 | def load_binint1(self): |
---|
936 | self.append(ord(self.read(1))) |
---|
937 | dispatch[BININT1] = load_binint1 |
---|
938 | |
---|
939 | def load_binint2(self): |
---|
940 | self.append(mloads('i' + self.read(2) + '\000\000')) |
---|
941 | dispatch[BININT2] = load_binint2 |
---|
942 | |
---|
943 | def load_long(self): |
---|
944 | self.append(long(self.readline()[:-1], 0)) |
---|
945 | dispatch[LONG] = load_long |
---|
946 | |
---|
947 | def load_long1(self): |
---|
948 | n = ord(self.read(1)) |
---|
949 | bytes = self.read(n) |
---|
950 | self.append(decode_long(bytes)) |
---|
951 | dispatch[LONG1] = load_long1 |
---|
952 | |
---|
953 | def load_long4(self): |
---|
954 | n = mloads('i' + self.read(4)) |
---|
955 | bytes = self.read(n) |
---|
956 | self.append(decode_long(bytes)) |
---|
957 | dispatch[LONG4] = load_long4 |
---|
958 | |
---|
959 | def load_float(self): |
---|
960 | self.append(float(self.readline()[:-1])) |
---|
961 | dispatch[FLOAT] = load_float |
---|
962 | |
---|
963 | def load_binfloat(self, unpack=struct.unpack): |
---|
964 | self.append(unpack('>d', self.read(8))[0]) |
---|
965 | dispatch[BINFLOAT] = load_binfloat |
---|
966 | |
---|
967 | def load_string(self): |
---|
968 | rep = self.readline()[:-1] |
---|
969 | for q in "\"'": # double or single quote |
---|
970 | if rep.startswith(q): |
---|
971 | if len(rep) < 2 or not rep.endswith(q): |
---|
972 | raise ValueError, "insecure string pickle" |
---|
973 | rep = rep[len(q):-len(q)] |
---|
974 | break |
---|
975 | else: |
---|
976 | raise ValueError, "insecure string pickle" |
---|
977 | self.append(rep.decode("string-escape")) |
---|
978 | dispatch[STRING] = load_string |
---|
979 | |
---|
980 | def load_binstring(self): |
---|
981 | len = mloads('i' + self.read(4)) |
---|
982 | self.append(self.read(len)) |
---|
983 | dispatch[BINSTRING] = load_binstring |
---|
984 | |
---|
985 | def load_unicode(self): |
---|
986 | self.append(unicode(self.readline()[:-1],'raw-unicode-escape')) |
---|
987 | dispatch[UNICODE] = load_unicode |
---|
988 | |
---|
989 | def load_binunicode(self): |
---|
990 | len = mloads('i' + self.read(4)) |
---|
991 | self.append(unicode(self.read(len),'utf-8')) |
---|
992 | dispatch[BINUNICODE] = load_binunicode |
---|
993 | |
---|
994 | def load_short_binstring(self): |
---|
995 | len = ord(self.read(1)) |
---|
996 | self.append(self.read(len)) |
---|
997 | dispatch[SHORT_BINSTRING] = load_short_binstring |
---|
998 | |
---|
999 | def load_tuple(self): |
---|
1000 | k = self.marker() |
---|
1001 | self.stack[k:] = [tuple(self.stack[k+1:])] |
---|
1002 | dispatch[TUPLE] = load_tuple |
---|
1003 | |
---|
1004 | def load_empty_tuple(self): |
---|
1005 | self.stack.append(()) |
---|
1006 | dispatch[EMPTY_TUPLE] = load_empty_tuple |
---|
1007 | |
---|
1008 | def load_tuple1(self): |
---|
1009 | self.stack[-1] = (self.stack[-1],) |
---|
1010 | dispatch[TUPLE1] = load_tuple1 |
---|
1011 | |
---|
1012 | def load_tuple2(self): |
---|
1013 | self.stack[-2:] = [(self.stack[-2], self.stack[-1])] |
---|
1014 | dispatch[TUPLE2] = load_tuple2 |
---|
1015 | |
---|
1016 | def load_tuple3(self): |
---|
1017 | self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] |
---|
1018 | dispatch[TUPLE3] = load_tuple3 |
---|
1019 | |
---|
1020 | def load_empty_list(self): |
---|
1021 | self.stack.append([]) |
---|
1022 | dispatch[EMPTY_LIST] = load_empty_list |
---|
1023 | |
---|
1024 | def load_empty_dictionary(self): |
---|
1025 | self.stack.append({}) |
---|
1026 | dispatch[EMPTY_DICT] = load_empty_dictionary |
---|
1027 | |
---|
1028 | def load_list(self): |
---|
1029 | k = self.marker() |
---|
1030 | self.stack[k:] = [self.stack[k+1:]] |
---|
1031 | dispatch[LIST] = load_list |
---|
1032 | |
---|
1033 | def load_dict(self): |
---|
1034 | k = self.marker() |
---|
1035 | d = {} |
---|
1036 | items = self.stack[k+1:] |
---|
1037 | for i in range(0, len(items), 2): |
---|
1038 | key = items[i] |
---|
1039 | value = items[i+1] |
---|
1040 | d[key] = value |
---|
1041 | self.stack[k:] = [d] |
---|
1042 | dispatch[DICT] = load_dict |
---|
1043 | |
---|
1044 | # INST and OBJ differ only in how they get a class object. It's not |
---|
1045 | # only sensible to do the rest in a common routine, the two routines |
---|
1046 | # previously diverged and grew different bugs. |
---|
1047 | # klass is the class to instantiate, and k points to the topmost mark |
---|
1048 | # object, following which are the arguments for klass.__init__. |
---|
1049 | def _instantiate(self, klass, k): |
---|
1050 | args = tuple(self.stack[k+1:]) |
---|
1051 | del self.stack[k:] |
---|
1052 | instantiated = 0 |
---|
1053 | if (not args and |
---|
1054 | type(klass) is ClassType and |
---|
1055 | not hasattr(klass, "__getinitargs__")): |
---|
1056 | try: |
---|
1057 | value = _EmptyClass() |
---|
1058 | value.__class__ = klass |
---|
1059 | instantiated = 1 |
---|
1060 | except RuntimeError: |
---|
1061 | # In restricted execution, assignment to inst.__class__ is |
---|
1062 | # prohibited |
---|
1063 | pass |
---|
1064 | if not instantiated: |
---|
1065 | try: |
---|
1066 | value = klass(*args) |
---|
1067 | except TypeError, err: |
---|
1068 | raise TypeError, "in constructor for %s: %s" % ( |
---|
1069 | klass.__name__, str(err)), sys.exc_info()[2] |
---|
1070 | self.append(value) |
---|
1071 | |
---|
1072 | def load_inst(self): |
---|
1073 | module = self.readline()[:-1] |
---|
1074 | name = self.readline()[:-1] |
---|
1075 | klass = self.find_class(module, name) |
---|
1076 | self._instantiate(klass, self.marker()) |
---|
1077 | dispatch[INST] = load_inst |
---|
1078 | |
---|
1079 | def load_obj(self): |
---|
1080 | # Stack is ... markobject classobject arg1 arg2 ... |
---|
1081 | k = self.marker() |
---|
1082 | klass = self.stack.pop(k+1) |
---|
1083 | self._instantiate(klass, k) |
---|
1084 | dispatch[OBJ] = load_obj |
---|
1085 | |
---|
1086 | def load_newobj(self): |
---|
1087 | args = self.stack.pop() |
---|
1088 | cls = self.stack[-1] |
---|
1089 | obj = cls.__new__(cls, *args) |
---|
1090 | self.stack[-1] = obj |
---|
1091 | dispatch[NEWOBJ] = load_newobj |
---|
1092 | |
---|
1093 | def load_global(self): |
---|
1094 | module = self.readline()[:-1] |
---|
1095 | name = self.readline()[:-1] |
---|
1096 | klass = self.find_class(module, name) |
---|
1097 | self.append(klass) |
---|
1098 | dispatch[GLOBAL] = load_global |
---|
1099 | |
---|
1100 | def load_ext1(self): |
---|
1101 | code = ord(self.read(1)) |
---|
1102 | self.get_extension(code) |
---|
1103 | dispatch[EXT1] = load_ext1 |
---|
1104 | |
---|
1105 | def load_ext2(self): |
---|
1106 | code = mloads('i' + self.read(2) + '\000\000') |
---|
1107 | self.get_extension(code) |
---|
1108 | dispatch[EXT2] = load_ext2 |
---|
1109 | |
---|
1110 | def load_ext4(self): |
---|
1111 | code = mloads('i' + self.read(4)) |
---|
1112 | self.get_extension(code) |
---|
1113 | dispatch[EXT4] = load_ext4 |
---|
1114 | |
---|
1115 | def get_extension(self, code): |
---|
1116 | nil = [] |
---|
1117 | obj = _extension_cache.get(code, nil) |
---|
1118 | if obj is not nil: |
---|
1119 | self.append(obj) |
---|
1120 | return |
---|
1121 | key = _inverted_registry.get(code) |
---|
1122 | if not key: |
---|
1123 | raise ValueError("unregistered extension code %d" % code) |
---|
1124 | obj = self.find_class(*key) |
---|
1125 | _extension_cache[code] = obj |
---|
1126 | self.append(obj) |
---|
1127 | |
---|
1128 | def find_class(self, module, name): |
---|
1129 | # Subclasses may override this |
---|
1130 | __import__(module) |
---|
1131 | mod = sys.modules[module] |
---|
1132 | klass = getattr(mod, name) |
---|
1133 | return klass |
---|
1134 | |
---|
1135 | def load_reduce(self): |
---|
1136 | stack = self.stack |
---|
1137 | args = stack.pop() |
---|
1138 | func = stack[-1] |
---|
1139 | value = func(*args) |
---|
1140 | stack[-1] = value |
---|
1141 | dispatch[REDUCE] = load_reduce |
---|
1142 | |
---|
1143 | def load_pop(self): |
---|
1144 | del self.stack[-1] |
---|
1145 | dispatch[POP] = load_pop |
---|
1146 | |
---|
1147 | def load_pop_mark(self): |
---|
1148 | k = self.marker() |
---|
1149 | del self.stack[k:] |
---|
1150 | dispatch[POP_MARK] = load_pop_mark |
---|
1151 | |
---|
1152 | def load_dup(self): |
---|
1153 | self.append(self.stack[-1]) |
---|
1154 | dispatch[DUP] = load_dup |
---|
1155 | |
---|
1156 | def load_get(self): |
---|
1157 | self.append(self.memo[self.readline()[:-1]]) |
---|
1158 | dispatch[GET] = load_get |
---|
1159 | |
---|
1160 | def load_binget(self): |
---|
1161 | i = ord(self.read(1)) |
---|
1162 | self.append(self.memo[repr(i)]) |
---|
1163 | dispatch[BINGET] = load_binget |
---|
1164 | |
---|
1165 | def load_long_binget(self): |
---|
1166 | i = mloads('i' + self.read(4)) |
---|
1167 | self.append(self.memo[repr(i)]) |
---|
1168 | dispatch[LONG_BINGET] = load_long_binget |
---|
1169 | |
---|
1170 | def load_put(self): |
---|
1171 | self.memo[self.readline()[:-1]] = self.stack[-1] |
---|
1172 | dispatch[PUT] = load_put |
---|
1173 | |
---|
1174 | def load_binput(self): |
---|
1175 | i = ord(self.read(1)) |
---|
1176 | self.memo[repr(i)] = self.stack[-1] |
---|
1177 | dispatch[BINPUT] = load_binput |
---|
1178 | |
---|
1179 | def load_long_binput(self): |
---|
1180 | i = mloads('i' + self.read(4)) |
---|
1181 | self.memo[repr(i)] = self.stack[-1] |
---|
1182 | dispatch[LONG_BINPUT] = load_long_binput |
---|
1183 | |
---|
1184 | def load_append(self): |
---|
1185 | stack = self.stack |
---|
1186 | value = stack.pop() |
---|
1187 | list = stack[-1] |
---|
1188 | list.append(value) |
---|
1189 | dispatch[APPEND] = load_append |
---|
1190 | |
---|
1191 | def load_appends(self): |
---|
1192 | stack = self.stack |
---|
1193 | mark = self.marker() |
---|
1194 | list = stack[mark - 1] |
---|
1195 | list.extend(stack[mark + 1:]) |
---|
1196 | del stack[mark:] |
---|
1197 | dispatch[APPENDS] = load_appends |
---|
1198 | |
---|
1199 | def load_setitem(self): |
---|
1200 | stack = self.stack |
---|
1201 | value = stack.pop() |
---|
1202 | key = stack.pop() |
---|
1203 | dict = stack[-1] |
---|
1204 | dict[key] = value |
---|
1205 | dispatch[SETITEM] = load_setitem |
---|
1206 | |
---|
1207 | def load_setitems(self): |
---|
1208 | stack = self.stack |
---|
1209 | mark = self.marker() |
---|
1210 | dict = stack[mark - 1] |
---|
1211 | for i in range(mark + 1, len(stack), 2): |
---|
1212 | dict[stack[i]] = stack[i + 1] |
---|
1213 | |
---|
1214 | del stack[mark:] |
---|
1215 | dispatch[SETITEMS] = load_setitems |
---|
1216 | |
---|
1217 | def load_build(self): |
---|
1218 | stack = self.stack |
---|
1219 | state = stack.pop() |
---|
1220 | inst = stack[-1] |
---|
1221 | setstate = getattr(inst, "__setstate__", None) |
---|
1222 | if setstate: |
---|
1223 | setstate(state) |
---|
1224 | return |
---|
1225 | slotstate = None |
---|
1226 | if isinstance(state, tuple) and len(state) == 2: |
---|
1227 | state, slotstate = state |
---|
1228 | if state: |
---|
1229 | try: |
---|
1230 | d = inst.__dict__ |
---|
1231 | try: |
---|
1232 | for k, v in state.iteritems(): |
---|
1233 | d[intern(k)] = v |
---|
1234 | # keys in state don't have to be strings |
---|
1235 | # don't blow up, but don't go out of our way |
---|
1236 | except TypeError: |
---|
1237 | d.update(state) |
---|
1238 | |
---|
1239 | except RuntimeError: |
---|
1240 | # XXX In restricted execution, the instance's __dict__ |
---|
1241 | # is not accessible. Use the old way of unpickling |
---|
1242 | # the instance variables. This is a semantic |
---|
1243 | # difference when unpickling in restricted |
---|
1244 | # vs. unrestricted modes. |
---|
1245 | # Note, however, that cPickle has never tried to do the |
---|
1246 | # .update() business, and always uses |
---|
1247 | # PyObject_SetItem(inst.__dict__, key, value) in a |
---|
1248 | # loop over state.items(). |
---|
1249 | for k, v in state.items(): |
---|
1250 | setattr(inst, k, v) |
---|
1251 | if slotstate: |
---|
1252 | for k, v in slotstate.items(): |
---|
1253 | setattr(inst, k, v) |
---|
1254 | dispatch[BUILD] = load_build |
---|
1255 | |
---|
1256 | def load_mark(self): |
---|
1257 | self.append(self.mark) |
---|
1258 | dispatch[MARK] = load_mark |
---|
1259 | |
---|
1260 | def load_stop(self): |
---|
1261 | value = self.stack.pop() |
---|
1262 | raise _Stop(value) |
---|
1263 | dispatch[STOP] = load_stop |
---|
1264 | |
---|
1265 | # Helper class for load_inst/load_obj |
---|
1266 | |
---|
1267 | class _EmptyClass: |
---|
1268 | pass |
---|
1269 | |
---|
1270 | # Encode/decode longs in linear time. |
---|
1271 | |
---|
1272 | import binascii as _binascii |
---|
1273 | |
---|
1274 | def encode_long(x): |
---|
1275 | r"""Encode a long to a two's complement little-endian binary string. |
---|
1276 | Note that 0L is a special case, returning an empty string, to save a |
---|
1277 | byte in the LONG1 pickling context. |
---|
1278 | |
---|
1279 | >>> encode_long(0L) |
---|
1280 | '' |
---|
1281 | >>> encode_long(255L) |
---|
1282 | '\xff\x00' |
---|
1283 | >>> encode_long(32767L) |
---|
1284 | '\xff\x7f' |
---|
1285 | >>> encode_long(-256L) |
---|
1286 | '\x00\xff' |
---|
1287 | >>> encode_long(-32768L) |
---|
1288 | '\x00\x80' |
---|
1289 | >>> encode_long(-128L) |
---|
1290 | '\x80' |
---|
1291 | >>> encode_long(127L) |
---|
1292 | '\x7f' |
---|
1293 | >>> |
---|
1294 | """ |
---|
1295 | |
---|
1296 | if x == 0: |
---|
1297 | return '' |
---|
1298 | if x > 0: |
---|
1299 | ashex = hex(x) |
---|
1300 | assert ashex.startswith("0x") |
---|
1301 | njunkchars = 2 + ashex.endswith('L') |
---|
1302 | nibbles = len(ashex) - njunkchars |
---|
1303 | if nibbles & 1: |
---|
1304 | # need an even # of nibbles for unhexlify |
---|
1305 | ashex = "0x0" + ashex[2:] |
---|
1306 | elif int(ashex[2], 16) >= 8: |
---|
1307 | # "looks negative", so need a byte of sign bits |
---|
1308 | ashex = "0x00" + ashex[2:] |
---|
1309 | else: |
---|
1310 | # Build the 256's-complement: (1L << nbytes) + x. The trick is |
---|
1311 | # to find the number of bytes in linear time (although that should |
---|
1312 | # really be a constant-time task). |
---|
1313 | ashex = hex(-x) |
---|
1314 | assert ashex.startswith("0x") |
---|
1315 | njunkchars = 2 + ashex.endswith('L') |
---|
1316 | nibbles = len(ashex) - njunkchars |
---|
1317 | if nibbles & 1: |
---|
1318 | # Extend to a full byte. |
---|
1319 | nibbles += 1 |
---|
1320 | nbits = nibbles * 4 |
---|
1321 | x += 1L << nbits |
---|
1322 | assert x > 0 |
---|
1323 | ashex = hex(x) |
---|
1324 | njunkchars = 2 + ashex.endswith('L') |
---|
1325 | newnibbles = len(ashex) - njunkchars |
---|
1326 | if newnibbles < nibbles: |
---|
1327 | ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:] |
---|
1328 | if int(ashex[2], 16) < 8: |
---|
1329 | # "looks positive", so need a byte of sign bits |
---|
1330 | ashex = "0xff" + ashex[2:] |
---|
1331 | |
---|
1332 | if ashex.endswith('L'): |
---|
1333 | ashex = ashex[2:-1] |
---|
1334 | else: |
---|
1335 | ashex = ashex[2:] |
---|
1336 | assert len(ashex) & 1 == 0, (x, ashex) |
---|
1337 | binary = _binascii.unhexlify(ashex) |
---|
1338 | return binary[::-1] |
---|
1339 | |
---|
1340 | def decode_long(data): |
---|
1341 | r"""Decode a long from a two's complement little-endian binary string. |
---|
1342 | |
---|
1343 | >>> decode_long('') |
---|
1344 | 0L |
---|
1345 | >>> decode_long("\xff\x00") |
---|
1346 | 255L |
---|
1347 | >>> decode_long("\xff\x7f") |
---|
1348 | 32767L |
---|
1349 | >>> decode_long("\x00\xff") |
---|
1350 | -256L |
---|
1351 | >>> decode_long("\x00\x80") |
---|
1352 | -32768L |
---|
1353 | >>> decode_long("\x80") |
---|
1354 | -128L |
---|
1355 | >>> decode_long("\x7f") |
---|
1356 | 127L |
---|
1357 | """ |
---|
1358 | |
---|
1359 | nbytes = len(data) |
---|
1360 | if nbytes == 0: |
---|
1361 | return 0L |
---|
1362 | ashex = _binascii.hexlify(data[::-1]) |
---|
1363 | n = long(ashex, 16) # quadratic time before Python 2.3; linear now |
---|
1364 | if data[-1] >= '\x80': |
---|
1365 | n -= 1L << (nbytes * 8) |
---|
1366 | return n |
---|
1367 | |
---|
1368 | # Shorthands |
---|
1369 | |
---|
1370 | try: |
---|
1371 | from cStringIO import StringIO |
---|
1372 | except ImportError: |
---|
1373 | from StringIO import StringIO |
---|
1374 | |
---|
1375 | def dump(obj, file, protocol=None): |
---|
1376 | Pickler(file, protocol).dump(obj) |
---|
1377 | |
---|
1378 | def dumps(obj, protocol=None): |
---|
1379 | file = StringIO() |
---|
1380 | Pickler(file, protocol).dump(obj) |
---|
1381 | return file.getvalue() |
---|
1382 | |
---|
1383 | def load(file): |
---|
1384 | return Unpickler(file).load() |
---|
1385 | |
---|
1386 | def loads(str): |
---|
1387 | file = StringIO(str) |
---|
1388 | return Unpickler(file).load() |
---|
1389 | |
---|
1390 | # Doctest |
---|
1391 | |
---|
1392 | def _test(): |
---|
1393 | import doctest |
---|
1394 | return doctest.testmod() |
---|
1395 | |
---|
1396 | if __name__ == "__main__": |
---|
1397 | _test() |
---|