1 | """Classes to represent arbitrary sets (including sets of sets). |
---|
2 | |
---|
3 | This module implements sets using dictionaries whose values are |
---|
4 | ignored. The usual operations (union, intersection, deletion, etc.) |
---|
5 | are provided as both methods and operators. |
---|
6 | |
---|
7 | Important: sets are not sequences! While they support 'x in s', |
---|
8 | 'len(s)', and 'for x in s', none of those operations are unique for |
---|
9 | sequences; for example, mappings support all three as well. The |
---|
10 | characteristic operation for sequences is subscripting with small |
---|
11 | integers: s[i], for i in range(len(s)). Sets don't support |
---|
12 | subscripting at all. Also, sequences allow multiple occurrences and |
---|
13 | their elements have a definite order; sets on the other hand don't |
---|
14 | record multiple occurrences and don't remember the order of element |
---|
15 | insertion (which is why they don't support s[i]). |
---|
16 | |
---|
17 | The following classes are provided: |
---|
18 | |
---|
19 | BaseSet -- All the operations common to both mutable and immutable |
---|
20 | sets. This is an abstract class, not meant to be directly |
---|
21 | instantiated. |
---|
22 | |
---|
23 | Set -- Mutable sets, subclass of BaseSet; not hashable. |
---|
24 | |
---|
25 | ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. |
---|
26 | An iterable argument is mandatory to create an ImmutableSet. |
---|
27 | |
---|
28 | _TemporarilyImmutableSet -- A wrapper around a Set, hashable, |
---|
29 | giving the same hash value as the immutable set equivalent |
---|
30 | would have. Do not use this class directly. |
---|
31 | |
---|
32 | Only hashable objects can be added to a Set. In particular, you cannot |
---|
33 | really add a Set as an element to another Set; if you try, what is |
---|
34 | actually added is an ImmutableSet built from it (it compares equal to |
---|
35 | the one you tried adding). |
---|
36 | |
---|
37 | When you ask if `x in y' where x is a Set and y is a Set or |
---|
38 | ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and |
---|
39 | what's tested is actually `z in y'. |
---|
40 | |
---|
41 | """ |
---|
42 | |
---|
43 | # Code history: |
---|
44 | # |
---|
45 | # - Greg V. Wilson wrote the first version, using a different approach |
---|
46 | # to the mutable/immutable problem, and inheriting from dict. |
---|
47 | # |
---|
48 | # - Alex Martelli modified Greg's version to implement the current |
---|
49 | # Set/ImmutableSet approach, and make the data an attribute. |
---|
50 | # |
---|
51 | # - Guido van Rossum rewrote much of the code, made some API changes, |
---|
52 | # and cleaned up the docstrings. |
---|
53 | # |
---|
54 | # - Raymond Hettinger added a number of speedups and other |
---|
55 | # improvements. |
---|
56 | |
---|
57 | from itertools import ifilter, ifilterfalse |
---|
58 | |
---|
59 | __all__ = ['BaseSet', 'Set', 'ImmutableSet'] |
---|
60 | |
---|
61 | import warnings |
---|
62 | warnings.warn("the sets module is deprecated", DeprecationWarning, |
---|
63 | stacklevel=2) |
---|
64 | |
---|
65 | class BaseSet(object): |
---|
66 | """Common base class for mutable and immutable sets.""" |
---|
67 | |
---|
68 | __slots__ = ['_data'] |
---|
69 | |
---|
70 | # Constructor |
---|
71 | |
---|
72 | def __init__(self): |
---|
73 | """This is an abstract class.""" |
---|
74 | # Don't call this from a concrete subclass! |
---|
75 | if self.__class__ is BaseSet: |
---|
76 | raise TypeError, ("BaseSet is an abstract class. " |
---|
77 | "Use Set or ImmutableSet.") |
---|
78 | |
---|
79 | # Standard protocols: __len__, __repr__, __str__, __iter__ |
---|
80 | |
---|
81 | def __len__(self): |
---|
82 | """Return the number of elements of a set.""" |
---|
83 | return len(self._data) |
---|
84 | |
---|
85 | def __repr__(self): |
---|
86 | """Return string representation of a set. |
---|
87 | |
---|
88 | This looks like 'Set([<list of elements>])'. |
---|
89 | """ |
---|
90 | return self._repr() |
---|
91 | |
---|
92 | # __str__ is the same as __repr__ |
---|
93 | __str__ = __repr__ |
---|
94 | |
---|
95 | def _repr(self, sorted=False): |
---|
96 | elements = self._data.keys() |
---|
97 | if sorted: |
---|
98 | elements.sort() |
---|
99 | return '%s(%r)' % (self.__class__.__name__, elements) |
---|
100 | |
---|
101 | def __iter__(self): |
---|
102 | """Return an iterator over the elements or a set. |
---|
103 | |
---|
104 | This is the keys iterator for the underlying dict. |
---|
105 | """ |
---|
106 | return self._data.iterkeys() |
---|
107 | |
---|
108 | # Three-way comparison is not supported. However, because __eq__ is |
---|
109 | # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and |
---|
110 | # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this |
---|
111 | # case). |
---|
112 | |
---|
113 | def __cmp__(self, other): |
---|
114 | raise TypeError, "can't compare sets using cmp()" |
---|
115 | |
---|
116 | # Equality comparisons using the underlying dicts. Mixed-type comparisons |
---|
117 | # are allowed here, where Set == z for non-Set z always returns False, |
---|
118 | # and Set != z always True. This allows expressions like "x in y" to |
---|
119 | # give the expected result when y is a sequence of mixed types, not |
---|
120 | # raising a pointless TypeError just because y contains a Set, or x is |
---|
121 | # a Set and y contain's a non-set ("in" invokes only __eq__). |
---|
122 | # Subtle: it would be nicer if __eq__ and __ne__ could return |
---|
123 | # NotImplemented instead of True or False. Then the other comparand |
---|
124 | # would get a chance to determine the result, and if the other comparand |
---|
125 | # also returned NotImplemented then it would fall back to object address |
---|
126 | # comparison (which would always return False for __eq__ and always |
---|
127 | # True for __ne__). However, that doesn't work, because this type |
---|
128 | # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, |
---|
129 | # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. |
---|
130 | |
---|
131 | def __eq__(self, other): |
---|
132 | if isinstance(other, BaseSet): |
---|
133 | return self._data == other._data |
---|
134 | else: |
---|
135 | return False |
---|
136 | |
---|
137 | def __ne__(self, other): |
---|
138 | if isinstance(other, BaseSet): |
---|
139 | return self._data != other._data |
---|
140 | else: |
---|
141 | return True |
---|
142 | |
---|
143 | # Copying operations |
---|
144 | |
---|
145 | def copy(self): |
---|
146 | """Return a shallow copy of a set.""" |
---|
147 | result = self.__class__() |
---|
148 | result._data.update(self._data) |
---|
149 | return result |
---|
150 | |
---|
151 | __copy__ = copy # For the copy module |
---|
152 | |
---|
153 | def __deepcopy__(self, memo): |
---|
154 | """Return a deep copy of a set; used by copy module.""" |
---|
155 | # This pre-creates the result and inserts it in the memo |
---|
156 | # early, in case the deep copy recurses into another reference |
---|
157 | # to this same set. A set can't be an element of itself, but |
---|
158 | # it can certainly contain an object that has a reference to |
---|
159 | # itself. |
---|
160 | from copy import deepcopy |
---|
161 | result = self.__class__() |
---|
162 | memo[id(self)] = result |
---|
163 | data = result._data |
---|
164 | value = True |
---|
165 | for elt in self: |
---|
166 | data[deepcopy(elt, memo)] = value |
---|
167 | return result |
---|
168 | |
---|
169 | # Standard set operations: union, intersection, both differences. |
---|
170 | # Each has an operator version (e.g. __or__, invoked with |) and a |
---|
171 | # method version (e.g. union). |
---|
172 | # Subtle: Each pair requires distinct code so that the outcome is |
---|
173 | # correct when the type of other isn't suitable. For example, if |
---|
174 | # we did "union = __or__" instead, then Set().union(3) would return |
---|
175 | # NotImplemented instead of raising TypeError (albeit that *why* it |
---|
176 | # raises TypeError as-is is also a bit subtle). |
---|
177 | |
---|
178 | def __or__(self, other): |
---|
179 | """Return the union of two sets as a new set. |
---|
180 | |
---|
181 | (I.e. all elements that are in either set.) |
---|
182 | """ |
---|
183 | if not isinstance(other, BaseSet): |
---|
184 | return NotImplemented |
---|
185 | return self.union(other) |
---|
186 | |
---|
187 | def union(self, other): |
---|
188 | """Return the union of two sets as a new set. |
---|
189 | |
---|
190 | (I.e. all elements that are in either set.) |
---|
191 | """ |
---|
192 | result = self.__class__(self) |
---|
193 | result._update(other) |
---|
194 | return result |
---|
195 | |
---|
196 | def __and__(self, other): |
---|
197 | """Return the intersection of two sets as a new set. |
---|
198 | |
---|
199 | (I.e. all elements that are in both sets.) |
---|
200 | """ |
---|
201 | if not isinstance(other, BaseSet): |
---|
202 | return NotImplemented |
---|
203 | return self.intersection(other) |
---|
204 | |
---|
205 | def intersection(self, other): |
---|
206 | """Return the intersection of two sets as a new set. |
---|
207 | |
---|
208 | (I.e. all elements that are in both sets.) |
---|
209 | """ |
---|
210 | if not isinstance(other, BaseSet): |
---|
211 | other = Set(other) |
---|
212 | if len(self) <= len(other): |
---|
213 | little, big = self, other |
---|
214 | else: |
---|
215 | little, big = other, self |
---|
216 | common = ifilter(big._data.__contains__, little) |
---|
217 | return self.__class__(common) |
---|
218 | |
---|
219 | def __xor__(self, other): |
---|
220 | """Return the symmetric difference of two sets as a new set. |
---|
221 | |
---|
222 | (I.e. all elements that are in exactly one of the sets.) |
---|
223 | """ |
---|
224 | if not isinstance(other, BaseSet): |
---|
225 | return NotImplemented |
---|
226 | return self.symmetric_difference(other) |
---|
227 | |
---|
228 | def symmetric_difference(self, other): |
---|
229 | """Return the symmetric difference of two sets as a new set. |
---|
230 | |
---|
231 | (I.e. all elements that are in exactly one of the sets.) |
---|
232 | """ |
---|
233 | result = self.__class__() |
---|
234 | data = result._data |
---|
235 | value = True |
---|
236 | selfdata = self._data |
---|
237 | try: |
---|
238 | otherdata = other._data |
---|
239 | except AttributeError: |
---|
240 | otherdata = Set(other)._data |
---|
241 | for elt in ifilterfalse(otherdata.__contains__, selfdata): |
---|
242 | data[elt] = value |
---|
243 | for elt in ifilterfalse(selfdata.__contains__, otherdata): |
---|
244 | data[elt] = value |
---|
245 | return result |
---|
246 | |
---|
247 | def __sub__(self, other): |
---|
248 | """Return the difference of two sets as a new Set. |
---|
249 | |
---|
250 | (I.e. all elements that are in this set and not in the other.) |
---|
251 | """ |
---|
252 | if not isinstance(other, BaseSet): |
---|
253 | return NotImplemented |
---|
254 | return self.difference(other) |
---|
255 | |
---|
256 | def difference(self, other): |
---|
257 | """Return the difference of two sets as a new Set. |
---|
258 | |
---|
259 | (I.e. all elements that are in this set and not in the other.) |
---|
260 | """ |
---|
261 | result = self.__class__() |
---|
262 | data = result._data |
---|
263 | try: |
---|
264 | otherdata = other._data |
---|
265 | except AttributeError: |
---|
266 | otherdata = Set(other)._data |
---|
267 | value = True |
---|
268 | for elt in ifilterfalse(otherdata.__contains__, self): |
---|
269 | data[elt] = value |
---|
270 | return result |
---|
271 | |
---|
272 | # Membership test |
---|
273 | |
---|
274 | def __contains__(self, element): |
---|
275 | """Report whether an element is a member of a set. |
---|
276 | |
---|
277 | (Called in response to the expression `element in self'.) |
---|
278 | """ |
---|
279 | try: |
---|
280 | return element in self._data |
---|
281 | except TypeError: |
---|
282 | transform = getattr(element, "__as_temporarily_immutable__", None) |
---|
283 | if transform is None: |
---|
284 | raise # re-raise the TypeError exception we caught |
---|
285 | return transform() in self._data |
---|
286 | |
---|
287 | # Subset and superset test |
---|
288 | |
---|
289 | def issubset(self, other): |
---|
290 | """Report whether another set contains this set.""" |
---|
291 | self._binary_sanity_check(other) |
---|
292 | if len(self) > len(other): # Fast check for obvious cases |
---|
293 | return False |
---|
294 | for elt in ifilterfalse(other._data.__contains__, self): |
---|
295 | return False |
---|
296 | return True |
---|
297 | |
---|
298 | def issuperset(self, other): |
---|
299 | """Report whether this set contains another set.""" |
---|
300 | self._binary_sanity_check(other) |
---|
301 | if len(self) < len(other): # Fast check for obvious cases |
---|
302 | return False |
---|
303 | for elt in ifilterfalse(self._data.__contains__, other): |
---|
304 | return False |
---|
305 | return True |
---|
306 | |
---|
307 | # Inequality comparisons using the is-subset relation. |
---|
308 | __le__ = issubset |
---|
309 | __ge__ = issuperset |
---|
310 | |
---|
311 | def __lt__(self, other): |
---|
312 | self._binary_sanity_check(other) |
---|
313 | return len(self) < len(other) and self.issubset(other) |
---|
314 | |
---|
315 | def __gt__(self, other): |
---|
316 | self._binary_sanity_check(other) |
---|
317 | return len(self) > len(other) and self.issuperset(other) |
---|
318 | |
---|
319 | # We inherit object.__hash__, so we must deny this explicitly |
---|
320 | __hash__ = None |
---|
321 | |
---|
322 | # Assorted helpers |
---|
323 | |
---|
324 | def _binary_sanity_check(self, other): |
---|
325 | # Check that the other argument to a binary operation is also |
---|
326 | # a set, raising a TypeError otherwise. |
---|
327 | if not isinstance(other, BaseSet): |
---|
328 | raise TypeError, "Binary operation only permitted between sets" |
---|
329 | |
---|
330 | def _compute_hash(self): |
---|
331 | # Calculate hash code for a set by xor'ing the hash codes of |
---|
332 | # the elements. This ensures that the hash code does not depend |
---|
333 | # on the order in which elements are added to the set. This is |
---|
334 | # not called __hash__ because a BaseSet should not be hashable; |
---|
335 | # only an ImmutableSet is hashable. |
---|
336 | result = 0 |
---|
337 | for elt in self: |
---|
338 | result ^= hash(elt) |
---|
339 | return result |
---|
340 | |
---|
341 | def _update(self, iterable): |
---|
342 | # The main loop for update() and the subclass __init__() methods. |
---|
343 | data = self._data |
---|
344 | |
---|
345 | # Use the fast update() method when a dictionary is available. |
---|
346 | if isinstance(iterable, BaseSet): |
---|
347 | data.update(iterable._data) |
---|
348 | return |
---|
349 | |
---|
350 | value = True |
---|
351 | |
---|
352 | if type(iterable) in (list, tuple, xrange): |
---|
353 | # Optimized: we know that __iter__() and next() can't |
---|
354 | # raise TypeError, so we can move 'try:' out of the loop. |
---|
355 | it = iter(iterable) |
---|
356 | while True: |
---|
357 | try: |
---|
358 | for element in it: |
---|
359 | data[element] = value |
---|
360 | return |
---|
361 | except TypeError: |
---|
362 | transform = getattr(element, "__as_immutable__", None) |
---|
363 | if transform is None: |
---|
364 | raise # re-raise the TypeError exception we caught |
---|
365 | data[transform()] = value |
---|
366 | else: |
---|
367 | # Safe: only catch TypeError where intended |
---|
368 | for element in iterable: |
---|
369 | try: |
---|
370 | data[element] = value |
---|
371 | except TypeError: |
---|
372 | transform = getattr(element, "__as_immutable__", None) |
---|
373 | if transform is None: |
---|
374 | raise # re-raise the TypeError exception we caught |
---|
375 | data[transform()] = value |
---|
376 | |
---|
377 | |
---|
378 | class ImmutableSet(BaseSet): |
---|
379 | """Immutable set class.""" |
---|
380 | |
---|
381 | __slots__ = ['_hashcode'] |
---|
382 | |
---|
383 | # BaseSet + hashing |
---|
384 | |
---|
385 | def __init__(self, iterable=None): |
---|
386 | """Construct an immutable set from an optional iterable.""" |
---|
387 | self._hashcode = None |
---|
388 | self._data = {} |
---|
389 | if iterable is not None: |
---|
390 | self._update(iterable) |
---|
391 | |
---|
392 | def __hash__(self): |
---|
393 | if self._hashcode is None: |
---|
394 | self._hashcode = self._compute_hash() |
---|
395 | return self._hashcode |
---|
396 | |
---|
397 | def __getstate__(self): |
---|
398 | return self._data, self._hashcode |
---|
399 | |
---|
400 | def __setstate__(self, state): |
---|
401 | self._data, self._hashcode = state |
---|
402 | |
---|
403 | class Set(BaseSet): |
---|
404 | """ Mutable set class.""" |
---|
405 | |
---|
406 | __slots__ = [] |
---|
407 | |
---|
408 | # BaseSet + operations requiring mutability; no hashing |
---|
409 | |
---|
410 | def __init__(self, iterable=None): |
---|
411 | """Construct a set from an optional iterable.""" |
---|
412 | self._data = {} |
---|
413 | if iterable is not None: |
---|
414 | self._update(iterable) |
---|
415 | |
---|
416 | def __getstate__(self): |
---|
417 | # getstate's results are ignored if it is not |
---|
418 | return self._data, |
---|
419 | |
---|
420 | def __setstate__(self, data): |
---|
421 | self._data, = data |
---|
422 | |
---|
423 | # In-place union, intersection, differences. |
---|
424 | # Subtle: The xyz_update() functions deliberately return None, |
---|
425 | # as do all mutating operations on built-in container types. |
---|
426 | # The __xyz__ spellings have to return self, though. |
---|
427 | |
---|
428 | def __ior__(self, other): |
---|
429 | """Update a set with the union of itself and another.""" |
---|
430 | self._binary_sanity_check(other) |
---|
431 | self._data.update(other._data) |
---|
432 | return self |
---|
433 | |
---|
434 | def union_update(self, other): |
---|
435 | """Update a set with the union of itself and another.""" |
---|
436 | self._update(other) |
---|
437 | |
---|
438 | def __iand__(self, other): |
---|
439 | """Update a set with the intersection of itself and another.""" |
---|
440 | self._binary_sanity_check(other) |
---|
441 | self._data = (self & other)._data |
---|
442 | return self |
---|
443 | |
---|
444 | def intersection_update(self, other): |
---|
445 | """Update a set with the intersection of itself and another.""" |
---|
446 | if isinstance(other, BaseSet): |
---|
447 | self &= other |
---|
448 | else: |
---|
449 | self._data = (self.intersection(other))._data |
---|
450 | |
---|
451 | def __ixor__(self, other): |
---|
452 | """Update a set with the symmetric difference of itself and another.""" |
---|
453 | self._binary_sanity_check(other) |
---|
454 | self.symmetric_difference_update(other) |
---|
455 | return self |
---|
456 | |
---|
457 | def symmetric_difference_update(self, other): |
---|
458 | """Update a set with the symmetric difference of itself and another.""" |
---|
459 | data = self._data |
---|
460 | value = True |
---|
461 | if not isinstance(other, BaseSet): |
---|
462 | other = Set(other) |
---|
463 | if self is other: |
---|
464 | self.clear() |
---|
465 | for elt in other: |
---|
466 | if elt in data: |
---|
467 | del data[elt] |
---|
468 | else: |
---|
469 | data[elt] = value |
---|
470 | |
---|
471 | def __isub__(self, other): |
---|
472 | """Remove all elements of another set from this set.""" |
---|
473 | self._binary_sanity_check(other) |
---|
474 | self.difference_update(other) |
---|
475 | return self |
---|
476 | |
---|
477 | def difference_update(self, other): |
---|
478 | """Remove all elements of another set from this set.""" |
---|
479 | data = self._data |
---|
480 | if not isinstance(other, BaseSet): |
---|
481 | other = Set(other) |
---|
482 | if self is other: |
---|
483 | self.clear() |
---|
484 | for elt in ifilter(data.__contains__, other): |
---|
485 | del data[elt] |
---|
486 | |
---|
487 | # Python dict-like mass mutations: update, clear |
---|
488 | |
---|
489 | def update(self, iterable): |
---|
490 | """Add all values from an iterable (such as a list or file).""" |
---|
491 | self._update(iterable) |
---|
492 | |
---|
493 | def clear(self): |
---|
494 | """Remove all elements from this set.""" |
---|
495 | self._data.clear() |
---|
496 | |
---|
497 | # Single-element mutations: add, remove, discard |
---|
498 | |
---|
499 | def add(self, element): |
---|
500 | """Add an element to a set. |
---|
501 | |
---|
502 | This has no effect if the element is already present. |
---|
503 | """ |
---|
504 | try: |
---|
505 | self._data[element] = True |
---|
506 | except TypeError: |
---|
507 | transform = getattr(element, "__as_immutable__", None) |
---|
508 | if transform is None: |
---|
509 | raise # re-raise the TypeError exception we caught |
---|
510 | self._data[transform()] = True |
---|
511 | |
---|
512 | def remove(self, element): |
---|
513 | """Remove an element from a set; it must be a member. |
---|
514 | |
---|
515 | If the element is not a member, raise a KeyError. |
---|
516 | """ |
---|
517 | try: |
---|
518 | del self._data[element] |
---|
519 | except TypeError: |
---|
520 | transform = getattr(element, "__as_temporarily_immutable__", None) |
---|
521 | if transform is None: |
---|
522 | raise # re-raise the TypeError exception we caught |
---|
523 | del self._data[transform()] |
---|
524 | |
---|
525 | def discard(self, element): |
---|
526 | """Remove an element from a set if it is a member. |
---|
527 | |
---|
528 | If the element is not a member, do nothing. |
---|
529 | """ |
---|
530 | try: |
---|
531 | self.remove(element) |
---|
532 | except KeyError: |
---|
533 | pass |
---|
534 | |
---|
535 | def pop(self): |
---|
536 | """Remove and return an arbitrary set element.""" |
---|
537 | return self._data.popitem()[0] |
---|
538 | |
---|
539 | def __as_immutable__(self): |
---|
540 | # Return a copy of self as an immutable set |
---|
541 | return ImmutableSet(self) |
---|
542 | |
---|
543 | def __as_temporarily_immutable__(self): |
---|
544 | # Return self wrapped in a temporarily immutable set |
---|
545 | return _TemporarilyImmutableSet(self) |
---|
546 | |
---|
547 | |
---|
548 | class _TemporarilyImmutableSet(BaseSet): |
---|
549 | # Wrap a mutable set as if it was temporarily immutable. |
---|
550 | # This only supplies hashing and equality comparisons. |
---|
551 | |
---|
552 | def __init__(self, set): |
---|
553 | self._set = set |
---|
554 | self._data = set._data # Needed by ImmutableSet.__eq__() |
---|
555 | |
---|
556 | def __hash__(self): |
---|
557 | return self._set._compute_hash() |
---|