1 | # Copyright (C) 2001-2010 Python Software Foundation |
---|
2 | # Contact: email-sig@python.org |
---|
3 | |
---|
4 | """Classes to generate plain text from a message object tree.""" |
---|
5 | |
---|
6 | __all__ = ['Generator', 'DecodedGenerator'] |
---|
7 | |
---|
8 | import re |
---|
9 | import sys |
---|
10 | import time |
---|
11 | import random |
---|
12 | import warnings |
---|
13 | |
---|
14 | from cStringIO import StringIO |
---|
15 | from email.header import Header |
---|
16 | |
---|
17 | UNDERSCORE = '_' |
---|
18 | NL = '\n' |
---|
19 | |
---|
20 | fcre = re.compile(r'^From ', re.MULTILINE) |
---|
21 | |
---|
22 | def _is8bitstring(s): |
---|
23 | if isinstance(s, str): |
---|
24 | try: |
---|
25 | unicode(s, 'us-ascii') |
---|
26 | except UnicodeError: |
---|
27 | return True |
---|
28 | return False |
---|
29 | |
---|
30 | |
---|
31 | |
---|
32 | class Generator: |
---|
33 | """Generates output from a Message object tree. |
---|
34 | |
---|
35 | This basic generator writes the message to the given file object as plain |
---|
36 | text. |
---|
37 | """ |
---|
38 | # |
---|
39 | # Public interface |
---|
40 | # |
---|
41 | |
---|
42 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): |
---|
43 | """Create the generator for message flattening. |
---|
44 | |
---|
45 | outfp is the output file-like object for writing the message to. It |
---|
46 | must have a write() method. |
---|
47 | |
---|
48 | Optional mangle_from_ is a flag that, when True (the default), escapes |
---|
49 | From_ lines in the body of the message by putting a `>' in front of |
---|
50 | them. |
---|
51 | |
---|
52 | Optional maxheaderlen specifies the longest length for a non-continued |
---|
53 | header. When a header line is longer (in characters, with tabs |
---|
54 | expanded to 8 spaces) than maxheaderlen, the header will split as |
---|
55 | defined in the Header class. Set maxheaderlen to zero to disable |
---|
56 | header wrapping. The default is 78, as recommended (but not required) |
---|
57 | by RFC 2822. |
---|
58 | """ |
---|
59 | self._fp = outfp |
---|
60 | self._mangle_from_ = mangle_from_ |
---|
61 | self._maxheaderlen = maxheaderlen |
---|
62 | |
---|
63 | def write(self, s): |
---|
64 | # Just delegate to the file object |
---|
65 | self._fp.write(s) |
---|
66 | |
---|
67 | def flatten(self, msg, unixfrom=False): |
---|
68 | """Print the message object tree rooted at msg to the output file |
---|
69 | specified when the Generator instance was created. |
---|
70 | |
---|
71 | unixfrom is a flag that forces the printing of a Unix From_ delimiter |
---|
72 | before the first object in the message tree. If the original message |
---|
73 | has no From_ delimiter, a `standard' one is crafted. By default, this |
---|
74 | is False to inhibit the printing of any From_ delimiter. |
---|
75 | |
---|
76 | Note that for subobjects, no From_ line is printed. |
---|
77 | """ |
---|
78 | if unixfrom: |
---|
79 | ufrom = msg.get_unixfrom() |
---|
80 | if not ufrom: |
---|
81 | ufrom = 'From nobody ' + time.ctime(time.time()) |
---|
82 | print >> self._fp, ufrom |
---|
83 | self._write(msg) |
---|
84 | |
---|
85 | def clone(self, fp): |
---|
86 | """Clone this generator with the exact same options.""" |
---|
87 | return self.__class__(fp, self._mangle_from_, self._maxheaderlen) |
---|
88 | |
---|
89 | # |
---|
90 | # Protected interface - undocumented ;/ |
---|
91 | # |
---|
92 | |
---|
93 | def _write(self, msg): |
---|
94 | # We can't write the headers yet because of the following scenario: |
---|
95 | # say a multipart message includes the boundary string somewhere in |
---|
96 | # its body. We'd have to calculate the new boundary /before/ we write |
---|
97 | # the headers so that we can write the correct Content-Type: |
---|
98 | # parameter. |
---|
99 | # |
---|
100 | # The way we do this, so as to make the _handle_*() methods simpler, |
---|
101 | # is to cache any subpart writes into a StringIO. The we write the |
---|
102 | # headers and the StringIO contents. That way, subpart handlers can |
---|
103 | # Do The Right Thing, and can still modify the Content-Type: header if |
---|
104 | # necessary. |
---|
105 | oldfp = self._fp |
---|
106 | try: |
---|
107 | self._fp = sfp = StringIO() |
---|
108 | self._dispatch(msg) |
---|
109 | finally: |
---|
110 | self._fp = oldfp |
---|
111 | # Write the headers. First we see if the message object wants to |
---|
112 | # handle that itself. If not, we'll do it generically. |
---|
113 | meth = getattr(msg, '_write_headers', None) |
---|
114 | if meth is None: |
---|
115 | self._write_headers(msg) |
---|
116 | else: |
---|
117 | meth(self) |
---|
118 | self._fp.write(sfp.getvalue()) |
---|
119 | |
---|
120 | def _dispatch(self, msg): |
---|
121 | # Get the Content-Type: for the message, then try to dispatch to |
---|
122 | # self._handle_<maintype>_<subtype>(). If there's no handler for the |
---|
123 | # full MIME type, then dispatch to self._handle_<maintype>(). If |
---|
124 | # that's missing too, then dispatch to self._writeBody(). |
---|
125 | main = msg.get_content_maintype() |
---|
126 | sub = msg.get_content_subtype() |
---|
127 | specific = UNDERSCORE.join((main, sub)).replace('-', '_') |
---|
128 | meth = getattr(self, '_handle_' + specific, None) |
---|
129 | if meth is None: |
---|
130 | generic = main.replace('-', '_') |
---|
131 | meth = getattr(self, '_handle_' + generic, None) |
---|
132 | if meth is None: |
---|
133 | meth = self._writeBody |
---|
134 | meth(msg) |
---|
135 | |
---|
136 | # |
---|
137 | # Default handlers |
---|
138 | # |
---|
139 | |
---|
140 | def _write_headers(self, msg): |
---|
141 | for h, v in msg.items(): |
---|
142 | print >> self._fp, '%s:' % h, |
---|
143 | if self._maxheaderlen == 0: |
---|
144 | # Explicit no-wrapping |
---|
145 | print >> self._fp, v |
---|
146 | elif isinstance(v, Header): |
---|
147 | # Header instances know what to do |
---|
148 | print >> self._fp, v.encode() |
---|
149 | elif _is8bitstring(v): |
---|
150 | # If we have raw 8bit data in a byte string, we have no idea |
---|
151 | # what the encoding is. There is no safe way to split this |
---|
152 | # string. If it's ascii-subset, then we could do a normal |
---|
153 | # ascii split, but if it's multibyte then we could break the |
---|
154 | # string. There's no way to know so the least harm seems to |
---|
155 | # be to not split the string and risk it being too long. |
---|
156 | print >> self._fp, v |
---|
157 | else: |
---|
158 | # Header's got lots of smarts, so use it. Note that this is |
---|
159 | # fundamentally broken though because we lose idempotency when |
---|
160 | # the header string is continued with tabs. It will now be |
---|
161 | # continued with spaces. This was reversedly broken before we |
---|
162 | # fixed bug 1974. Either way, we lose. |
---|
163 | print >> self._fp, Header( |
---|
164 | v, maxlinelen=self._maxheaderlen, header_name=h).encode() |
---|
165 | # A blank line always separates headers from body |
---|
166 | print >> self._fp |
---|
167 | |
---|
168 | # |
---|
169 | # Handlers for writing types and subtypes |
---|
170 | # |
---|
171 | |
---|
172 | def _handle_text(self, msg): |
---|
173 | payload = msg.get_payload() |
---|
174 | if payload is None: |
---|
175 | return |
---|
176 | if not isinstance(payload, basestring): |
---|
177 | raise TypeError('string payload expected: %s' % type(payload)) |
---|
178 | if self._mangle_from_: |
---|
179 | payload = fcre.sub('>From ', payload) |
---|
180 | self._fp.write(payload) |
---|
181 | |
---|
182 | # Default body handler |
---|
183 | _writeBody = _handle_text |
---|
184 | |
---|
185 | def _handle_multipart(self, msg): |
---|
186 | # The trick here is to write out each part separately, merge them all |
---|
187 | # together, and then make sure that the boundary we've chosen isn't |
---|
188 | # present in the payload. |
---|
189 | msgtexts = [] |
---|
190 | subparts = msg.get_payload() |
---|
191 | if subparts is None: |
---|
192 | subparts = [] |
---|
193 | elif isinstance(subparts, basestring): |
---|
194 | # e.g. a non-strict parse of a message with no starting boundary. |
---|
195 | self._fp.write(subparts) |
---|
196 | return |
---|
197 | elif not isinstance(subparts, list): |
---|
198 | # Scalar payload |
---|
199 | subparts = [subparts] |
---|
200 | for part in subparts: |
---|
201 | s = StringIO() |
---|
202 | g = self.clone(s) |
---|
203 | g.flatten(part, unixfrom=False) |
---|
204 | msgtexts.append(s.getvalue()) |
---|
205 | # BAW: What about boundaries that are wrapped in double-quotes? |
---|
206 | boundary = msg.get_boundary() |
---|
207 | if not boundary: |
---|
208 | # Create a boundary that doesn't appear in any of the |
---|
209 | # message texts. |
---|
210 | alltext = NL.join(msgtexts) |
---|
211 | boundary = _make_boundary(alltext) |
---|
212 | msg.set_boundary(boundary) |
---|
213 | # If there's a preamble, write it out, with a trailing CRLF |
---|
214 | if msg.preamble is not None: |
---|
215 | if self._mangle_from_: |
---|
216 | preamble = fcre.sub('>From ', msg.preamble) |
---|
217 | else: |
---|
218 | preamble = msg.preamble |
---|
219 | print >> self._fp, preamble |
---|
220 | # dash-boundary transport-padding CRLF |
---|
221 | print >> self._fp, '--' + boundary |
---|
222 | # body-part |
---|
223 | if msgtexts: |
---|
224 | self._fp.write(msgtexts.pop(0)) |
---|
225 | # *encapsulation |
---|
226 | # --> delimiter transport-padding |
---|
227 | # --> CRLF body-part |
---|
228 | for body_part in msgtexts: |
---|
229 | # delimiter transport-padding CRLF |
---|
230 | print >> self._fp, '\n--' + boundary |
---|
231 | # body-part |
---|
232 | self._fp.write(body_part) |
---|
233 | # close-delimiter transport-padding |
---|
234 | self._fp.write('\n--' + boundary + '--' + NL) |
---|
235 | if msg.epilogue is not None: |
---|
236 | if self._mangle_from_: |
---|
237 | epilogue = fcre.sub('>From ', msg.epilogue) |
---|
238 | else: |
---|
239 | epilogue = msg.epilogue |
---|
240 | self._fp.write(epilogue) |
---|
241 | |
---|
242 | def _handle_multipart_signed(self, msg): |
---|
243 | # The contents of signed parts has to stay unmodified in order to keep |
---|
244 | # the signature intact per RFC1847 2.1, so we disable header wrapping. |
---|
245 | # RDM: This isn't enough to completely preserve the part, but it helps. |
---|
246 | old_maxheaderlen = self._maxheaderlen |
---|
247 | try: |
---|
248 | self._maxheaderlen = 0 |
---|
249 | self._handle_multipart(msg) |
---|
250 | finally: |
---|
251 | self._maxheaderlen = old_maxheaderlen |
---|
252 | |
---|
253 | def _handle_message_delivery_status(self, msg): |
---|
254 | # We can't just write the headers directly to self's file object |
---|
255 | # because this will leave an extra newline between the last header |
---|
256 | # block and the boundary. Sigh. |
---|
257 | blocks = [] |
---|
258 | for part in msg.get_payload(): |
---|
259 | s = StringIO() |
---|
260 | g = self.clone(s) |
---|
261 | g.flatten(part, unixfrom=False) |
---|
262 | text = s.getvalue() |
---|
263 | lines = text.split('\n') |
---|
264 | # Strip off the unnecessary trailing empty line |
---|
265 | if lines and lines[-1] == '': |
---|
266 | blocks.append(NL.join(lines[:-1])) |
---|
267 | else: |
---|
268 | blocks.append(text) |
---|
269 | # Now join all the blocks with an empty line. This has the lovely |
---|
270 | # effect of separating each block with an empty line, but not adding |
---|
271 | # an extra one after the last one. |
---|
272 | self._fp.write(NL.join(blocks)) |
---|
273 | |
---|
274 | def _handle_message(self, msg): |
---|
275 | s = StringIO() |
---|
276 | g = self.clone(s) |
---|
277 | # The payload of a message/rfc822 part should be a multipart sequence |
---|
278 | # of length 1. The zeroth element of the list should be the Message |
---|
279 | # object for the subpart. Extract that object, stringify it, and |
---|
280 | # write it out. |
---|
281 | # Except, it turns out, when it's a string instead, which happens when |
---|
282 | # and only when HeaderParser is used on a message of mime type |
---|
283 | # message/rfc822. Such messages are generated by, for example, |
---|
284 | # Groupwise when forwarding unadorned messages. (Issue 7970.) So |
---|
285 | # in that case we just emit the string body. |
---|
286 | payload = msg.get_payload() |
---|
287 | if isinstance(payload, list): |
---|
288 | g.flatten(msg.get_payload(0), unixfrom=False) |
---|
289 | payload = s.getvalue() |
---|
290 | self._fp.write(payload) |
---|
291 | |
---|
292 | |
---|
293 | |
---|
294 | _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' |
---|
295 | |
---|
296 | class DecodedGenerator(Generator): |
---|
297 | """Generates a text representation of a message. |
---|
298 | |
---|
299 | Like the Generator base class, except that non-text parts are substituted |
---|
300 | with a format string representing the part. |
---|
301 | """ |
---|
302 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): |
---|
303 | """Like Generator.__init__() except that an additional optional |
---|
304 | argument is allowed. |
---|
305 | |
---|
306 | Walks through all subparts of a message. If the subpart is of main |
---|
307 | type `text', then it prints the decoded payload of the subpart. |
---|
308 | |
---|
309 | Otherwise, fmt is a format string that is used instead of the message |
---|
310 | payload. fmt is expanded with the following keywords (in |
---|
311 | %(keyword)s format): |
---|
312 | |
---|
313 | type : Full MIME type of the non-text part |
---|
314 | maintype : Main MIME type of the non-text part |
---|
315 | subtype : Sub-MIME type of the non-text part |
---|
316 | filename : Filename of the non-text part |
---|
317 | description: Description associated with the non-text part |
---|
318 | encoding : Content transfer encoding of the non-text part |
---|
319 | |
---|
320 | The default value for fmt is None, meaning |
---|
321 | |
---|
322 | [Non-text (%(type)s) part of message omitted, filename %(filename)s] |
---|
323 | """ |
---|
324 | Generator.__init__(self, outfp, mangle_from_, maxheaderlen) |
---|
325 | if fmt is None: |
---|
326 | self._fmt = _FMT |
---|
327 | else: |
---|
328 | self._fmt = fmt |
---|
329 | |
---|
330 | def _dispatch(self, msg): |
---|
331 | for part in msg.walk(): |
---|
332 | maintype = part.get_content_maintype() |
---|
333 | if maintype == 'text': |
---|
334 | print >> self, part.get_payload(decode=True) |
---|
335 | elif maintype == 'multipart': |
---|
336 | # Just skip this |
---|
337 | pass |
---|
338 | else: |
---|
339 | print >> self, self._fmt % { |
---|
340 | 'type' : part.get_content_type(), |
---|
341 | 'maintype' : part.get_content_maintype(), |
---|
342 | 'subtype' : part.get_content_subtype(), |
---|
343 | 'filename' : part.get_filename('[no filename]'), |
---|
344 | 'description': part.get('Content-Description', |
---|
345 | '[no description]'), |
---|
346 | 'encoding' : part.get('Content-Transfer-Encoding', |
---|
347 | '[no encoding]'), |
---|
348 | } |
---|
349 | |
---|
350 | |
---|
351 | |
---|
352 | # Helper |
---|
353 | _width = len(repr(sys.maxint-1)) |
---|
354 | _fmt = '%%0%dd' % _width |
---|
355 | |
---|
356 | def _make_boundary(text=None): |
---|
357 | # Craft a random boundary. If text is given, ensure that the chosen |
---|
358 | # boundary doesn't appear in the text. |
---|
359 | token = random.randrange(sys.maxint) |
---|
360 | boundary = ('=' * 15) + (_fmt % token) + '==' |
---|
361 | if text is None: |
---|
362 | return boundary |
---|
363 | b = boundary |
---|
364 | counter = 0 |
---|
365 | while True: |
---|
366 | cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) |
---|
367 | if not cre.search(text): |
---|
368 | break |
---|
369 | b = boundary + '.' + str(counter) |
---|
370 | counter += 1 |
---|
371 | return b |
---|