1 | r"""File-like objects that read from or write to a string buffer. |
---|
2 | |
---|
3 | This implements (nearly) all stdio methods. |
---|
4 | |
---|
5 | f = StringIO() # ready for writing |
---|
6 | f = StringIO(buf) # ready for reading |
---|
7 | f.close() # explicitly release resources held |
---|
8 | flag = f.isatty() # always false |
---|
9 | pos = f.tell() # get current position |
---|
10 | f.seek(pos) # set current position |
---|
11 | f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF |
---|
12 | buf = f.read() # read until EOF |
---|
13 | buf = f.read(n) # read up to n bytes |
---|
14 | buf = f.readline() # read until end of line ('\n') or EOF |
---|
15 | list = f.readlines()# list of f.readline() results until EOF |
---|
16 | f.truncate([size]) # truncate file at to at most size (default: current pos) |
---|
17 | f.write(buf) # write at current position |
---|
18 | f.writelines(list) # for line in list: f.write(line) |
---|
19 | f.getvalue() # return whole file's contents as a string |
---|
20 | |
---|
21 | Notes: |
---|
22 | - Using a real file is often faster (but less convenient). |
---|
23 | - There's also a much faster implementation in C, called cStringIO, but |
---|
24 | it's not subclassable. |
---|
25 | - fileno() is left unimplemented so that code which uses it triggers |
---|
26 | an exception early. |
---|
27 | - Seeking far beyond EOF and then writing will insert real null |
---|
28 | bytes that occupy space in the buffer. |
---|
29 | - There's a simple test set (see end of this file). |
---|
30 | """ |
---|
31 | try: |
---|
32 | from errno import EINVAL |
---|
33 | except ImportError: |
---|
34 | EINVAL = 22 |
---|
35 | |
---|
36 | __all__ = ["StringIO"] |
---|
37 | |
---|
38 | def _complain_ifclosed(closed): |
---|
39 | if closed: |
---|
40 | raise ValueError, "I/O operation on closed file" |
---|
41 | |
---|
42 | class StringIO: |
---|
43 | """class StringIO([buffer]) |
---|
44 | |
---|
45 | When a StringIO object is created, it can be initialized to an existing |
---|
46 | string by passing the string to the constructor. If no string is given, |
---|
47 | the StringIO will start empty. |
---|
48 | |
---|
49 | The StringIO object can accept either Unicode or 8-bit strings, but |
---|
50 | mixing the two may take some care. If both are used, 8-bit strings that |
---|
51 | cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause |
---|
52 | a UnicodeError to be raised when getvalue() is called. |
---|
53 | """ |
---|
54 | def __init__(self, buf = ''): |
---|
55 | # Force self.buf to be a string or unicode |
---|
56 | if not isinstance(buf, basestring): |
---|
57 | buf = str(buf) |
---|
58 | self.buf = buf |
---|
59 | self.len = len(buf) |
---|
60 | self.buflist = [] |
---|
61 | self.pos = 0 |
---|
62 | self.closed = False |
---|
63 | self.softspace = 0 |
---|
64 | |
---|
65 | def __iter__(self): |
---|
66 | return self |
---|
67 | |
---|
68 | def next(self): |
---|
69 | """A file object is its own iterator, for example iter(f) returns f |
---|
70 | (unless f is closed). When a file is used as an iterator, typically |
---|
71 | in a for loop (for example, for line in f: print line), the next() |
---|
72 | method is called repeatedly. This method returns the next input line, |
---|
73 | or raises StopIteration when EOF is hit. |
---|
74 | """ |
---|
75 | _complain_ifclosed(self.closed) |
---|
76 | r = self.readline() |
---|
77 | if not r: |
---|
78 | raise StopIteration |
---|
79 | return r |
---|
80 | |
---|
81 | def close(self): |
---|
82 | """Free the memory buffer. |
---|
83 | """ |
---|
84 | if not self.closed: |
---|
85 | self.closed = True |
---|
86 | del self.buf, self.pos |
---|
87 | |
---|
88 | def isatty(self): |
---|
89 | """Returns False because StringIO objects are not connected to a |
---|
90 | tty-like device. |
---|
91 | """ |
---|
92 | _complain_ifclosed(self.closed) |
---|
93 | return False |
---|
94 | |
---|
95 | def seek(self, pos, mode = 0): |
---|
96 | """Set the file's current position. |
---|
97 | |
---|
98 | The mode argument is optional and defaults to 0 (absolute file |
---|
99 | positioning); other values are 1 (seek relative to the current |
---|
100 | position) and 2 (seek relative to the file's end). |
---|
101 | |
---|
102 | There is no return value. |
---|
103 | """ |
---|
104 | _complain_ifclosed(self.closed) |
---|
105 | if self.buflist: |
---|
106 | self.buf += ''.join(self.buflist) |
---|
107 | self.buflist = [] |
---|
108 | if mode == 1: |
---|
109 | pos += self.pos |
---|
110 | elif mode == 2: |
---|
111 | pos += self.len |
---|
112 | self.pos = max(0, pos) |
---|
113 | |
---|
114 | def tell(self): |
---|
115 | """Return the file's current position.""" |
---|
116 | _complain_ifclosed(self.closed) |
---|
117 | return self.pos |
---|
118 | |
---|
119 | def read(self, n = -1): |
---|
120 | """Read at most size bytes from the file |
---|
121 | (less if the read hits EOF before obtaining size bytes). |
---|
122 | |
---|
123 | If the size argument is negative or omitted, read all data until EOF |
---|
124 | is reached. The bytes are returned as a string object. An empty |
---|
125 | string is returned when EOF is encountered immediately. |
---|
126 | """ |
---|
127 | _complain_ifclosed(self.closed) |
---|
128 | if self.buflist: |
---|
129 | self.buf += ''.join(self.buflist) |
---|
130 | self.buflist = [] |
---|
131 | if n is None or n < 0: |
---|
132 | newpos = self.len |
---|
133 | else: |
---|
134 | newpos = min(self.pos+n, self.len) |
---|
135 | r = self.buf[self.pos:newpos] |
---|
136 | self.pos = newpos |
---|
137 | return r |
---|
138 | |
---|
139 | def readline(self, length=None): |
---|
140 | r"""Read one entire line from the file. |
---|
141 | |
---|
142 | A trailing newline character is kept in the string (but may be absent |
---|
143 | when a file ends with an incomplete line). If the size argument is |
---|
144 | present and non-negative, it is a maximum byte count (including the |
---|
145 | trailing newline) and an incomplete line may be returned. |
---|
146 | |
---|
147 | An empty string is returned only when EOF is encountered immediately. |
---|
148 | |
---|
149 | Note: Unlike stdio's fgets(), the returned string contains null |
---|
150 | characters ('\0') if they occurred in the input. |
---|
151 | """ |
---|
152 | _complain_ifclosed(self.closed) |
---|
153 | if self.buflist: |
---|
154 | self.buf += ''.join(self.buflist) |
---|
155 | self.buflist = [] |
---|
156 | i = self.buf.find('\n', self.pos) |
---|
157 | if i < 0: |
---|
158 | newpos = self.len |
---|
159 | else: |
---|
160 | newpos = i+1 |
---|
161 | if length is not None and length >= 0: |
---|
162 | if self.pos + length < newpos: |
---|
163 | newpos = self.pos + length |
---|
164 | r = self.buf[self.pos:newpos] |
---|
165 | self.pos = newpos |
---|
166 | return r |
---|
167 | |
---|
168 | def readlines(self, sizehint = 0): |
---|
169 | """Read until EOF using readline() and return a list containing the |
---|
170 | lines thus read. |
---|
171 | |
---|
172 | If the optional sizehint argument is present, instead of reading up |
---|
173 | to EOF, whole lines totalling approximately sizehint bytes (or more |
---|
174 | to accommodate a final whole line). |
---|
175 | """ |
---|
176 | total = 0 |
---|
177 | lines = [] |
---|
178 | line = self.readline() |
---|
179 | while line: |
---|
180 | lines.append(line) |
---|
181 | total += len(line) |
---|
182 | if 0 < sizehint <= total: |
---|
183 | break |
---|
184 | line = self.readline() |
---|
185 | return lines |
---|
186 | |
---|
187 | def truncate(self, size=None): |
---|
188 | """Truncate the file's size. |
---|
189 | |
---|
190 | If the optional size argument is present, the file is truncated to |
---|
191 | (at most) that size. The size defaults to the current position. |
---|
192 | The current file position is not changed unless the position |
---|
193 | is beyond the new file size. |
---|
194 | |
---|
195 | If the specified size exceeds the file's current size, the |
---|
196 | file remains unchanged. |
---|
197 | """ |
---|
198 | _complain_ifclosed(self.closed) |
---|
199 | if size is None: |
---|
200 | size = self.pos |
---|
201 | elif size < 0: |
---|
202 | raise IOError(EINVAL, "Negative size not allowed") |
---|
203 | elif size < self.pos: |
---|
204 | self.pos = size |
---|
205 | self.buf = self.getvalue()[:size] |
---|
206 | self.len = size |
---|
207 | |
---|
208 | def write(self, s): |
---|
209 | """Write a string to the file. |
---|
210 | |
---|
211 | There is no return value. |
---|
212 | """ |
---|
213 | _complain_ifclosed(self.closed) |
---|
214 | if not s: return |
---|
215 | # Force s to be a string or unicode |
---|
216 | if not isinstance(s, basestring): |
---|
217 | s = str(s) |
---|
218 | spos = self.pos |
---|
219 | slen = self.len |
---|
220 | if spos == slen: |
---|
221 | self.buflist.append(s) |
---|
222 | self.len = self.pos = spos + len(s) |
---|
223 | return |
---|
224 | if spos > slen: |
---|
225 | self.buflist.append('\0'*(spos - slen)) |
---|
226 | slen = spos |
---|
227 | newpos = spos + len(s) |
---|
228 | if spos < slen: |
---|
229 | if self.buflist: |
---|
230 | self.buf += ''.join(self.buflist) |
---|
231 | self.buflist = [self.buf[:spos], s, self.buf[newpos:]] |
---|
232 | self.buf = '' |
---|
233 | if newpos > slen: |
---|
234 | slen = newpos |
---|
235 | else: |
---|
236 | self.buflist.append(s) |
---|
237 | slen = newpos |
---|
238 | self.len = slen |
---|
239 | self.pos = newpos |
---|
240 | |
---|
241 | def writelines(self, iterable): |
---|
242 | """Write a sequence of strings to the file. The sequence can be any |
---|
243 | iterable object producing strings, typically a list of strings. There |
---|
244 | is no return value. |
---|
245 | |
---|
246 | (The name is intended to match readlines(); writelines() does not add |
---|
247 | line separators.) |
---|
248 | """ |
---|
249 | write = self.write |
---|
250 | for line in iterable: |
---|
251 | write(line) |
---|
252 | |
---|
253 | def flush(self): |
---|
254 | """Flush the internal buffer |
---|
255 | """ |
---|
256 | _complain_ifclosed(self.closed) |
---|
257 | |
---|
258 | def getvalue(self): |
---|
259 | """ |
---|
260 | Retrieve the entire contents of the "file" at any time before |
---|
261 | the StringIO object's close() method is called. |
---|
262 | |
---|
263 | The StringIO object can accept either Unicode or 8-bit strings, |
---|
264 | but mixing the two may take some care. If both are used, 8-bit |
---|
265 | strings that cannot be interpreted as 7-bit ASCII (that use the |
---|
266 | 8th bit) will cause a UnicodeError to be raised when getvalue() |
---|
267 | is called. |
---|
268 | """ |
---|
269 | _complain_ifclosed(self.closed) |
---|
270 | if self.buflist: |
---|
271 | self.buf += ''.join(self.buflist) |
---|
272 | self.buflist = [] |
---|
273 | return self.buf |
---|
274 | |
---|
275 | |
---|
276 | # A little test suite |
---|
277 | |
---|
278 | def test(): |
---|
279 | import sys |
---|
280 | if sys.argv[1:]: |
---|
281 | file = sys.argv[1] |
---|
282 | else: |
---|
283 | file = '/etc/passwd' |
---|
284 | lines = open(file, 'r').readlines() |
---|
285 | text = open(file, 'r').read() |
---|
286 | f = StringIO() |
---|
287 | for line in lines[:-2]: |
---|
288 | f.write(line) |
---|
289 | f.writelines(lines[-2:]) |
---|
290 | if f.getvalue() != text: |
---|
291 | raise RuntimeError, 'write failed' |
---|
292 | length = f.tell() |
---|
293 | print 'File length =', length |
---|
294 | f.seek(len(lines[0])) |
---|
295 | f.write(lines[1]) |
---|
296 | f.seek(0) |
---|
297 | print 'First line =', repr(f.readline()) |
---|
298 | print 'Position =', f.tell() |
---|
299 | line = f.readline() |
---|
300 | print 'Second line =', repr(line) |
---|
301 | f.seek(-len(line), 1) |
---|
302 | line2 = f.read(len(line)) |
---|
303 | if line != line2: |
---|
304 | raise RuntimeError, 'bad result after seek back' |
---|
305 | f.seek(len(line2), 1) |
---|
306 | list = f.readlines() |
---|
307 | line = list[-1] |
---|
308 | f.seek(f.tell() - len(line)) |
---|
309 | line2 = f.read() |
---|
310 | if line != line2: |
---|
311 | raise RuntimeError, 'bad result after seek back from EOF' |
---|
312 | print 'Read', len(list), 'more lines' |
---|
313 | print 'File length =', f.tell() |
---|
314 | if f.tell() != length: |
---|
315 | raise RuntimeError, 'bad length' |
---|
316 | f.truncate(length/2) |
---|
317 | f.seek(0, 2) |
---|
318 | print 'Truncated length =', f.tell() |
---|
319 | if f.tell() != length/2: |
---|
320 | raise RuntimeError, 'truncate did not adjust length' |
---|
321 | f.close() |
---|
322 | |
---|
323 | if __name__ == '__main__': |
---|
324 | test() |
---|