1
2 """Text wrapping and filling.
3 """
4
5
6
7
8
9 __revision__ = "$Id: textwrap.py 9228 2008-12-13 04:50:49Z friedelwolff $"
10
11 import string, re
12
13
14
15
16 try:
17 True, False
18 except NameError:
19 (True, False) = (1, 0)
20
21 __all__ = ['TextWrapper', 'wrap', 'fill']
22
23
24
25
26
27
28
29
30
31 _whitespace = '\t\n\x0b\x0c\r '
32
34 """
35 Object for wrapping/filling text. The public interface consists of
36 the wrap() and fill() methods; the other methods are just there for
37 subclasses to override in order to tweak the default behaviour.
38 If you want to completely replace the main wrapping algorithm,
39 you'll probably have to override _wrap_chunks().
40
41 Several instance attributes control various aspects of wrapping:
42 width (default: 70)
43 the maximum width of wrapped lines (unless break_long_words
44 is false)
45 initial_indent (default: "")
46 string that will be prepended to the first line of wrapped
47 output. Counts towards the line's width.
48 subsequent_indent (default: "")
49 string that will be prepended to all lines save the first
50 of wrapped output; also counts towards each line's width.
51 expand_tabs (default: true)
52 Expand tabs in input text to spaces before further processing.
53 Each tab will become 1 .. 8 spaces, depending on its position in
54 its line. If false, each tab is treated as a single character.
55 drop_whitespace (default: true)
56 Drop leading and trailing whitespace from lines.
57 replace_whitespace (default: true)
58 Replace all whitespace characters in the input text by spaces
59 after tab expansion. Note that if expand_tabs is false and
60 replace_whitespace is true, every tab will be converted to a
61 single space!
62 fix_sentence_endings (default: false)
63 Ensure that sentence-ending punctuation is always followed
64 by two spaces. Off by default because the algorithm is
65 (unavoidably) imperfect.
66 break_long_words (default: true)
67 Break words longer than 'width'. If false, those words will not
68 be broken, and some lines might be longer than 'width'.
69 """
70
71 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
72
73 unicode_whitespace_trans = {}
74 uspace = ord(u' ')
75 for x in map(ord, _whitespace):
76 unicode_whitespace_trans[x] = uspace
77
78
79
80
81
82
83
84 wordsep_re = re.compile(
85 r'(\s+|'
86 r'%|'
87 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'
88 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')
89
90
91
92 sentence_end_re = re.compile(r'[%s]'
93 r'[\.\!\?]'
94 r'[\"\']?'
95 % string.lowercase)
96
97
98 - def __init__(self,
99 width=70,
100 initial_indent="",
101 subsequent_indent="",
102 expand_tabs=True,
103 drop_whitespace=True,
104 replace_whitespace=True,
105 fix_sentence_endings=False,
106 break_long_words=True):
107 self.width = width
108 self.initial_indent = initial_indent
109 self.subsequent_indent = subsequent_indent
110 self.expand_tabs = expand_tabs
111 self.drop_whitespace = drop_whitespace
112 self.replace_whitespace = replace_whitespace
113 self.fix_sentence_endings = fix_sentence_endings
114 self.break_long_words = break_long_words
115
116
117
118
119
120 - def _munge_whitespace(self, text):
121 """_munge_whitespace(text : string) -> string
122
123 Munge whitespace in text: expand tabs and convert all other
124 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
125 becomes " foo bar baz".
126 """
127 if self.expand_tabs:
128 text = text.expandtabs()
129 if self.replace_whitespace:
130 if isinstance(text, str):
131 text = text.translate(self.whitespace_trans)
132 elif isinstance(text, unicode):
133 text = text.translate(self.unicode_whitespace_trans)
134 return text
135
136
137 - def _split(self, text):
138 """_split(text : string) -> [string]
139
140 Split the text to wrap into indivisible chunks. Chunks are
141 not quite the same as words; see wrap_chunks() for full
142 details. As an example, the text
143 Look, goof-ball -- use the -b option!
144 breaks into the following chunks:
145 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
146 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
147 """
148 chunks = self.wordsep_re.split(text)
149 chunks = filter(None, chunks)
150 return chunks
151
152 - def _fix_sentence_endings(self, chunks):
153 """_fix_sentence_endings(chunks : [string])
154
155 Correct for sentence endings buried in 'chunks'. Eg. when the
156 original text contains "... foo.\nBar ...", munge_whitespace()
157 and split() will convert that to [..., "foo.", " ", "Bar", ...]
158 which has one too few spaces; this method simply changes the one
159 space to two.
160 """
161 i = 0
162 pat = self.sentence_end_re
163 while i < len(chunks)-1:
164 if chunks[i+1] == " " and pat.search(chunks[i]):
165 chunks[i+1] = " "
166 i += 2
167 else:
168 i += 1
169
170 - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
171 """_handle_long_word(chunks : [string],
172 cur_line : [string],
173 cur_len : int, width : int)
174
175 Handle a chunk of text (most likely a word, not whitespace) that
176 is too long to fit in any line.
177 """
178 space_left = max(width - cur_len, 1)
179
180
181
182 if self.break_long_words:
183 cur_line.append(reversed_chunks[-1][:space_left])
184 reversed_chunks[-1] = reversed_chunks[-1][space_left:]
185
186
187
188
189 elif not cur_line:
190 cur_line.append(reversed_chunks.pop())
191
192
193
194
195
196
197
198 - def _wrap_chunks(self, chunks):
199 """_wrap_chunks(chunks : [string]) -> [string]
200
201 Wrap a sequence of text chunks and return a list of lines of
202 length 'self.width' or less. (If 'break_long_words' is false,
203 some lines may be longer than this.) Chunks correspond roughly
204 to words and the whitespace between them: each chunk is
205 indivisible (modulo 'break_long_words'), but a line break can
206 come between any two chunks. Chunks should not have internal
207 whitespace; ie. a chunk is either all whitespace or a "word".
208 Whitespace chunks will be removed from the beginning and end of
209 lines, but apart from that whitespace is preserved.
210 """
211 lines = []
212 if self.width <= 0:
213 raise ValueError("invalid width %r (must be > 0)" % self.width)
214
215
216
217 chunks.reverse()
218
219 while chunks:
220
221
222
223 cur_line = []
224 cur_len = 0
225
226
227 if lines:
228 indent = self.subsequent_indent
229 else:
230 indent = self.initial_indent
231
232
233 width = self.width - len(indent)
234
235
236
237 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
238 del chunks[-1]
239
240 while chunks:
241 l = len(chunks[-1])
242
243
244 if cur_len + l <= width:
245 cur_line.append(chunks.pop())
246 cur_len += l
247
248
249 else:
250 break
251
252
253
254 if chunks and len(chunks[-1]) > width:
255 self._handle_long_word(chunks, cur_line, cur_len, width)
256
257
258 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
259 del cur_line[-1]
260
261
262
263 if cur_line:
264 lines.append(indent + ''.join(cur_line))
265
266 return lines
267
268
269
270
271 - def wrap(self, text):
272 """wrap(text : string) -> [string]
273
274 Reformat the single paragraph in 'text' so it fits in lines of
275 no more than 'self.width' columns, and return a list of wrapped
276 lines. Tabs in 'text' are expanded with string.expandtabs(),
277 and all other whitespace characters (including newline) are
278 converted to space.
279 """
280 text = self._munge_whitespace(text)
281 chunks = self._split(text)
282 if self.fix_sentence_endings:
283 self._fix_sentence_endings(chunks)
284 return self._wrap_chunks(chunks)
285
286 - def fill(self, text):
287 """fill(text : string) -> string
288
289 Reformat the single paragraph in 'text' to fit in lines of no
290 more than 'self.width' columns, and return a new string
291 containing the entire wrapped paragraph.
292 """
293 return "\n".join(self.wrap(text))
294
295
296
297
298 -def wrap(text, width=70, **kwargs):
299 """Wrap a single paragraph of text, returning a list of wrapped lines.
300
301 Reformat the single paragraph in 'text' so it fits in lines of no
302 more than 'width' columns, and return a list of wrapped lines. By
303 default, tabs in 'text' are expanded with string.expandtabs(), and
304 all other whitespace characters (including newline) are converted to
305 space. See TextWrapper class for available keyword args to customize
306 wrapping behaviour.
307 """
308 w = TextWrapper(width=width, **kwargs)
309 return w.wrap(text)
310
311 -def fill(text, width=70, **kwargs):
312 """Fill a single paragraph of text, returning a new string.
313
314 Reformat the single paragraph in 'text' to fit in lines of no more
315 than 'width' columns, and return a new string containing the entire
316 wrapped paragraph. As with wrap(), tabs are expanded and other
317 whitespace characters converted to space. See TextWrapper class for
318 available keyword args to customize wrapping behaviour.
319 """
320 w = TextWrapper(width=width, **kwargs)
321 return w.fill(text)
322
323
324
325
326 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
327 _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
328
330 """Remove any common leading whitespace from every line in `text`.
331
332 This can be used to make triple-quoted strings line up with the left
333 edge of the display, while still presenting them in the source code
334 in indented form.
335
336 Note that tabs and spaces are both treated as whitespace, but they
337 are not equal: the lines " hello" and "\thello" are
338 considered to have no common leading whitespace. (This behaviour is
339 new in Python 2.5; older versions of this module incorrectly
340 expanded tabs before searching for common leading whitespace.)
341 """
342
343
344 margin = None
345 text = _whitespace_only_re.sub('', text)
346 indents = _leading_whitespace_re.findall(text)
347 for indent in indents:
348 if margin is None:
349 margin = indent
350
351
352
353 elif indent.startswith(margin):
354 pass
355
356
357
358 elif margin.startswith(indent):
359 margin = indent
360
361
362
363 else:
364 margin = ""
365 break
366
367
368 if 0 and margin:
369 for line in text.split("\n"):
370 assert not line or line.startswith(margin), \
371 "line = %r, margin = %r" % (line, margin)
372
373 if margin:
374 text = re.sub(r'(?m)^' + margin, '', text)
375 return text
376
377 if __name__ == "__main__":
378
379
380 print dedent("Hello there.\n This is indented.")
381