1
2
3 """Diff Match and Patch
4
5 Copyright 2006 Google Inc.
6 http://code.google.com/p/google-diff-match-patch/
7
8 Licensed under the Apache License, Version 2.0 (the "License");
9 you may not use this file except in compliance with the License.
10 You may obtain a copy of the License at
11
12 http://www.apache.org/licenses/LICENSE-2.0
13
14 Unless required by applicable law or agreed to in writing, software
15 distributed under the License is distributed on an "AS IS" BASIS,
16 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 See the License for the specific language governing permissions and
18 limitations under the License.
19 """
20
21 """Functions for diff, match and patch.
22
23 Computes the difference between two texts to create a patch.
24 Applies the patch onto another text, allowing for errors.
25 """
26
27 __author__ = 'fraser@google.com (Neil Fraser)'
28
29 import time
30 import re
31
33 """Class containing the diff, match and patch methods.
34
35 Also contains the behaviour settings.
36 """
37
39 """Inits a diff_match_patch object with default settings.
40 Redefine these in your program to override the defaults.
41 """
42
43
44 self.Diff_Timeout = 1.0
45
46 self.Diff_EditCost = 4
47
48
49 self.Diff_DualThreshold = 32
50
51 self.Match_Threshold = 0.5
52
53
54
55 self.Match_Distance = 1000
56
57
58
59
60 self.Patch_DeleteThreshold = 0.5
61
62 self.Patch_Margin = 4
63
64
65
66
67
68 self.Match_MaxBits = 32
69
70
71
72
73
74
75 DIFF_DELETE = -1
76 DIFF_INSERT = 1
77 DIFF_EQUAL = 0
78
79 - def diff_main(self, text1, text2, checklines=True):
80 """Find the differences between two texts. Simplifies the problem by
81 stripping any common prefix or suffix off the texts before diffing.
82
83 Args:
84 text1: Old string to be diffed.
85 text2: New string to be diffed.
86 checklines: Optional speedup flag. If present and false, then don't run
87 a line-level diff first to identify the changed areas.
88 Defaults to true, which does a faster, slightly less optimal diff.
89
90 Returns:
91 Array of changes.
92 """
93
94
95 if text1 == None or text2 == None:
96 raise ValueError("Null inputs. (diff_main)")
97
98
99 if text1 == text2:
100 return [(self.DIFF_EQUAL, text1)]
101
102
103 commonlength = self.diff_commonPrefix(text1, text2)
104 commonprefix = text1[:commonlength]
105 text1 = text1[commonlength:]
106 text2 = text2[commonlength:]
107
108
109 commonlength = self.diff_commonSuffix(text1, text2)
110 if commonlength == 0:
111 commonsuffix = ''
112 else:
113 commonsuffix = text1[-commonlength:]
114 text1 = text1[:-commonlength]
115 text2 = text2[:-commonlength]
116
117
118 diffs = self.diff_compute(text1, text2, checklines)
119
120
121 if commonprefix:
122 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
123 if commonsuffix:
124 diffs.append((self.DIFF_EQUAL, commonsuffix))
125 self.diff_cleanupMerge(diffs)
126 return diffs
127
129 """Find the differences between two texts. Assumes that the texts do not
130 have any common prefix or suffix.
131
132 Args:
133 text1: Old string to be diffed.
134 text2: New string to be diffed.
135 checklines: Speedup flag. If false, then don't run a line-level diff
136 first to identify the changed areas.
137 If true, then run a faster, slightly less optimal diff.
138
139 Returns:
140 Array of changes.
141 """
142 if not text1:
143
144 return [(self.DIFF_INSERT, text2)]
145
146 if not text2:
147
148 return [(self.DIFF_DELETE, text1)]
149
150 if len(text1) > len(text2):
151 (longtext, shorttext) = (text1, text2)
152 else:
153 (shorttext, longtext) = (text1, text2)
154 i = longtext.find(shorttext)
155 if i != -1:
156
157 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
158 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
159
160 if len(text1) > len(text2):
161 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
162 diffs[2] = (self.DIFF_DELETE, diffs[2][1])
163 return diffs
164 longtext = shorttext = None
165
166
167 hm = self.diff_halfMatch(text1, text2)
168 if hm:
169
170 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
171
172 diffs_a = self.diff_main(text1_a, text2_a, checklines)
173 diffs_b = self.diff_main(text1_b, text2_b, checklines)
174
175 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
176
177
178 if checklines and (len(text1) < 100 or len(text2) < 100):
179 checklines = False
180 if checklines:
181
182 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
183
184 diffs = self.diff_map(text1, text2)
185 if not diffs:
186 diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
187 if checklines:
188
189 self.diff_charsToLines(diffs, linearray)
190
191 self.diff_cleanupSemantic(diffs)
192
193
194
195 diffs.append((self.DIFF_EQUAL, ''))
196 pointer = 0
197 count_delete = 0
198 count_insert = 0
199 text_delete = ''
200 text_insert = ''
201 while pointer < len(diffs):
202 if diffs[pointer][0] == self.DIFF_INSERT:
203 count_insert += 1
204 text_insert += diffs[pointer][1]
205 elif diffs[pointer][0] == self.DIFF_DELETE:
206 count_delete += 1
207 text_delete += diffs[pointer][1]
208 elif diffs[pointer][0] == self.DIFF_EQUAL:
209
210 if count_delete >= 1 and count_insert >= 1:
211
212 a = self.diff_main(text_delete, text_insert, False)
213 diffs[pointer - count_delete - count_insert : pointer] = a
214 pointer = pointer - count_delete - count_insert + len(a)
215 count_insert = 0
216 count_delete = 0
217 text_delete = ''
218 text_insert = ''
219
220 pointer += 1
221
222 diffs.pop()
223 return diffs
224
226 """Split two texts into an array of strings. Reduce the texts to a string
227 of hashes where each Unicode character represents one line.
228
229 Args:
230 text1: First string.
231 text2: Second string.
232
233 Returns:
234 Three element tuple, containing the encoded text1, the encoded text2 and
235 the array of unique strings. The zeroth element of the array of unique
236 strings is intentionally blank.
237 """
238 lineArray = []
239 lineHash = {}
240
241
242
243 lineArray.append('')
244
245 def diff_linesToCharsMunge(text):
246 """Split a text into an array of strings. Reduce the texts to a string
247 of hashes where each Unicode character represents one line.
248 Modifies linearray and linehash through being a closure.
249
250 Args:
251 text: String to encode.
252
253 Returns:
254 Encoded string.
255 """
256 chars = []
257
258
259
260 lineStart = 0
261 lineEnd = -1
262 while lineEnd < len(text) - 1:
263 lineEnd = text.find('\n', lineStart)
264 if lineEnd == -1:
265 lineEnd = len(text) - 1
266 line = text[lineStart:lineEnd + 1]
267 lineStart = lineEnd + 1
268
269 if line in lineHash:
270 chars.append(unichr(lineHash[line]))
271 else:
272 lineArray.append(line)
273 lineHash[line] = len(lineArray) - 1
274 chars.append(unichr(len(lineArray) - 1))
275 return "".join(chars)
276
277 chars1 = diff_linesToCharsMunge(text1)
278 chars2 = diff_linesToCharsMunge(text2)
279 return (chars1, chars2, lineArray)
280
282 """Rehydrate the text in a diff from a string of line hashes to real lines
283 of text.
284
285 Args:
286 diffs: Array of diff tuples.
287 lineArray: Array of unique strings.
288 """
289 for x in xrange(len(diffs)):
290 text = []
291 for char in diffs[x][1]:
292 text.append(lineArray[ord(char)])
293 diffs[x] = (diffs[x][0], "".join(text))
294
296 """Explore the intersection points between the two texts.
297
298 Args:
299 text1: Old string to be diffed.
300 text2: New string to be diffed.
301
302 Returns:
303 Array of diff tuples or None if no diff available.
304 """
305
306
307 s_end = time.time() + self.Diff_Timeout
308
309 text1_length = len(text1)
310 text2_length = len(text2)
311 max_d = text1_length + text2_length - 1
312 doubleEnd = self.Diff_DualThreshold * 2 < max_d
313
314
315
316
317 v_map1 = []
318 v_map2 = []
319 v1 = {}
320 v2 = {}
321 v1[1] = 0
322 v2[1] = 0
323 footsteps = {}
324 done = False
325
326
327 front = (text1_length + text2_length) % 2
328 for d in xrange(max_d):
329
330 if self.Diff_Timeout > 0 and time.time() > s_end:
331 return None
332
333
334 v_map1.append({})
335 for k in xrange(-d, d + 1, 2):
336 if k == -d or k != d and v1[k - 1] < v1[k + 1]:
337 x = v1[k + 1]
338 else:
339 x = v1[k - 1] + 1
340 y = x - k
341 if doubleEnd:
342 footstep = str((x << 32) + y)
343 if front and footstep in footsteps:
344 done = True
345 if not front:
346 footsteps[footstep] = d
347
348 while (not done and x < text1_length and y < text2_length and
349 text1[x] == text2[y]):
350 x += 1
351 y += 1
352 if doubleEnd:
353 footstep = str((x << 32) + y)
354 if front and footstep in footsteps:
355 done = True
356 if not front:
357 footsteps[footstep] = d
358
359 v1[k] = x
360 v_map1[d][(x << 32) + y] = True
361 if x == text1_length and y == text2_length:
362
363 return self.diff_path1(v_map1, text1, text2)
364 elif done:
365
366 v_map2 = v_map2[:footsteps[footstep] + 1]
367 a = self.diff_path1(v_map1, text1[:x], text2[:y])
368 b = self.diff_path2(v_map2, text1[x:], text2[y:])
369 return a + b
370
371 if doubleEnd:
372
373 v_map2.append({})
374 for k in xrange(-d, d + 1, 2):
375 if k == -d or k != d and v2[k - 1] < v2[k + 1]:
376 x = v2[k + 1]
377 else:
378 x = v2[k - 1] + 1
379 y = x - k
380 footstep = str((text1_length - x << 32) + text2_length - y)
381 if not front and footstep in footsteps:
382 done = True
383 if front:
384 footsteps[footstep] = d
385 while (not done and x < text1_length and y < text2_length and
386 text1[-x - 1] == text2[-y - 1]):
387 x += 1
388 y += 1
389 footstep = str((text1_length - x << 32) + text2_length - y)
390 if not front and footstep in footsteps:
391 done = True
392 if front:
393 footsteps[footstep] = d
394
395 v2[k] = x
396 v_map2[d][(x << 32) + y] = True
397 if done:
398
399 v_map1 = v_map1[:footsteps[footstep] + 1]
400 a = self.diff_path1(v_map1, text1[:text1_length - x],
401 text2[:text2_length - y])
402 b = self.diff_path2(v_map2, text1[text1_length - x:],
403 text2[text2_length - y:])
404 return a + b
405
406
407 return None
408
410 """Work from the middle back to the start to determine the path.
411
412 Args:
413 v_map: Array of paths.
414 text1: Old string fragment to be diffed.
415 text2: New string fragment to be diffed.
416
417 Returns:
418 Array of diff tuples.
419 """
420 path = []
421 x = len(text1)
422 y = len(text2)
423 last_op = None
424 for d in xrange(len(v_map) - 2, -1, -1):
425 while True:
426 if (x - 1 << 32) + y in v_map[d]:
427 x -= 1
428 if last_op == self.DIFF_DELETE:
429 path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
430 else:
431 path[:0] = [(self.DIFF_DELETE, text1[x])]
432 last_op = self.DIFF_DELETE
433 break
434 elif (x << 32) + y - 1 in v_map[d]:
435 y -= 1
436 if last_op == self.DIFF_INSERT:
437 path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
438 else:
439 path[:0] = [(self.DIFF_INSERT, text2[y])]
440 last_op = self.DIFF_INSERT
441 break
442 else:
443 x -= 1
444 y -= 1
445 assert text1[x] == text2[y], ("No diagonal. " +
446 "Can't happen. (diff_path1)")
447 if last_op == self.DIFF_EQUAL:
448 path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
449 else:
450 path[:0] = [(self.DIFF_EQUAL, text1[x])]
451 last_op = self.DIFF_EQUAL
452 return path
453
455 """Work from the middle back to the end to determine the path.
456
457 Args:
458 v_map: Array of paths.
459 text1: Old string fragment to be diffed.
460 text2: New string fragment to be diffed.
461
462 Returns:
463 Array of diff tuples.
464 """
465 path = []
466 x = len(text1)
467 y = len(text2)
468 last_op = None
469 for d in xrange(len(v_map) - 2, -1, -1):
470 while True:
471 if (x - 1 << 32) + y in v_map[d]:
472 x -= 1
473 if last_op == self.DIFF_DELETE:
474 path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
475 else:
476 path.append((self.DIFF_DELETE, text1[-x - 1]))
477 last_op = self.DIFF_DELETE
478 break
479 elif (x << 32) + y - 1 in v_map[d]:
480 y -= 1
481 if last_op == self.DIFF_INSERT:
482 path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
483 else:
484 path.append((self.DIFF_INSERT, text2[-y - 1]))
485 last_op = self.DIFF_INSERT
486 break
487 else:
488 x -= 1
489 y -= 1
490 assert text1[-x - 1] == text2[-y - 1], ("No diagonal. " +
491 "Can't happen. (diff_path2)")
492 if last_op == self.DIFF_EQUAL:
493 path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
494 else:
495 path.append((self.DIFF_EQUAL, text1[-x - 1]))
496 last_op = self.DIFF_EQUAL
497 return path
498
500 """Determine the common prefix of two strings.
501
502 Args:
503 text1: First string.
504 text2: Second string.
505
506 Returns:
507 The number of characters common to the start of each string.
508 """
509
510 if not text1 or not text2 or text1[0] != text2[0]:
511 return 0
512
513
514 pointermin = 0
515 pointermax = min(len(text1), len(text2))
516 pointermid = pointermax
517 pointerstart = 0
518 while pointermin < pointermid:
519 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
520 pointermin = pointermid
521 pointerstart = pointermin
522 else:
523 pointermax = pointermid
524 pointermid = int((pointermax - pointermin) / 2 + pointermin)
525 return pointermid
526
528 """Determine the common suffix of two strings.
529
530 Args:
531 text1: First string.
532 text2: Second string.
533
534 Returns:
535 The number of characters common to the end of each string.
536 """
537
538 if not text1 or not text2 or text1[-1] != text2[-1]:
539 return 0
540
541
542 pointermin = 0
543 pointermax = min(len(text1), len(text2))
544 pointermid = pointermax
545 pointerend = 0
546 while pointermin < pointermid:
547 if (text1[-pointermid:len(text1) - pointerend] ==
548 text2[-pointermid:len(text2) - pointerend]):
549 pointermin = pointermid
550 pointerend = pointermin
551 else:
552 pointermax = pointermid
553 pointermid = int((pointermax - pointermin) / 2 + pointermin)
554 return pointermid
555
557 """Do the two texts share a substring which is at least half the length of
558 the longer text?
559
560 Args:
561 text1: First string.
562 text2: Second string.
563
564 Returns:
565 Five element Array, containing the prefix of text1, the suffix of text1,
566 the prefix of text2, the suffix of text2 and the common middle. Or None
567 if there was no match.
568 """
569 if len(text1) > len(text2):
570 (longtext, shorttext) = (text1, text2)
571 else:
572 (shorttext, longtext) = (text1, text2)
573 if len(longtext) < 10 or len(shorttext) < 1:
574 return None
575
576 def diff_halfMatchI(longtext, shorttext, i):
577 """Does a substring of shorttext exist within longtext such that the
578 substring is at least half the length of longtext?
579 Closure, but does not reference any external variables.
580
581 Args:
582 longtext: Longer string.
583 shorttext: Shorter string.
584 i: Start index of quarter length substring within longtext.
585
586 Returns:
587 Five element Array, containing the prefix of longtext, the suffix of
588 longtext, the prefix of shorttext, the suffix of shorttext and the
589 common middle. Or None if there was no match.
590 """
591 seed = longtext[i:i + len(longtext) / 4]
592 best_common = ''
593 j = shorttext.find(seed)
594 while j != -1:
595 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
596 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
597 if len(best_common) < suffixLength + prefixLength:
598 best_common = (shorttext[j - suffixLength:j] +
599 shorttext[j:j + prefixLength])
600 best_longtext_a = longtext[:i - suffixLength]
601 best_longtext_b = longtext[i + prefixLength:]
602 best_shorttext_a = shorttext[:j - suffixLength]
603 best_shorttext_b = shorttext[j + prefixLength:]
604 j = shorttext.find(seed, j + 1)
605
606 if len(best_common) >= len(longtext) / 2:
607 return (best_longtext_a, best_longtext_b,
608 best_shorttext_a, best_shorttext_b, best_common)
609 else:
610 return None
611
612
613 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
614
615 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
616 if not hm1 and not hm2:
617 return None
618 elif not hm2:
619 hm = hm1
620 elif not hm1:
621 hm = hm2
622 else:
623
624 if len(hm1[4]) > len(hm2[4]):
625 hm = hm1
626 else:
627 hm = hm2
628
629
630 if len(text1) > len(text2):
631 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
632 else:
633 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
634 return (text1_a, text1_b, text2_a, text2_b, mid_common)
635
637 """Reduce the number of edits by eliminating semantically trivial
638 equalities.
639
640 Args:
641 diffs: Array of diff tuples.
642 """
643 changes = False
644 equalities = []
645 lastequality = None
646 pointer = 0
647 length_changes1 = 0
648 length_changes2 = 0
649 while pointer < len(diffs):
650 if diffs[pointer][0] == self.DIFF_EQUAL:
651 equalities.append(pointer)
652 length_changes1 = length_changes2
653 length_changes2 = 0
654 lastequality = diffs[pointer][1]
655 else:
656 length_changes2 += len(diffs[pointer][1])
657 if (lastequality != None and (len(lastequality) <= length_changes1) and
658 (len(lastequality) <= length_changes2)):
659
660 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
661
662 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
663 diffs[equalities[-1] + 1][1])
664
665 equalities.pop()
666
667 if len(equalities) != 0:
668 equalities.pop()
669 if len(equalities):
670 pointer = equalities[-1]
671 else:
672 pointer = -1
673 length_changes1 = 0
674 length_changes2 = 0
675 lastequality = None
676 changes = True
677 pointer += 1
678
679 if changes:
680 self.diff_cleanupMerge(diffs)
681
682 self.diff_cleanupSemanticLossless(diffs)
683
685 """Look for single edits surrounded on both sides by equalities
686 which can be shifted sideways to align the edit to a word boundary.
687 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
688
689 Args:
690 diffs: Array of diff tuples.
691 """
692
693 def diff_cleanupSemanticScore(one, two):
694 """Given two strings, compute a score representing whether the
695 internal boundary falls on logical boundaries.
696 Scores range from 5 (best) to 0 (worst).
697 Closure, but does not reference any external variables.
698
699 Args:
700 one: First string.
701 two: Second string.
702
703 Returns:
704 The score.
705 """
706 if not one or not two:
707
708 return 5
709
710
711
712
713
714
715 score = 0
716
717 if not one[-1].isalnum() or not two[0].isalnum():
718 score += 1
719
720 if one[-1].isspace() or two[0].isspace():
721 score += 1
722
723 if (one[-1] == "\r" or one[-1] == "\n" or
724 two[0] == "\r" or two[0] == "\n"):
725 score += 1
726
727 if (re.search("\\n\\r?\\n$", one) or
728 re.match("^\\r?\\n\\r?\\n", two)):
729 score += 1
730 return score
731
732 pointer = 1
733
734 while pointer < len(diffs) - 1:
735 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
736 diffs[pointer + 1][0] == self.DIFF_EQUAL):
737
738 equality1 = diffs[pointer - 1][1]
739 edit = diffs[pointer][1]
740 equality2 = diffs[pointer + 1][1]
741
742
743 commonOffset = self.diff_commonSuffix(equality1, edit)
744 if commonOffset:
745 commonString = edit[-commonOffset:]
746 equality1 = equality1[:-commonOffset]
747 edit = commonString + edit[:-commonOffset]
748 equality2 = commonString + equality2
749
750
751 bestEquality1 = equality1
752 bestEdit = edit
753 bestEquality2 = equality2
754 bestScore = (diff_cleanupSemanticScore(equality1, edit) +
755 diff_cleanupSemanticScore(edit, equality2))
756 while edit and equality2 and edit[0] == equality2[0]:
757 equality1 += edit[0]
758 edit = edit[1:] + equality2[0]
759 equality2 = equality2[1:]
760 score = (diff_cleanupSemanticScore(equality1, edit) +
761 diff_cleanupSemanticScore(edit, equality2))
762
763 if score >= bestScore:
764 bestScore = score
765 bestEquality1 = equality1
766 bestEdit = edit
767 bestEquality2 = equality2
768
769 if diffs[pointer - 1][1] != bestEquality1:
770
771 if bestEquality1:
772 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
773 else:
774 del diffs[pointer - 1]
775 pointer -= 1
776 diffs[pointer] = (diffs[pointer][0], bestEdit)
777 if bestEquality2:
778 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
779 else:
780 del diffs[pointer + 1]
781 pointer -= 1
782 pointer += 1
783
785 """Reduce the number of edits by eliminating operationally trivial
786 equalities.
787
788 Args:
789 diffs: Array of diff tuples.
790 """
791 changes = False
792 equalities = []
793 lastequality = ''
794 pointer = 0
795 pre_ins = False
796 pre_del = False
797 post_ins = False
798 post_del = False
799 while pointer < len(diffs):
800 if diffs[pointer][0] == self.DIFF_EQUAL:
801 if (len(diffs[pointer][1]) < self.Diff_EditCost and
802 (post_ins or post_del)):
803
804 equalities.append(pointer)
805 pre_ins = post_ins
806 pre_del = post_del
807 lastequality = diffs[pointer][1]
808 else:
809
810 equalities = []
811 lastequality = ''
812
813 post_ins = post_del = False
814 else:
815 if diffs[pointer][0] == self.DIFF_DELETE:
816 post_del = True
817 else:
818 post_ins = True
819
820
821
822
823
824
825
826
827 if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
828 ((len(lastequality) < self.Diff_EditCost / 2) and
829 (pre_ins + pre_del + post_ins + post_del) == 3)):
830
831 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
832
833 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
834 diffs[equalities[-1] + 1][1])
835 equalities.pop()
836 lastequality = ''
837 if pre_ins and pre_del:
838
839 post_ins = post_del = True
840 equalities = []
841 else:
842 if len(equalities):
843 equalities.pop()
844 if len(equalities):
845 pointer = equalities[-1]
846 else:
847 pointer = -1
848 post_ins = post_del = False
849 changes = True
850 pointer += 1
851
852 if changes:
853 self.diff_cleanupMerge(diffs)
854
856 """Reorder and merge like edit sections. Merge equalities.
857 Any edit section can move as long as it doesn't cross an equality.
858
859 Args:
860 diffs: Array of diff tuples.
861 """
862 diffs.append((self.DIFF_EQUAL, ''))
863 pointer = 0
864 count_delete = 0
865 count_insert = 0
866 text_delete = ''
867 text_insert = ''
868 while pointer < len(diffs):
869 if diffs[pointer][0] == self.DIFF_INSERT:
870 count_insert += 1
871 text_insert += diffs[pointer][1]
872 pointer += 1
873 elif diffs[pointer][0] == self.DIFF_DELETE:
874 count_delete += 1
875 text_delete += diffs[pointer][1]
876 pointer += 1
877 elif diffs[pointer][0] == self.DIFF_EQUAL:
878
879 if count_delete != 0 or count_insert != 0:
880 if count_delete != 0 and count_insert != 0:
881
882 commonlength = self.diff_commonPrefix(text_insert, text_delete)
883 if commonlength != 0:
884 x = pointer - count_delete - count_insert - 1
885 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
886 diffs[x] = (diffs[x][0], diffs[x][1] +
887 text_insert[:commonlength])
888 else:
889 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
890 pointer += 1
891 text_insert = text_insert[commonlength:]
892 text_delete = text_delete[commonlength:]
893
894 commonlength = self.diff_commonSuffix(text_insert, text_delete)
895 if commonlength != 0:
896 diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
897 diffs[pointer][1])
898 text_insert = text_insert[:-commonlength]
899 text_delete = text_delete[:-commonlength]
900
901 if count_delete == 0:
902 diffs[pointer - count_insert : pointer] = [
903 (self.DIFF_INSERT, text_insert)]
904 elif count_insert == 0:
905 diffs[pointer - count_delete : pointer] = [
906 (self.DIFF_DELETE, text_delete)]
907 else:
908 diffs[pointer - count_delete - count_insert : pointer] = [
909 (self.DIFF_DELETE, text_delete),
910 (self.DIFF_INSERT, text_insert)]
911 pointer = pointer - count_delete - count_insert + 1
912 if count_delete != 0:
913 pointer += 1
914 if count_insert != 0:
915 pointer += 1
916 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
917
918 diffs[pointer - 1] = (diffs[pointer - 1][0],
919 diffs[pointer - 1][1] + diffs[pointer][1])
920 del diffs[pointer]
921 else:
922 pointer += 1
923
924 count_insert = 0
925 count_delete = 0
926 text_delete = ''
927 text_insert = ''
928
929 if diffs[-1][1] == '':
930 diffs.pop()
931
932
933
934
935 changes = False
936 pointer = 1
937
938 while pointer < len(diffs) - 1:
939 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
940 diffs[pointer + 1][0] == self.DIFF_EQUAL):
941
942 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
943
944 diffs[pointer] = (diffs[pointer][0],
945 diffs[pointer - 1][1] +
946 diffs[pointer][1][:-len(diffs[pointer - 1][1])])
947 diffs[pointer + 1] = (diffs[pointer + 1][0],
948 diffs[pointer - 1][1] + diffs[pointer + 1][1])
949 del diffs[pointer - 1]
950 changes = True
951 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
952
953 diffs[pointer - 1] = (diffs[pointer - 1][0],
954 diffs[pointer - 1][1] + diffs[pointer + 1][1])
955 diffs[pointer] = (diffs[pointer][0],
956 diffs[pointer][1][len(diffs[pointer + 1][1]):] +
957 diffs[pointer + 1][1])
958 del diffs[pointer + 1]
959 changes = True
960 pointer += 1
961
962
963 if changes:
964 self.diff_cleanupMerge(diffs)
965
967 """loc is a location in text1, compute and return the equivalent location
968 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
969
970 Args:
971 diffs: Array of diff tuples.
972 loc: Location within text1.
973
974 Returns:
975 Location within text2.
976 """
977 chars1 = 0
978 chars2 = 0
979 last_chars1 = 0
980 last_chars2 = 0
981 for x in xrange(len(diffs)):
982 (op, text) = diffs[x]
983 if op != self.DIFF_INSERT:
984 chars1 += len(text)
985 if op != self.DIFF_DELETE:
986 chars2 += len(text)
987 if chars1 > loc:
988 break
989 last_chars1 = chars1
990 last_chars2 = chars2
991
992 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
993
994 return last_chars2
995
996 return last_chars2 + (loc - last_chars1)
997
999 """Convert a diff array into a pretty HTML report.
1000
1001 Args:
1002 diffs: Array of diff tuples.
1003
1004 Returns:
1005 HTML representation.
1006 """
1007 html = []
1008 i = 0
1009 for (op, data) in diffs:
1010 text = (data.replace("&", "&").replace("<", "<")
1011 .replace(">", ">").replace("\n", "¶<BR>"))
1012 if op == self.DIFF_INSERT:
1013 html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
1014 % (i, text))
1015 elif op == self.DIFF_DELETE:
1016 html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
1017 % (i, text))
1018 elif op == self.DIFF_EQUAL:
1019 html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1020 if op != self.DIFF_DELETE:
1021 i += len(data)
1022 return "".join(html)
1023
1024 - def diff_text1(self, diffs):
1025 """Compute and return the source text (all equalities and deletions).
1026
1027 Args:
1028 diffs: Array of diff tuples.
1029
1030 Returns:
1031 Source text.
1032 """
1033 text = []
1034 for (op, data) in diffs:
1035 if op != self.DIFF_INSERT:
1036 text.append(data)
1037 return "".join(text)
1038
1039 - def diff_text2(self, diffs):
1040 """Compute and return the destination text (all equalities and insertions).
1041
1042 Args:
1043 diffs: Array of diff tuples.
1044
1045 Returns:
1046 Destination text.
1047 """
1048 text = []
1049 for (op, data) in diffs:
1050 if op != self.DIFF_DELETE:
1051 text.append(data)
1052 return "".join(text)
1053
1055 """Compute the Levenshtein distance; the number of inserted, deleted or
1056 substituted characters.
1057
1058 Args:
1059 diffs: Array of diff tuples.
1060
1061 Returns:
1062 Number of changes.
1063 """
1064 levenshtein = 0
1065 insertions = 0
1066 deletions = 0
1067 for (op, data) in diffs:
1068 if op == self.DIFF_INSERT:
1069 insertions += len(data)
1070 elif op == self.DIFF_DELETE:
1071 deletions += len(data)
1072 elif op == self.DIFF_EQUAL:
1073
1074 levenshtein += max(insertions, deletions)
1075 insertions = 0
1076 deletions = 0
1077 levenshtein += max(insertions, deletions)
1078 return levenshtein
1079
1081 """Crush the diff into an encoded string which describes the operations
1082 required to transform text1 into text2.
1083 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1084 Operations are tab-separated. Inserted text is escaped using %xx notation.
1085
1086 Args:
1087 diffs: Array of diff tuples.
1088
1089 Returns:
1090 Delta text.
1091 """
1092 import urllib
1093 text = []
1094 for (op, data) in diffs:
1095 if op == self.DIFF_INSERT:
1096
1097 data = data.encode("utf-8")
1098 text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1099 elif op == self.DIFF_DELETE:
1100 text.append("-%d" % len(data))
1101 elif op == self.DIFF_EQUAL:
1102 text.append("=%d" % len(data))
1103 return "\t".join(text)
1104
1106 """Given the original text1, and an encoded string which describes the
1107 operations required to transform text1 into text2, compute the full diff.
1108
1109 Args:
1110 text1: Source string for the diff.
1111 delta: Delta text.
1112
1113 Returns:
1114 Array of diff tuples.
1115
1116 Raises:
1117 ValueError: If invalid input.
1118 """
1119 import urllib
1120 if type(delta) == unicode:
1121
1122
1123 delta = delta.encode("ascii")
1124 diffs = []
1125 pointer = 0
1126 tokens = delta.split("\t")
1127 for token in tokens:
1128 if token == "":
1129
1130 continue
1131
1132
1133 param = token[1:]
1134 if token[0] == "+":
1135 param = urllib.unquote(param).decode("utf-8")
1136 diffs.append((self.DIFF_INSERT, param))
1137 elif token[0] == "-" or token[0] == "=":
1138 try:
1139 n = int(param)
1140 except ValueError:
1141 raise ValueError("Invalid number in diff_fromDelta: " + param)
1142 if n < 0:
1143 raise ValueError("Negative number in diff_fromDelta: " + param)
1144 text = text1[pointer : pointer + n]
1145 pointer += n
1146 if token[0] == "=":
1147 diffs.append((self.DIFF_EQUAL, text))
1148 else:
1149 diffs.append((self.DIFF_DELETE, text))
1150 else:
1151
1152 raise ValueError("Invalid diff operation in diff_fromDelta: " +
1153 token[0])
1154 if pointer != len(text1):
1155 raise ValueError(
1156 "Delta length (%d) does not equal source text length (%d)." %
1157 (pointer, len(text1)))
1158 return diffs
1159
1160
1161
1162 - def match_main(self, text, pattern, loc):
1163 """Locate the best instance of 'pattern' in 'text' near 'loc'.
1164
1165 Args:
1166 text: The text to search.
1167 pattern: The pattern to search for.
1168 loc: The location to search around.
1169
1170 Returns:
1171 Best match index or -1.
1172 """
1173
1174 if text == None or pattern == None:
1175 raise ValueError("Null inputs. (match_main)")
1176
1177 loc = max(0, min(loc, len(text)))
1178 if text == pattern:
1179
1180 return 0
1181 elif not text:
1182
1183 return -1
1184 elif text[loc:loc + len(pattern)] == pattern:
1185
1186 return loc
1187 else:
1188
1189 match = self.match_bitap(text, pattern, loc)
1190 return match
1191
1193 """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1194 Bitap algorithm.
1195
1196 Args:
1197 text: The text to search.
1198 pattern: The pattern to search for.
1199 loc: The location to search around.
1200
1201 Returns:
1202 Best match index or -1.
1203 """
1204
1205
1206
1207
1208
1209 s = self.match_alphabet(pattern)
1210
1211 def match_bitapScore(e, x):
1212 """Compute and return the score for a match with e errors and x location.
1213 Accesses loc and pattern through being a closure.
1214
1215 Args:
1216 e: Number of errors in match.
1217 x: Location of match.
1218
1219 Returns:
1220 Overall score for match (0.0 = good, 1.0 = bad).
1221 """
1222 accuracy = float(e) / len(pattern)
1223 proximity = abs(loc - x)
1224 if not self.Match_Distance:
1225
1226 return proximity and 1.0 or accuracy
1227 return accuracy + (proximity / float(self.Match_Distance))
1228
1229
1230 score_threshold = self.Match_Threshold
1231
1232 best_loc = text.find(pattern, loc)
1233 if best_loc != -1:
1234 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1235
1236 best_loc = text.rfind(pattern, loc + len(pattern))
1237 if best_loc != -1:
1238 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1239
1240
1241 matchmask = 1 << (len(pattern) - 1)
1242 best_loc = -1
1243
1244 bin_max = len(pattern) + len(text)
1245
1246 last_rd = None
1247 for d in xrange(len(pattern)):
1248
1249
1250
1251 bin_min = 0
1252 bin_mid = bin_max
1253 while bin_min < bin_mid:
1254 if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1255 bin_min = bin_mid
1256 else:
1257 bin_max = bin_mid
1258 bin_mid = (bin_max - bin_min) / 2 + bin_min
1259
1260
1261 bin_max = bin_mid
1262 start = max(1, loc - bin_mid + 1)
1263 finish = min(loc + bin_mid, len(text)) + len(pattern)
1264
1265 rd = range(finish + 1)
1266 rd.append((1 << d) - 1)
1267 for j in xrange(finish, start - 1, -1):
1268 if len(text) <= j - 1:
1269
1270 charMatch = 0
1271 else:
1272 charMatch = s.get(text[j - 1], 0)
1273 if d == 0:
1274 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1275 else:
1276 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1277 ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1278 if rd[j] & matchmask:
1279 score = match_bitapScore(d, j - 1)
1280
1281
1282 if score <= score_threshold:
1283
1284 score_threshold = score
1285 best_loc = j - 1
1286 if best_loc > loc:
1287
1288 start = max(1, 2 * loc - best_loc)
1289 else:
1290
1291 break
1292
1293 if match_bitapScore(d + 1, loc) > score_threshold:
1294 break
1295 last_rd = rd
1296 return best_loc
1297
1299 """Initialise the alphabet for the Bitap algorithm.
1300
1301 Args:
1302 pattern: The text to encode.
1303
1304 Returns:
1305 Hash of character locations.
1306 """
1307 s = {}
1308 for char in pattern:
1309 s[char] = 0
1310 for i in xrange(len(pattern)):
1311 s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1312 return s
1313
1314
1315
1316 - def patch_addContext(self, patch, text):
1317 """Increase the context until it is unique,
1318 but don't let the pattern expand beyond Match_MaxBits.
1319
1320 Args:
1321 patch: The patch to grow.
1322 text: Source text.
1323 """
1324 if len(text) == 0:
1325 return
1326 pattern = text[patch.start2 : patch.start2 + patch.length1]
1327 padding = 0
1328
1329
1330
1331 while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1332 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1333 self.Patch_Margin)):
1334 padding += self.Patch_Margin
1335 pattern = text[max(0, patch.start2 - padding) :
1336 patch.start2 + patch.length1 + padding]
1337
1338 padding += self.Patch_Margin
1339
1340
1341 prefix = text[max(0, patch.start2 - padding) : patch.start2]
1342 if prefix:
1343 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1344
1345 suffix = text[patch.start2 + patch.length1 :
1346 patch.start2 + patch.length1 + padding]
1347 if suffix:
1348 patch.diffs.append((self.DIFF_EQUAL, suffix))
1349
1350
1351 patch.start1 -= len(prefix)
1352 patch.start2 -= len(prefix)
1353
1354 patch.length1 += len(prefix) + len(suffix)
1355 patch.length2 += len(prefix) + len(suffix)
1356
1358 """Compute a list of patches to turn text1 into text2.
1359 Use diffs if provided, otherwise compute it ourselves.
1360 There are four ways to call this function, depending on what data is
1361 available to the caller:
1362 Method 1:
1363 a = text1, b = text2
1364 Method 2:
1365 a = diffs
1366 Method 3 (optimal):
1367 a = text1, b = diffs
1368 Method 4 (deprecated, use method 3):
1369 a = text1, b = text2, c = diffs
1370
1371 Args:
1372 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1373 text2 (method 2).
1374 b: text2 (methods 1,4) or Array of diff tuples for text1 to
1375 text2 (method 3) or undefined (method 2).
1376 c: Array of diff tuples for text1 to text2 (method 4) or
1377 undefined (methods 1,2,3).
1378
1379 Returns:
1380 Array of patch objects.
1381 """
1382 text1 = None
1383 diffs = None
1384
1385 if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1386
1387
1388 text1 = a
1389 diffs = self.diff_main(text1, b, True)
1390 if len(diffs) > 2:
1391 self.diff_cleanupSemantic(diffs)
1392 self.diff_cleanupEfficiency(diffs)
1393 elif isinstance(a, list) and b is None and c is None:
1394
1395
1396 diffs = a
1397 text1 = self.diff_text1(diffs)
1398 elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1399
1400 text1 = a
1401 diffs = b
1402 elif (isinstance(a, basestring) and isinstance(b, basestring) and
1403 isinstance(c, list)):
1404
1405
1406 text1 = a
1407 diffs = c
1408 else:
1409 raise ValueError("Unknown call format to patch_make.")
1410
1411 if not diffs:
1412 return []
1413 patches = []
1414 patch = patch_obj()
1415 char_count1 = 0
1416 char_count2 = 0
1417 prepatch_text = text1
1418 postpatch_text = text1
1419 for x in xrange(len(diffs)):
1420 (diff_type, diff_text) = diffs[x]
1421 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1422
1423 patch.start1 = char_count1
1424 patch.start2 = char_count2
1425 if diff_type == self.DIFF_INSERT:
1426
1427 patch.diffs.append(diffs[x])
1428 patch.length2 += len(diff_text)
1429 postpatch_text = (postpatch_text[:char_count2] + diff_text +
1430 postpatch_text[char_count2:])
1431 elif diff_type == self.DIFF_DELETE:
1432
1433 patch.length1 += len(diff_text)
1434 patch.diffs.append(diffs[x])
1435 postpatch_text = (postpatch_text[:char_count2] +
1436 postpatch_text[char_count2 + len(diff_text):])
1437 elif (diff_type == self.DIFF_EQUAL and
1438 len(diff_text) <= 2 * self.Patch_Margin and
1439 len(patch.diffs) != 0 and len(diffs) != x + 1):
1440
1441 patch.diffs.append(diffs[x])
1442 patch.length1 += len(diff_text)
1443 patch.length2 += len(diff_text)
1444
1445 if (diff_type == self.DIFF_EQUAL and
1446 len(diff_text) >= 2 * self.Patch_Margin):
1447
1448 if len(patch.diffs) != 0:
1449 self.patch_addContext(patch, prepatch_text)
1450 patches.append(patch)
1451 patch = patch_obj()
1452
1453
1454
1455
1456 prepatch_text = postpatch_text
1457 char_count1 = char_count2
1458
1459
1460 if diff_type != self.DIFF_INSERT:
1461 char_count1 += len(diff_text)
1462 if diff_type != self.DIFF_DELETE:
1463 char_count2 += len(diff_text)
1464
1465
1466 if len(patch.diffs) != 0:
1467 self.patch_addContext(patch, prepatch_text)
1468 patches.append(patch)
1469 return patches
1470
1472 """Given an array of patches, return another array that is identical.
1473
1474 Args:
1475 patches: Array of patch objects.
1476
1477 Returns:
1478 Array of patch objects.
1479 """
1480 patchesCopy = []
1481 for patch in patches:
1482 patchCopy = patch_obj()
1483
1484 patchCopy.diffs = patch.diffs[:]
1485 patchCopy.start1 = patch.start1
1486 patchCopy.start2 = patch.start2
1487 patchCopy.length1 = patch.length1
1488 patchCopy.length2 = patch.length2
1489 patchesCopy.append(patchCopy)
1490 return patchesCopy
1491
1493 """Merge a set of patches onto the text. Return a patched text, as well
1494 as a list of true/false values indicating which patches were applied.
1495
1496 Args:
1497 patches: Array of patch objects.
1498 text: Old text.
1499
1500 Returns:
1501 Two element Array, containing the new text and an array of boolean values.
1502 """
1503 if not patches:
1504 return (text, [])
1505
1506
1507 patches = self.patch_deepCopy(patches)
1508
1509 nullPadding = self.patch_addPadding(patches)
1510 text = nullPadding + text + nullPadding
1511 self.patch_splitMax(patches)
1512
1513
1514
1515
1516
1517 delta = 0
1518 results = []
1519 for patch in patches:
1520 expected_loc = patch.start2 + delta
1521 text1 = self.diff_text1(patch.diffs)
1522 end_loc = -1
1523 if len(text1) > self.Match_MaxBits:
1524
1525
1526 start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1527 expected_loc)
1528 if start_loc != -1:
1529 end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1530 expected_loc + len(text1) - self.Match_MaxBits)
1531 if end_loc == -1 or start_loc >= end_loc:
1532
1533 start_loc = -1
1534 else:
1535 start_loc = self.match_main(text, text1, expected_loc)
1536 if start_loc == -1:
1537
1538 results.append(False)
1539
1540 delta -= patch.length2 - patch.length1
1541 else:
1542
1543 results.append(True)
1544 delta = start_loc - expected_loc
1545 if end_loc == -1:
1546 text2 = text[start_loc : start_loc + len(text1)]
1547 else:
1548 text2 = text[start_loc : end_loc + self.Match_MaxBits]
1549 if text1 == text2:
1550
1551 text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1552 text[start_loc + len(text1):])
1553 else:
1554
1555
1556 diffs = self.diff_main(text1, text2, False)
1557 if (len(text1) > self.Match_MaxBits and
1558 self.diff_levenshtein(diffs) / float(len(text1)) >
1559 self.Patch_DeleteThreshold):
1560
1561 results[-1] = False
1562 else:
1563 self.diff_cleanupSemanticLossless(diffs)
1564 index1 = 0
1565 for (op, data) in patch.diffs:
1566 if op != self.DIFF_EQUAL:
1567 index2 = self.diff_xIndex(diffs, index1)
1568 if op == self.DIFF_INSERT:
1569 text = text[:start_loc + index2] + data + text[start_loc +
1570 index2:]
1571 elif op == self.DIFF_DELETE:
1572 text = text[:start_loc + index2] + text[start_loc +
1573 self.diff_xIndex(diffs, index1 + len(data)):]
1574 if op != self.DIFF_DELETE:
1575 index1 += len(data)
1576
1577 text = text[len(nullPadding):-len(nullPadding)]
1578 return (text, results)
1579
1581 """Add some padding on text start and end so that edges can match
1582 something. Intended to be called only from within patch_apply.
1583
1584 Args:
1585 patches: Array of patch objects.
1586
1587 Returns:
1588 The padding string added to each side.
1589 """
1590 paddingLength = self.Patch_Margin
1591 nullPadding = ""
1592 for x in xrange(1, paddingLength + 1):
1593 nullPadding += chr(x)
1594
1595
1596 for patch in patches:
1597 patch.start1 += paddingLength
1598 patch.start2 += paddingLength
1599
1600
1601 patch = patches[0]
1602 diffs = patch.diffs
1603 if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1604
1605 diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1606 patch.start1 -= paddingLength
1607 patch.start2 -= paddingLength
1608 patch.length1 += paddingLength
1609 patch.length2 += paddingLength
1610 elif paddingLength > len(diffs[0][1]):
1611
1612 extraLength = paddingLength - len(diffs[0][1])
1613 newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1614 diffs[0] = (diffs[0][0], newText)
1615 patch.start1 -= extraLength
1616 patch.start2 -= extraLength
1617 patch.length1 += extraLength
1618 patch.length2 += extraLength
1619
1620
1621 patch = patches[-1]
1622 diffs = patch.diffs
1623 if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1624
1625 diffs.append((self.DIFF_EQUAL, nullPadding))
1626 patch.length1 += paddingLength
1627 patch.length2 += paddingLength
1628 elif paddingLength > len(diffs[-1][1]):
1629
1630 extraLength = paddingLength - len(diffs[-1][1])
1631 newText = diffs[-1][1] + nullPadding[:extraLength]
1632 diffs[-1] = (diffs[-1][0], newText)
1633 patch.length1 += extraLength
1634 patch.length2 += extraLength
1635
1636 return nullPadding
1637
1639 """Look through the patches and break up any which are longer than the
1640 maximum limit of the match algorithm.
1641
1642 Args:
1643 patches: Array of patch objects.
1644 """
1645 if self.Match_MaxBits == 0:
1646 return
1647 for x in xrange(len(patches)):
1648 if patches[x].length1 > self.Match_MaxBits:
1649 bigpatch = patches[x]
1650
1651 del patches[x]
1652 x -= 1
1653 patch_size = self.Match_MaxBits
1654 start1 = bigpatch.start1
1655 start2 = bigpatch.start2
1656 precontext = ''
1657 while len(bigpatch.diffs) != 0:
1658
1659 patch = patch_obj()
1660 empty = True
1661 patch.start1 = start1 - len(precontext)
1662 patch.start2 = start2 - len(precontext)
1663 if precontext:
1664 patch.length1 = patch.length2 = len(precontext)
1665 patch.diffs.append((self.DIFF_EQUAL, precontext))
1666
1667 while (len(bigpatch.diffs) != 0 and
1668 patch.length1 < patch_size - self.Patch_Margin):
1669 (diff_type, diff_text) = bigpatch.diffs[0]
1670 if diff_type == self.DIFF_INSERT:
1671
1672 patch.length2 += len(diff_text)
1673 start2 += len(diff_text)
1674 patch.diffs.append(bigpatch.diffs.pop(0))
1675 empty = False
1676 elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1677 patch.diffs[0][0] == self.DIFF_EQUAL and
1678 len(diff_text) > 2 * patch_size):
1679
1680 patch.length1 += len(diff_text)
1681 start1 += len(diff_text)
1682 empty = False
1683 patch.diffs.append((diff_type, diff_text))
1684 del bigpatch.diffs[0]
1685 else:
1686
1687 diff_text = diff_text[:patch_size - patch.length1 -
1688 self.Patch_Margin]
1689 patch.length1 += len(diff_text)
1690 start1 += len(diff_text)
1691 if diff_type == self.DIFF_EQUAL:
1692 patch.length2 += len(diff_text)
1693 start2 += len(diff_text)
1694 else:
1695 empty = False
1696
1697 patch.diffs.append((diff_type, diff_text))
1698 if diff_text == bigpatch.diffs[0][1]:
1699 del bigpatch.diffs[0]
1700 else:
1701 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1702 bigpatch.diffs[0][1][len(diff_text):])
1703
1704
1705 precontext = self.diff_text2(patch.diffs)
1706 precontext = precontext[-self.Patch_Margin:]
1707
1708 postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1709 if postcontext:
1710 patch.length1 += len(postcontext)
1711 patch.length2 += len(postcontext)
1712 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1713 patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1714 postcontext)
1715 else:
1716 patch.diffs.append((self.DIFF_EQUAL, postcontext))
1717
1718 if not empty:
1719 x += 1
1720 patches.insert(x, patch)
1721
1722 - def patch_toText(self, patches):
1723 """Take a list of patches and return a textual representation.
1724
1725 Args:
1726 patches: Array of patch objects.
1727
1728 Returns:
1729 Text representation of patches.
1730 """
1731 text = []
1732 for patch in patches:
1733 text.append(str(patch))
1734 return "".join(text)
1735
1736 - def patch_fromText(self, textline):
1737 """Parse a textual representation of patches and return a list of patch
1738 objects.
1739
1740 Args:
1741 textline: Text representation of patches.
1742
1743 Returns:
1744 Array of patch objects.
1745
1746 Raises:
1747 ValueError: If invalid input.
1748 """
1749 if type(textline) == unicode:
1750
1751
1752 textline = textline.encode("ascii")
1753 patches = []
1754 if not textline:
1755 return patches
1756 text = textline.split('\n')
1757 while len(text) != 0:
1758 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1759 if not m:
1760 raise ValueError("Invalid patch string: " + text[0])
1761 patch = patch_obj()
1762 patches.append(patch)
1763 patch.start1 = int(m.group(1))
1764 if m.group(2) == '':
1765 patch.start1 -= 1
1766 patch.length1 = 1
1767 elif m.group(2) == '0':
1768 patch.length1 = 0
1769 else:
1770 patch.start1 -= 1
1771 patch.length1 = int(m.group(2))
1772
1773 patch.start2 = int(m.group(3))
1774 if m.group(4) == '':
1775 patch.start2 -= 1
1776 patch.length2 = 1
1777 elif m.group(4) == '0':
1778 patch.length2 = 0
1779 else:
1780 patch.start2 -= 1
1781 patch.length2 = int(m.group(4))
1782
1783 del text[0]
1784
1785 import urllib
1786 while len(text) != 0:
1787 if text[0]:
1788 sign = text[0][0]
1789 else:
1790 sign = ''
1791 line = urllib.unquote(text[0][1:])
1792 line = line.decode("utf-8")
1793 if sign == '+':
1794
1795 patch.diffs.append((self.DIFF_INSERT, line))
1796 elif sign == '-':
1797
1798 patch.diffs.append((self.DIFF_DELETE, line))
1799 elif sign == ' ':
1800
1801 patch.diffs.append((self.DIFF_EQUAL, line))
1802 elif sign == '@':
1803
1804 break
1805 elif sign == '':
1806
1807 pass
1808 else:
1809
1810 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1811 del text[0]
1812 return patches
1813
1814
1816 """Class representing one patch operation.
1817 """
1818
1820 """Initializes with an empty list of diffs.
1821 """
1822 self.diffs = []
1823 self.start1 = None
1824 self.start2 = None
1825 self.length1 = 0
1826 self.length2 = 0
1827
1829 """Emmulate GNU diff's format.
1830 Header: @@ -382,8 +481,9 @@
1831 Indicies are printed as 1-based, not 0-based.
1832
1833 Returns:
1834 The GNU diff string.
1835 """
1836 import urllib
1837 if self.length1 == 0:
1838 coords1 = str(self.start1) + ",0"
1839 elif self.length1 == 1:
1840 coords1 = str(self.start1 + 1)
1841 else:
1842 coords1 = str(self.start1 + 1) + "," + str(self.length1)
1843 if self.length2 == 0:
1844 coords2 = str(self.start2) + ",0"
1845 elif self.length2 == 1:
1846 coords2 = str(self.start2 + 1)
1847 else:
1848 coords2 = str(self.start2 + 1) + "," + str(self.length2)
1849 text = ["@@ -", coords1, " +", coords2, " @@\n"]
1850
1851 for (op, data) in self.diffs:
1852 if op == diff_match_patch.DIFF_INSERT:
1853 text.append("+")
1854 elif op == diff_match_patch.DIFF_DELETE:
1855 text.append("-")
1856 elif op == diff_match_patch.DIFF_EQUAL:
1857 text.append(" ")
1858
1859 data = data.encode("utf-8")
1860 text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1861 return "".join(text)
1862