[Emacs-diffs] master 2d1d54d: * lisp/vc/smerge-mode.el: Avoid N² blow u

emacs-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 2d1d54d: * lisp/vc/smerge-mode.el: Avoid N² blow u

From:	Stefan Monnier
Subject:	[Emacs-diffs] master 2d1d54d: * lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases
Date:	Thu, 27 Jul 2017 00:21:39 -0400 (EDT)

branch: master
commit 2d1d54d333735c0128fd31edb183a71298ef5cfc
Author: Stefan Monnier <address@hidden>
Commit: Stefan Monnier <address@hidden>

    * lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases
    
    (smerge--refine-long-words): New var.
    (smerge--refine-chopup-region): Use it.
---
 lisp/vc/smerge-mode.el | 66 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/lisp/vc/smerge-mode.el b/lisp/vc/smerge-mode.el
index 21c39c8..f94f8a6 100644
--- a/lisp/vc/smerge-mode.el
+++ b/lisp/vc/smerge-mode.el
@@ -938,15 +938,15 @@ It has the following disadvantages:
 - cannot use `diff -w' because the weighting causes added spaces in a line
   to be represented as added copies of some line, so `diff -w' can't do the
   right thing any more.
-- may in degenerate cases take a 1KB input region and turn it into a 1MB
-  file to pass to diff.")
+- Is a bit more costly (may in degenerate cases use temp files that are 10x
+  larger than the refined regions).")
 
 (defun smerge--refine-forward (n)
   (let ((case-fold-search nil)
         (re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n"))
     (when (and smerge-refine-ignore-whitespace
                ;; smerge-refine-weight-hack causes additional spaces to
-               ;; appear as additional lines as well, so even if diff ignore
+               ;; appear as additional lines as well, so even if diff ignores
                ;; whitespace changes, it'll report added/removed lines :-(
                (not smerge-refine-weight-hack))
       (setq re (concat "[ \t]*\\(?:" re "\\)")))
@@ -954,6 +954,8 @@ It has the following disadvantages:
       (unless (looking-at re) (error "Smerge refine internal error"))
       (goto-char (match-end 0)))))
 
+(defvar smerge--refine-long-words)
+
 (defun smerge--refine-chopup-region (beg end file &optional preproc)
   "Chopup the region into small elements, one per line.
 Save the result into FILE.
@@ -976,18 +978,46 @@ chars to try and eliminate some spurious differences."
       (subst-char-in-region (point-min) (point-max) ?\n ?\s))
     (goto-char (point-min))
     (while (not (eobp))
-      (funcall smerge-refine-forward-function 1)
-      (let ((s (if (prog2 (forward-char -1) (bolp) (forward-char 1))
-                   nil
-                 (buffer-substring (line-beginning-position) (point)))))
-        ;; We add \n after each char except after \n, so we get
-        ;; one line per text char, where each line contains
-        ;; just one char, except for \n chars which are
-        ;; represented by the empty line.
-        (unless (eq (char-before) ?\n) (insert ?\n))
-        ;; HACK ALERT!!
-        (if smerge-refine-weight-hack
-            (dotimes (_i (1- (length s))) (insert s "\n")))))
+      (cl-assert (bolp))
+      (let ((start (point)))
+        (funcall smerge-refine-forward-function 1)
+        (let ((len (- (point) start)))
+          (cl-assert (>= len 1))
+          ;; We add \n after each chunk except after \n, so we get
+          ;; one line per text chunk, where each line contains
+          ;; just one chunk, except for \n chars which are
+          ;; represented by the empty line.
+          (unless (bolp) (insert ?\n))
+          (when (and smerge-refine-weight-hack (> len 1))
+            (let ((s (buffer-substring-no-properties start (point))))
+              ;; The weight-hack inserts N copies of words of size N,
+              ;; so it naturally suffers from an O(N²) blow up.
+              ;; To circumvent this, we map each long word
+              ;; to a shorter (but still unique) replacement.
+              ;; Another option would be to change smerge--refine-forward
+              ;; so it chops up long words into smaller ones.
+              (when (> len 8)
+                (let ((short (gethash s smerge--refine-long-words)))
+                  (unless short
+                    ;; To avoid accidental conflicts with ≤8 words,
+                    ;; we make sure the replacement is >8 chars.  Overall,
+                    ;; this should bound the blowup factor to ~10x,
+                    ;; tho if those chars end up encoded as multiple bytes
+                    ;; each, it could probably still reach ~30x in
+                    ;; pathological cases.
+                    (setq short
+                          (concat (substring s 0 7)
+                                  " "
+                                  (string
+                                   (+ ?0
+                                      (hash-table-count
+                                       smerge--refine-long-words)))
+                                  "\n"))
+                    (puthash s short smerge--refine-long-words))
+                  (delete-region start (point))
+                  (insert short)
+                  (setq s short)))
+              (dotimes (_i (1- len)) (insert s)))))))
     (unless (bolp) (error "Smerge refine internal error"))
     (let ((coding-system-for-write 'emacs-internal))
       (write-region (point-min) (point-max) file nil 'nomessage))))
@@ -1042,7 +1072,9 @@ used to replace chars to try and eliminate some spurious 
differences."
   (let* ((pos (point))
          deactivate-mark         ; The code does not modify any visible buffer.
          (file1 (make-temp-file "diff1"))
-         (file2 (make-temp-file "diff2")))
+         (file2 (make-temp-file "diff2"))
+         (smerge--refine-long-words
+          (if smerge-refine-weight-hack (make-hash-table :test #'equal))))
     (unless (markerp beg1) (setq beg1 (copy-marker beg1)))
     (unless (markerp beg2) (setq beg2 (copy-marker beg2)))
     ;; Chop up regions into smaller elements and save into files.
@@ -1062,7 +1094,7 @@ used to replace chars to try and eliminate some spurious 
differences."
                               ;; also and more importantly because otherwise it
                               ;; may happen that diff doesn't behave like
                               ;; smerge-refine-weight-hack expects it to.
-                              ;; See 
http://thread.gmane.org/gmane.emacs.devel/82685.
+                              ;; See 
http://thread.gmane.org/gmane.emacs.devel/82685, aka 
https://lists.gnu.org/archive/html/emacs-devel/2007-11/msg00401.html
                               "-awd" "-ad")
                           file1 file2))
           ;; Process diff's output.

[Prev in Thread]

Current Thread

[Next in Thread]

[Emacs-diffs] master 2d1d54d: * lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases, Stefan Monnier <=

Prev by Date: [Emacs-diffs] master ea87506: * lisp/url/url-cookie.el: Use lexical-binding
Next by Date: [Emacs-diffs] master 28faa94: CC Mode: Fix declarator being cut off from terminator by end of jit-lock chunk
Previous by thread: [Emacs-diffs] master ea87506: * lisp/url/url-cookie.el: Use lexical-binding
Next by thread: [Emacs-diffs] master 28faa94: CC Mode: Fix declarator being cut off from terminator by end of jit-lock chunk
Index(es):
- Date
- Thread