emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 44aa385b9af: Make imenu index generation for PDFs more reliable


From: Tassilo Horn
Subject: master 44aa385b9af: Make imenu index generation for PDFs more reliable
Date: Tue, 8 Oct 2024 11:42:20 -0400 (EDT)

branch: master
commit 44aa385b9afe2f2d500f62fd679314150e859d77
Author: Visuwesh <visuweshm@gmail.com>
Commit: Tassilo Horn <tsdh@gnu.org>

    Make imenu index generation for PDFs more reliable
    
    Do away with parsing the output of "mutool show FILE outline"
    since the URI reported in its output may not include the page
    number of the heading, and instead may contained "nameddest"
    elements which cannot be resolved using "mutool".  Instead, use
    a MuPDF JS script to generate the PDF outline allowing to
    resolve such URIs.
    
    * lisp/doc-view.el (doc-view--outline-rx): Remove as no longer
    needed.
    (doc-view--outline): Reflect that outline can be generated for
    non-PDF files too.
    (doc-view--mutool-pdf-outline-script): Add new variable to hold
    the JS script used to generate the outline.
    (doc-view--pdf-outline): Use the script.  (bug#73638)
---
 lisp/doc-view.el | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/lisp/doc-view.el b/lisp/doc-view.el
index 446beeafd9f..57a24418616 100644
--- a/lisp/doc-view.el
+++ b/lisp/doc-view.el
@@ -1969,14 +1969,26 @@ the document text."
        (doc-view-goto-page (caar (last doc-view--current-search-matches)))))))
 
 ;;;; Imenu support
-(defconst doc-view--outline-rx
-  "[^\t]+\\(\t+\\)\"\\(.+\\)\"\t#\\(?:page=\\)?\\([0-9]+\\)")
-
 (defvar-local doc-view--outline nil
-  "Cached PDF outline, so that it is only computed once per document.
+  "Cached document outline, so that it is only computed once per document.
 It can be the symbol `unavailable' to indicate that outline is
 unavailable for the document.")
 
+(defvar doc-view--mutool-pdf-outline-script
+  "var document = new Document.openDocument(\"%s\", \"application/pdf\");
+var outline = document.loadOutline();
+if(!outline) quit();
+function pp(outl, level){print(\"((level . \" + level + \")\");\
+print(\"(title . \" + repr(outl.title) + \")\");\
+print(\"(page . \" + (document.resolveLink(outl.uri)+1) + \"))\");\
+if(outl.down){for(var i=0; i<outl.down.length; i++){pp(outl.down[i], 
level+1);}}};
+function run(){print(\"BEGIN(\");\
+for(var i=0; i<outline.length; i++){pp(outline[i], 1);}print(\")\");};
+run()"
+  "JS script to extract the PDF's outline using mutool.
+The script has to be minified to pass it to the REPL.  The \"BEGIN\"
+marker is here to skip past the prompt characters.")
+
 (defun doc-view--pdf-outline (&optional file-name)
   "Return a list describing the outline of FILE-NAME.
 Return a list describing the current file if FILE-NAME is nil.
@@ -1986,21 +1998,25 @@ title, nesting level and page number.  The list is 
flat: its tree
 structure is extracted by `doc-view--imenu-subtree'."
   (let ((fn (or file-name (buffer-file-name))))
     (when fn
-      (let ((outline nil)
-            (fn (expand-file-name fn)))
-        (with-temp-buffer
-          (unless (eql 0 (call-process doc-view-pdfdraw-program nil
-                                       (current-buffer) nil "show" fn 
"outline"))
+      (with-temp-buffer
+        (let ((proc (make-process
+                     :name "doc-view-pdf-outline"
+                     :command (list "mutool" "run")
+                     :buffer (current-buffer))))
+          (process-send-string proc (format doc-view--mutool-pdf-outline-script
+                                            (expand-file-name fn)))
+          ;; Need to send this twice for some reason...
+          (process-send-eof)
+          (process-send-eof)
+          (while (accept-process-output proc))
+          (unless (eq (process-status proc) 'exit)
             (setq doc-view--outline 'unavailable)
             (imenu-unavailable-error "Unable to create imenu index using 
`mutool'"))
           (goto-char (point-min))
-          (while (re-search-forward doc-view--outline-rx nil t)
-            (push `((level . ,(length (match-string 1)))
-                    (title . ,(replace-regexp-in-string "\\\\[rt]" " "
-                                                        (match-string 2)))
-                    (page . ,(string-to-number (match-string 3))))
-                  outline)))
-        (nreverse outline)))))
+          (when (search-forward "BEGIN" nil t)
+            (condition-case nil
+                (read (current-buffer))
+              (end-of-file nil))))))))
 
 (defun doc-view--djvu-outline (&optional file-name)
   "Return a list describing the outline of FILE-NAME.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]