emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 72972118e6f 1/2: Allow toggling "readable" mode in EWW


From: Jim Porter
Subject: master 72972118e6f 1/2: Allow toggling "readable" mode in EWW
Date: Sat, 23 Mar 2024 13:25:42 -0400 (EDT)

branch: master
commit 72972118e6f5831f200108cd7b80bf86538c265e
Author: Jim Porter <jporterbugs@gmail.com>
Commit: Jim Porter <jporterbugs@gmail.com>

    Allow toggling "readable" mode in EWW
    
    Additionally, add an option to prevent adding a new history entry for
    each call of 'eww-readable' (bug#68254).
    
    * lisp/net/eww.el (eww-retrieve):
    
    * lisp/net/eww.el (eww-readable-adds-to-history): New option.
    (eww-retrieve): Make sure we call CALLBACK in all configurations.
    (eww-render): Simplify how to pass encoding.
    (eww--parse-html-region, eww-display-document): New functions, extracted
    from...
    (eww-display-html): ... here.
    (eww-document-base): New function.
    (eww-readable): Toggle "readable" mode interactively, like with a minor
    mode.  Consult 'eww-readable-adds-to-history'.
    (eww-reload): Use 'eshell-display-document'.
    
    * test/lisp/net/eww-tests.el (eww-test--with-mock-retrieve): Fix indent.
    (eww-test/display/html, eww-test/readable/toggle-display): New tests.
    
    * doc/misc/eww.texi (Basics): Describe the new behavior.
    
    * etc/NEWS: Announce this change.
---
 doc/misc/eww.texi          |   5 ++
 etc/NEWS                   |  12 +++++
 lisp/net/eww.el            | 127 +++++++++++++++++++++++++++++----------------
 test/lisp/net/eww-tests.el |  57 +++++++++++++++++++-
 4 files changed, 155 insertions(+), 46 deletions(-)

diff --git a/doc/misc/eww.texi b/doc/misc/eww.texi
index d31fcf1802b..522034c874d 100644
--- a/doc/misc/eww.texi
+++ b/doc/misc/eww.texi
@@ -146,6 +146,11 @@ a new tab is created on the frame tab bar.
 which part of the document contains the ``readable'' text, and will
 only display this part.  This usually gets rid of menus and the like.
 
+  When called interactively, this command toggles the display of the
+readable parts.  With a positive prefix argument, this command always
+displays the readable parts, and with a zero or negative prefix, it
+always displays the full page.
+
 @findex eww-toggle-fonts
 @vindex shr-use-fonts
 @kindex F
diff --git a/etc/NEWS b/etc/NEWS
index e9cb455aa40..30eaaf40385 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1066,6 +1066,18 @@ entries newer than the current page.  To change the 
behavior when
 browsing from "historical" pages, you can customize
 'eww-before-browse-history-function'.
 
++++
+*** 'eww-readable' now toggles display of the readable parts of a web page.
+When called interactively, 'eww-readable' toggles whether to display
+only the readable parts of a page or the full page.  With a positive
+prefix argument, it always displays the readable parts, and with a zero
+or negative prefix, it always displays the full page.
+
+---
+*** New option 'eww-readable-adds-to-history'.
+When non-nil (the default), calling 'eww-readable' adds a new entry to
+the EWW page history.
+
 ** go-ts-mode
 
 +++
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index 54847bdf396..54b65d35164 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -275,6 +275,11 @@ parameter, and should return the (possibly) transformed 
URL."
   :type '(repeat function)
   :version "29.1")
 
+(defcustom eww-readable-adds-to-history t
+  "If non-nil, calling `eww-readable' adds a new entry to the history."
+  :type 'boolean
+  :version "30.1")
+
 (defface eww-form-submit
   '((((type x w32 ns haiku pgtk android) (class color))        ; Like default 
mode line
      :box (:line-width 2 :style released-button)
@@ -464,11 +469,11 @@ For more information, see Info node `(eww) Top'."
 (defun eww-retrieve (url callback cbargs)
   (cond
    ((null eww-retrieve-command)
-    (url-retrieve url #'eww-render cbargs))
+    (url-retrieve url callback cbargs))
    ((eq eww-retrieve-command 'sync)
     (let ((data-buffer (url-retrieve-synchronously url)))
       (with-current-buffer data-buffer
-        (apply #'eww-render nil cbargs))))
+        (apply callback nil cbargs))))
    (t
     (let ((buffer (generate-new-buffer " *eww retrieve*"))
           (error-buffer (generate-new-buffer " *eww error*")))
@@ -673,9 +678,9 @@ The renaming scheme is performed in accordance with
               (insert (format "<a href=%S>Direct link to the document</a>"
                               url))
               (goto-char (point-min))
-             (eww-display-html charset url nil point buffer encode))
+              (eww-display-html (or encode charset) url nil point buffer))
             ((eww-html-p (car content-type))
-             (eww-display-html charset url nil point buffer encode))
+              (eww-display-html (or encode charset) url nil point buffer))
             ((equal (car content-type) "application/pdf")
              (eww-display-pdf))
             ((string-match-p "\\`image/" (car content-type))
@@ -726,34 +731,40 @@ The renaming scheme is performed in accordance with
 (declare-function libxml-parse-html-region "xml.c"
                  (start end &optional base-url discard-comments))
 
-(defun eww-display-html (charset url &optional document point buffer encode)
+(defun eww--parse-html-region (start end &optional coding-system)
+  "Parse the HTML between START and END, returning the DOM as an S-expression.
+Use CODING-SYSTEM to decode the region; if nil, decode as UTF-8.
+
+This replaces the region with the preprocessed HTML."
+  (setq coding-system (or coding-system 'utf-8))
+  (with-restriction start end
+    (condition-case nil
+        (decode-coding-region (point-min) (point-max) coding-system)
+      (coding-system-error nil))
+    ;; Remove CRLF and replace NUL with &#0; before parsing.
+    (while (re-search-forward "\\(\r$\\)\\|\0" nil t)
+      (replace-match (if (match-beginning 1) "" "&#0;") t t))
+    (eww--preprocess-html (point-min) (point-max))
+    (libxml-parse-html-region (point-min) (point-max))))
+
+(defsubst eww-document-base (url dom)
+  `(base ((href . ,url)) ,dom))
+
+(defun eww-display-document (document &optional point buffer)
   (unless (fboundp 'libxml-parse-html-region)
     (error "This function requires Emacs to be compiled with libxml2"))
+  (setq buffer (or buffer (current-buffer)))
   (unless (buffer-live-p buffer)
     (error "Buffer %s doesn't exist" buffer))
   ;; There should be a better way to abort loading images
   ;; asynchronously.
   (setq url-queue nil)
-  (let ((document
-        (or document
-            (list
-             'base (list (cons 'href url))
-             (progn
-               (setq encode (or encode charset 'utf-8))
-               (condition-case nil
-                   (decode-coding-region (point) (point-max) encode)
-                 (coding-system-error nil))
-               (save-excursion
-                 ;; Remove CRLF and replace NUL with &#0; before parsing.
-                 (while (re-search-forward "\\(\r$\\)\\|\0" nil t)
-                   (replace-match (if (match-beginning 1) "" "&#0;") t t)))
-                (eww--preprocess-html (point) (point-max))
-               (libxml-parse-html-region (point) (point-max))))))
-       (source (and (null document)
-                    (buffer-substring (point) (point-max)))))
+  (let ((url (when (eq (car document) 'base)
+               (alist-get 'href (cadr document)))))
+    (unless url
+      (error "Document is missing base URL"))
     (with-current-buffer buffer
       (setq bidi-paragraph-direction nil)
-      (plist-put eww-data :source source)
       (plist-put eww-data :dom document)
       (let ((inhibit-read-only t)
            (inhibit-modification-hooks t)
@@ -794,6 +805,16 @@ The renaming scheme is performed in accordance with
            (forward-line 1)))))
       (eww-size-text-inputs))))
 
+(defun eww-display-html (charset url &optional document point buffer)
+  (let ((source (buffer-substring (point) (point-max))))
+    (with-current-buffer buffer
+      (plist-put eww-data :source source)))
+  (eww-display-document
+   (or document
+       (eww-document-base
+        url (eww--parse-html-region (point) (point-max) charset)))
+   point buffer))
+
 (defun eww-handle-link (dom)
   (let* ((rel (dom-attr dom 'rel))
         (href (dom-attr dom 'href))
@@ -1055,30 +1076,47 @@ The renaming scheme is performed in accordance with
                "automatic"
              bidi-paragraph-direction)))
 
-(defun eww-readable ()
-  "View the main \"readable\" parts of the current web page.
+(defun eww-readable (&optional arg)
+  "Toggle display of only the main \"readable\" parts of the current web page.
 This command uses heuristics to find the parts of the web page that
-contains the main textual portion, leaving out navigation menus and
-the like."
-  (interactive nil eww-mode)
+contain the main textual portion, leaving out navigation menus and the
+like.
+
+If called interactively, toggle the display of the readable parts.  If
+the prefix argument is positive, display the readable parts, and if it
+is zero or negative, display the full page.
+
+If called from Lisp, toggle the display of the readable parts if ARG is
+`toggle'.  Display the readable parts if ARG is nil, omitted, or is a
+positive number.  Display the full page if ARG is a negative number.
+
+When `eww-readable-adds-to-history' is non-nil, calling this function
+adds a new entry to `eww-history'."
+  (interactive (list (if current-prefix-arg
+                         (prefix-numeric-value current-prefix-arg)
+                       'toggle))
+               eww-mode)
   (let* ((old-data eww-data)
-        (dom (with-temp-buffer
+        (make-readable (cond
+                         ((eq arg 'toggle)
+                          (not (plist-get old-data :readable)))
+                         ((and (numberp arg) (< arg 1))
+                          nil)
+                         (t t)))
+         (dom (with-temp-buffer
                (insert (plist-get old-data :source))
-               (condition-case nil
-                   (decode-coding-region (point-min) (point-max) 'utf-8)
-                 (coding-system-error nil))
-                (eww--preprocess-html (point-min) (point-max))
-               (libxml-parse-html-region (point-min) (point-max))))
+                (eww--parse-html-region (point-min) (point-max))))
          (base (plist-get eww-data :url)))
-    (eww-score-readability dom)
-    (eww-save-history)
-    (eww--before-browse)
-    (eww-display-html nil nil
-                      (list 'base (list (cons 'href base))
-                            (eww-highest-readability dom))
-                     nil (current-buffer))
-    (dolist (elem '(:source :url :title :next :previous :up :peer))
-      (plist-put eww-data elem (plist-get old-data elem)))
+    (when make-readable
+      (eww-score-readability dom)
+      (setq dom (eww-highest-readability dom)))
+    (when eww-readable-adds-to-history
+      (eww-save-history)
+      (eww--before-browse)
+      (dolist (elem '(:source :url :title :next :previous :up :peer))
+        (plist-put eww-data elem (plist-get old-data elem))))
+    (eww-display-document (eww-document-base base dom))
+    (plist-put eww-data :readable make-readable)
     (eww--after-page-change)))
 
 (defun eww-score-readability (node)
@@ -1398,8 +1436,7 @@ just re-display the HTML already fetched."
     (if local
        (if (null (plist-get eww-data :dom))
            (error "No current HTML data")
-         (eww-display-html 'utf-8 url (plist-get eww-data :dom)
-                           (point) (current-buffer)))
+         (eww-display-document (plist-get eww-data :dom) (point)))
       (let ((parsed (url-generic-parse-url url)))
         (if (equal (url-type parsed) "file")
             ;; Use Tramp instead of url.el for files (since url.el
diff --git a/test/lisp/net/eww-tests.el b/test/lisp/net/eww-tests.el
index bd00893d503..a09e0a4f279 100644
--- a/test/lisp/net/eww-tests.el
+++ b/test/lisp/net/eww-tests.el
@@ -33,7 +33,7 @@ body.")
   "Evaluate BODY with a mock implementation of `eww-retrieve'.
 This avoids network requests during our tests.  Additionally, prepare a
 temporary EWW buffer for our tests."
-  (declare (indent 1))
+  (declare (indent 0))
     `(cl-letf (((symbol-function 'eww-retrieve)
                 (lambda (url callback args)
                   (with-temp-buffer
@@ -48,6 +48,24 @@ temporary EWW buffer for our tests."
 
 ;;; Tests:
 
+(ert-deftest eww-test/display/html ()
+  "Test displaying a simple HTML page."
+  (eww-test--with-mock-retrieve
+    (let ((eww-test--response-function
+           (lambda (url)
+             (concat "Content-Type: text/html\n\n"
+                     (format "<html><body><h1>Hello</h1>%s</body></html>"
+                             url)))))
+      (eww "example.invalid")
+      ;; Check that the buffer contains the rendered HTML.
+      (should (equal (buffer-string) "Hello\n\n\nhttp://example.invalid/\n";))
+      (should (equal (get-text-property (point-min) 'face)
+                     '(shr-text shr-h1)))
+      ;; Check that the DOM includes the `base'.
+      (should (equal (pcase (plist-get eww-data :dom)
+                       (`(base ((href . ,url)) ,_) url))
+                     "http://example.invalid/";)))))
+
 (ert-deftest eww-test/history/new-page ()
   "Test that when visiting a new page, the previous one goes into the history."
   (eww-test--with-mock-retrieve
@@ -176,5 +194,42 @@ This sets `eww-before-browse-history-function' to
                        "http://one.invalid/";)))
       (should (= eww-history-position 0)))))
 
+(ert-deftest eww-test/readable/toggle-display ()
+  "Test toggling the display of the \"readable\" parts of a web page."
+  (eww-test--with-mock-retrieve
+    (let* ((shr-width most-positive-fixnum)
+           (shr-use-fonts nil)
+           (words (string-join
+                   (make-list
+                    20 "All work and no play makes Jack a dull boy.")
+                   " "))
+           (eww-test--response-function
+            (lambda (_url)
+              (concat "Content-Type: text/html\n\n"
+                      "<html><body>"
+                      "<a>This is an uninteresting sentence.</a>"
+                      "<div>"
+                      words
+                      "</div>"
+                      "</body></html>"))))
+      (eww "example.invalid")
+      ;; Make sure EWW renders the whole document.
+      (should-not (plist-get eww-data :readable))
+      (should (string-prefix-p
+               "This is an uninteresting sentence."
+               (buffer-substring-no-properties (point-min) (point-max))))
+      (eww-readable 'toggle)
+      ;; Now, EWW should render just the "readable" parts.
+      (should (plist-get eww-data :readable))
+      (should (string-match-p
+               (concat "\\`" (regexp-quote words) "\n*\\'")
+               (buffer-substring-no-properties (point-min) (point-max))))
+      (eww-readable 'toggle)
+      ;; Finally, EWW should render the whole document again.
+      (should-not (plist-get eww-data :readable))
+      (should (string-prefix-p
+               "This is an uninteresting sentence."
+               (buffer-substring-no-properties (point-min) (point-max)))))))
+
 (provide 'eww-tests)
 ;; eww-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]