emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 9074a9f496b: Add customization to let EWW guess content-type if n


From: Jim Porter
Subject: master 9074a9f496b: Add customization to let EWW guess content-type if needed
Date: Tue, 22 Oct 2024 00:58:46 -0400 (EDT)

branch: master
commit 9074a9f496b04ab58588b71f51d7830782fc7a29
Author: Sebastián Monía <sebastian@sebasmonia.com>
Commit: Jim Porter <jporterbugs@gmail.com>

    Add customization to let EWW guess content-type if needed
    
    * lisp/net/eww.el (eww-guess-content-type-functions): New user option.
    (eww--guess-content-type, eww--html-if-doctype): New functions.
    (eww-render): Call 'eww--guess-content-type' (bug#73133).
---
 lisp/net/eww.el | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index 63ad4ae78b7..3cdb8a3f42e 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -109,6 +109,19 @@ duplicate entries (if any) removed."
              eww-current-url
              eww-bookmark-urls))
 
+(defcustom eww-guess-content-type-functions
+  '(eww--html-if-doctype)
+  "List of functions used to guess a page's content-type.
+These are only used when the page does not have a valid Content-Type
+header.  Functions are called in order, until one of them returns the
+value to be used as Content-Type.  They receive two parameters: an alist
+of headers, and the buffer that holds the complete response.  If the
+list is exhausted, EWW assumes \"application/octet-stream\" per
+RFC-9110."
+  :version "31.1"
+  :group 'eww
+  :type '(repeat function))
+
 (defcustom eww-bookmarks-directory user-emacs-directory
   "Directory where bookmark files will be stored."
   :version "25.1"
@@ -631,6 +644,30 @@ Currently this means either text/html or 
application/xhtml+xml."
   (member content-type '("text/html"
                         "application/xhtml+xml")))
 
+(defun eww--guess-content-type (headers response-buffer)
+  "Use HEADERS and RESPONSE-BUFFER to guess the Content-Type.
+Will call each function in `eww-guess-content-type-functions', until one
+of them returns a value.  This mechanism is used only if there isn't a
+valid Content-Type header.  If none of the functions can guess, return
+\"application/octet-stream\"."
+  (or (run-hook-with-args-until-success
+       'eww-guess-content-type-functions headers response-buffer)
+      "application/octet-stream"))
+
+(defun eww--html-if-doctype (_headers response-buffer)
+  "Return \"text/html\" if RESPONSE-BUFFER has an HTML doctype declaration.
+HEADERS is unused."
+  ;; https://html.spec.whatwg.org/multipage/syntax.html#the-doctype
+  (let ((case-fold-search t)
+        (target
+         "<!doctype +html *\\(>\\|system 
+\\(\\\"\\|'\\)+about:legacy-compat\\)"))
+    (with-current-buffer response-buffer
+      (goto-char (point-min))
+      ;; match basic <!doctype html> and also legacy variants as
+      ;; specified in link above
+      (when (re-search-forward target nil t)
+        "text/html"))))
+
 (defun eww--rename-buffer ()
   "Rename the current EWW buffer.
 The renaming scheme is performed in accordance with
@@ -660,7 +697,7 @@ The renaming scheme is performed in accordance with
         (content-type
          (mail-header-parse-content-type
            (if (zerop (length (cdr (assoc "content-type" headers))))
-              "text/plain"
+               (eww--guess-content-type headers (current-buffer))
              (cdr (assoc "content-type" headers)))))
         (charset (intern
                   (downcase



reply via email to

[Prev in Thread] Current Thread [Next in Thread]