[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 9074a9f496b: Add customization to let EWW guess content-type if n
From: |
Jim Porter |
Subject: |
master 9074a9f496b: Add customization to let EWW guess content-type if needed |
Date: |
Tue, 22 Oct 2024 00:58:46 -0400 (EDT) |
branch: master
commit 9074a9f496b04ab58588b71f51d7830782fc7a29
Author: Sebastián Monía <sebastian@sebasmonia.com>
Commit: Jim Porter <jporterbugs@gmail.com>
Add customization to let EWW guess content-type if needed
* lisp/net/eww.el (eww-guess-content-type-functions): New user option.
(eww--guess-content-type, eww--html-if-doctype): New functions.
(eww-render): Call 'eww--guess-content-type' (bug#73133).
---
lisp/net/eww.el | 39 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index 63ad4ae78b7..3cdb8a3f42e 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -109,6 +109,19 @@ duplicate entries (if any) removed."
eww-current-url
eww-bookmark-urls))
+(defcustom eww-guess-content-type-functions
+ '(eww--html-if-doctype)
+ "List of functions used to guess a page's content-type.
+These are only used when the page does not have a valid Content-Type
+header. Functions are called in order, until one of them returns the
+value to be used as Content-Type. They receive two parameters: an alist
+of headers, and the buffer that holds the complete response. If the
+list is exhausted, EWW assumes \"application/octet-stream\" per
+RFC-9110."
+ :version "31.1"
+ :group 'eww
+ :type '(repeat function))
+
(defcustom eww-bookmarks-directory user-emacs-directory
"Directory where bookmark files will be stored."
:version "25.1"
@@ -631,6 +644,30 @@ Currently this means either text/html or
application/xhtml+xml."
(member content-type '("text/html"
"application/xhtml+xml")))
+(defun eww--guess-content-type (headers response-buffer)
+ "Use HEADERS and RESPONSE-BUFFER to guess the Content-Type.
+Will call each function in `eww-guess-content-type-functions', until one
+of them returns a value. This mechanism is used only if there isn't a
+valid Content-Type header. If none of the functions can guess, return
+\"application/octet-stream\"."
+ (or (run-hook-with-args-until-success
+ 'eww-guess-content-type-functions headers response-buffer)
+ "application/octet-stream"))
+
+(defun eww--html-if-doctype (_headers response-buffer)
+ "Return \"text/html\" if RESPONSE-BUFFER has an HTML doctype declaration.
+HEADERS is unused."
+ ;; https://html.spec.whatwg.org/multipage/syntax.html#the-doctype
+ (let ((case-fold-search t)
+ (target
+ "<!doctype +html *\\(>\\|system
+\\(\\\"\\|'\\)+about:legacy-compat\\)"))
+ (with-current-buffer response-buffer
+ (goto-char (point-min))
+ ;; match basic <!doctype html> and also legacy variants as
+ ;; specified in link above
+ (when (re-search-forward target nil t)
+ "text/html"))))
+
(defun eww--rename-buffer ()
"Rename the current EWW buffer.
The renaming scheme is performed in accordance with
@@ -660,7 +697,7 @@ The renaming scheme is performed in accordance with
(content-type
(mail-header-parse-content-type
(if (zerop (length (cdr (assoc "content-type" headers))))
- "text/plain"
+ (eww--guess-content-type headers (current-buffer))
(cdr (assoc "content-type" headers)))))
(charset (intern
(downcase
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 9074a9f496b: Add customization to let EWW guess content-type if needed,
Jim Porter <=