[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master d3e2c88 4/4: Fix ASCII-only conversion logic (bug#40407)
From: |
Mattias Engdeg�rd |
Subject: |
master d3e2c88 4/4: Fix ASCII-only conversion logic (bug#40407) |
Date: |
Thu, 9 Apr 2020 07:04:09 -0400 (EDT) |
branch: master
commit d3e2c88041b4844422bda64b1ee51678dc8a2e88
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>
Fix ASCII-only conversion logic (bug#40407)
To sidestep conversion altogether when EOL conversion applies, we must
either be encoding a string without NL, or decoding without CR.
* src/coding.c (string_ascii_p): Revert to a pure predicate.
(code_convert_string): Fix logic. Don't use uninitialised
ascii_p (removed). Use memchr to detect CR or LF in string when needed.
* test/src/coding-tests.el (coding-nocopy-ascii):
Update tests to include encodings with explicit EOL conversions.
---
src/coding.c | 46 +++++++++++++++++-----------------------------
test/src/coding-tests.el | 33 +++++++++++++++++++++------------
2 files changed, 38 insertions(+), 41 deletions(-)
diff --git a/src/coding.c b/src/coding.c
index ffcb9cf..450c498 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9474,22 +9474,15 @@ not fully specified.) */)
return code_convert_region (start, end, coding_system, destination, 1, 0);
}
-/* Non-zero if STR contains only characters in the 0..127 range.
- Positive if STR includes characters that don't need EOL conversion
- on decoding, negative otherwise. */
-static int
-string_ascii_p (Lisp_Object str)
+/* Whether STRING only contains chars in the 0..127 range. */
+static bool
+string_ascii_p (Lisp_Object string)
{
- ptrdiff_t nbytes = SBYTES (str);
- bool CR_Seen = false;
+ ptrdiff_t nbytes = SBYTES (string);
for (ptrdiff_t i = 0; i < nbytes; i++)
- {
- if (SREF (str, i) > 127)
- return 0;
- if (SREF (str, i) == '\r')
- CR_Seen = true;
- }
- return CR_Seen ? -1 : 1;
+ if (SREF (string, i) > 127)
+ return false;
+ return true;
}
Lisp_Object
@@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object
coding_system,
if (EQ (dst_object, Qt))
{
/* Fast path for ASCII-only input and an ASCII-compatible coding:
- act as identity if no EOL conversion is neede. */
- int ascii_p;
+ act as identity if no EOL conversion is needed. */
Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
&& (STRING_MULTIBYTE (string)
- ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
- {
- if (ascii_p > 0
- || (ascii_p < 0
- && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
- || inhibit_eol_conversion)))
- return (nocopy
- ? string
- : (encodep
- ? make_unibyte_string (SSDATA (string), bytes)
- : make_multibyte_string (SSDATA (string),
- bytes, bytes)));
- }
+ ? (chars == bytes) : string_ascii_p (string))
+ && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+ || inhibit_eol_conversion
+ || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
+ return (nocopy
+ ? string
+ : (encodep
+ ? make_unibyte_string (SSDATA (string), bytes)
+ : make_multibyte_string (SSDATA (string), bytes, bytes)));
}
else if (BUFFERP (dst_object))
{
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 8d92bcd..9f6fac3 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -388,29 +388,38 @@
(let* ((uni (apply #'string (number-sequence 0 127)))
(multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
+ ;; Encodings without EOL conversion.
(dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
(should-not (eq (decode-coding-string s coding nil) s))
(should-not (eq (encode-coding-string s coding nil) s))
(should (eq (decode-coding-string s coding t) s))
- (should (eq (encode-coding-string s coding t) s)))))
- (let* ((uni (apply #'string (number-sequence 15 127)))
+ (should (eq (encode-coding-string s coding t) s)))
+
+ ;; With EOL conversion inhibited.
+ (let ((inhibit-eol-conversion t))
+ (dolist (coding '(us-ascii iso-latin-1 utf-8))
+ (should-not (eq (decode-coding-string s coding nil) s))
+ (should-not (eq (encode-coding-string s coding nil) s))
+ (should (eq (decode-coding-string s coding t) s))
+ (should (eq (encode-coding-string s coding t) s))))))
+
+ ;; Check identity decoding with EOL conversion for ASCII except CR.
+ (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127))))
(multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
- (dolist (coding '(us-ascii iso-latin-1 utf-8))
+ (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
(should-not (eq (decode-coding-string s coding nil) s))
- (should-not (eq (encode-coding-string s coding nil) s))
- (should (eq (decode-coding-string s coding t) s))
- (should (eq (encode-coding-string s coding t) s)))))
- (let* ((uni (apply #'string (number-sequence 0 127)))
- (multi (string-to-multibyte uni))
- (inhibit-eol-conversion t))
+ (should (eq (decode-coding-string s coding t) s)))))
+
+ ;; Check identity encoding with EOL conversion for ASCII except LF.
+ (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127))))
+ (multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
- (dolist (coding '(us-ascii iso-latin-1 utf-8))
- (should-not (eq (decode-coding-string s coding nil) s))
+ (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
(should-not (eq (encode-coding-string s coding nil) s))
- (should (eq (decode-coding-string s coding t) s))
(should (eq (encode-coding-string s coding t) s))))))
+
(ert-deftest coding-check-coding-systems-region ()
(should (equal (check-coding-systems-region "aå" nil '(utf-8))
nil))