diff --git a/preview.el.in b/preview.el.in --- a/preview.el.in +++ b/preview.el.in @@ -2613,35 +2613,96 @@ so the character represented by ^^^ preceding extended characters will not get matched, usually." (let (output case-fold-search) - (when (featurep 'mule) - (setq string (encode-coding-string string run-coding-system))) - (while (string-match "\\^\\{2,\\}\\(\\(address@hidden)\\|[8-9a-f][0-9a-f]\\)" - string) + ;; Some coding systems (e.g. japanese-shift-jis) use regexp meta + ;; characters on encoding. Such meta characters would be + ;; interfered with `regexp-quote' below. Thus the idea of + ;; "encoding entire string beforehand and decoding it at the last + ;; stage" does not work for such coding systems. + ;; (when (featurep 'mule) + ;; (setq string (encode-coding-string string run-coding-system))) + ;; Rather, we work consistently with decoded text. + (if (and (featurep 'xemacs) (featurep 'mule) + (eq 'raw-text (coding-system-name + (coding-system-base run-coding-system)))) + (setq string + (decode-coding-string string + (or (and (featurep 'tex-jp) + japanese-TeX-mode + TeX-japanese-process-output-coding-system) + buffer-file-coding-system)))) + + ;; Next, bytes with value 0x80 to 0xFF represented with ^^ form + ;; are converted to byte sequence, and decoded by suitable coding + ;; system. + (setq string + (preview--decode-^^ab string + (if (featurep 'mule) + buffer-file-coding-system nil))) + + ;; Then, control characters are taken into account. + (while (string-match "\\^\\{2,\\}\\(address@hidden)" string) (setq output (concat output (regexp-quote (substring string 0 (- (match-beginning 1) 2))) - (if (match-beginning 2) - (concat - "\\(?:" (regexp-quote - (substring string - (- (match-beginning 1) 2) - (match-end 0))) - "\\|" - (char-to-string - (logxor (aref string (match-beginning 2)) 64)) - "\\)") - (char-to-string - (string-to-number (match-string 1 string) 16)))) + (concat + "\\(?:" (regexp-quote + (substring string + (- (match-beginning 1) 2) + (match-end 0))) + "\\|" + (char-to-string + (logxor (aref string (match-beginning 1)) 64)) + "\\)")) string (substring string (match-end 0)))) (setq output (concat output (regexp-quote string))) - (if (featurep 'mule) - (decode-coding-string output - (or (and (boundp 'TeX-japanese-process-output-coding-system) - TeX-japanese-process-output-coding-system) - buffer-file-coding-system)) - output))) + output)) + +(defun preview--decode-^^ab (string coding-system) + "Decode ^^ sequences in STRING with CODING-SYSTEM. +Sequences of control characters such as ^^I are left untouched. + +Return a new string." + ;; Since the given string can contain multibyte characters, decoding + ;; should be performed seperately on each segment made up entirely + ;; with ASCII characters. + (let ((result "")) + (while (string-match "[\x00-\x7F]+" string) + (setq result + (concat result + (substring string 0 (match-beginning 0)) + (let ((text (preview--convert-^^ab + (match-string 0 string)))) + (if (featurep 'mule) + (decode-coding-string text coding-system) + text))) + string (substring string (match-end 0)))) + (setq result (concat result string)) + result)) + +(defun preview--convert-^^ab (string) + "Convert ^^ sequences in STRING to raw 8bit. +Sequences of control characters such as ^^I are left untouched. + +Return a new string." + (save-match-data + (let ((result "")) + (while (string-match "\\^\\^[8-9a-f][0-9a-f]" string) + (setq result + (concat result + (substring string 0 (match-beginning 0)) + (let ((byte (string-to-number + (substring (match-string 0 string) 2) 16))) + ;; `char-to-string' is not appropriate in + ;; Emacs >= 23 because it converts #xAB into + ;; "\u00AB" (multibyte string), not "\xAB" + ;; (raw 8bit unibyte string). + (if (fboundp 'byte-to-string) + (byte-to-string byte) (char-to-string byte)))) + string (substring string (match-end 0)))) + (setq result (concat result string)) + result))) (defun preview-parse-messages (open-closure) "Turn all preview snippets into overlays. @@ -3496,9 +3557,10 @@ (setq TeX-sentinel-function 'preview-TeX-inline-sentinel) (when (featurep 'mule) (setq preview-coding-system - (or (and (boundp 'TeX-japanese-process-output-coding-system) - TeX-japanese-process-output-coding-system) - (with-current-buffer commandbuff + (with-current-buffer commandbuff + (or (and (featurep 'tex-jp) + japanese-TeX-mode + TeX-japanese-process-output-coding-system) buffer-file-coding-system))) (when preview-coding-system (setq preview-coding-system