[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] master 05c5e4d 4/4: ; Merge: Update and optimize UCS norma
From: |
Noam Postavsky |
Subject: |
[Emacs-diffs] master 05c5e4d 4/4: ; Merge: Update and optimize UCS normalization tests |
Date: |
Sat, 8 Jul 2017 14:33:12 -0400 (EDT) |
branch: master
commit 05c5e4d8181ee5274885da4ed520bb9874491aab
Merge: efaf148 06ff34c
Author: Noam Postavsky <address@hidden>
Commit: Noam Postavsky <address@hidden>
; Merge: Update and optimize UCS normalization tests
---
test/lisp/international/ucs-normalize-tests.el | 247 +++++++++++++++----------
1 file changed, 153 insertions(+), 94 deletions(-)
diff --git a/test/lisp/international/ucs-normalize-tests.el
b/test/lisp/international/ucs-normalize-tests.el
index d85efe2..02a4bba 100644
--- a/test/lisp/international/ucs-normalize-tests.el
+++ b/test/lisp/international/ucs-normalize-tests.el
@@ -26,15 +26,13 @@
;; If there are lines marked as failing (see
;; `ucs-normalize-tests--failing-lines-part1' and
;; `ucs-normalize-tests--failing-lines-part2'), they may need to be
-;; adjusted when NormalizationTest.txt is updated. To get a list of
-;; currently failing lines, set those 2 variables to nil, run the
-;; tests, and inspect the values of
-;; `ucs-normalize-tests--part1-rule1-failed-lines' and
-;; `ucs-normalize-tests--part1-rule2-failed-chars', respectively.
+;; adjusted when NormalizationTest.txt is updated. Run the function
+;; `ucs-normalize-check-failing-lines' to see what changes are needed.
;;; Code:
(eval-when-compile (require 'cl-lib))
+(require 'seq)
(require 'ert)
(require 'ucs-normalize)
@@ -44,83 +42,98 @@
(defun ucs-normalize-tests--parse-column ()
(let ((chars nil)
(term nil))
- (while (and (not (equal term ";"))
+ (while (and (not (eq term ?\;))
(looking-at "\\([[:xdigit:]]\\{4,6\\}\\)\\([; ]\\)"))
- (let ((code-point (match-string 1)))
- (setq term (match-string 2))
+ (let ((code-point (match-string-no-properties 1)))
+ (setq term (char-after (match-beginning 2)))
(goto-char (match-end 0))
(push (string-to-number code-point 16) chars)))
- (nreverse chars)))
+ (apply #'string (nreverse chars))))
-(defmacro ucs-normalize-tests--normalize (norm str)
+(defconst ucs-normalize-tests--norm-buf (generate-new-buffer "
*ucs-normalizing-buffer*"))
+
+(defmacro ucs-normalize-tests--normalization-equal-p (norm str equal-to)
"Like `ucs-normalize-string' but reuse current buffer for efficiency.
And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(let ((norm-alist '((NFC . ucs-normalize-NFC-region)
(NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region))))
- `(save-restriction
- (narrow-to-region (point) (point))
+ `(with-current-buffer ucs-normalize-tests--norm-buf
+ (erase-buffer)
(insert ,str)
- (funcall #',(cdr (assq norm norm-alist)) (point-min) (point-max))
- (delete-and-extract-region (point-min) (point-max)))))
+ (,(cdr (assq norm norm-alist)) (point-min) (point-max))
+ (goto-char (point-min))
+ (insert ,equal-to)
+ (eq (compare-buffer-substrings nil nil (point) nil (point) nil) 0))))
+
+(defmacro ucs-normalize-tests--normalization-chareq-p (norm char char-eq-to)
+ "Like `ucs-normalize-string' but reuse current buffer for efficiency.
+And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
+ (let ((norm-alist '((NFC . ucs-normalize-NFC-region)
+ (NFD . ucs-normalize-NFD-region)
+ (NFKC . ucs-normalize-NFKC-region)
+ (NFKD . ucs-normalize-NFKD-region))))
+ `(with-current-buffer ucs-normalize-tests--norm-buf
+ (erase-buffer)
+ (insert ,char)
+ (,(cdr (assq norm norm-alist)) (point-min) (point-max))
+ (and (eq (buffer-size) 1)
+ (eq (char-after (point-min)) ,char-eq-to)))))
(defvar ucs-normalize-tests--chars-part1 nil)
-(defun ucs-normalize-tests--invariants-hold-p (&rest columns)
+(defsubst ucs-normalize-tests--rule1-holds-p (source nfc nfd nfkc nfkd)
"Check 1st conformance rule.
The following invariants must be true for all conformant implementations..."
(when ucs-normalize-tests--chars-part1
- ;; See `ucs-normalize-tests--invariants-rule2-hold-p'.
+ ;; See `ucs-normalize-tests--rule2-holds-p'.
(aset ucs-normalize-tests--chars-part1
- (caar columns) 1))
- (cl-destructuring-bind (source nfc nfd nfkc nfkd)
- (mapcar (lambda (c) (apply #'string c)) columns)
- (and
- ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3)
- (equal nfc (ucs-normalize-tests--normalize NFC source))
- (equal nfc (ucs-normalize-tests--normalize NFC nfc))
- (equal nfc (ucs-normalize-tests--normalize NFC nfd))
- ;; c4 == toNFC(c4) == toNFC(c5)
- (equal nfkc (ucs-normalize-tests--normalize NFC nfkc))
- (equal nfkc (ucs-normalize-tests--normalize NFC nfkd))
-
- ;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3)
- (equal nfd (ucs-normalize-tests--normalize NFD source))
- (equal nfd (ucs-normalize-tests--normalize NFD nfc))
- (equal nfd (ucs-normalize-tests--normalize NFD nfd))
- ;; c5 == toNFD(c4) == toNFD(c5)
- (equal nfkd (ucs-normalize-tests--normalize NFD nfkc))
- (equal nfkd (ucs-normalize-tests--normalize NFD nfkd))
-
- ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) ==
toNFKC(c5)
- (equal nfkc (ucs-normalize-tests--normalize NFKC source))
- (equal nfkc (ucs-normalize-tests--normalize NFKC nfc))
- (equal nfkc (ucs-normalize-tests--normalize NFKC nfd))
- (equal nfkc (ucs-normalize-tests--normalize NFKC nfkc))
- (equal nfkc (ucs-normalize-tests--normalize NFKC nfkd))
-
- ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) ==
toNFKD(c5)
- (equal nfkd (ucs-normalize-tests--normalize NFKD source))
- (equal nfkd (ucs-normalize-tests--normalize NFKD nfc))
- (equal nfkd (ucs-normalize-tests--normalize NFKD nfd))
- (equal nfkd (ucs-normalize-tests--normalize NFKD nfkc))
- (equal nfkd (ucs-normalize-tests--normalize NFKD nfkd)))))
-
-(defun ucs-normalize-tests--invariants-rule2-hold-p (char)
+ (aref source 0) 1))
+ (and
+ ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3)
+ (ucs-normalize-tests--normalization-equal-p NFC source nfc)
+ (ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
+ (ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
+ ;; c4 == toNFC(c4) == toNFC(c5)
+ (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
+ (ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
+
+ ;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3)
+ (ucs-normalize-tests--normalization-equal-p NFD source nfd)
+ (ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
+ (ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
+ ;; c5 == toNFD(c4) == toNFD(c5)
+ (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
+ (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
+
+ ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
+ (ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
+ (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
+ (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
+ (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
+ (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
+
+ ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
+ (ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
+ (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
+ (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
+ (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
+ (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd)))
+
+(defsubst ucs-normalize-tests--rule2-holds-p (X)
"Check 2nd conformance rule.
For every code point X assigned in this version of Unicode that is not
specifically
listed in Part 1, the following invariants must be true for all conformant
implementations:
X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)"
- (let ((X (string char)))
- (and (equal X (ucs-normalize-tests--normalize NFC X))
- (equal X (ucs-normalize-tests--normalize NFD X))
- (equal X (ucs-normalize-tests--normalize NFKC X))
- (equal X (ucs-normalize-tests--normalize NFKD X)))))
+ (and (ucs-normalize-tests--normalization-chareq-p NFC X X)
+ (ucs-normalize-tests--normalization-chareq-p NFD X X)
+ (ucs-normalize-tests--normalization-chareq-p NFKC X X)
+ (ucs-normalize-tests--normalization-chareq-p NFKD X X)))
-(cl-defun ucs-normalize-tests--invariants-failing-for-part (part &optional
skip-lines &key progress-str)
+(cl-defun ucs-normalize-tests--rule1-failing-for-partX (part &optional
skip-lines &key progress-str)
"Returns a list of failed line numbers."
(with-temp-buffer
(insert-file-contents ucs-normalize-test-data-file)
@@ -136,8 +149,8 @@ implementations:
progress-str beg-line end-line
0 nil 0.5))
for line from beg-line to (1- end-line)
- unless (or (= (following-char) ?#)
- (ucs-normalize-tests--invariants-hold-p
+ unless (or (eq (following-char) ?#)
+ (ucs-normalize-tests--rule1-holds-p
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
@@ -148,7 +161,7 @@ implementations:
do (forward-line)
if reporter do (progress-reporter-update reporter line)))))
-(defun ucs-normalize-tests--invariants-failing-for-lines (lines)
+(defun ucs-normalize-tests--rule1-failing-for-lines (lines)
"Returns a list of failed line numbers."
(with-temp-buffer
(insert-file-contents ucs-normalize-test-data-file)
@@ -156,7 +169,7 @@ implementations:
(cl-loop for prev-line = 1 then line
for line in lines
do (forward-line (- line prev-line))
- unless (ucs-normalize-tests--invariants-hold-p
+ unless (ucs-normalize-tests--rule1-holds-p
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
@@ -165,7 +178,7 @@ implementations:
collect line)))
(ert-deftest ucs-normalize-part0 ()
- (should-not (ucs-normalize-tests--invariants-failing-for-part 0)))
+ (should-not (ucs-normalize-tests--rule1-failing-for-partX 0)))
(defconst ucs-normalize-tests--failing-lines-part1
(list 15131 15132 15133 15134 15135 15136 15137 15138
@@ -195,6 +208,8 @@ implementations:
"A list of line numbers.")
(defvar ucs-normalize-tests--part1-rule2-failed-chars nil
"A list of code points.")
+(defvar ucs-normalize-tests--part2-rule1-failed-lines nil
+ "A list of line numbers.")
(defun ucs-normalize-tests--part1-rule2 (chars-part1)
(let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
@@ -204,11 +219,11 @@ implementations:
(lambda (char-range listed-in-part)
(unless (eq listed-in-part 1)
(if (characterp char-range)
- (progn (unless (ucs-normalize-tests--invariants-rule2-hold-p
char-range)
+ (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
(push char-range failed-chars))
(progress-reporter-update reporter char-range))
(cl-loop for char from (car char-range) to (cdr char-range)
- unless (ucs-normalize-tests--invariants-rule2-hold-p char)
+ unless (ucs-normalize-tests--rule2-holds-p char)
do (push char failed-chars)
do (progress-reporter-update reporter char)))))
chars-part1)
@@ -219,59 +234,103 @@ implementations:
:tags '(:expensive-test)
;; This takes a long time, so make sure we're compiled.
(dolist (fun '(ucs-normalize-tests--part1-rule2
- ucs-normalize-tests--invariants-failing-for-part
- ucs-normalize-tests--invariants-hold-p
- ucs-normalize-tests--invariants-rule2-hold-p))
+ ucs-normalize-tests--rule1-failing-for-partX
+ ucs-normalize-tests--rule1-holds-p
+ ucs-normalize-tests--rule2-holds-p))
(or (byte-code-function-p (symbol-function fun))
(byte-compile fun)))
(let ((ucs-normalize-tests--chars-part1 (make-char-table
'ucs-normalize-tests t)))
- (should-not
- (setq ucs-normalize-tests--part1-rule1-failed-lines
- (ucs-normalize-tests--invariants-failing-for-part
- 1 ucs-normalize-tests--failing-lines-part1
- :progress-str "UCS Normalize Test Part1, rule 1")))
- (should-not (setq ucs-normalize-tests--part1-rule2-failed-chars
- (ucs-normalize-tests--part1-rule2
- ucs-normalize-tests--chars-part1)))))
+ (setq ucs-normalize-tests--part1-rule1-failed-lines
+ (ucs-normalize-tests--rule1-failing-for-partX
+ 1 ucs-normalize-tests--failing-lines-part1
+ :progress-str "UCS Normalize Test Part1, rule 1"))
+ (setq ucs-normalize-tests--part1-rule2-failed-chars
+ (ucs-normalize-tests--part1-rule2
+ ucs-normalize-tests--chars-part1))
+ (should-not ucs-normalize-tests--part1-rule1-failed-lines)
+ (should-not ucs-normalize-tests--part1-rule2-failed-chars)))
(ert-deftest ucs-normalize-part1-failing ()
:expected-result :failed
(skip-unless ucs-normalize-tests--failing-lines-part1)
(should-not
- (ucs-normalize-tests--invariants-failing-for-lines
+ (ucs-normalize-tests--rule1-failing-for-lines
ucs-normalize-tests--failing-lines-part1)))
(defconst ucs-normalize-tests--failing-lines-part2
- (list 18328 18330 18332 18334 18336 18338 18340 18342
- 18344 18346 18348 18350 18352 18354 18356 18358
- 18360 18362 18364 18366 18368 18370 18372 18374
- 18376 18378 18380 18382 18384 18386 18388 18390
- 18392 18394 18396 18398 18400 18402 18404 18406
- 18408 18410 18412 18414 18416 18418 18420 18422
- 18424 18426 18494 18496 18498 18500 18502 18504
- 18506 18508 18510 18512 18514 18516 18518 18520
- 18522 18524 18526 18528 18530 18532 18534 18536
- 18538 18540 18542 18544 18546 18548 18550 18552
- 18554 18556 18558 18560 18562 18564 18566 18568
- 18570 18572 18574 18576 18578 18580 18582 18584
- 18586 18588 18590 18592 18594 18596))
+ (list 17656 17658 18006 18007 18008 18009 18010 18011
+ 18012 18340 18342 18344 18346 18348 18350 18352
+ 18354 18356 18358 18360 18362 18364 18366 18368
+ 18370 18372 18374 18376 18378 18380 18382 18384
+ 18386 18388 18390 18392 18394 18396 18398 18400
+ 18402 18404 18406 18408 18410 18412 18414 18416
+ 18418 18420 18422 18424 18426 18428 18430 18432
+ 18434 18436 18438 18440 18442 18444 18446 18448
+ 18450 18518 18520 18522 18524 18526 18528 18530
+ 18532 18534 18536 18538 18540 18542 18544 18546
+ 18548 18550 18552 18554 18556 18558 18560 18562
+ 18564 18566 18568 18570 18572 18574 18576 18578
+ 18580 18582 18584 18586 18588 18590 18592 18594
+ 18596 18598 18600 18602 18604 18606 18608 18610
+ 18612 18614 18616 18618 18620))
(ert-deftest ucs-normalize-part2 ()
:tags '(:expensive-test)
(should-not
- (ucs-normalize-tests--invariants-failing-for-part
- 2 ucs-normalize-tests--failing-lines-part2
- :progress-str "UCS Normalize Test Part2")))
+ (setq ucs-normalize-tests--part2-rule1-failed-lines
+ (ucs-normalize-tests--rule1-failing-for-partX
+ 2 ucs-normalize-tests--failing-lines-part2
+ :progress-str "UCS Normalize Test Part2"))))
(ert-deftest ucs-normalize-part2-failing ()
:expected-result :failed
(skip-unless ucs-normalize-tests--failing-lines-part2)
(should-not
- (ucs-normalize-tests--invariants-failing-for-lines
+ (ucs-normalize-tests--rule1-failing-for-lines
ucs-normalize-tests--failing-lines-part2)))
(ert-deftest ucs-normalize-part3 ()
(should-not
- (ucs-normalize-tests--invariants-failing-for-part 3)))
+ (ucs-normalize-tests--rule1-failing-for-partX 3)))
+
+(defun ucs-normalize-tests--insert-failing-lines (var newval)
+ (insert (format "`%s' should be updated to:\n
+\(defconst %s
+ (list " var var))
+ (dolist (linos (seq-partition newval 8))
+ (insert (mapconcat #'number-to-string linos " ") "\n"))
+ (insert ")\)"))
+
+(defun ucs-normalize-check-failing-lines ()
+ (interactive)
+ (let ((ucs-normalize-tests--failing-lines-part1 nil)
+ (ucs-normalize-tests--failing-lines-part2 nil))
+ (setq ucs-normalize-tests--part1-rule1-failed-lines nil)
+ (setq ucs-normalize-tests--part1-rule2-failed-chars nil)
+ (setq ucs-normalize-tests--part2-rule1-failed-lines nil)
+ (ert "\\`ucs-normalize"))
+
+ (with-current-buffer (get-buffer-create "*ucs normalize change bad lines*")
+ (erase-buffer)
+ (unless (equal ucs-normalize-tests--part1-rule1-failed-lines
+ ucs-normalize-tests--failing-lines-part1)
+ (ucs-normalize-tests--insert-failing-lines
+ 'ucs-normalize-tests--failing-lines-part1
+ ucs-normalize-tests--part1-rule1-failed-lines))
+
+ (when ucs-normalize-tests--part1-rule2-failed-chars
+ (insert (format "Some characters failed rule 2!\n\n%S"
+ `(list
,@ucs-normalize-tests--part1-rule2-failed-chars))))
+
+ (unless (equal ucs-normalize-tests--part2-rule1-failed-lines
+ ucs-normalize-tests--failing-lines-part2)
+ (ucs-normalize-tests--insert-failing-lines
+ 'ucs-normalize-tests--failing-lines-part2
+ ucs-normalize-tests--part2-rule1-failed-lines))
+ (if (> (buffer-size) 0)
+ (if noninteractive
+ (princ (buffer-string) standard-output)
+ (display-buffer (current-buffer)))
+ (message "No changes to failing lines needed"))))
;;; ucs-normalize-tests.el ends here