emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim f260cc9834 2/3: 改变形码词条的排序规则, #449


From: ELPA Syncer
Subject: [elpa] externals/pyim f260cc9834 2/3: 改变形码词条的排序规则, #449
Date: Wed, 6 Jul 2022 17:57:50 -0400 (EDT)

branch: externals/pyim
commit f260cc9834a0efa7f2d1da63191ee5e5a94e9b0c
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    改变形码词条的排序规则, #449
---
 pyim-candidates.el  | 63 ++++++++++++++++++++++++++++-------------------------
 tests/pyim-tests.el | 17 +--------------
 2 files changed, 34 insertions(+), 46 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 25f6f95ec9..9702f9cc53 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -51,19 +51,6 @@
 (cl-defgeneric pyim-candidates-get-chief (scheme &optional personal-words 
common-words)
   "PYIM 输入法第一位候选词的获取策略。")
 
-(cl-defmethod pyim-candidates-get-chief ((_scheme pyim-scheme-xingma)
-                                         &optional personal-words common-words)
-  "五笔仓颉等形码输入法第一位候选词的选择策略。"
-  (or
-   ;; 如果从公共词库里面获取到的第一个词条是汉字,就选择它。
-   (when (= (length (car common-words)) 1)
-     (car common-words))
-   ;; 从个人词库里面按排列的先后顺序,获取一个汉字。
-   (cl-find-if
-    (lambda (word)
-      (= (length word) 1))
-    personal-words)))
-
 (cl-defmethod pyim-candidates-get-chief ((_scheme pyim-scheme-quanpin)
                                          &optional personal-words 
_common-words)
   "PYIM 输入法第一位候选词的获取通用策略。"
@@ -104,34 +91,50 @@
         (when other-codes
           (setq prefix (mapconcat
                         (lambda (code)
-                          (pyim-candidates-get-chief
-                           scheme
-                           (pyim-dcache-get code '(icode2word))
-                           (pyim-dcache-get code '(code2word))))
+                          (car (pyim-candidates--xingma-words code)))
                         other-codes "")))
 
         ;; 5. output => 工子又 工子叕
         (setq output
               (mapcar (lambda (word)
                         (concat prefix word))
-                      (pyim-candidates--xingma-words last-code scheme)))
+                      (pyim-candidates--xingma-words last-code)))
         (setq output (remove "" (or output (list prefix))))
         (setq result (append result output))))
     (when (car result)
       (delete-dups result))))
 
-(defun pyim-candidates--xingma-words (code scheme)
-  "按照形码 scheme 的规则,搜索 CODE, 得到相应的词条列表。"
-  (let* ((personal-words (pyim-dcache-get code '(icode2word)))
-         (personal-words (pyim-candidates--sort personal-words))
-         (common-words (pyim-dcache-get code '(code2word)))
-         (chief-word (pyim-candidates-get-chief scheme personal-words 
common-words))
-         (common-words (pyim-candidates--sort common-words))
-         (other-words (pyim-dcache-get code '(shortcode2word))))
-    `(,chief-word
-      ,@personal-words
-      ,@common-words
-      ,@other-words)))
+(defun pyim-candidates--xingma-words (code)
+  "按照形码 scheme 的规则,搜索 CODE, 得到相应的词条列表。
+
+当前的词条的构建规则是:
+1. 先排公共词库中的字。
+2. 然后再排所有词库中的词,词会按词频动态调整。"
+  (let* ((common-words (pyim-dcache-get code '(code2word)))
+         (common-chars (pyim-candidates--get-chars common-words))
+         (personal-words (pyim-dcache-get code '(icode2word)))
+         (other-words (pyim-dcache-get code '(shortcode2word)))
+         (words-without-chars
+          (pyim-candidates--sort
+           (pyim-candidates--remove-chars
+            (delete-dups
+             `(,@personal-words
+               ,@common-words
+               ,@other-words))))))
+    `(,@common-chars
+      ,@words-without-chars)))
+
+(defun pyim-candidates--get-chars (words)
+  "从 WORDS 中获取字。"
+  (cl-remove-if (lambda (x)
+                  (> (length x) 1))
+                words))
+
+(defun pyim-candidates--remove-chars (words)
+  "把 WORDS 中的字删除。"
+  (cl-remove-if (lambda (x)
+                  (< (length x) 2))
+                words))
 
 (cl-defmethod pyim-candidates-create (imobjs (scheme pyim-scheme-quanpin))
   "按照 SCHEME, 从 IMOBJS 获得候选词条,用于全拼输入法。"
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 7c6176914c..6c49370c1f 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -513,21 +513,6 @@
 
 ;; ** pyim-candidates 相关单元测试
 (ert-deftest pyim-tests-pyim-candidates-get-chief ()
-  (let ((wubi (pyim-scheme-get 'wubi))
-        (personal-words1 '("呵呵" "天" "恭恭敬敬"))
-        (personal-words2 '("呵呵" "恭恭敬敬"))
-        (common-words1 '("工" "恭恭敬敬"))
-        (common-words2 '("恭恭敬敬" "工")))
-
-    ;; 形码输入法选择第一位词条的规则是:
-    ;; 1. 如果从公共词库里面获取到的第一个词条是汉字,就选择它。
-    ;; 2. 如果不是,就从个人词库里面按排列的先后顺序,获取一个汉字。
-    (should (equal (pyim-candidates-get-chief wubi personal-words1 
common-words1)
-                   "工"))
-    (should (equal (pyim-candidates-get-chief wubi personal-words1 
common-words2)
-                   "天"))
-    (should-not (pyim-candidates-get-chief wubi personal-words2 
common-words2)))
-
   (let ((quanpin (pyim-scheme-get 'quanpin))
         (pyim-dhashcache-iword2count-recent-10-words
          (read "#s(hash-table size 65 test equal rehash-size 1.5 
rehash-threshold 0.8125 data (:all-words (\"就\" \"不是\" \"如果\" \"是\" \"规则\" 
\"的\" \"词条\" \"第一位\" \"选择\" \"输入法\") \"如果\" 1 \"的\" 1 \"就\" 1 \"输入法\" 2 \"词条\" 
1 \"不是\" 1 \"选择\" 1 \"第一位\" 1 \"规则\" 1 \"是\" 1))"))
@@ -556,7 +541,7 @@
     (should (equal (pyim-candidates-create
                     (pyim-imobjs-create "aaaa" wubi)
                     wubi)
-                   '("㠭" "工")))
+                   '("㠭")))
     (should (equal (pyim-candidates-create
                     (pyim-imobjs-create "bbbb" wubi)
                     wubi)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]