emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

feature/android bfbdf4eb892: Optimize creation of multibyte menu items o


From: Po Lu
Subject: feature/android bfbdf4eb892: Optimize creation of multibyte menu items on Android
Date: Fri, 4 Aug 2023 02:30:19 -0400 (EDT)

branch: feature/android
commit bfbdf4eb892935536fc665d6cc986fd669364263
Author: Po Lu <luangruo@yahoo.com>
Commit: Po Lu <luangruo@yahoo.com>

    Optimize creation of multibyte menu items on Android
    
    * src/androidvfs.c (android_verify_jni_string): Move to
    android.c.
    * src/android.c (android_verify_jni_string): New function.
    (android_build_string): Forgo encoding menu text if TEXT is a
    multibyte string that's also a valid JNI string.
    * src/android.h: Update prototypes.
---
 src/android.c    | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 src/android.h    |  1 +
 src/androidvfs.c | 63 -------------------------------------------
 3 files changed, 76 insertions(+), 69 deletions(-)

diff --git a/src/android.c b/src/android.c
index c30d7b58979..bd19107f53a 100644
--- a/src/android.c
+++ b/src/android.c
@@ -5480,6 +5480,69 @@ android_check_string (Lisp_Object text)
   return true;
 }
 
+/* Verify that the specified NULL-terminated STRING is a valid JNI
+   ``UTF-8'' string.  Return 0 if so, 1 otherwise.
+
+   Do not perform GC, enabling NAME to be a direct reference to string
+   data.
+
+   The native coding system used by the JVM to store strings derives
+   from UTF-8, but deviates from it in two aspects in an attempt to
+   better represent the UCS-16 based Java String format, and to let
+   strings contain NULL characters while remaining valid C strings:
+   NULL bytes are encoded as two-byte sequences, and Unicode surrogate
+   pairs encoded as two-byte sequences are prefered to four-byte
+   sequences when encoding characters above the BMP.  */
+
+int
+android_verify_jni_string (const char *name)
+{
+  const unsigned char *chars;
+
+  chars = (unsigned char *) name;
+  while (*chars)
+    {
+      /* Switch on the high 4 bits.  */
+
+      switch (*chars++ >> 4)
+       {
+       case 0 ... 7:
+         /* The 8th bit is clean, so this is a regular C
+            character.  */
+         break;
+
+       case 8 ... 0xb:
+         /* Invalid starting byte! */
+         return 1;
+
+       case 0xf:
+         /* The start of a four byte sequence.  These aren't allowed
+            in Java.  */
+         return 1;
+
+       case 0xe:
+         /* The start of a three byte sequence.  Verify that its
+            continued.  */
+
+         if ((*chars++ & 0xc0) != 0x80)
+           return 1;
+
+         FALLTHROUGH;
+
+       case 0xc ... 0xd:
+         /* The start of a two byte sequence.  Verify that the
+            next byte exists and has its high bit set.  */
+
+         if ((*chars++ & 0xc0) != 0x80)
+           return 1;
+
+         break;
+       }
+    }
+
+  return 0;
+}
+
 /* Given a Lisp string TEXT, return a local reference to an equivalent
    Java string.  */
 
@@ -5492,12 +5555,18 @@ android_build_string (Lisp_Object text)
   jchar *characters;
   USE_SAFE_ALLOCA;
 
-  /* Directly encode TEXT if it contains no multibyte
-     characters.  This is okay because the Java extended UTF
-     format is compatible with ASCII.  */
-
-  if (SBYTES (text) == SCHARS (text)
-      && android_check_string (text))
+  /* Directly encode TEXT if it contains no non-ASCII characters, or
+     is multibyte and a valid Modified UTF-8 string.  This is okay
+     because the Java extended UTF format is compatible with
+     ASCII.  */
+
+  if ((SBYTES (text) == SCHARS (text)
+       && android_check_string (text))
+      /* If TEXT is a multibyte string, then it's using Emacs's
+        internal UTF-8 coding system, a significant subset of which
+        is compatible with JNI.  */
+      || (STRING_MULTIBYTE (text)
+         && !android_verify_jni_string (SSDATA (text))))
     {
       string = (*android_java_env)->NewStringUTF (android_java_env,
                                                  SSDATA (text));
diff --git a/src/android.h b/src/android.h
index cecdfab002f..a052d3a3b21 100644
--- a/src/android.h
+++ b/src/android.h
@@ -105,6 +105,7 @@ extern bool android_detect_mouse (void);
 extern void android_set_dont_focus_on_map (android_window, bool);
 extern void android_set_dont_accept_focus (android_window, bool);
 
+extern int android_verify_jni_string (const char *);
 extern jstring android_build_string (Lisp_Object);
 extern jstring android_build_jstring (const char *);
 extern void android_exception_check (void);
diff --git a/src/androidvfs.c b/src/androidvfs.c
index 2b467bc444f..0d99116c75c 100644
--- a/src/androidvfs.c
+++ b/src/androidvfs.c
@@ -3299,9 +3299,6 @@ static struct android_saf_root_vdir *all_saf_root_vdirs;
 static struct android_vnode *android_saf_tree_from_name (char *, const char *,
                                                         const char *);
 
-/* Forward declaration.  */
-static int android_verify_jni_string (const char *);
-
 /* Ascertain and return whether or not AUTHORITY designates a content
    provider offering at least one directory tree accessible to
    Emacs.  */
@@ -4437,66 +4434,6 @@ static struct android_vops saf_new_vfs_ops;
 /* Chain of all open SAF directory streams.  */
 static struct android_saf_tree_vdir *all_saf_tree_vdirs;
 
-/* Verify that the specified NULL-terminated STRING is a valid JNI
-   ``UTF-8'' string.  Return 0 if so, 1 otherwise.
-
-   The native coding system used by the JVM to store strings derives
-   from UTF-8, but deviates from it in two aspects in an attempt to
-   better represent the UCS-16 based Java String format, and to let
-   strings contain NULL characters while remaining valid C strings:
-   NULL bytes are encoded as two-byte sequences, and Unicode surrogate
-   pairs encoded as two-byte sequences are prefered to four-byte
-   sequences when encoding characters above the BMP.  */
-
-static int
-android_verify_jni_string (const char *name)
-{
-  const unsigned char *chars;
-
-  chars = (unsigned char *) name;
-  while (*chars)
-    {
-      /* Switch on the high 4 bits.  */
-
-      switch (*chars++ >> 4)
-       {
-       case 0 ... 7:
-         /* The 8th bit is clean, so this is a regular C
-            character.  */
-         break;
-
-       case 8 ... 0xb:
-         /* Invalid starting byte! */
-         return 1;
-
-       case 0xf:
-         /* The start of a four byte sequence.  These aren't allowed
-            in Java.  */
-         return 1;
-
-       case 0xe:
-         /* The start of a three byte sequence.  Verify that its
-            continued.  */
-
-         if ((*chars++ & 0xc0) != 0x80)
-           return 1;
-
-         FALLTHROUGH;
-
-       case 0xc ... 0xd:
-         /* The start of a two byte sequence.  Verify that the
-            next byte exists and has its high bit set.  */
-
-         if ((*chars++ & 0xc0) != 0x80)
-           return 1;
-
-         break;
-       }
-    }
-
-  return 0;
-}
-
 /* Find the document ID of the file within TREE_URI designated by
    NAME.
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]