commit-classpath
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

java.text.RuleBasedCollator


From: Guilhem Lavaux
Subject: java.text.RuleBasedCollator
Date: Sun, 30 May 2004 19:19:43 +0200
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.6) Gecko/20040115

Hi,

I just commited the attached patch to improve once again the text collator of GNU Classpath.

Guilhem.

2004-05-30  Guilhem Lavaux <address@hidden>

        * java/text/RuleBasedCollator.java
        (CollationElement.CollationElement): Removed unused constructor.
        New argument for the other one.
        (subParseString): Mark the element as ignorable but do not erase
        them from the database.
        (compare): Take into account the ignore field for ignorable
        elements.
        (buildCollationVector): Use the ignore field now.
        * java/text/CollationElementIterator.java
        (setText): Keep ignorable characters.
Index: java/text/RuleBasedCollator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/RuleBasedCollator.java,v
retrieving revision 1.24
diff -u -b -B -r1.24 RuleBasedCollator.java
--- java/text/RuleBasedCollator.java    20 May 2004 10:34:33 -0000      1.24
+++ java/text/RuleBasedCollator.java    29 May 2004 17:28:32 -0000
@@ -158,23 +158,17 @@
     String expansion;
 
     CollationElement(String key, int primary, short secondary, short tertiary,
-                    short equality, String expansion)
+                    short equality, String expansion, boolean ignore)
     {
       this.key = key;
       this.primary = primary;
       this.secondary = secondary;
       this.tertiary = tertiary;
       this.equality = equality;
-      this.ignore = false;
+      this.ignore = ignore;
       this.expansion = expansion;
     }
     
-    CollationElement(String key)
-    {
-      this.key = key;
-      this.ignore = true;
-    }
-
     final int getValue()
     {
       return (primary << 16) + (secondary << 8) + tertiary;
@@ -196,13 +190,13 @@
     static final int GREATERT = 2;
     static final int EQUAL = 3;
     static final int RESET = 4;
-    static final int IGNORE = 5;
-    static final int INVERSE_SECONDARY = 6;
+    static final int INVERSE_SECONDARY = 5;
     
     int comparisonType;
     String textElement;
     int hashText;
     int offset;
+    boolean ignore;
 
     String expansionOrdering;
   }
@@ -250,7 +244,7 @@
    */
   static final CollationElement SPECIAL_UNKNOWN_SEQ = 
     new CollationElement("", (short) 32767, (short) 0, (short) 0,
-                        (short) 0, null);
+                        (short) 0, null, false);
   
   /**
    * This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -447,26 +441,16 @@
          throw new ParseException
            ("Modifier '!' is not yet supported by Classpath", i+base_offset);
        case '<':
-         ignoreChars = false;
          type = CollationSorter.GREATERP;
          break;
        case ';':
-         if (!ignoreChars)
            type = CollationSorter.GREATERS;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case ',':
-         if (!ignoreChars)
            type = CollationSorter.GREATERT;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case '=':
-         if (!ignoreChars)
            type = CollationSorter.EQUAL;
-         else
-           type = CollationSorter.IGNORE;
          break;
        case '\'':
          eatingChars = !eatingChars;
@@ -549,10 +533,14 @@
 
        CollationSorter sorter = new CollationSorter();
        
+       if (operator == CollationSorter.GREATERP)
+         ignoreChars = false;
+
        sorter.comparisonType = operator;
        sorter.textElement = sb.toString();
        sorter.hashText = sorter.textElement.hashCode();
        sorter.offset = base_offset+rules.length();
+       sorter.ignore = ignoreChars;
        sb.setLength(0);
 
        v.add(sorter);
@@ -568,10 +556,14 @@
            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
          throw new ParseException("text element empty at " + pos, pos);
 
+       if (operator == CollationSorter.GREATERP)
+         ignoreChars = false;
+
        sorter.comparisonType = operator;
        sorter.textElement = sb.toString();
        sorter.hashText = sorter.textElement.hashCode();
        sorter.offset = base_offset+pos;
+       sorter.ignore = ignoreChars;
        v.add(sorter);
       }
 
@@ -674,8 +666,6 @@
              last_tertiary_seq = tertiary_seq;
            equality_seq = 0;
            break;
-         case CollationSorter.IGNORE:
-           ignoreChar = true;
          case CollationSorter.EQUAL:
            equality_seq++;
            break;
@@ -687,18 +677,9 @@
              ("Invalid unknown state '" + elt.comparisonType + "'", 
elt.offset);
          }
 
-       CollationElement e;
-
-       if (!ignoreChar)
-         {
-           e = new CollationElement(elt.textElement, primary_seq,
+       v.add(new CollationElement(elt.textElement, primary_seq,
                                     secondary_seq, tertiary_seq,
-                                    equality_seq, elt.expansionOrdering);
-         }
-       else
-         e = new CollationElement(elt.textElement);
-
-       v.add(e);
+                                  equality_seq, elt.expansionOrdering, 
elt.ignore));
       }
 
     this.inverseAccentComparison = inverseComparisons; 
@@ -741,17 +722,45 @@
   public int compare(String source, String target)
   {
     CollationElementIterator cs, ct;
+    CollationElement ord1block = null;
+    CollationElement ord2block = null;
+    boolean advance_block_1 = true;
+    boolean advance_block_2 = true;
 
     cs = getCollationElementIterator(source);
     ct = getCollationElementIterator(target);
 
     for(;;)
       {
-        CollationElement ord1block = cs.nextBlock(); 
-        CollationElement ord2block = ct.nextBlock(); 
        int ord1;
        int ord2;
 
+       /*
+        * We have to check whether the characters are ignorable.
+        * If it is the case then forget them. 
+        */
+       if (advance_block_1)
+         {
+           ord1block = cs.nextBlock();
+           if (ord1block != null && ord1block.ignore)
+             continue;
+         }
+       
+       if (advance_block_2)
+         {
+           ord2block = ct.nextBlock();
+           if (ord2block != null && ord2block.ignore)
+             {
+               advance_block_1 = false;
+               continue;
+             }
+        }
+       else
+         advance_block_2 = true;
+
+       if (!advance_block_1)
+         advance_block_1 = true;
+
        if (ord1block != null)
          ord1 = ord1block.getValue();
        else
@@ -781,12 +790,12 @@
        
        if (prim1 == 0 && getStrength() < TERTIARY)
          {
-           ct.previousBlock();
+            advance_block_2 = false;
            continue;
          }
        else if (prim2 == 0 && getStrength() < TERTIARY)
          {
-           cs.previousBlock();
+           advance_block_1 = false;
            continue;
          }
 
@@ -861,7 +870,7 @@
     else
       v = (short) c;
     return new CollationElement("" + c, last_primary_value + v,
-                               (short) 0, (short) 0, (short) 0, null);
+                               (short) 0, (short) 0, (short) 0, null, false);
   }
 
   /**
@@ -883,7 +892,7 @@
     else
       v = (short) c;
     return new CollationElement("" + c, (short) 0,
-                               (short) 0, (short) (last_tertiary_value + v), 
(short) 0, null);
+                               (short) 0, (short) (last_tertiary_value + v), 
(short) 0, null, false);
   }
 
   /**
@@ -945,6 +954,13 @@
 
     while (ord != CollationElementIterator.NULLORDER)
       {
+       // If the primary order is null, it means this is an ignorable
+       // character.
+       if (CollationElementIterator.primaryOrder(ord) == 0)
+         {
+            ord = cei.next();
+           continue;
+         }
         switch (getStrength())
           {
             case PRIMARY:
Index: java/text/CollationElementIterator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/CollationElementIterator.java,v
retrieving revision 1.17
diff -u -b -B -r1.17 CollationElementIterator.java
--- java/text/CollationElementIterator.java     20 May 2004 10:34:33 -0000      
1.17
+++ java/text/CollationElementIterator.java     29 May 2004 17:28:32 -0000
@@ -357,11 +357,8 @@
            /* Third case: the simplest. We have got the prefix and it
             * has not to be expanded.
             */
-           if (!prefix.ignore)
-             {
                v.add (prefix);
                vi.add (new Integer(idx_idx));
-             }
            idx += prefix.key.length();
            /* If the sequence is in an expansion, we must decrease the
             * counter.

Attachment: signature.asc
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]