[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
java.text.RuleBasedCollator
From: |
Guilhem Lavaux |
Subject: |
java.text.RuleBasedCollator |
Date: |
Sun, 30 May 2004 19:19:43 +0200 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.6) Gecko/20040115 |
Hi,
I just commited the attached patch to improve once again the text
collator of GNU Classpath.
Guilhem.
2004-05-30 Guilhem Lavaux <address@hidden>
* java/text/RuleBasedCollator.java
(CollationElement.CollationElement): Removed unused constructor.
New argument for the other one.
(subParseString): Mark the element as ignorable but do not erase
them from the database.
(compare): Take into account the ignore field for ignorable
elements.
(buildCollationVector): Use the ignore field now.
* java/text/CollationElementIterator.java
(setText): Keep ignorable characters.
Index: java/text/RuleBasedCollator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/RuleBasedCollator.java,v
retrieving revision 1.24
diff -u -b -B -r1.24 RuleBasedCollator.java
--- java/text/RuleBasedCollator.java 20 May 2004 10:34:33 -0000 1.24
+++ java/text/RuleBasedCollator.java 29 May 2004 17:28:32 -0000
@@ -158,23 +158,17 @@
String expansion;
CollationElement(String key, int primary, short secondary, short tertiary,
- short equality, String expansion)
+ short equality, String expansion, boolean ignore)
{
this.key = key;
this.primary = primary;
this.secondary = secondary;
this.tertiary = tertiary;
this.equality = equality;
- this.ignore = false;
+ this.ignore = ignore;
this.expansion = expansion;
}
- CollationElement(String key)
- {
- this.key = key;
- this.ignore = true;
- }
-
final int getValue()
{
return (primary << 16) + (secondary << 8) + tertiary;
@@ -196,13 +190,13 @@
static final int GREATERT = 2;
static final int EQUAL = 3;
static final int RESET = 4;
- static final int IGNORE = 5;
- static final int INVERSE_SECONDARY = 6;
+ static final int INVERSE_SECONDARY = 5;
int comparisonType;
String textElement;
int hashText;
int offset;
+ boolean ignore;
String expansionOrdering;
}
@@ -250,7 +244,7 @@
*/
static final CollationElement SPECIAL_UNKNOWN_SEQ =
new CollationElement("", (short) 32767, (short) 0, (short) 0,
- (short) 0, null);
+ (short) 0, null, false);
/**
* This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -447,26 +441,16 @@
throw new ParseException
("Modifier '!' is not yet supported by Classpath", i+base_offset);
case '<':
- ignoreChars = false;
type = CollationSorter.GREATERP;
break;
case ';':
- if (!ignoreChars)
type = CollationSorter.GREATERS;
- else
- type = CollationSorter.IGNORE;
break;
case ',':
- if (!ignoreChars)
type = CollationSorter.GREATERT;
- else
- type = CollationSorter.IGNORE;
break;
case '=':
- if (!ignoreChars)
type = CollationSorter.EQUAL;
- else
- type = CollationSorter.IGNORE;
break;
case '\'':
eatingChars = !eatingChars;
@@ -549,10 +533,14 @@
CollationSorter sorter = new CollationSorter();
+ if (operator == CollationSorter.GREATERP)
+ ignoreChars = false;
+
sorter.comparisonType = operator;
sorter.textElement = sb.toString();
sorter.hashText = sorter.textElement.hashCode();
sorter.offset = base_offset+rules.length();
+ sorter.ignore = ignoreChars;
sb.setLength(0);
v.add(sorter);
@@ -568,10 +556,14 @@
|| (sb.length() == 0 && !nextIsModifier && !eatingChars))
throw new ParseException("text element empty at " + pos, pos);
+ if (operator == CollationSorter.GREATERP)
+ ignoreChars = false;
+
sorter.comparisonType = operator;
sorter.textElement = sb.toString();
sorter.hashText = sorter.textElement.hashCode();
sorter.offset = base_offset+pos;
+ sorter.ignore = ignoreChars;
v.add(sorter);
}
@@ -674,8 +666,6 @@
last_tertiary_seq = tertiary_seq;
equality_seq = 0;
break;
- case CollationSorter.IGNORE:
- ignoreChar = true;
case CollationSorter.EQUAL:
equality_seq++;
break;
@@ -687,18 +677,9 @@
("Invalid unknown state '" + elt.comparisonType + "'",
elt.offset);
}
- CollationElement e;
-
- if (!ignoreChar)
- {
- e = new CollationElement(elt.textElement, primary_seq,
+ v.add(new CollationElement(elt.textElement, primary_seq,
secondary_seq, tertiary_seq,
- equality_seq, elt.expansionOrdering);
- }
- else
- e = new CollationElement(elt.textElement);
-
- v.add(e);
+ equality_seq, elt.expansionOrdering,
elt.ignore));
}
this.inverseAccentComparison = inverseComparisons;
@@ -741,17 +722,45 @@
public int compare(String source, String target)
{
CollationElementIterator cs, ct;
+ CollationElement ord1block = null;
+ CollationElement ord2block = null;
+ boolean advance_block_1 = true;
+ boolean advance_block_2 = true;
cs = getCollationElementIterator(source);
ct = getCollationElementIterator(target);
for(;;)
{
- CollationElement ord1block = cs.nextBlock();
- CollationElement ord2block = ct.nextBlock();
int ord1;
int ord2;
+ /*
+ * We have to check whether the characters are ignorable.
+ * If it is the case then forget them.
+ */
+ if (advance_block_1)
+ {
+ ord1block = cs.nextBlock();
+ if (ord1block != null && ord1block.ignore)
+ continue;
+ }
+
+ if (advance_block_2)
+ {
+ ord2block = ct.nextBlock();
+ if (ord2block != null && ord2block.ignore)
+ {
+ advance_block_1 = false;
+ continue;
+ }
+ }
+ else
+ advance_block_2 = true;
+
+ if (!advance_block_1)
+ advance_block_1 = true;
+
if (ord1block != null)
ord1 = ord1block.getValue();
else
@@ -781,12 +790,12 @@
if (prim1 == 0 && getStrength() < TERTIARY)
{
- ct.previousBlock();
+ advance_block_2 = false;
continue;
}
else if (prim2 == 0 && getStrength() < TERTIARY)
{
- cs.previousBlock();
+ advance_block_1 = false;
continue;
}
@@ -861,7 +870,7 @@
else
v = (short) c;
return new CollationElement("" + c, last_primary_value + v,
- (short) 0, (short) 0, (short) 0, null);
+ (short) 0, (short) 0, (short) 0, null, false);
}
/**
@@ -883,7 +892,7 @@
else
v = (short) c;
return new CollationElement("" + c, (short) 0,
- (short) 0, (short) (last_tertiary_value + v),
(short) 0, null);
+ (short) 0, (short) (last_tertiary_value + v),
(short) 0, null, false);
}
/**
@@ -945,6 +954,13 @@
while (ord != CollationElementIterator.NULLORDER)
{
+ // If the primary order is null, it means this is an ignorable
+ // character.
+ if (CollationElementIterator.primaryOrder(ord) == 0)
+ {
+ ord = cei.next();
+ continue;
+ }
switch (getStrength())
{
case PRIMARY:
Index: java/text/CollationElementIterator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/CollationElementIterator.java,v
retrieving revision 1.17
diff -u -b -B -r1.17 CollationElementIterator.java
--- java/text/CollationElementIterator.java 20 May 2004 10:34:33 -0000
1.17
+++ java/text/CollationElementIterator.java 29 May 2004 17:28:32 -0000
@@ -357,11 +357,8 @@
/* Third case: the simplest. We have got the prefix and it
* has not to be expanded.
*/
- if (!prefix.ignore)
- {
v.add (prefix);
vi.add (new Integer(idx_idx));
- }
idx += prefix.key.length();
/* If the sequence is in an expansion, we must decrease the
* counter.
signature.asc
Description: OpenPGP digital signature
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- java.text.RuleBasedCollator,
Guilhem Lavaux <=