[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Patch: java.text updates
From: |
Guilhem Lavaux |
Subject: |
Patch: java.text updates |
Date: |
Thu, 20 May 2004 12:32:04 +0200 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030630 |
Hi,
I commited now the attached patch concerning java.text.
Guilhem.
2004-05-20 Guilhem Lavaux <address@hidden>
* java/text/CollationElementIterator.java
(nextBlock, previousBlock): Use text_indexes to compute
textIndex.
(setOffset): Use text_indexes to chose the right position.
(text_indexes): New field.
(setText): Build text_indexes. Better handling of expansion
ordering.
* java/text/RuleBasedCollator.java
(CollationElement, CollationSorter): Made static.
(last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
collation elements for unknown character this is for sequence
using resets.
(mergeRules): Fixed insertion point.
(buildCollationVector): Initialize last_tertiary_value.
(compare): Handle special cases of accented characters.
(getDefaultAccentedElement): New method.
(getCollationFixed): Fixed key building.
* java/text/DecimalFormat.java
(parse): Fixed parsing of decimal strings. Number of maximum
digits to be read should now work.
* java/text/SimpleDateFormat.java:
(SimpleDateFormat): Set maximumFractionDigit to 0 for the number
formatter. This fixes DateFormatTest.
Index: java/text/CollationElementIterator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/CollationElementIterator.java,v
retrieving revision 1.16
diff -u -b -B -r1.16 CollationElementIterator.java
--- java/text/CollationElementIterator.java 23 Apr 2004 16:03:10 -0000
1.16
+++ java/text/CollationElementIterator.java 20 May 2004 10:06:19 -0000
@@ -92,6 +92,11 @@
private Object[] text_decomposition;
/**
+ * Array containing the index of the specified block.
+ */
+ private int[] text_indexes;
+
+ /**
* This method initializes a new instance of
<code>CollationElementIterator</code>
* to iterate over the specified <code>String</code> using the rules in the
* specified <code>RuleBasedCollator</code>.
@@ -112,9 +117,11 @@
return null;
RuleBasedCollator.CollationElement e =
- (RuleBasedCollator.CollationElement) text_decomposition[index++];
+ (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+ textIndex = text_indexes[index+1];
- textIndex += e.key.length();
+ index++;
return e;
}
@@ -128,7 +135,7 @@
RuleBasedCollator.CollationElement e =
(RuleBasedCollator.CollationElement) text_decomposition[index];
- textIndex -= e.key.length();
+ textIndex = text_indexes[index+1];
return e;
}
@@ -231,6 +238,9 @@
public void setText(String text)
{
int idx = 0;
+ int idx_idx = 0;
+ int alreadyExpanded = 0;
+ int idxToMove = 0;
this.text = text;
this.index = 0;
@@ -238,6 +248,8 @@
String work_text = text.intern();
Vector v = new Vector();
+ Vector vi = new Vector();
+
// Build element collection ordered as they come in "text".
while (idx < work_text.length())
{
@@ -254,6 +266,16 @@
key_old = key;
key = work_text.substring (idx, idx+p);
object = collator.prefix_tree.get (key);
+ if (object != null && idx < alreadyExpanded)
+ {
+ RuleBasedCollator.CollationElement prefix =
(RuleBasedCollator.CollationElement)object;
+ if (prefix.expansion != null &&
+ prefix.expansion.startsWith(work_text.substring(0, idx)))
+ {
+ object = null;
+ key = key_old;
+ }
+ }
p++;
}
while (idx+p <= work_text.length());
@@ -264,32 +286,106 @@
RuleBasedCollator.CollationElement prefix =
(RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
+ /*
+ * First case: There is no such sequence in the database.
+ * We will have to build one from the context.
+ */
if (prefix == null)
{
+ /*
+ * We are dealing with sequences in an expansion. They
+ * are treated as accented characters (tertiary order).
+ */
+ if (alreadyExpanded > 0)
+ {
RuleBasedCollator.CollationElement e =
- collator.getDefaultElement(work_text.charAt (idx));
+ collator.getDefaultAccentedElement (work_text.charAt (idx));
v.add (e);
+ vi.add (new Integer(idx_idx));
idx++;
+ alreadyExpanded--;
+ if (alreadyExpanded == 0)
+ {
+ /* There is not any characters left in the expansion set.
+ * We can increase the pointer in the source string.
+ */
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ else
+ idx_idx++;
+ }
+ else
+ {
+ /* This is a normal character. */
+ RuleBasedCollator.CollationElement e =
+ collator.getDefaultElement (work_text.charAt (idx));
+ Integer i_ref = new Integer(idx_idx);
+
+ /* Don't forget to mark it as a special sequence so the
+ * string can be ordered.
+ */
+ v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+ vi.add (i_ref);
+ v.add (e);
+ vi.add (i_ref);
+ idx_idx++;
+ idx++;
+ }
continue;
}
+ /*
+ * Second case: Here we have found a matching sequence.
+ * Here we have an expansion string prepend it to the "work text" and
+ * add the corresponding sorting element. We must also mark
+ */
if (prefix.expansion != null)
{
work_text = prefix.expansion
+ work_text.substring (idx+prefix.key.length());
idx = 0;
v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ if (alreadyExpanded == 0)
+ idxToMove = prefix.key.length();
+ alreadyExpanded += prefix.expansion.length()-prefix.key.length();
}
else
{
+ /* Third case: the simplest. We have got the prefix and it
+ * has not to be expanded.
+ */
if (!prefix.ignore)
+ {
v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ }
idx += prefix.key.length();
+ /* If the sequence is in an expansion, we must decrease the
+ * counter.
+ */
+ if (alreadyExpanded > 0)
+ {
+ alreadyExpanded -= prefix.key.length();
+ if (alreadyExpanded == 0)
+ {
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ } else
+ idx_idx += prefix.key.length();
}
}
text_decomposition = v.toArray();
+ text_indexes = new int[vi.size()+1];
+ for (int i = 0; i < vi.size(); i++)
+ {
+ text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
+ }
+ text_indexes[vi.size()] = text.length();
}
/**
@@ -341,22 +437,22 @@
if (offset < 0)
throw new IllegalArgumentException("Negative offset: " + offset);
- if ((text.length() > 0) && (offset > 0))
- throw new IllegalArgumentException("Offset too large: " + offset);
- else if (offset > (text.length() - 1))
+ if (offset > (text.length() - 1))
throw new IllegalArgumentException("Offset too large: " + offset);
- textIndex = 0;
- for (int i=0;i<text_decomposition.length;i++)
+ for (index = 0; index < text_decomposition.length; index++)
{
- RuleBasedCollator.CollationElement e =
- (RuleBasedCollator.CollationElement) text_decomposition[i];
- int idx = textIndex + e.key.length();
-
- if (idx > offset)
+ if (offset <= text_indexes[index])
break;
- textIndex = idx;
}
+ /*
+ * As text_indexes[0] == 0, we should not have to take care whether index
is
+ * greater than 0. It is always.
+ */
+ if (text_indexes[index] == offset)
+ textIndex = offset;
+ else
+ textIndex = text_indexes[index-1];
}
/**
Index: java/text/DecimalFormat.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/DecimalFormat.java,v
retrieving revision 1.15
diff -u -b -B -r1.15 DecimalFormat.java
--- java/text/DecimalFormat.java 1 May 2004 14:07:49 -0000 1.15
+++ java/text/DecimalFormat.java 20 May 2004 10:06:19 -0000
@@ -847,7 +847,7 @@
// FIXME: handle Inf and NaN.
// FIXME: do we have to respect minimum digits?
- // What about leading zeros? What about multiplier?
+ // What about multiplier?
StringBuffer buf = int_buf;
StringBuffer frac_buf = null;
@@ -855,7 +855,13 @@
int start_index = index;
int max = str.length();
int exp_index = -1;
- int last = index + MAXIMUM_INTEGER_DIGITS;
+ int last = index + maximumIntegerDigits;
+
+ if (maximumFractionDigits > 0)
+ last += maximumFractionDigits + 1;
+
+ if (useExponentialNotation)
+ last += minExponentDigits + 1;
if (last > 0 && max > last)
max = last;
Index: java/text/RuleBasedCollator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/RuleBasedCollator.java,v
retrieving revision 1.23
diff -u -b -B -r1.23 RuleBasedCollator.java
--- java/text/RuleBasedCollator.java 23 Apr 2004 17:37:46 -0000 1.23
+++ java/text/RuleBasedCollator.java 20 May 2004 10:06:19 -0000
@@ -147,7 +147,7 @@
* This class describes what rank has a character (or a sequence of
characters)
* in the lexicographic order. Each element in a rule has a collation
element.
*/
- final class CollationElement
+ final static class CollationElement
{
String key;
int primary;
@@ -189,7 +189,7 @@
* address@hidden
#mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
* as a temporary state while merging two sets of instructions.
*/
- final class CollationSorter
+ final static class CollationSorter
{
static final int GREATERP = 0;
static final int GREATERS = 1;
@@ -230,12 +230,29 @@
private int last_primary_value;
/**
+ * This is the value of the last secondary sequence of the
+ * primary 0, entered into
+ * <code>ce_table</code>. It is used to compute the
+ * ordering value of an unspecified accented character.
+ */
+ private int last_tertiary_value;
+
+ /**
* This variable is true if accents need to be sorted
* in the other direction.
*/
private boolean inverseAccentComparison;
/**
+ * This collation element is special to unknown sequence.
+ * The JDK uses it to mark and sort the characters which has
+ * no collation rules.
+ */
+ static final CollationElement SPECIAL_UNKNOWN_SEQ =
+ new CollationElement("", (short) 32767, (short) 0, (short) 0,
+ (short) 0, null);
+
+ /**
* This method initializes a new instance of <code>RuleBasedCollator</code>
* with the specified collation rules. Note that an application normally
* obtains an instance of <code>RuleBasedCollator</code> by calling the
@@ -363,7 +380,7 @@
* This is a new set of rules. Append to the list.
*/
patch.removeElementAt(0);
- insertion_point = main.size();
+ insertion_point++;
}
// Now insert all elements of patch at the insertion point.
@@ -392,7 +409,7 @@
{
boolean ignoreChars = (base_offset == 0);
int operator = -1;
- StringBuffer sb = new StringBuffer("");
+ StringBuffer sb = new StringBuffer();
boolean doubleQuote = false;
boolean eatingChars = false;
boolean nextIsModifier = false;
@@ -605,6 +622,7 @@
throws ParseException
{
int primary_seq = 0;
+ int last_tertiary_seq = 0;
short secondary_seq = 0;
short tertiary_seq = 0;
short equality_seq = 0;
@@ -652,6 +670,8 @@
continue element_loop;
case CollationSorter.GREATERT:
tertiary_seq++;
+ if (primary_seq == 0)
+ last_tertiary_seq = tertiary_seq;
equality_seq = 0;
break;
case CollationSorter.IGNORE:
@@ -686,6 +706,7 @@
ce_table = v.toArray();
last_primary_value = primary_seq+1;
+ last_tertiary_value = last_tertiary_seq+1;
}
/**
@@ -758,6 +779,17 @@
int prim1 = CollationElementIterator.primaryOrder(ord1);
int prim2 = CollationElementIterator.primaryOrder(ord2);
+ if (prim1 == 0 && getStrength() < TERTIARY)
+ {
+ ct.previousBlock();
+ continue;
+ }
+ else if (prim2 == 0 && getStrength() < TERTIARY)
+ {
+ cs.previousBlock();
+ continue;
+ }
+
if (prim1 < prim2)
return -1;
else if (prim1 > prim2)
@@ -833,6 +865,28 @@
}
/**
+ * This method builds a default collation element for an accented character
+ * without invoking the database created from the rules passed to the
constructor.
+ *
+ * @param c Character which needs a collation element.
+ * @return A valid brand new CollationElement instance.
+ */
+ CollationElement getDefaultAccentedElement(char c)
+ {
+ int v;
+
+ // Preliminary support for generic accent sorting inversion (I don't know
if all
+ // characters in the range should be sorted backward). This is the place
+ // to fix this if needed.
+ if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+ v = 0x0361 - ((int) c - 0x02B9);
+ else
+ v = (short) c;
+ return new CollationElement("" + c, (short) 0,
+ (short) 0, (short) (last_tertiary_value + v),
(short) 0, null);
+ }
+
+ /**
* This method returns an instance for <code>CollationElementIterator</code>
* for the specified <code>String</code> under the collation rules for this
* object.
@@ -898,7 +952,8 @@
break;
case SECONDARY:
- ord = CollationElementIterator.secondaryOrder(ord);
+ ord = CollationElementIterator.primaryOrder(ord) << 8;
+ ord |= CollationElementIterator.secondaryOrder(ord);
default:
break;
Index: java/text/SimpleDateFormat.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/text/SimpleDateFormat.java,v
retrieving revision 1.27
diff -u -b -B -r1.27 SimpleDateFormat.java
--- java/text/SimpleDateFormat.java 28 Apr 2004 19:17:09 -0000 1.27
+++ java/text/SimpleDateFormat.java 20 May 2004 10:06:20 -0000
@@ -189,6 +189,7 @@
numberFormat = NumberFormat.getInstance(locale);
numberFormat.setGroupingUsed (false);
numberFormat.setParseIntegerOnly (true);
+ numberFormat.setMaximumFractionDigits (0);
}
/**
@@ -216,6 +217,7 @@
numberFormat = NumberFormat.getInstance(locale);
numberFormat.setGroupingUsed (false);
numberFormat.setParseIntegerOnly (true);
+ numberFormat.setMaximumFractionDigits (0);
}
/**
@@ -234,6 +236,7 @@
numberFormat = NumberFormat.getInstance();
numberFormat.setGroupingUsed (false);
numberFormat.setParseIntegerOnly (true);
+ numberFormat.setMaximumFractionDigits (0);
}
// What is the difference between localized and unlocalized? The
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Patch: java.text updates,
Guilhem Lavaux <=