Source for java.text.RuleBasedCollator

   1: /* RuleBasedCollator.java -- Concrete Collator Class
   2:    Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.util.ArrayList;
  42: import java.util.HashMap;
  43: 
  44: /* Written using "Java Class Libraries", 2nd edition, plus online
  45:  * API docs for JDK 1.2 from http://www.javasoft.com.
  46:  * Status: Believed complete and correct
  47:  */
  48: 
  49: /**
  50:  * This class is a concrete subclass of <code>Collator</code> suitable
  51:  * for string collation in a wide variety of languages.  An instance of
  52:  * this class is normally returned by the <code>getInstance</code> method
  53:  * of <code>Collator</code> with rules predefined for the requested
  54:  * locale.  However, an instance of this class can be created manually
  55:  * with any desired rules.
  56:  * <p>
  57:  * Rules take the form of a <code>String</code> with the following syntax
  58:  * <ul>
  59:  * <li> Modifier: '@'</li>
  60:  * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
  61:  * <li> Reset: '&amp;' : &lt;text&gt;</li>
  62:  * </ul>
  63:  * The modifier character indicates that accents sort backward as is the
  64:  * case with French.  The modifier applies to all rules <b>after</b>
  65:  * the modifier but before the next primary sequence. If placed at the end
  66:  * of the sequence if applies to all unknown accented character.
  67:  * The relational operators specify how the text 
  68:  * argument relates to the previous term.  The relation characters have
  69:  * the following meanings:
  70:  * <ul>
  71:  * <li>'&lt;' - The text argument is greater than the prior term at the primary
  72:  * difference level.</li>
  73:  * <li>';' - The text argument is greater than the prior term at the secondary
  74:  * difference level.</li>
  75:  * <li>',' - The text argument is greater than the prior term at the tertiary
  76:  * difference level.</li>
  77:  * <li>'=' - The text argument is equal to the prior term</li>
  78:  * </ul>
  79:  * <p>
  80:  * As for the text argument itself, this is any sequence of Unicode
  81:  * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
  82:  * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 
  83:  * desired, they must be enclosed in single quotes.  If any whitespace is 
  84:  * encountered, it is ignored.  (For example, "a b" is equal to "ab").  
  85:  * <p>
  86:  * The reset operation inserts the following rule at the point where the
  87:  * text argument to it exists in the previously declared rule string.  This
  88:  * makes it easy to add new rules to an existing string by simply including
  89:  * them in a reset sequence at the end.  Note that the text argument, or
  90:  * at least the first character of it, must be present somewhere in the
  91:  * previously declared rules in order to be inserted properly.  If this
  92:  * is not satisfied, a <code>ParseException</code> will be thrown. 
  93:  * <p>
  94:  * This system of configuring <code>RuleBasedCollator</code> is needlessly
  95:  * complex and the people at Taligent who developed it (along with the folks
  96:  * at Sun who accepted it into the Java standard library) deserve a slow
  97:  * and agonizing death.
  98:  * <p>
  99:  * Here are a couple of example of rule strings:
 100:  * <p>
 101:  * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is 
 102:  * greater than c, with all differences being primary differences.
 103:  * <p>
 104:  * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
 105:  * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
 106:  * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
 107:  * under a tertiary strength comparison.
 108:  * <p>
 109:  * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the 
 110:  * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
 111:  * the rule "&lt; b" is to be inserted after the text argument "a" in the
 112:  * previous rule string segment.
 113:  * <p>
 114:  * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
 115:  * anywhere in the previous rule string segment so the rule following the
 116:  * reset rule cannot be inserted.
 117:  * <p>
 118:  * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
 119:  * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
 120:  * <p>
 121:  * For a description of the various comparison strength types, see the
 122:  * documentation for the <code>Collator</code> class.
 123:  * <p>
 124:  * As an additional complication to this already overly complex rule scheme,
 125:  * if any characters precede the first rule, these characters are considered
 126:  * ignorable.  They will be treated as if they did not exist during 
 127:  * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
 128:  * character such that the strings "high-tech" and "hightech" would
 129:  * be considered identical.
 130:  * <p>
 131:  * A <code>ParseException</code> will be thrown for any of the following
 132:  * conditions:
 133:  * <ul>
 134:  * <li>Unquoted punctuation characters in a text argument.</li>
 135:  * <li>A relational or reset operator not followed by a text argument</li>
 136:  * <li>A reset operator where the text argument is not present in
 137:  * the previous rule string section.</li>
 138:  * </ul>
 139:  *
 140:  * @author Aaron M. Renn (arenn@urbanophile.com)
 141:  * @author Tom Tromey (tromey@cygnus.com)
 142:  * @author Guilhem Lavaux (guilhem@kaffe.org)
 143:  */
 144: public class RuleBasedCollator extends Collator
 145: {
 146:   /**
 147:    * This class describes what rank has a character (or a sequence of characters) 
 148:    * in the lexicographic order. Each element in a rule has a collation element.
 149:    */
 150:   static final class CollationElement
 151:   {
 152:     String key;
 153:     int primary;
 154:     short secondary;
 155:     short tertiary;
 156:     short equality;
 157:     boolean ignore;
 158:     String expansion;
 159: 
 160:     CollationElement(String key, int primary, short secondary, short tertiary,
 161:              short equality, String expansion, boolean ignore)
 162:     {
 163:       this.key = key;
 164:       this.primary = primary;
 165:       this.secondary = secondary;
 166:       this.tertiary = tertiary;
 167:       this.equality = equality;
 168:       this.ignore = ignore;
 169:       this.expansion = expansion;
 170:     }
 171: 
 172:     int getValue()
 173:     {
 174:       return (primary << 16) + (secondary << 8) + tertiary;
 175:     }
 176:   }
 177: 
 178:   /**
 179:    * Basic collation instruction (internal format) to build the series of
 180:    * collation elements. It contains an instruction which specifies the new
 181:    * state of the generator. The sequence of instruction should not contain
 182:    * RESET (it is used by
 183:    * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
 184:    * as a temporary state while merging two sets of instructions.
 185:    */
 186:   static final class CollationSorter
 187:   {
 188:     static final int GREATERP = 0;
 189:     static final int GREATERS = 1;
 190:     static final int GREATERT = 2;
 191:     static final int EQUAL = 3;
 192:     static final int RESET = 4;
 193:     static final int INVERSE_SECONDARY = 5;
 194:     
 195:     int comparisonType;
 196:     String textElement;
 197:     int hashText;
 198:     int offset;
 199:     boolean ignore;
 200: 
 201:     String expansionOrdering;
 202:   }
 203: 
 204:   /**
 205:    * This the the original rule string.
 206:    */
 207:   private String rules;
 208: 
 209:   /**
 210:    * This is the table of collation element values
 211:    */
 212:   private Object[] ce_table;
 213: 
 214:   /**
 215:    * Quick-prefix finder.
 216:    */
 217:   HashMap prefix_tree;
 218: 
 219:   /**
 220:    * This is the value of the last sequence entered into
 221:    * <code>ce_table</code>. It is used to compute the
 222:    * ordering value of unspecified character.
 223:    */
 224:   private int last_primary_value;
 225: 
 226:   /**
 227:    * This is the value of the last secondary sequence of the
 228:    * primary 0, entered into
 229:    * <code>ce_table</code>. It is used to compute the
 230:    * ordering value of an unspecified accented character.
 231:    */
 232:   private int last_tertiary_value;
 233: 
 234:   /**
 235:    * This variable is true if accents need to be sorted
 236:    * in the other direction.
 237:    */
 238:   private boolean inverseAccentComparison;
 239: 
 240:   /**
 241:    * This collation element is special to unknown sequence.
 242:    * The JDK uses it to mark and sort the characters which has
 243:    * no collation rules.
 244:    */
 245:   static final CollationElement SPECIAL_UNKNOWN_SEQ = 
 246:     new CollationElement("", (short) 32767, (short) 0, (short) 0,
 247:              (short) 0, null, false);
 248:   
 249:   /**
 250:    * This method initializes a new instance of <code>RuleBasedCollator</code>
 251:    * with the specified collation rules.  Note that an application normally
 252:    * obtains an instance of <code>RuleBasedCollator</code> by calling the
 253:    * <code>getInstance</code> method of <code>Collator</code>.  That method
 254:    * automatically loads the proper set of rules for the desired locale.
 255:    *
 256:    * @param rules The collation rule string.
 257:    *
 258:    * @exception ParseException If the rule string contains syntax errors.
 259:    */
 260:   public RuleBasedCollator(String rules) throws ParseException
 261:   {
 262:     if (rules.equals(""))
 263:       throw new ParseException("empty rule set", 0);
 264:     
 265:     this.rules = rules;
 266: 
 267:     buildCollationVector(parseString(rules));
 268:     buildPrefixAccess();
 269:   }
 270: 
 271:   /**
 272:    * This method returns the number of common characters at the beginning
 273:    * of the string of the two parameters.
 274:    *
 275:    * @param prefix A string considered as a prefix to test against
 276:    * the other string.
 277:    * @param s A string to test the prefix against.
 278:    * @return The number of common characters.
 279:    */
 280:   static int findPrefixLength(String prefix, String s)
 281:   {
 282:     int index;
 283:     int len = prefix.length();
 284: 
 285:     for (index = 0; index < len && index < s.length(); ++index)
 286:       {
 287:     if (prefix.charAt(index) != s.charAt(index))
 288:       return index;
 289:       }
 290: 
 291: 
 292:     return index;
 293:   }
 294: 
 295:   /**
 296:    * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
 297:    * checks whether it is possible to find an anchor point for the rules to be merged and
 298:    * then insert them at that precise point.
 299:    *
 300:    * @param offset Offset in the string containing rules of the beginning of the rules
 301:    * being merged in.
 302:    * @param starter Text of the rules being merged.
 303:    * @param main Repository of all already parsed rules.
 304:    * @param patch Rules to be merged into the repository.
 305:    * @throws ParseException if it is impossible to find an anchor point for the new rules.
 306:    */
 307:   private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
 308:     throws ParseException 
 309:   {
 310:     int insertion_point = -1;
 311:     int max_length = 0;
 312:     
 313:     /* We must check that no rules conflict with another already present. If it
 314:      * is the case delete the old rule. 
 315:      */
 316:     
 317:     /* For the moment good old O(N^2) algorithm.
 318:      */
 319:     for (int i = 0; i < patch.size(); i++)
 320:       {
 321:     int j = 0;
 322:     
 323:     while (j < main.size())
 324:       {
 325:         CollationSorter rule1 = (CollationSorter) patch.get(i);
 326:         CollationSorter rule2 = (CollationSorter) main.get(j);
 327:         
 328:         if (rule1.textElement.equals(rule2.textElement))
 329:           main.remove(j);
 330:         else
 331:           j++;
 332:       }
 333:       }
 334: 
 335:     // Find the insertion point... O(N)
 336:     for (int i = 0; i < main.size(); i++)
 337:       {
 338:     CollationSorter sorter = (CollationSorter) main.get(i);
 339:     int length = findPrefixLength(starter, sorter.textElement);
 340:         
 341:     if (length > max_length)
 342:       {
 343:         max_length = length;
 344:         insertion_point = i+1;
 345:       }
 346:       }
 347: 
 348:     if (insertion_point < 0)
 349:       throw new ParseException("no insertion point found for " + starter, offset);
 350: 
 351:     if (max_length < starter.length())
 352:       {
 353:     /*
 354:      * We need to expand the first entry. It must be sorted
 355:      * like if it was the reference key itself (like the spec
 356:      * said. So the first entry is special: the element is
 357:      * replaced by the specified text element for the sorting.
 358:      * This text replace the old one for comparisons. However
 359:      * to preserve the behaviour we replace the first key (corresponding
 360:      * to the found prefix) by a new code rightly ordered in the
 361:      * sequence. The rest of the subsequence must be appended
 362:      * to the end of the sequence.
 363:      */
 364:     CollationSorter sorter = (CollationSorter) patch.get(0);
 365:     CollationSorter expansionPrefix =
 366:       (CollationSorter) main.get(insertion_point-1);
 367:     
 368:     sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
 369:         
 370:     main.add(insertion_point, sorter);
 371:     
 372:     /*
 373:      * This is a new set of rules. Append to the list.
 374:      */
 375:     patch.remove(0);
 376:     insertion_point++;
 377:       }
 378: 
 379:     // Now insert all elements of patch at the insertion point.
 380:     for (int i = 0; i < patch.size(); i++)
 381:       main.add(i+insertion_point, patch.get(i));
 382:   }
 383: 
 384:   /**
 385:    * This method parses a string and build a set of sorting instructions. The parsing
 386:    * may only be partial on the case the rules are to be merged sometime later.
 387:    * 
 388:    * @param stop_on_reset If this parameter is true then the parser stops when it
 389:    * encounters a reset instruction. In the other case, it tries to parse the subrules
 390:    * and merged it in the same repository.
 391:    * @param v Output vector for the set of instructions.
 392:    * @param base_offset Offset in the string to begin parsing.
 393:    * @param rules Rules to be parsed.
 394:    * @return -1 if the parser reached the end of the string, an integer representing the
 395:    * offset in the string at which it stopped parsing. 
 396:    * @throws ParseException if something turned wrong during the parsing. To get details
 397:    * decode the message.
 398:    */
 399:   private int subParseString(boolean stop_on_reset, ArrayList v,
 400:                  int base_offset, String rules)
 401:     throws ParseException
 402:   {
 403:     boolean ignoreChars = (base_offset == 0);
 404:     int operator = -1;
 405:     StringBuffer sb = new StringBuffer();
 406:     boolean doubleQuote = false;
 407:     boolean eatingChars = false;
 408:     boolean nextIsModifier = false;
 409:     boolean isModifier = false;
 410:     int i;
 411:     
 412: main_parse_loop:
 413:     for (i = 0; i < rules.length(); i++)
 414:       {
 415:     char c = rules.charAt(i);
 416:     int type = -1;
 417:     
 418:     if (!eatingChars &&
 419:         ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
 420:           continue;
 421: 
 422:     isModifier = nextIsModifier;
 423:     nextIsModifier = false;
 424: 
 425:     if (eatingChars && c != '\'')
 426:       {
 427:         doubleQuote = false;
 428:         sb.append(c);
 429:         continue;
 430:       }
 431:     if (doubleQuote && eatingChars)
 432:       {
 433:         sb.append(c);
 434:         doubleQuote = false;
 435:         continue;
 436:       }
 437: 
 438:     switch (c)
 439:       {
 440:       case '!':
 441:         throw new ParseException
 442:           ("Modifier '!' is not yet supported by Classpath", i + base_offset);
 443:       case '<':
 444:         type = CollationSorter.GREATERP;
 445:         break;
 446:       case ';':
 447:         type = CollationSorter.GREATERS;
 448:         break;
 449:       case ',':
 450:         type = CollationSorter.GREATERT;
 451:         break;
 452:       case '=':
 453:         type = CollationSorter.EQUAL;
 454:         break;
 455:       case '\'':
 456:         eatingChars = !eatingChars;
 457:         doubleQuote = true;
 458:         break;
 459:       case '@':
 460:         if (ignoreChars)
 461:           throw new ParseException
 462:         ("comparison list has not yet been started. You may only use"
 463:          + "(<,;=&)", i + base_offset);
 464:         // Inverse the order of secondaries from now on.
 465:         nextIsModifier = true;
 466:         type = CollationSorter.INVERSE_SECONDARY;
 467:         break;
 468:       case '&':
 469:         type = CollationSorter.RESET;
 470:         if (stop_on_reset)
 471:           break main_parse_loop;
 472:         break;
 473:       default:
 474:         if (operator < 0)
 475:           throw new ParseException
 476:         ("operator missing at " + (i + base_offset), i + base_offset);
 477:         if (! eatingChars
 478:         && ((c >= 0x21 && c <= 0x2F) 
 479:             || (c >= 0x3A && c <= 0x40)
 480:             || (c >= 0x5B && c <= 0x60)
 481:             || (c >= 0x7B && c <= 0x7E)))
 482:           throw new ParseException
 483:         ("unquoted punctuation character '" + c + "'", i + base_offset);
 484: 
 485:         //type = ignoreChars ? CollationSorter.IGNORE : -1;
 486:         sb.append(c);
 487:         break;
 488:       }
 489: 
 490:     if (type  < 0)
 491:       continue;
 492: 
 493:     if (operator < 0)
 494:       {
 495:         operator = type;
 496:         continue;
 497:       }
 498: 
 499:     if (sb.length() == 0 && !isModifier)
 500:       throw new ParseException
 501:         ("text element empty at " + (i+base_offset), i+base_offset);
 502: 
 503:     if (operator == CollationSorter.RESET)
 504:       {
 505:         /* Reposition in the sorting list at the position
 506:          * indicated by the text element.
 507:          */
 508:         String subrules = rules.substring(i);
 509:         ArrayList sorted_rules = new ArrayList();
 510:         int idx;
 511: 
 512:         // Parse the subrules but do not iterate through all
 513:         // sublist. This is the privilege of the first call.
 514:         idx = subParseString(true, sorted_rules, base_offset+i, subrules);
 515:     
 516:         // Merge new parsed rules into the list.
 517:         mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
 518:         sb.setLength(0);
 519:         
 520:         // Reset state to none.
 521:         operator = -1;
 522:         type = -1;
 523:         // We have found a new subrule at 'idx' but it has not been parsed.
 524:         if (idx >= 0)
 525:           {
 526:         i += idx-1;
 527:         continue main_parse_loop;
 528:           }
 529:         else
 530:         // No more rules.
 531:         break main_parse_loop;
 532:       }
 533: 
 534:     CollationSorter sorter = new CollationSorter();
 535:     
 536:     if (operator == CollationSorter.GREATERP)
 537:       ignoreChars = false;
 538: 
 539:     sorter.comparisonType = operator;
 540:     sorter.textElement = sb.toString();
 541:     sorter.hashText = sorter.textElement.hashCode();
 542:     sorter.offset = base_offset+rules.length();
 543:     sorter.ignore = ignoreChars;
 544:     sb.setLength(0);
 545: 
 546:     v.add(sorter);
 547:     operator = type;
 548:       }
 549: 
 550:     if (operator >= 0)
 551:       {
 552:     CollationSorter sorter = new CollationSorter();
 553:     int pos = rules.length() + base_offset;
 554: 
 555:     if ((sb.length() != 0 && nextIsModifier)
 556:         || (sb.length() == 0 && !nextIsModifier && !eatingChars))
 557:       throw new ParseException("text element empty at " + pos, pos);
 558: 
 559:     if (operator == CollationSorter.GREATERP)
 560:       ignoreChars = false;
 561: 
 562:     sorter.comparisonType = operator;
 563:     sorter.textElement = sb.toString();
 564:      sorter.hashText = sorter.textElement.hashCode();
 565:     sorter.offset = base_offset+pos;
 566:     sorter.ignore = ignoreChars;
 567:     v.add(sorter);
 568:       }
 569: 
 570:     if (i == rules.length())
 571:       return -1;
 572:     else
 573:       return i;
 574:   }
 575: 
 576:   /**
 577:    * This method creates a copy of this object.
 578:    *
 579:    * @return A copy of this object.
 580:    */
 581:   public Object clone()
 582:   {
 583:     return super.clone();
 584:   }
 585: 
 586:   /**
 587:    * This method completely parses a string 'rules' containing sorting rules.
 588:    *
 589:    * @param rules String containing the rules to be parsed. 
 590:    * @return A set of sorting instructions stored in a Vector.
 591:    * @throws ParseException if something turned wrong during the parsing. To get details
 592:    * decode the message.
 593:    */
 594:   private ArrayList parseString(String rules) 
 595:     throws ParseException
 596:   {
 597:     ArrayList v = new ArrayList();
 598: 
 599:     // result of the first subParseString is not absolute (may be -1 or a
 600:     // positive integer). But we do not care.
 601:     subParseString(false, v, 0, rules);
 602:     
 603:     return v;
 604:   }
 605: 
 606:   /**
 607:    * This method uses the sorting instructions built by {@link #parseString}
 608:    * to build collation elements which can be directly used to sort strings.
 609:    *
 610:    * @param parsedElements Parsed instructions stored in a ArrayList.
 611:    * @throws ParseException if the order of the instructions are not valid.
 612:    */
 613:   private void buildCollationVector(ArrayList parsedElements)
 614:     throws ParseException
 615:   {
 616:     int primary_seq = 0;
 617:     int last_tertiary_seq = 0;
 618:     short secondary_seq = 0;
 619:     short tertiary_seq = 0;
 620:     short equality_seq = 0;
 621:     boolean inverseComparisons = false;
 622:     final boolean DECREASING = false;
 623:     final boolean INCREASING = true;
 624:     boolean secondaryType = INCREASING;
 625:     ArrayList v = new ArrayList();
 626: 
 627:     // elts is completely sorted.
 628: element_loop:
 629:     for (int i = 0; i < parsedElements.size(); i++)
 630:       {
 631:     CollationSorter elt = (CollationSorter) parsedElements.get(i);
 632:     boolean ignoreChar = false;
 633: 
 634:     switch (elt.comparisonType)
 635:       {
 636:       case CollationSorter.GREATERP:
 637:         primary_seq++;
 638:         if (inverseComparisons)
 639:           {
 640:         secondary_seq = Short.MAX_VALUE;
 641:         secondaryType = DECREASING;
 642:           }
 643:         else
 644:           {
 645:         secondary_seq = 0;
 646:         secondaryType = INCREASING;
 647:           }
 648:         tertiary_seq = 0;
 649:         equality_seq = 0;
 650:         inverseComparisons = false;
 651:         break;
 652:       case CollationSorter.GREATERS:
 653:         if (secondaryType == DECREASING)
 654:           secondary_seq--;
 655:         else
 656:           secondary_seq++;
 657:         tertiary_seq = 0;
 658:         equality_seq = 0;
 659:         break;
 660:       case CollationSorter.INVERSE_SECONDARY:
 661:         inverseComparisons = true;
 662:         continue element_loop;
 663:       case CollationSorter.GREATERT:
 664:         tertiary_seq++;
 665:         if (primary_seq == 0)
 666:           last_tertiary_seq = tertiary_seq;
 667:         equality_seq = 0;
 668:         break;
 669:       case CollationSorter.EQUAL:
 670:         equality_seq++;
 671:         break;
 672:       case CollationSorter.RESET:
 673:         throw new ParseException
 674:           ("Invalid reached state 'RESET'. Internal error", elt.offset);
 675:       default:
 676:         throw new ParseException
 677:           ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
 678:       }
 679: 
 680:     v.add(new CollationElement(elt.textElement, primary_seq,
 681:                    secondary_seq, tertiary_seq,
 682:                    equality_seq, elt.expansionOrdering, elt.ignore));
 683:       }
 684: 
 685:     this.inverseAccentComparison = inverseComparisons; 
 686: 
 687:     ce_table = v.toArray();
 688: 
 689:     last_primary_value = primary_seq+1;
 690:     last_tertiary_value = last_tertiary_seq+1;
 691:   }
 692: 
 693:   /**
 694:    * Build a tree where all keys are the texts of collation elements and data is
 695:    * the collation element itself. The tree is used when extracting all prefix
 696:    * for a given text.
 697:    */
 698:   private void buildPrefixAccess()
 699:   {
 700:     prefix_tree = new HashMap();
 701: 
 702:     for (int i = 0; i < ce_table.length; i++)
 703:       {
 704:     CollationElement e = (CollationElement) ce_table[i];
 705: 
 706:     prefix_tree.put(e.key, e);
 707:       }
 708:   }
 709: 
 710:   /**
 711:    * This method returns an integer which indicates whether the first
 712:    * specified <code>String</code> is less than, greater than, or equal to
 713:    * the second.  The value depends not only on the collation rules in
 714:    * effect, but also the strength and decomposition settings of this object.
 715:    *
 716:    * @param source The first <code>String</code> to compare.
 717:    * @param target A second <code>String</code> to compare to the first.
 718:    *
 719:    * @return A negative integer if source &lt; target, a positive integer
 720:    * if source &gt; target, or 0 if source == target.
 721:    */
 722:   public int compare(String source, String target)
 723:   {
 724:     CollationElementIterator cs, ct;
 725:     CollationElement ord1block = null;
 726:     CollationElement ord2block = null;
 727:     boolean advance_block_1 = true;
 728:     boolean advance_block_2 = true;
 729: 
 730:     cs = getCollationElementIterator(source);
 731:     ct = getCollationElementIterator(target);
 732: 
 733:     for(;;)
 734:       {
 735:     int ord1;
 736:     int ord2;
 737: 
 738:     /*
 739:      * We have to check whether the characters are ignorable.
 740:      * If it is the case then forget them. 
 741:      */
 742:     if (advance_block_1)
 743:       {
 744:         ord1block = cs.nextBlock();
 745:         if (ord1block != null && ord1block.ignore)
 746:           continue;
 747:       }
 748:     
 749:     if (advance_block_2)
 750:       {
 751:         ord2block = ct.nextBlock();
 752:         if (ord2block != null && ord2block.ignore)
 753:           {
 754:             advance_block_1 = false;
 755:             continue;
 756:           }
 757:      }
 758:     else
 759:       advance_block_2 = true;
 760: 
 761:     if (!advance_block_1)
 762:       advance_block_1 = true;
 763: 
 764:     if (ord1block != null)
 765:       ord1 = ord1block.getValue();
 766:     else
 767:       {
 768:         if (ord2block == null)
 769:           return 0;
 770:         return -1;
 771:       }
 772: 
 773:     if (ord2block == null)
 774:       return 1;
 775:     
 776:     ord2 = ord2block.getValue();
 777:     
 778:     // We know chars are totally equal, so skip
 779:         if (ord1 == ord2)
 780:       {
 781:         if (getStrength() == IDENTICAL)
 782:           if (!ord1block.key.equals(ord2block.key))
 783:         return ord1block.key.compareTo(ord2block.key);
 784:         continue;
 785:       }
 786: 
 787:         // Check for primary strength differences
 788:         int prim1 = CollationElementIterator.primaryOrder(ord1); 
 789:         int prim2 = CollationElementIterator.primaryOrder(ord2); 
 790:     
 791:     if (prim1 == 0 && getStrength() < TERTIARY)
 792:       {
 793:             advance_block_2 = false;
 794:         continue;
 795:       }
 796:     else if (prim2 == 0 && getStrength() < TERTIARY)
 797:       {
 798:         advance_block_1 = false;
 799:         continue;
 800:       }
 801: 
 802:         if (prim1 < prim2)
 803:           return -1;
 804:         else if (prim1 > prim2)
 805:           return 1;
 806:         else if (getStrength() == PRIMARY)
 807:           continue;
 808: 
 809:         // Check for secondary strength differences
 810:         int sec1 = CollationElementIterator.secondaryOrder(ord1);
 811:         int sec2 = CollationElementIterator.secondaryOrder(ord2);
 812: 
 813:     if (sec1 < sec2)
 814:           return -1;
 815:         else if (sec1 > sec2)
 816:           return 1;
 817:         else if (getStrength() == SECONDARY)
 818:           continue;
 819: 
 820:         // Check for tertiary differences
 821:         int tert1 = CollationElementIterator.tertiaryOrder(ord1);
 822:         int tert2 = CollationElementIterator.tertiaryOrder(ord2);
 823: 
 824:         if (tert1 < tert2)
 825:           return -1;
 826:         else if (tert1 > tert2)
 827:           return 1;
 828:     else if (getStrength() == TERTIARY)
 829:       continue;
 830: 
 831:     // Apparently JDK does this (at least for my test case).
 832:     return ord1block.key.compareTo(ord2block.key);    
 833:       }
 834:   }
 835: 
 836:   /**
 837:    * This method tests this object for equality against the specified 
 838:    * object.  This will be true if and only if the specified object is
 839:    * another reference to this object.
 840:    *
 841:    * @param obj The <code>Object</code> to compare against this object.
 842:    *
 843:    * @return <code>true</code> if the specified object is equal to this object,
 844:    * <code>false</code> otherwise.
 845:    */
 846:   public boolean equals(Object obj)
 847:   {
 848:     if (obj == this)
 849:       return true;
 850:     else
 851:       return false;
 852:   }
 853: 
 854:   /**
 855:    * This method builds a default collation element without invoking
 856:    * the database created from the rules passed to the constructor.
 857:    *
 858:    * @param c Character which needs a collation element.
 859:    * @return A valid brand new CollationElement instance.
 860:    */
 861:   CollationElement getDefaultElement(char c)
 862:   {
 863:     int v;
 864: 
 865:     // Preliminary support for generic accent sorting inversion (I don't know if all
 866:     // characters in the range should be sorted backward). This is the place
 867:     // to fix this if needed.
 868:     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 869:       v = 0x0361 - ((int) c - 0x02B9);
 870:     else
 871:       v = (short) c;
 872:     return new CollationElement("" + c, last_primary_value + v,
 873:                 (short) 0, (short) 0, (short) 0, null, false);
 874:   }
 875: 
 876:   /**
 877:    * This method builds a default collation element for an accented character
 878:    * without invoking the database created from the rules passed to the constructor.
 879:    *
 880:    * @param c Character which needs a collation element.
 881:    * @return A valid brand new CollationElement instance.
 882:    */
 883:   CollationElement getDefaultAccentedElement(char c)
 884:   {
 885:     int v;
 886: 
 887:     // Preliminary support for generic accent sorting inversion (I don't know if all
 888:     // characters in the range should be sorted backward). This is the place
 889:     // to fix this if needed.
 890:     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 891:       v = 0x0361 - ((int) c - 0x02B9);
 892:     else
 893:       v = (short) c;
 894:     return new CollationElement("" + c, (short) 0,
 895:                 (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
 896:   }
 897: 
 898:   /**
 899:    * This method returns an instance for <code>CollationElementIterator</code>
 900:    * for the specified <code>String</code> under the collation rules for this
 901:    * object.
 902:    *
 903:    * @param source The <code>String</code> to return the
 904:    * <code>CollationElementIterator</code> instance for.
 905:    *
 906:    * @return A <code>CollationElementIterator</code> for the specified
 907:    * <code>String</code>.
 908:    */
 909:   public CollationElementIterator getCollationElementIterator(String source)
 910:   {
 911:     return new CollationElementIterator(this, source);
 912:   }
 913: 
 914:   /**
 915:    * This method returns an instance of <code>CollationElementIterator</code>
 916:    * for the <code>String</code> represented by the specified
 917:    * <code>CharacterIterator</code>.
 918:    *
 919:    * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
 920:    *
 921:    * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
 922:    */
 923:   public CollationElementIterator getCollationElementIterator(CharacterIterator source)
 924:   {
 925:     StringBuffer expand = new StringBuffer("");
 926:     
 927:     // Right now we assume that we will read from the beginning of the string.
 928:     for (char c = source.first();
 929:      c != CharacterIterator.DONE;
 930:      c = source.next())
 931:       decomposeCharacter(c, expand);
 932: 
 933:     return getCollationElementIterator(expand.toString());
 934:   }
 935: 
 936:   /**
 937:    * This method returns an instance of <code>CollationKey</code> for the
 938:    * specified <code>String</code>.  The object returned will have a
 939:    * more efficient mechanism for its comparison function that could
 940:    * provide speed benefits if multiple comparisons are performed, such
 941:    * as during a sort.
 942:    *
 943:    * @param source The <code>String</code> to create a <code>CollationKey</code> for.
 944:    *
 945:    * @return A <code>CollationKey</code> for the specified <code>String</code>.
 946:    */
 947:   public CollationKey getCollationKey(String source)
 948:   {
 949:     CollationElementIterator cei = getCollationElementIterator(source);
 950:     ArrayList vect = new ArrayList();
 951: 
 952:     int ord = cei.next();
 953:     cei.reset(); //set to start of string
 954: 
 955:     while (ord != CollationElementIterator.NULLORDER)
 956:       {
 957:     // If the primary order is null, it means this is an ignorable
 958:     // character.
 959:     if (CollationElementIterator.primaryOrder(ord) == 0)
 960:       {
 961:             ord = cei.next();
 962:         continue;
 963:       }
 964:         switch (getStrength())
 965:           {
 966:             case PRIMARY:
 967:           ord = CollationElementIterator.primaryOrder(ord);
 968:           break;
 969:           
 970:             case SECONDARY:
 971:           ord = CollationElementIterator.primaryOrder(ord) << 8;
 972:           ord |= CollationElementIterator.secondaryOrder(ord);
 973: 
 974:             default:
 975:                break;
 976:           }
 977: 
 978:         vect.add(new Integer(ord)); 
 979:     ord = cei.next(); //increment to next key
 980:       }
 981: 
 982:     Object[] objarr = vect.toArray();
 983:     byte[] key = new byte[objarr.length * 4];
 984: 
 985:     for (int i = 0; i < objarr.length; i++)
 986:       {
 987:         int j = ((Integer) objarr[i]).intValue();
 988:         key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
 989:         key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
 990:         key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
 991:         key [i * 4 + 3] = (byte) (j & 0x000000FF);
 992:       }
 993: 
 994:     return new CollationKey(this, source, key);
 995:   }
 996: 
 997:   /**
 998:    * This method returns a <code>String</code> containing the collation rules
 999:    * for this object.
1000:    *
1001:    * @return The collation rules for this object.
1002:    */
1003:   public String getRules()
1004:   {
1005:     return rules;
1006:   }
1007: 
1008:   /**
1009:    * This method returns a hash value for this object.
1010:    *
1011:    * @return A hash value for this object.
1012:    */
1013:   public int hashCode()
1014:   {
1015:     return System.identityHashCode(this);
1016:   }
1017: }