GNU Classpath (0.20) | |
Frames | No Frames |
1: /* RuleBasedCollator.java -- Concrete Collator Class 2: Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.text; 40: 41: import java.util.ArrayList; 42: import java.util.HashMap; 43: 44: /* Written using "Java Class Libraries", 2nd edition, plus online 45: * API docs for JDK 1.2 from http://www.javasoft.com. 46: * Status: Believed complete and correct 47: */ 48: 49: /** 50: * This class is a concrete subclass of <code>Collator</code> suitable 51: * for string collation in a wide variety of languages. An instance of 52: * this class is normally returned by the <code>getInstance</code> method 53: * of <code>Collator</code> with rules predefined for the requested 54: * locale. However, an instance of this class can be created manually 55: * with any desired rules. 56: * <p> 57: * Rules take the form of a <code>String</code> with the following syntax 58: * <ul> 59: * <li> Modifier: '@'</li> 60: * <li> Relation: '<' | ';' | ',' | '=' : <text></li> 61: * <li> Reset: '&' : <text></li> 62: * </ul> 63: * The modifier character indicates that accents sort backward as is the 64: * case with French. The modifier applies to all rules <b>after</b> 65: * the modifier but before the next primary sequence. If placed at the end 66: * of the sequence if applies to all unknown accented character. 67: * The relational operators specify how the text 68: * argument relates to the previous term. The relation characters have 69: * the following meanings: 70: * <ul> 71: * <li>'<' - The text argument is greater than the prior term at the primary 72: * difference level.</li> 73: * <li>';' - The text argument is greater than the prior term at the secondary 74: * difference level.</li> 75: * <li>',' - The text argument is greater than the prior term at the tertiary 76: * difference level.</li> 77: * <li>'=' - The text argument is equal to the prior term</li> 78: * </ul> 79: * <p> 80: * As for the text argument itself, this is any sequence of Unicode 81: * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F, 82: * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 83: * desired, they must be enclosed in single quotes. If any whitespace is 84: * encountered, it is ignored. (For example, "a b" is equal to "ab"). 85: * <p> 86: * The reset operation inserts the following rule at the point where the 87: * text argument to it exists in the previously declared rule string. This 88: * makes it easy to add new rules to an existing string by simply including 89: * them in a reset sequence at the end. Note that the text argument, or 90: * at least the first character of it, must be present somewhere in the 91: * previously declared rules in order to be inserted properly. If this 92: * is not satisfied, a <code>ParseException</code> will be thrown. 93: * <p> 94: * This system of configuring <code>RuleBasedCollator</code> is needlessly 95: * complex and the people at Taligent who developed it (along with the folks 96: * at Sun who accepted it into the Java standard library) deserve a slow 97: * and agonizing death. 98: * <p> 99: * Here are a couple of example of rule strings: 100: * <p> 101: * "< a < b < c" - This string says that a is greater than b which is 102: * greater than c, with all differences being primary differences. 103: * <p> 104: * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with 105: * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and 106: * 'A' during a primary strength comparison. But 'B' is greater than 'b' 107: * under a tertiary strength comparison. 108: * <p> 109: * "< a < c & a < b " - This sequence is identical in function to the 110: * "< a < b < c" rule string above. The '&' reset symbol indicates that 111: * the rule "< b" is to be inserted after the text argument "a" in the 112: * previous rule string segment. 113: * <p> 114: * "< a < b & y < z" - This is an error. The character 'y' does not appear 115: * anywhere in the previous rule string segment so the rule following the 116: * reset rule cannot be inserted. 117: * <p> 118: * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following 119: * "< a & A < E & e < f & F". 120: * <p> 121: * For a description of the various comparison strength types, see the 122: * documentation for the <code>Collator</code> class. 123: * <p> 124: * As an additional complication to this already overly complex rule scheme, 125: * if any characters precede the first rule, these characters are considered 126: * ignorable. They will be treated as if they did not exist during 127: * comparisons. For example, "- < a < b ..." would make '-' an ignorable 128: * character such that the strings "high-tech" and "hightech" would 129: * be considered identical. 130: * <p> 131: * A <code>ParseException</code> will be thrown for any of the following 132: * conditions: 133: * <ul> 134: * <li>Unquoted punctuation characters in a text argument.</li> 135: * <li>A relational or reset operator not followed by a text argument</li> 136: * <li>A reset operator where the text argument is not present in 137: * the previous rule string section.</li> 138: * </ul> 139: * 140: * @author Aaron M. Renn (arenn@urbanophile.com) 141: * @author Tom Tromey (tromey@cygnus.com) 142: * @author Guilhem Lavaux (guilhem@kaffe.org) 143: */ 144: public class RuleBasedCollator extends Collator 145: { 146: /** 147: * This class describes what rank has a character (or a sequence of characters) 148: * in the lexicographic order. Each element in a rule has a collation element. 149: */ 150: static final class CollationElement 151: { 152: String key; 153: int primary; 154: short secondary; 155: short tertiary; 156: short equality; 157: boolean ignore; 158: String expansion; 159: 160: CollationElement(String key, int primary, short secondary, short tertiary, 161: short equality, String expansion, boolean ignore) 162: { 163: this.key = key; 164: this.primary = primary; 165: this.secondary = secondary; 166: this.tertiary = tertiary; 167: this.equality = equality; 168: this.ignore = ignore; 169: this.expansion = expansion; 170: } 171: 172: int getValue() 173: { 174: return (primary << 16) + (secondary << 8) + tertiary; 175: } 176: } 177: 178: /** 179: * Basic collation instruction (internal format) to build the series of 180: * collation elements. It contains an instruction which specifies the new 181: * state of the generator. The sequence of instruction should not contain 182: * RESET (it is used by 183: * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)}) 184: * as a temporary state while merging two sets of instructions. 185: */ 186: static final class CollationSorter 187: { 188: static final int GREATERP = 0; 189: static final int GREATERS = 1; 190: static final int GREATERT = 2; 191: static final int EQUAL = 3; 192: static final int RESET = 4; 193: static final int INVERSE_SECONDARY = 5; 194: 195: int comparisonType; 196: String textElement; 197: int hashText; 198: int offset; 199: boolean ignore; 200: 201: String expansionOrdering; 202: } 203: 204: /** 205: * This the the original rule string. 206: */ 207: private String rules; 208: 209: /** 210: * This is the table of collation element values 211: */ 212: private Object[] ce_table; 213: 214: /** 215: * Quick-prefix finder. 216: */ 217: HashMap prefix_tree; 218: 219: /** 220: * This is the value of the last sequence entered into 221: * <code>ce_table</code>. It is used to compute the 222: * ordering value of unspecified character. 223: */ 224: private int last_primary_value; 225: 226: /** 227: * This is the value of the last secondary sequence of the 228: * primary 0, entered into 229: * <code>ce_table</code>. It is used to compute the 230: * ordering value of an unspecified accented character. 231: */ 232: private int last_tertiary_value; 233: 234: /** 235: * This variable is true if accents need to be sorted 236: * in the other direction. 237: */ 238: private boolean inverseAccentComparison; 239: 240: /** 241: * This collation element is special to unknown sequence. 242: * The JDK uses it to mark and sort the characters which has 243: * no collation rules. 244: */ 245: static final CollationElement SPECIAL_UNKNOWN_SEQ = 246: new CollationElement("", (short) 32767, (short) 0, (short) 0, 247: (short) 0, null, false); 248: 249: /** 250: * This method initializes a new instance of <code>RuleBasedCollator</code> 251: * with the specified collation rules. Note that an application normally 252: * obtains an instance of <code>RuleBasedCollator</code> by calling the 253: * <code>getInstance</code> method of <code>Collator</code>. That method 254: * automatically loads the proper set of rules for the desired locale. 255: * 256: * @param rules The collation rule string. 257: * 258: * @exception ParseException If the rule string contains syntax errors. 259: */ 260: public RuleBasedCollator(String rules) throws ParseException 261: { 262: if (rules.equals("")) 263: throw new ParseException("empty rule set", 0); 264: 265: this.rules = rules; 266: 267: buildCollationVector(parseString(rules)); 268: buildPrefixAccess(); 269: } 270: 271: /** 272: * This method returns the number of common characters at the beginning 273: * of the string of the two parameters. 274: * 275: * @param prefix A string considered as a prefix to test against 276: * the other string. 277: * @param s A string to test the prefix against. 278: * @return The number of common characters. 279: */ 280: static int findPrefixLength(String prefix, String s) 281: { 282: int index; 283: int len = prefix.length(); 284: 285: for (index = 0; index < len && index < s.length(); ++index) 286: { 287: if (prefix.charAt(index) != s.charAt(index)) 288: return index; 289: } 290: 291: 292: return index; 293: } 294: 295: /** 296: * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods 297: * checks whether it is possible to find an anchor point for the rules to be merged and 298: * then insert them at that precise point. 299: * 300: * @param offset Offset in the string containing rules of the beginning of the rules 301: * being merged in. 302: * @param starter Text of the rules being merged. 303: * @param main Repository of all already parsed rules. 304: * @param patch Rules to be merged into the repository. 305: * @throws ParseException if it is impossible to find an anchor point for the new rules. 306: */ 307: private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch) 308: throws ParseException 309: { 310: int insertion_point = -1; 311: int max_length = 0; 312: 313: /* We must check that no rules conflict with another already present. If it 314: * is the case delete the old rule. 315: */ 316: 317: /* For the moment good old O(N^2) algorithm. 318: */ 319: for (int i = 0; i < patch.size(); i++) 320: { 321: int j = 0; 322: 323: while (j < main.size()) 324: { 325: CollationSorter rule1 = (CollationSorter) patch.get(i); 326: CollationSorter rule2 = (CollationSorter) main.get(j); 327: 328: if (rule1.textElement.equals(rule2.textElement)) 329: main.remove(j); 330: else 331: j++; 332: } 333: } 334: 335: // Find the insertion point... O(N) 336: for (int i = 0; i < main.size(); i++) 337: { 338: CollationSorter sorter = (CollationSorter) main.get(i); 339: int length = findPrefixLength(starter, sorter.textElement); 340: 341: if (length > max_length) 342: { 343: max_length = length; 344: insertion_point = i+1; 345: } 346: } 347: 348: if (insertion_point < 0) 349: throw new ParseException("no insertion point found for " + starter, offset); 350: 351: if (max_length < starter.length()) 352: { 353: /* 354: * We need to expand the first entry. It must be sorted 355: * like if it was the reference key itself (like the spec 356: * said. So the first entry is special: the element is 357: * replaced by the specified text element for the sorting. 358: * This text replace the old one for comparisons. However 359: * to preserve the behaviour we replace the first key (corresponding 360: * to the found prefix) by a new code rightly ordered in the 361: * sequence. The rest of the subsequence must be appended 362: * to the end of the sequence. 363: */ 364: CollationSorter sorter = (CollationSorter) patch.get(0); 365: CollationSorter expansionPrefix = 366: (CollationSorter) main.get(insertion_point-1); 367: 368: sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element 369: 370: main.add(insertion_point, sorter); 371: 372: /* 373: * This is a new set of rules. Append to the list. 374: */ 375: patch.remove(0); 376: insertion_point++; 377: } 378: 379: // Now insert all elements of patch at the insertion point. 380: for (int i = 0; i < patch.size(); i++) 381: main.add(i+insertion_point, patch.get(i)); 382: } 383: 384: /** 385: * This method parses a string and build a set of sorting instructions. The parsing 386: * may only be partial on the case the rules are to be merged sometime later. 387: * 388: * @param stop_on_reset If this parameter is true then the parser stops when it 389: * encounters a reset instruction. In the other case, it tries to parse the subrules 390: * and merged it in the same repository. 391: * @param v Output vector for the set of instructions. 392: * @param base_offset Offset in the string to begin parsing. 393: * @param rules Rules to be parsed. 394: * @return -1 if the parser reached the end of the string, an integer representing the 395: * offset in the string at which it stopped parsing. 396: * @throws ParseException if something turned wrong during the parsing. To get details 397: * decode the message. 398: */ 399: private int subParseString(boolean stop_on_reset, ArrayList v, 400: int base_offset, String rules) 401: throws ParseException 402: { 403: boolean ignoreChars = (base_offset == 0); 404: int operator = -1; 405: StringBuffer sb = new StringBuffer(); 406: boolean doubleQuote = false; 407: boolean eatingChars = false; 408: boolean nextIsModifier = false; 409: boolean isModifier = false; 410: int i; 411: 412: main_parse_loop: 413: for (i = 0; i < rules.length(); i++) 414: { 415: char c = rules.charAt(i); 416: int type = -1; 417: 418: if (!eatingChars && 419: ((c >= 0x09 && c <= 0x0D) || (c == 0x20))) 420: continue; 421: 422: isModifier = nextIsModifier; 423: nextIsModifier = false; 424: 425: if (eatingChars && c != '\'') 426: { 427: doubleQuote = false; 428: sb.append(c); 429: continue; 430: } 431: if (doubleQuote && eatingChars) 432: { 433: sb.append(c); 434: doubleQuote = false; 435: continue; 436: } 437: 438: switch (c) 439: { 440: case '!': 441: throw new ParseException 442: ("Modifier '!' is not yet supported by Classpath", i + base_offset); 443: case '<': 444: type = CollationSorter.GREATERP; 445: break; 446: case ';': 447: type = CollationSorter.GREATERS; 448: break; 449: case ',': 450: type = CollationSorter.GREATERT; 451: break; 452: case '=': 453: type = CollationSorter.EQUAL; 454: break; 455: case '\'': 456: eatingChars = !eatingChars; 457: doubleQuote = true; 458: break; 459: case '@': 460: if (ignoreChars) 461: throw new ParseException 462: ("comparison list has not yet been started. You may only use" 463: + "(<,;=&)", i + base_offset); 464: // Inverse the order of secondaries from now on. 465: nextIsModifier = true; 466: type = CollationSorter.INVERSE_SECONDARY; 467: break; 468: case '&': 469: type = CollationSorter.RESET; 470: if (stop_on_reset) 471: break main_parse_loop; 472: break; 473: default: 474: if (operator < 0) 475: throw new ParseException 476: ("operator missing at " + (i + base_offset), i + base_offset); 477: if (! eatingChars 478: && ((c >= 0x21 && c <= 0x2F) 479: || (c >= 0x3A && c <= 0x40) 480: || (c >= 0x5B && c <= 0x60) 481: || (c >= 0x7B && c <= 0x7E))) 482: throw new ParseException 483: ("unquoted punctuation character '" + c + "'", i + base_offset); 484: 485: //type = ignoreChars ? CollationSorter.IGNORE : -1; 486: sb.append(c); 487: break; 488: } 489: 490: if (type < 0) 491: continue; 492: 493: if (operator < 0) 494: { 495: operator = type; 496: continue; 497: } 498: 499: if (sb.length() == 0 && !isModifier) 500: throw new ParseException 501: ("text element empty at " + (i+base_offset), i+base_offset); 502: 503: if (operator == CollationSorter.RESET) 504: { 505: /* Reposition in the sorting list at the position 506: * indicated by the text element. 507: */ 508: String subrules = rules.substring(i); 509: ArrayList sorted_rules = new ArrayList(); 510: int idx; 511: 512: // Parse the subrules but do not iterate through all 513: // sublist. This is the privilege of the first call. 514: idx = subParseString(true, sorted_rules, base_offset+i, subrules); 515: 516: // Merge new parsed rules into the list. 517: mergeRules(base_offset+i, sb.toString(), v, sorted_rules); 518: sb.setLength(0); 519: 520: // Reset state to none. 521: operator = -1; 522: type = -1; 523: // We have found a new subrule at 'idx' but it has not been parsed. 524: if (idx >= 0) 525: { 526: i += idx-1; 527: continue main_parse_loop; 528: } 529: else 530: // No more rules. 531: break main_parse_loop; 532: } 533: 534: CollationSorter sorter = new CollationSorter(); 535: 536: if (operator == CollationSorter.GREATERP) 537: ignoreChars = false; 538: 539: sorter.comparisonType = operator; 540: sorter.textElement = sb.toString(); 541: sorter.hashText = sorter.textElement.hashCode(); 542: sorter.offset = base_offset+rules.length(); 543: sorter.ignore = ignoreChars; 544: sb.setLength(0); 545: 546: v.add(sorter); 547: operator = type; 548: } 549: 550: if (operator >= 0) 551: { 552: CollationSorter sorter = new CollationSorter(); 553: int pos = rules.length() + base_offset; 554: 555: if ((sb.length() != 0 && nextIsModifier) 556: || (sb.length() == 0 && !nextIsModifier && !eatingChars)) 557: throw new ParseException("text element empty at " + pos, pos); 558: 559: if (operator == CollationSorter.GREATERP) 560: ignoreChars = false; 561: 562: sorter.comparisonType = operator; 563: sorter.textElement = sb.toString(); 564: sorter.hashText = sorter.textElement.hashCode(); 565: sorter.offset = base_offset+pos; 566: sorter.ignore = ignoreChars; 567: v.add(sorter); 568: } 569: 570: if (i == rules.length()) 571: return -1; 572: else 573: return i; 574: } 575: 576: /** 577: * This method creates a copy of this object. 578: * 579: * @return A copy of this object. 580: */ 581: public Object clone() 582: { 583: return super.clone(); 584: } 585: 586: /** 587: * This method completely parses a string 'rules' containing sorting rules. 588: * 589: * @param rules String containing the rules to be parsed. 590: * @return A set of sorting instructions stored in a Vector. 591: * @throws ParseException if something turned wrong during the parsing. To get details 592: * decode the message. 593: */ 594: private ArrayList parseString(String rules) 595: throws ParseException 596: { 597: ArrayList v = new ArrayList(); 598: 599: // result of the first subParseString is not absolute (may be -1 or a 600: // positive integer). But we do not care. 601: subParseString(false, v, 0, rules); 602: 603: return v; 604: } 605: 606: /** 607: * This method uses the sorting instructions built by {@link #parseString} 608: * to build collation elements which can be directly used to sort strings. 609: * 610: * @param parsedElements Parsed instructions stored in a ArrayList. 611: * @throws ParseException if the order of the instructions are not valid. 612: */ 613: private void buildCollationVector(ArrayList parsedElements) 614: throws ParseException 615: { 616: int primary_seq = 0; 617: int last_tertiary_seq = 0; 618: short secondary_seq = 0; 619: short tertiary_seq = 0; 620: short equality_seq = 0; 621: boolean inverseComparisons = false; 622: final boolean DECREASING = false; 623: final boolean INCREASING = true; 624: boolean secondaryType = INCREASING; 625: ArrayList v = new ArrayList(); 626: 627: // elts is completely sorted. 628: element_loop: 629: for (int i = 0; i < parsedElements.size(); i++) 630: { 631: CollationSorter elt = (CollationSorter) parsedElements.get(i); 632: boolean ignoreChar = false; 633: 634: switch (elt.comparisonType) 635: { 636: case CollationSorter.GREATERP: 637: primary_seq++; 638: if (inverseComparisons) 639: { 640: secondary_seq = Short.MAX_VALUE; 641: secondaryType = DECREASING; 642: } 643: else 644: { 645: secondary_seq = 0; 646: secondaryType = INCREASING; 647: } 648: tertiary_seq = 0; 649: equality_seq = 0; 650: inverseComparisons = false; 651: break; 652: case CollationSorter.GREATERS: 653: if (secondaryType == DECREASING) 654: secondary_seq--; 655: else 656: secondary_seq++; 657: tertiary_seq = 0; 658: equality_seq = 0; 659: break; 660: case CollationSorter.INVERSE_SECONDARY: 661: inverseComparisons = true; 662: continue element_loop; 663: case CollationSorter.GREATERT: 664: tertiary_seq++; 665: if (primary_seq == 0) 666: last_tertiary_seq = tertiary_seq; 667: equality_seq = 0; 668: break; 669: case CollationSorter.EQUAL: 670: equality_seq++; 671: break; 672: case CollationSorter.RESET: 673: throw new ParseException 674: ("Invalid reached state 'RESET'. Internal error", elt.offset); 675: default: 676: throw new ParseException 677: ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); 678: } 679: 680: v.add(new CollationElement(elt.textElement, primary_seq, 681: secondary_seq, tertiary_seq, 682: equality_seq, elt.expansionOrdering, elt.ignore)); 683: } 684: 685: this.inverseAccentComparison = inverseComparisons; 686: 687: ce_table = v.toArray(); 688: 689: last_primary_value = primary_seq+1; 690: last_tertiary_value = last_tertiary_seq+1; 691: } 692: 693: /** 694: * Build a tree where all keys are the texts of collation elements and data is 695: * the collation element itself. The tree is used when extracting all prefix 696: * for a given text. 697: */ 698: private void buildPrefixAccess() 699: { 700: prefix_tree = new HashMap(); 701: 702: for (int i = 0; i < ce_table.length; i++) 703: { 704: CollationElement e = (CollationElement) ce_table[i]; 705: 706: prefix_tree.put(e.key, e); 707: } 708: } 709: 710: /** 711: * This method returns an integer which indicates whether the first 712: * specified <code>String</code> is less than, greater than, or equal to 713: * the second. The value depends not only on the collation rules in 714: * effect, but also the strength and decomposition settings of this object. 715: * 716: * @param source The first <code>String</code> to compare. 717: * @param target A second <code>String</code> to compare to the first. 718: * 719: * @return A negative integer if source < target, a positive integer 720: * if source > target, or 0 if source == target. 721: */ 722: public int compare(String source, String target) 723: { 724: CollationElementIterator cs, ct; 725: CollationElement ord1block = null; 726: CollationElement ord2block = null; 727: boolean advance_block_1 = true; 728: boolean advance_block_2 = true; 729: 730: cs = getCollationElementIterator(source); 731: ct = getCollationElementIterator(target); 732: 733: for(;;) 734: { 735: int ord1; 736: int ord2; 737: 738: /* 739: * We have to check whether the characters are ignorable. 740: * If it is the case then forget them. 741: */ 742: if (advance_block_1) 743: { 744: ord1block = cs.nextBlock(); 745: if (ord1block != null && ord1block.ignore) 746: continue; 747: } 748: 749: if (advance_block_2) 750: { 751: ord2block = ct.nextBlock(); 752: if (ord2block != null && ord2block.ignore) 753: { 754: advance_block_1 = false; 755: continue; 756: } 757: } 758: else 759: advance_block_2 = true; 760: 761: if (!advance_block_1) 762: advance_block_1 = true; 763: 764: if (ord1block != null) 765: ord1 = ord1block.getValue(); 766: else 767: { 768: if (ord2block == null) 769: return 0; 770: return -1; 771: } 772: 773: if (ord2block == null) 774: return 1; 775: 776: ord2 = ord2block.getValue(); 777: 778: // We know chars are totally equal, so skip 779: if (ord1 == ord2) 780: { 781: if (getStrength() == IDENTICAL) 782: if (!ord1block.key.equals(ord2block.key)) 783: return ord1block.key.compareTo(ord2block.key); 784: continue; 785: } 786: 787: // Check for primary strength differences 788: int prim1 = CollationElementIterator.primaryOrder(ord1); 789: int prim2 = CollationElementIterator.primaryOrder(ord2); 790: 791: if (prim1 == 0 && getStrength() < TERTIARY) 792: { 793: advance_block_2 = false; 794: continue; 795: } 796: else if (prim2 == 0 && getStrength() < TERTIARY) 797: { 798: advance_block_1 = false; 799: continue; 800: } 801: 802: if (prim1 < prim2) 803: return -1; 804: else if (prim1 > prim2) 805: return 1; 806: else if (getStrength() == PRIMARY) 807: continue; 808: 809: // Check for secondary strength differences 810: int sec1 = CollationElementIterator.secondaryOrder(ord1); 811: int sec2 = CollationElementIterator.secondaryOrder(ord2); 812: 813: if (sec1 < sec2) 814: return -1; 815: else if (sec1 > sec2) 816: return 1; 817: else if (getStrength() == SECONDARY) 818: continue; 819: 820: // Check for tertiary differences 821: int tert1 = CollationElementIterator.tertiaryOrder(ord1); 822: int tert2 = CollationElementIterator.tertiaryOrder(ord2); 823: 824: if (tert1 < tert2) 825: return -1; 826: else if (tert1 > tert2) 827: return 1; 828: else if (getStrength() == TERTIARY) 829: continue; 830: 831: // Apparently JDK does this (at least for my test case). 832: return ord1block.key.compareTo(ord2block.key); 833: } 834: } 835: 836: /** 837: * This method tests this object for equality against the specified 838: * object. This will be true if and only if the specified object is 839: * another reference to this object. 840: * 841: * @param obj The <code>Object</code> to compare against this object. 842: * 843: * @return <code>true</code> if the specified object is equal to this object, 844: * <code>false</code> otherwise. 845: */ 846: public boolean equals(Object obj) 847: { 848: if (obj == this) 849: return true; 850: else 851: return false; 852: } 853: 854: /** 855: * This method builds a default collation element without invoking 856: * the database created from the rules passed to the constructor. 857: * 858: * @param c Character which needs a collation element. 859: * @return A valid brand new CollationElement instance. 860: */ 861: CollationElement getDefaultElement(char c) 862: { 863: int v; 864: 865: // Preliminary support for generic accent sorting inversion (I don't know if all 866: // characters in the range should be sorted backward). This is the place 867: // to fix this if needed. 868: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 869: v = 0x0361 - ((int) c - 0x02B9); 870: else 871: v = (short) c; 872: return new CollationElement("" + c, last_primary_value + v, 873: (short) 0, (short) 0, (short) 0, null, false); 874: } 875: 876: /** 877: * This method builds a default collation element for an accented character 878: * without invoking the database created from the rules passed to the constructor. 879: * 880: * @param c Character which needs a collation element. 881: * @return A valid brand new CollationElement instance. 882: */ 883: CollationElement getDefaultAccentedElement(char c) 884: { 885: int v; 886: 887: // Preliminary support for generic accent sorting inversion (I don't know if all 888: // characters in the range should be sorted backward). This is the place 889: // to fix this if needed. 890: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 891: v = 0x0361 - ((int) c - 0x02B9); 892: else 893: v = (short) c; 894: return new CollationElement("" + c, (short) 0, 895: (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false); 896: } 897: 898: /** 899: * This method returns an instance for <code>CollationElementIterator</code> 900: * for the specified <code>String</code> under the collation rules for this 901: * object. 902: * 903: * @param source The <code>String</code> to return the 904: * <code>CollationElementIterator</code> instance for. 905: * 906: * @return A <code>CollationElementIterator</code> for the specified 907: * <code>String</code>. 908: */ 909: public CollationElementIterator getCollationElementIterator(String source) 910: { 911: return new CollationElementIterator(this, source); 912: } 913: 914: /** 915: * This method returns an instance of <code>CollationElementIterator</code> 916: * for the <code>String</code> represented by the specified 917: * <code>CharacterIterator</code>. 918: * 919: * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. 920: * 921: * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. 922: */ 923: public CollationElementIterator getCollationElementIterator(CharacterIterator source) 924: { 925: StringBuffer expand = new StringBuffer(""); 926: 927: // Right now we assume that we will read from the beginning of the string. 928: for (char c = source.first(); 929: c != CharacterIterator.DONE; 930: c = source.next()) 931: decomposeCharacter(c, expand); 932: 933: return getCollationElementIterator(expand.toString()); 934: } 935: 936: /** 937: * This method returns an instance of <code>CollationKey</code> for the 938: * specified <code>String</code>. The object returned will have a 939: * more efficient mechanism for its comparison function that could 940: * provide speed benefits if multiple comparisons are performed, such 941: * as during a sort. 942: * 943: * @param source The <code>String</code> to create a <code>CollationKey</code> for. 944: * 945: * @return A <code>CollationKey</code> for the specified <code>String</code>. 946: */ 947: public CollationKey getCollationKey(String source) 948: { 949: CollationElementIterator cei = getCollationElementIterator(source); 950: ArrayList vect = new ArrayList(); 951: 952: int ord = cei.next(); 953: cei.reset(); //set to start of string 954: 955: while (ord != CollationElementIterator.NULLORDER) 956: { 957: // If the primary order is null, it means this is an ignorable 958: // character. 959: if (CollationElementIterator.primaryOrder(ord) == 0) 960: { 961: ord = cei.next(); 962: continue; 963: } 964: switch (getStrength()) 965: { 966: case PRIMARY: 967: ord = CollationElementIterator.primaryOrder(ord); 968: break; 969: 970: case SECONDARY: 971: ord = CollationElementIterator.primaryOrder(ord) << 8; 972: ord |= CollationElementIterator.secondaryOrder(ord); 973: 974: default: 975: break; 976: } 977: 978: vect.add(new Integer(ord)); 979: ord = cei.next(); //increment to next key 980: } 981: 982: Object[] objarr = vect.toArray(); 983: byte[] key = new byte[objarr.length * 4]; 984: 985: for (int i = 0; i < objarr.length; i++) 986: { 987: int j = ((Integer) objarr[i]).intValue(); 988: key [i * 4] = (byte) ((j & 0xFF000000) >> 24); 989: key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); 990: key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); 991: key [i * 4 + 3] = (byte) (j & 0x000000FF); 992: } 993: 994: return new CollationKey(this, source, key); 995: } 996: 997: /** 998: * This method returns a <code>String</code> containing the collation rules 999: * for this object. 1000: * 1001: * @return The collation rules for this object. 1002: */ 1003: public String getRules() 1004: { 1005: return rules; 1006: } 1007: 1008: /** 1009: * This method returns a hash value for this object. 1010: * 1011: * @return A hash value for this object. 1012: */ 1013: public int hashCode() 1014: { 1015: return System.identityHashCode(this); 1016: } 1017: }
GNU Classpath (0.20) |