001 /* Collator.java -- Perform locale dependent String comparisons. 002 Copyright (C) 1998, 1999, 2000, 2001, 2004, 2005, 2007, 003 2008 Free Software Foundation, Inc. 004 005 This file is part of GNU Classpath. 006 007 GNU Classpath is free software; you can redistribute it and/or modify 008 it under the terms of the GNU General Public License as published by 009 the Free Software Foundation; either version 2, or (at your option) 010 any later version. 011 012 GNU Classpath is distributed in the hope that it will be useful, but 013 WITHOUT ANY WARRANTY; without even the implied warranty of 014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 General Public License for more details. 016 017 You should have received a copy of the GNU General Public License 018 along with GNU Classpath; see the file COPYING. If not, write to the 019 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 020 02110-1301 USA. 021 022 Linking this library statically or dynamically with other modules is 023 making a combined work based on this library. Thus, the terms and 024 conditions of the GNU General Public License cover the whole 025 combination. 026 027 As a special exception, the copyright holders of this library give you 028 permission to link this library with independent modules to produce an 029 executable, regardless of the license terms of these independent 030 modules, and to copy and distribute the resulting executable under 031 terms of your choice, provided that you also meet, for each linked 032 independent module, the terms and conditions of the license of that 033 module. An independent module is a module which is not derived from 034 or based on this library. If you modify this library, you may extend 035 this exception to your version of the library, but you are not 036 obligated to do so. If you do not wish to do so, delete this 037 exception statement from your version. */ 038 039 040 package java.text; 041 042 import gnu.java.locale.LocaleHelper; 043 044 import java.text.spi.CollatorProvider; 045 046 import java.util.Comparator; 047 import java.util.Locale; 048 import java.util.MissingResourceException; 049 import java.util.ResourceBundle; 050 import java.util.ServiceLoader; 051 052 /** 053 * This class is the abstract superclass of classes which perform 054 * locale dependent <code>String</code> comparisons. A caller requests 055 * an instance of <code>Collator</code> for a particular locale using 056 * the <code>getInstance()</code> static method in this class. That method 057 * will return a locale specific subclass of <code>Collator</code> which 058 * can be used to perform <code>String</code> comparisons for that locale. 059 * If a subclass of <code>Collator</code> cannot be located for a particular 060 * locale, a default instance for the current locale will be returned. 061 * 062 * In addition to setting the correct locale, there are two additional 063 * settings that can be adjusted to affect <code>String</code> comparisons: 064 * strength and decomposition. The strength value determines the level 065 * of signficance of character differences required for them to sort 066 * differently. (For example, whether or not capital letters are considered 067 * different from lower case letters). The decomposition value affects how 068 * variants of the same character are treated for sorting purposes. (For 069 * example, whether or not an accent is signficant or not). These settings 070 * are described in detail in the documentation for the methods and values 071 * that are related to them. 072 * 073 * @author Tom Tromey (tromey@cygnus.com) 074 * @author Aaron M. Renn (arenn@urbanophile.com) 075 * @date March 18, 1999 076 */ 077 public abstract class Collator implements Comparator<Object>, Cloneable 078 { 079 /** 080 * This constant is a strength value which indicates that only primary 081 * differences between characters will be considered signficant. As an 082 * example, two completely different English letters such as 'a' and 'b' 083 * are considered to have a primary difference. 084 */ 085 public static final int PRIMARY = 0; 086 087 /** 088 * This constant is a strength value which indicates that only secondary 089 * or primary differences between characters will be considered 090 * significant. An example of a secondary difference between characters 091 * are instances of the same letter with different accented forms. 092 */ 093 public static final int SECONDARY = 1; 094 095 /** 096 * This constant is a strength value which indicates that tertiary, 097 * secondary, and primary differences will be considered during sorting. 098 * An example of a tertiary difference is capitalization of a given letter. 099 * This is the default value for the strength setting. 100 */ 101 public static final int TERTIARY = 2; 102 103 /** 104 * This constant is a strength value which indicates that any difference 105 * at all between character values are considered significant. 106 */ 107 public static final int IDENTICAL = 3; 108 109 /** 110 * This constant indicates that accented characters won't be decomposed 111 * when performing comparisons. This will yield the fastest results, but 112 * will only work correctly in call cases for languages which do not 113 * use accents such as English. 114 */ 115 public static final int NO_DECOMPOSITION = 0; 116 117 /** 118 * This constant indicates that only characters which are canonical variants 119 * in Unicode 2.0 will be decomposed prior to performing comparisons. This 120 * will cause accented languages to be sorted correctly. This is the 121 * default decomposition value. 122 */ 123 public static final int CANONICAL_DECOMPOSITION = 1; 124 125 /** 126 * This constant indicates that both canonical variants and compatibility 127 * variants in Unicode 2.0 will be decomposed prior to performing 128 * comparisons. This is the slowest mode, but is required to get the 129 * correct sorting for certain languages with certain special formats. 130 */ 131 public static final int FULL_DECOMPOSITION = 2; 132 133 /** 134 * This method initializes a new instance of <code>Collator</code> to have 135 * the default strength (TERTIARY) and decomposition 136 * (CANONICAL_DECOMPOSITION) settings. This constructor is protected and 137 * is for use by subclasses only. Non-subclass callers should use the 138 * static <code>getInstance()</code> methods of this class to instantiate 139 * <code>Collation</code> objects for the desired locale. 140 */ 141 protected Collator () 142 { 143 strength = TERTIARY; 144 decmp = CANONICAL_DECOMPOSITION; 145 } 146 147 /** 148 * This method compares the two <code>String</code>'s and returns an 149 * integer indicating whether or not the first argument is less than, 150 * equal to, or greater than the second argument. The comparison is 151 * performed according to the rules of the locale for this 152 * <code>Collator</code> and the strength and decomposition rules in 153 * effect. 154 * 155 * @param source The first object to compare 156 * @param target The second object to compare 157 * 158 * @return A negative integer if str1 < str2, 0 if str1 == str2, or 159 * a positive integer if str1 > str2. 160 */ 161 public abstract int compare (String source, String target); 162 163 /** 164 * This method compares the two <code>Object</code>'s and returns an 165 * integer indicating whether or not the first argument is less than, 166 * equal to, or greater than the second argument. These two objects 167 * must be <code>String</code>'s or an exception will be thrown. 168 * 169 * @param o1 The first object to compare 170 * @param o2 The second object to compare 171 * 172 * @return A negative integer if obj1 < obj2, 0 if obj1 == obj2, or 173 * a positive integer if obj1 > obj2. 174 * 175 * @exception ClassCastException If the arguments are not instances 176 * of <code>String</code>. 177 */ 178 public int compare (Object o1, Object o2) 179 { 180 return compare ((String) o1, (String) o2); 181 } 182 183 /** 184 * This method tests the specified object for equality against this 185 * object. This will be true if and only if the following conditions are 186 * met: 187 * <ul> 188 * <li>The specified object is not <code>null</code>.</li> 189 * <li>The specified object is an instance of <code>Collator</code>.</li> 190 * <li>The specified object has the same strength and decomposition 191 * settings as this object.</li> 192 * </ul> 193 * 194 * @param obj The <code>Object</code> to test for equality against 195 * this object. 196 * 197 * @return <code>true</code> if the specified object is equal to 198 * this one, <code>false</code> otherwise. 199 */ 200 public boolean equals (Object obj) 201 { 202 if (! (obj instanceof Collator)) 203 return false; 204 Collator c = (Collator) obj; 205 return decmp == c.decmp && strength == c.strength; 206 } 207 208 /** 209 * This method tests whether the specified <code>String</code>'s are equal 210 * according to the collation rules for the locale of this object and 211 * the current strength and decomposition settings. 212 * 213 * @param source The first <code>String</code> to compare 214 * @param target The second <code>String</code> to compare 215 * 216 * @return <code>true</code> if the two strings are equal, 217 * <code>false</code> otherwise. 218 */ 219 public boolean equals (String source, String target) 220 { 221 return compare (source, target) == 0; 222 } 223 224 /** 225 * This method returns a copy of this <code>Collator</code> object. 226 * 227 * @return A duplicate of this object. 228 */ 229 public Object clone () 230 { 231 try 232 { 233 return super.clone (); 234 } 235 catch (CloneNotSupportedException _) 236 { 237 return null; 238 } 239 } 240 241 /** 242 * This method returns an array of <code>Locale</code> objects which is 243 * the list of locales for which <code>Collator</code> objects exist. 244 * 245 * @return The list of locales for which <code>Collator</code>'s exist. 246 */ 247 public static synchronized Locale[] getAvailableLocales () 248 { 249 return LocaleHelper.getCollatorLocales(); 250 } 251 252 /** 253 * This method transforms the specified <code>String</code> into a 254 * <code>CollationKey</code> for faster comparisons. This is useful when 255 * comparisons against a string might be performed multiple times, such 256 * as during a sort operation. 257 * 258 * @param source The <code>String</code> to convert. 259 * 260 * @return A <code>CollationKey</code> for the specified <code>String</code>. 261 */ 262 public abstract CollationKey getCollationKey (String source); 263 264 /** 265 * This method returns the current decomposition setting for this 266 * object. This * will be one of NO_DECOMPOSITION, 267 * CANONICAL_DECOMPOSITION, or * FULL_DECOMPOSITION. See the 268 * documentation for those constants for an * explanation of this 269 * setting. 270 * 271 * @return The current decomposition setting. 272 */ 273 public synchronized int getDecomposition () 274 { 275 return decmp; 276 } 277 278 /** 279 * This method returns an instance of <code>Collator</code> for the 280 * default locale. 281 * 282 * @return A <code>Collator</code> for the default locale. 283 */ 284 public static Collator getInstance () 285 { 286 return getInstance (Locale.getDefault()); 287 } 288 289 /** 290 * This method returns an instance of <code>Collator</code> for the 291 * specified locale. If no <code>Collator</code> exists for the desired 292 * locale, the fallback procedure described in 293 * {@link java.util.spi.LocaleServiceProvider} is invoked. 294 * 295 * @param loc The desired locale to load a <code>Collator</code> for. 296 * 297 * @return A <code>Collator</code> for the requested locale 298 */ 299 public static Collator getInstance (Locale loc) 300 { 301 String pattern; 302 try 303 { 304 ResourceBundle res = 305 ResourceBundle.getBundle("gnu.java.locale.LocaleInformation", 306 loc, ClassLoader.getSystemClassLoader()); 307 return new RuleBasedCollator(res.getString("collation_rules")); 308 } 309 catch (MissingResourceException x) 310 { 311 /* This means runtime support for the locale 312 * is not available, so we check providers. */ 313 } 314 catch (ParseException x) 315 { 316 throw (InternalError)new InternalError().initCause(x); 317 } 318 for (CollatorProvider p : ServiceLoader.load(CollatorProvider.class)) 319 { 320 for (Locale l : p.getAvailableLocales()) 321 { 322 if (l.equals(loc)) 323 { 324 Collator c = p.getInstance(loc); 325 if (c != null) 326 return c; 327 break; 328 } 329 } 330 } 331 if (loc.equals(Locale.ROOT)) 332 { 333 try 334 { 335 return new RuleBasedCollator("<0<1<2<3<4<5<6<7<8<9<A,a<b,B<c," + 336 "C<d,D<e,E<f,F<g,G<h,H<i,I<j,J<k,K" + 337 "<l,L<m,M<n,N<o,O<p,P<q,Q<r,R<s,S<t,"+ 338 "T<u,U<v,V<w,W<x,X<y,Y<z,Z"); 339 } 340 catch (ParseException x) 341 { 342 throw (InternalError)new InternalError().initCause(x); 343 } 344 } 345 return getInstance(LocaleHelper.getFallbackLocale(loc)); 346 } 347 348 /** 349 * This method returns the current strength setting for this object. This 350 * will be one of PRIMARY, SECONDARY, TERTIARY, or IDENTICAL. See the 351 * documentation for those constants for an explanation of this setting. 352 * 353 * @return The current strength setting. 354 */ 355 public synchronized int getStrength () 356 { 357 return strength; 358 } 359 360 /** 361 * This method returns a hash code value for this object. 362 * 363 * @return A hash value for this object. 364 */ 365 public abstract int hashCode (); 366 367 /** 368 * This method sets the decomposition setting for this object to the 369 * specified value. This must be one of NO_DECOMPOSITION, 370 * CANONICAL_DECOMPOSITION, or FULL_DECOMPOSITION. Otherwise an 371 * exception will be thrown. See the documentation for those 372 * contants for an explanation of this setting. 373 * 374 * @param mode The new decomposition setting. 375 * 376 * @exception IllegalArgumentException If the requested 377 * decomposition setting is not valid. 378 */ 379 public synchronized void setDecomposition (int mode) 380 { 381 if (mode != NO_DECOMPOSITION 382 && mode != CANONICAL_DECOMPOSITION 383 && mode != FULL_DECOMPOSITION) 384 throw new IllegalArgumentException (); 385 decmp = mode; 386 } 387 388 /** 389 * This method sets the strength setting for this object to the specified 390 * value. This must be one of PRIMARY, SECONDARY, TERTIARY, or IDENTICAL. 391 * Otherwise an exception is thrown. See the documentation for these 392 * constants for an explanation of this setting. 393 * 394 * @param strength The new strength setting. 395 * 396 * @exception IllegalArgumentException If the requested strength 397 * setting value is not valid. 398 */ 399 public synchronized void setStrength (int strength) 400 { 401 if (strength != PRIMARY && strength != SECONDARY 402 && strength != TERTIARY && strength != IDENTICAL) 403 throw new IllegalArgumentException (); 404 this.strength = strength; 405 } 406 407 // Decompose a single character and append results to the buffer. 408 native final void decomposeCharacter (char c, StringBuffer buf); 409 410 /** 411 * This is the current collation decomposition setting. 412 */ 413 int decmp; 414 415 /** 416 * This is the current collation strength setting. 417 */ 418 int strength; 419 }