Source for java.net.URI

   1: /* URI.java -- An URI class
   2:    Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.net;
  40: 
  41: import java.io.IOException;
  42: import java.io.ObjectInputStream;
  43: import java.io.ObjectOutputStream;
  44: import java.io.Serializable;
  45: import java.util.regex.Matcher;
  46: import java.util.regex.Pattern;
  47: 
  48: /**
  49:  * <p>
  50:  * A URI instance represents that defined by 
  51:  * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC3986</a>,
  52:  * with some deviations.
  53:  * </p>
  54:  * <p>
  55:  * At its highest level, a URI consists of:
  56:  * </p>
  57:  * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
  58:  * [<strong>#</strong><em>fragment</em>]</code>
  59:  * </p>
  60:  * <p>
  61:  * where <strong>#</strong> and <strong>:</strong> are literal characters,
  62:  * and those parts enclosed in square brackets are optional.
  63:  * </p>
  64:  * <p>
  65:  * There are two main types of URI.  An <em>opaque</em> URI is one
  66:  * which just consists of the above three parts, and is not further
  67:  * defined.  An example of such a URI would be <em>mailto:</em> URI.
  68:  * In contrast, <em>hierarchical</em> URIs give further definition
  69:  * to the scheme-specific part, so as represent some part of a hierarchical
  70:  * structure.
  71:  * </p>
  72:  * <p>
  73:  * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
  74:  * [<strong>?</strong><em>query</em>]</code>
  75:  * </p>
  76:  * <p>
  77:  * with <strong>/</strong> and <strong>?</strong> being literal characters.
  78:  * When server-based, the authority section is further subdivided into:
  79:  * </p>
  80:  * <p>
  81:  * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
  82:  * [<strong>:</strong><em>port</em>]</code>
  83:  * </p>
  84:  * <p>
  85:  * with <strong>@</strong> and <strong>:</strong> as literal characters.
  86:  * Authority sections that are not server-based are said to be registry-based.
  87:  * </p>
  88:  * <p>
  89:  * Hierarchical URIs can be either relative or absolute.  Absolute URIs
  90:  * always start with a `<strong>/</strong>', while relative URIs don't
  91:  * specify a scheme.  Opaque URIs are always absolute.
  92:  * </p>
  93:  * <p>
  94:  * Each part of the URI may have one of three states: undefined, empty
  95:  * or containing some content.  The former two of these are represented
  96:  * by <code>null</code> and the empty string in Java, respectively.
  97:  * The scheme-specific part may never be undefined.  It also follows from
  98:  * this that the path sub-part may also not be undefined, so as to ensure
  99:  * the former.
 100:  * </p>
 101:  * <h2>Character Escaping and Quoting</h2>
 102:  * <p>
 103:  * The characters that can be used within a valid URI are restricted.
 104:  * There are two main classes of characters which can't be used as is
 105:  * within the URI:
 106:  * </p>
 107:  * <ol>
 108:  * <li><strong>Characters outside the US-ASCII character set</strong>.
 109:  * These have to be <strong>escaped</strong> in order to create
 110:  * an RFC-compliant URI; this means replacing the character with the
 111:  * appropriate hexadecimal value, preceded by a `%'.</li>
 112:  * <li><strong>Illegal characters</strong> (e.g. space characters,
 113:  * control characters) are quoted, which results in them being encoded
 114:  * in the same way as non-US-ASCII characters.</li>
 115:  * </ol>
 116:  * <p>
 117:  * The set of valid characters differs depending on the section of the URI:
 118:  * </p>
 119:  * <ul>
 120:  * <li><strong>Scheme</strong>: Must be an alphanumeric, `-', `.' or '+'.</li>
 121:  * <li><strong>Authority</strong>:Composed of the username, host, port, `@'
 122:  * and `:'.</li>
 123:  * <li><strong>Username</strong>: Allows unreserved or percent-encoded
 124:  * characters, sub-delimiters and `:'.</li>
 125:  * <li><strong>Host</strong>: Allows unreserved or percent-encoded
 126:  * characters, sub-delimiters and square brackets (`[' and `]') for IPv6
 127:  * addresses.</li>
 128:  * <li><strong>Port</strong>: Digits only.</li>
 129:  * <li><strong>Path</strong>: Allows the path characters and `/'.
 130:  * <li><strong>Query</strong>: Allows the path characters, `?' and '/'.
 131:  * <li><strong>Fragment</strong>: Allows the path characters, `?' and '/'.
 132:  * </ul>
 133:  * <p>
 134:  * These definitions reference the following sets of characters:
 135:  * </p>
 136:  * <ul>
 137:  * <li><strong>Unreserved characters</strong>: The alphanumerics plus
 138:  * `-', `.', `_', and `~'.</li>
 139:  * <li><strong>Sub-delimiters</strong>: `!', `$', `&', `(', `)', `*',
 140:  * `+', `,', `;', `=' and the single-quote itself.</li>
 141:  * <li><strong>Path characters</strong>: Unreserved and percent-encoded
 142:  * characters and the sub-delimiters along with `@' and `:'.</li>
 143:  * </ul>
 144:  * <p>
 145:  * The constructors and accessor methods allow the use and retrieval of
 146:  * URI components which contain non-US-ASCII characters directly.
 147:  * They are only escaped when the <code>toASCIIString()</code> method
 148:  * is used.  In contrast, illegal characters are always quoted, with the
 149:  * exception of the return values of the non-raw accessors.
 150:  * </p>
 151:  *
 152:  * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
 153:  * @author Dalibor Topic (robilad@kaffe.org)
 154:  * @author Michael Koch (konqueror@gmx.de)
 155:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 156:  * @since 1.4
 157:  */
 158: public final class URI 
 159:   implements Comparable, Serializable
 160: {
 161:   /**
 162:    * For serialization compatability.
 163:    */
 164:   static final long serialVersionUID = -6052424284110960213L;
 165: 
 166:   /**
 167:    * Regular expression for parsing URIs.
 168:    *
 169:    * Taken from RFC 2396, Appendix B.
 170:    * This expression doesn't parse IPv6 addresses.
 171:    */
 172:   private static final String URI_REGEXP =
 173:     "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
 174: 
 175:   /**
 176:    * Regular expression for parsing the authority segment.
 177:    */
 178:   private static final String AUTHORITY_REGEXP =
 179:     "(([^?#]*)@)?([^?#:]*)(:([0-9]*))?";
 180: 
 181:   /**
 182:    * Valid characters (taken from rfc2396/3986)
 183:    */
 184:   private static final String RFC2396_DIGIT = "0123456789";
 185:   private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
 186:   private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 187:   private static final String RFC2396_ALPHA =
 188:     RFC2396_LOWALPHA + RFC2396_UPALPHA;
 189:   private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
 190:   private static final String RFC3986_UNRESERVED = RFC2396_ALPHANUM + "-._~";
 191:   private static final String RFC3986_SUBDELIMS = "!$&'()*+,;=";
 192:   private static final String RFC3986_REG_NAME =
 193:     RFC3986_UNRESERVED + RFC3986_SUBDELIMS + "%";
 194:   private static final String RFC3986_PCHAR = RFC3986_UNRESERVED + 
 195:     RFC3986_SUBDELIMS + ":@%";
 196:   private static final String RFC3986_SEGMENT = RFC3986_PCHAR;
 197:   private static final String RFC3986_PATH_SEGMENTS = RFC3986_SEGMENT + "/";
 198:   private static final String RFC3986_SSP = RFC3986_PCHAR + "?/";
 199:   private static final String RFC3986_HOST = RFC3986_REG_NAME + "[]";
 200:   private static final String RFC3986_USERINFO = RFC3986_REG_NAME + ":";
 201: 
 202:   /**
 203:    * Index of scheme component in parsed URI.
 204:    */
 205:   private static final int SCHEME_GROUP = 2;
 206: 
 207:   /**
 208:    * Index of scheme-specific-part in parsed URI.
 209:    */
 210:   private static final int SCHEME_SPEC_PART_GROUP = 3;
 211: 
 212:   /**
 213:    * Index of authority component in parsed URI.
 214:    */
 215:   private static final int AUTHORITY_GROUP = 5;
 216: 
 217:   /**
 218:    * Index of path component in parsed URI.
 219:    */
 220:   private static final int PATH_GROUP = 6;
 221: 
 222:   /**
 223:    * Index of query component in parsed URI.
 224:    */
 225:   private static final int QUERY_GROUP = 8;
 226: 
 227:   /**
 228:    * Index of fragment component in parsed URI.
 229:    */
 230:   private static final int FRAGMENT_GROUP = 10;
 231:   
 232:   /**
 233:    * Index of userinfo component in parsed authority section.
 234:    */
 235:   private static final int AUTHORITY_USERINFO_GROUP = 2;
 236: 
 237:   /**
 238:    * Index of host component in parsed authority section.
 239:    */
 240:   private static final int AUTHORITY_HOST_GROUP = 3;
 241: 
 242:   /**
 243:    * Index of port component in parsed authority section.
 244:    */
 245:   private static final int AUTHORITY_PORT_GROUP = 5;
 246: 
 247:   /**
 248:    * The compiled version of the URI regular expression.
 249:    */
 250:   private static final Pattern URI_PATTERN;
 251: 
 252:   /**
 253:    * The compiled version of the authority regular expression.
 254:    */
 255:   private static final Pattern AUTHORITY_PATTERN;
 256: 
 257:   /**
 258:    * The set of valid hexadecimal characters.
 259:    */
 260:   private static final String HEX = "0123456789ABCDEF";
 261: 
 262:   private transient String scheme;
 263:   private transient String rawSchemeSpecificPart;
 264:   private transient String schemeSpecificPart;
 265:   private transient String rawAuthority;
 266:   private transient String authority;
 267:   private transient String rawUserInfo;
 268:   private transient String userInfo;
 269:   private transient String rawHost;
 270:   private transient String host;
 271:   private transient int port = -1;
 272:   private transient String rawPath;
 273:   private transient String path;
 274:   private transient String rawQuery;
 275:   private transient String query;
 276:   private transient String rawFragment;
 277:   private transient String fragment;
 278:   private String string;
 279: 
 280:   /**
 281:    * Static initializer to pre-compile the regular expressions.
 282:    */
 283:   static
 284:   {
 285:     URI_PATTERN = Pattern.compile(URI_REGEXP);
 286:     AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
 287:   }
 288: 
 289:   private void readObject(ObjectInputStream is)
 290:     throws ClassNotFoundException, IOException
 291:   {
 292:     this.string = (String) is.readObject();
 293:     try
 294:       {
 295:     parseURI(this.string);
 296:       }
 297:     catch (URISyntaxException x)
 298:       {
 299:     // Should not happen.
 300:     throw new RuntimeException(x);
 301:       }
 302:   }
 303: 
 304:   private void writeObject(ObjectOutputStream os) throws IOException
 305:   {
 306:     if (string == null)
 307:       string = toString(); 
 308:     os.writeObject(string);
 309:   }
 310: 
 311:   /**
 312:    * <p>
 313:    * Returns the string content of the specified group of the supplied
 314:    * matcher.  The returned value is modified according to the following:
 315:    * </p>
 316:    * <ul>
 317:    * <li>If the resulting string has a length greater than 0, then
 318:    * that string is returned.</li>
 319:    * <li>If a string of zero length, is matched, then the content
 320:    * of the preceding group is considered.  If this is also an empty
 321:    * string, then <code>null</code> is returned to indicate an undefined
 322:    * value.  Otherwise, the value is truly the empty string and this is
 323:    * the returned value.</li>
 324:    * </ul>
 325:    * <p>
 326:    * This method is used for matching against all parts of the URI
 327:    * that may be either undefined or empty (i.e. all those but the
 328:    * scheme-specific part and the path).  In each case, the preceding
 329:    * group is the content of the original group, along with some
 330:    * additional distinguishing feature.  For example, the preceding
 331:    * group for the query includes the preceding question mark,
 332:    * while that of the fragment includes the hash symbol.  The presence
 333:    * of these features enables disambiguation between the two cases
 334:    * of a completely unspecified value and a simple non-existant value.
 335:    * The scheme differs in that it will never return an empty string;
 336:    * the delimiter follows the scheme rather than preceding it, so
 337:    * it becomes part of the following section.  The same is true
 338:    * of the user information.
 339:    * </p>
 340:    *
 341:    * @param match the matcher, which contains the results of the URI
 342:    *              matched against the URI regular expression.
 343:    * @return either the matched content, <code>null</code> for undefined
 344:    *         values, or an empty string for a URI part with empty content.
 345:    */
 346:   private static String getURIGroup(Matcher match, int group)
 347:   {
 348:     String matched = match.group(group);
 349:     return matched.length() == 0 
 350:       ? ((match.group(group - 1).length() == 0) ? null : "") : matched;
 351:   }
 352: 
 353:   /**
 354:    * Sets fields of this URI by parsing the given string.
 355:    *
 356:    * @param str The string to parse
 357:    *
 358:    * @exception URISyntaxException If the given string violates RFC 2396
 359:    */
 360:   private void parseURI(String str) throws URISyntaxException
 361:   {
 362:     Matcher matcher = URI_PATTERN.matcher(str);
 363:     
 364:     if (matcher.matches())
 365:       {
 366:     scheme = getURIGroup(matcher, SCHEME_GROUP);
 367:     rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
 368:     schemeSpecificPart = unquote(rawSchemeSpecificPart);
 369:     if (!isOpaque())
 370:       {
 371:         rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
 372:         rawPath = matcher.group(PATH_GROUP);
 373:         rawQuery = getURIGroup(matcher, QUERY_GROUP);
 374:       }
 375:     rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
 376:       }
 377:     else
 378:       throw new URISyntaxException(str,
 379:                    "doesn't match URI regular expression");
 380:     parseServerAuthority();
 381: 
 382:     // We must eagerly unquote the parts, because this is the only time
 383:     // we may throw an exception.
 384:     authority = unquote(rawAuthority);
 385:     userInfo = unquote(rawUserInfo);
 386:     host = unquote(rawHost);
 387:     path = unquote(rawPath);
 388:     query = unquote(rawQuery);
 389:     fragment = unquote(rawFragment);
 390:   }
 391: 
 392:   /**
 393:    * Unquote "%" + hex quotes characters
 394:    *
 395:    * @param str The string to unquote or null.
 396:    *
 397:    * @return The unquoted string or null if str was null.
 398:    *
 399:    * @exception URISyntaxException If the given string contains invalid
 400:    * escape sequences.
 401:    */
 402:   private static String unquote(String str) throws URISyntaxException
 403:   {
 404:     if (str == null)
 405:       return null;
 406:     byte[] buf = new byte[str.length()];
 407:     int pos = 0;
 408:     for (int i = 0; i < str.length(); i++)
 409:       {
 410:     char c = str.charAt(i);
 411:     if (c == '%')
 412:       {
 413:         if (i + 2 >= str.length())
 414:           throw new URISyntaxException(str, "Invalid quoted character");
 415:         int hi = Character.digit(str.charAt(++i), 16);
 416:         int lo = Character.digit(str.charAt(++i), 16);
 417:         if (lo < 0 || hi < 0)
 418:           throw new URISyntaxException(str, "Invalid quoted character");
 419:         buf[pos++] = (byte) (hi * 16 + lo);
 420:       }
 421:     else
 422:       buf[pos++] = (byte) c;
 423:       }
 424:     try
 425:       {
 426:     return new String(buf, 0, pos, "utf-8");
 427:       }
 428:     catch (java.io.UnsupportedEncodingException x2)
 429:       {
 430:     throw (Error) new InternalError().initCause(x2);
 431:       }
 432:   }
 433: 
 434:   /**
 435:    * Quote characters illegal in URIs in given string.
 436:    *
 437:    * Replace illegal characters by encoding their UTF-8
 438:    * representation as "%" + hex code for each resulting
 439:    * UTF-8 character.
 440:    *
 441:    * @param str The string to quote
 442:    *
 443:    * @return The quoted string.
 444:    */
 445:   private static String quote(String str)
 446:   {
 447:     return quote(str, RFC3986_SSP);
 448:   }
 449: 
 450:   /**
 451:    * Quote characters illegal in URI authorities in given string.
 452:    *
 453:    * Replace illegal characters by encoding their UTF-8
 454:    * representation as "%" + hex code for each resulting
 455:    * UTF-8 character.
 456:    *
 457:    * @param str The string to quote
 458:    *
 459:    * @return The quoted string.
 460:    */
 461:   private static String quoteAuthority(String str)
 462:   {
 463:     // Technically, we should be using RFC2396_AUTHORITY, but
 464:     // it contains no additional characters.
 465:     return quote(str, RFC3986_REG_NAME);
 466:   }
 467: 
 468:   /**
 469:    * Quotes the characters in the supplied string that are not part of
 470:    * the specified set of legal characters.
 471:    *
 472:    * @param str the string to quote
 473:    * @param legalCharacters the set of legal characters
 474:    *
 475:    * @return the quoted string.
 476:    */
 477:   private static String quote(String str, String legalCharacters)
 478:   {
 479:     StringBuffer sb = new StringBuffer(str.length());
 480:     for (int i = 0; i < str.length(); i++)
 481:       {
 482:     char c = str.charAt(i);
 483:     if (legalCharacters.indexOf(c) == -1)
 484:       {
 485:         if (c <= 127)
 486:           {
 487:         sb.append('%');
 488:         sb.append(HEX.charAt(c / 16));
 489:         sb.append(HEX.charAt(c % 16));
 490:           }
 491:       }
 492:     else
 493:       sb.append(c);
 494:       }
 495:     return sb.toString();
 496:   }
 497: 
 498:   /**
 499:    * Quote characters illegal in URI hosts in given string.
 500:    *
 501:    * Replace illegal characters by encoding their UTF-8
 502:    * representation as "%" + hex code for each resulting
 503:    * UTF-8 character.
 504:    *
 505:    * @param str The string to quote
 506:    *
 507:    * @return The quoted string.
 508:    */
 509:   private static String quoteHost(String str)
 510:   {
 511:     return quote(str, RFC3986_HOST);
 512:   }
 513: 
 514:   /**
 515:    * Quote characters illegal in URI paths in given string.
 516:    *
 517:    * Replace illegal characters by encoding their UTF-8
 518:    * representation as "%" + hex code for each resulting
 519:    * UTF-8 character.
 520:    *
 521:    * @param str The string to quote
 522:    *
 523:    * @return The quoted string.
 524:    */
 525:   private static String quotePath(String str)
 526:   {
 527:     // Technically, we should be using RFC2396_PATH, but
 528:     // it contains no additional characters.
 529:     return quote(str, RFC3986_PATH_SEGMENTS);
 530:   }
 531: 
 532:   /**
 533:    * Quote characters illegal in URI user infos in given string.
 534:    *
 535:    * Replace illegal characters by encoding their UTF-8
 536:    * representation as "%" + hex code for each resulting
 537:    * UTF-8 character.
 538:    *
 539:    * @param str The string to quote
 540:    *
 541:    * @return The quoted string.
 542:    */
 543:   private static String quoteUserInfo(String str)
 544:   {
 545:     return quote(str, RFC3986_USERINFO);
 546:   }
 547: 
 548:   /**
 549:    * Creates an URI from the given string
 550:    *
 551:    * @param str The string to create the URI from
 552:    *
 553:    * @exception URISyntaxException If the given string violates RFC 2396
 554:    * @exception NullPointerException If str is null
 555:    */
 556:   public URI(String str) throws URISyntaxException
 557:   {
 558:     this.string = str;
 559:     parseURI(str);
 560:   }
 561: 
 562:   /**
 563:    * Create an URI from the given components
 564:    *
 565:    * @param scheme The scheme name
 566:    * @param userInfo The username and authorization info
 567:    * @param host The hostname
 568:    * @param port The port number
 569:    * @param path The path
 570:    * @param query The query
 571:    * @param fragment The fragment
 572:    *
 573:    * @exception URISyntaxException If the given string violates RFC 2396
 574:    */
 575:   public URI(String scheme, String userInfo, String host, int port,
 576:              String path, String query, String fragment)
 577:     throws URISyntaxException
 578:   {
 579:     this((scheme == null ? "" : scheme + ":")
 580:          + (userInfo == null && host == null && port == -1 ? "" : "//")
 581:          + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
 582:          + (host == null ? "" : quoteHost(host))
 583:          + (port == -1 ? "" : ":" + String.valueOf(port))
 584:          + (path == null ? "" : quotePath(path))
 585:          + (query == null ? "" : "?" + quote(query))
 586:          + (fragment == null ? "" : "#" + quote(fragment)));
 587:   }
 588: 
 589:   /**
 590:    * Create an URI from the given components
 591:    *
 592:    * @param scheme The scheme name
 593:    * @param authority The authority
 594:    * @param path The apth
 595:    * @param query The query
 596:    * @param fragment The fragment
 597:    *
 598:    * @exception URISyntaxException If the given string violates RFC 2396
 599:    */
 600:   public URI(String scheme, String authority, String path, String query,
 601:              String fragment) throws URISyntaxException
 602:   {
 603:     this((scheme == null ? "" : scheme + ":")
 604:          + (authority == null ? "" : "//" + quoteAuthority(authority))
 605:          + (path == null ? "" : quotePath(path))
 606:          + (query == null ? "" : "?" + quote(query))
 607:          + (fragment == null ? "" : "#" + quote(fragment)));
 608:   }
 609: 
 610:   /**
 611:    * Create an URI from the given components
 612:    *
 613:    * @param scheme The scheme name
 614:    * @param host The hostname
 615:    * @param path The path
 616:    * @param fragment The fragment
 617:    *
 618:    * @exception URISyntaxException If the given string violates RFC 2396
 619:    */
 620:   public URI(String scheme, String host, String path, String fragment)
 621:     throws URISyntaxException
 622:   {
 623:     this(scheme, null, host, -1, path, null, fragment);
 624:   }
 625: 
 626:   /**
 627:    * Create an URI from the given components
 628:    *
 629:    * @param scheme The scheme name
 630:    * @param ssp The scheme specific part
 631:    * @param fragment The fragment
 632:    *
 633:    * @exception URISyntaxException If the given string violates RFC 2396
 634:    */
 635:   public URI(String scheme, String ssp, String fragment)
 636:     throws URISyntaxException
 637:   {
 638:     this((scheme == null ? "" : scheme + ":")
 639:          + (ssp == null ? "" : quote(ssp))
 640:          + (fragment == null ? "" : "#" + quote(fragment)));
 641:   }
 642: 
 643:   /**
 644:    * Create an URI from the given string
 645:    *
 646:    * @param str The string to create the URI from
 647:    *
 648:    * @exception IllegalArgumentException If the given string violates RFC 2396
 649:    * @exception NullPointerException If str is null
 650:    */
 651:   public static URI create(String str)
 652:   {
 653:     try
 654:       {
 655:     return new URI(str);
 656:       }
 657:     catch (URISyntaxException e)
 658:       {
 659:     throw (IllegalArgumentException) new IllegalArgumentException()
 660:           .initCause(e);
 661:       }
 662:   }
 663: 
 664:   /**
 665:    * Attempts to parse this URI's authority component, if defined,
 666:    * into user-information, host, and port components.  The purpose
 667:    * of this method was to disambiguate between some authority sections,
 668:    * which form invalid server-based authories, but valid registry
 669:    * based authorities.  In the updated RFC 3986, the authority section
 670:    * is defined differently, with registry-based authorities part of
 671:    * the host section.  Thus, this method is now simply an explicit
 672:    * way of parsing any authority section.
 673:    *
 674:    * @return the URI, with the authority section parsed into user
 675:    *         information, host and port components.
 676:    * @throws URISyntaxException if the given string violates RFC 2396
 677:    */
 678:   public URI parseServerAuthority() throws URISyntaxException
 679:   {
 680:     if (rawAuthority != null)
 681:       {
 682:     Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
 683: 
 684:     if (matcher.matches())
 685:       {
 686:         rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
 687:         rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
 688:         
 689:         String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
 690:         
 691:         if (portStr != null)
 692:           try
 693:         {
 694:           port = Integer.parseInt(portStr);
 695:         }
 696:           catch (NumberFormatException e)
 697:         {
 698:           URISyntaxException use =
 699:             new URISyntaxException
 700:               (string, "doesn't match URI regular expression");
 701:           use.initCause(e);
 702:           throw use;
 703:         }
 704:       }
 705:     else
 706:       throw new URISyntaxException(string,
 707:                        "doesn't match URI regular expression");
 708:       }
 709:     return this;
 710:   }
 711: 
 712:   /**
 713:    * <p>
 714:    * Returns a normalized version of the URI.  If the URI is opaque,
 715:    * or its path is already in normal form, then this URI is simply
 716:    * returned.  Otherwise, the following transformation of the path
 717:    * element takes place:
 718:    * </p>
 719:    * <ol>
 720:    * <li>All `.' segments are removed.</li>
 721:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 722:    * is removed along with the preceding segment.</li>
 723:    * <li>A `.' segment is added to the front if the first segment contains
 724:    * a colon (`:').  This is a deviation from the RFC, which prevents
 725:    * confusion between the path and the scheme.</li>
 726:    * </ol>
 727:    * <p>
 728:    * The resulting URI will be free of `.' and `..' segments, barring those
 729:    * that were prepended or which couldn't be paired, respectively.
 730:    * </p>
 731:    *
 732:    * @return the normalized URI.
 733:    */
 734:   public URI normalize()
 735:   {
 736:     if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1)
 737:       return this;
 738:     try
 739:       {
 740:     return new URI(scheme, authority, normalizePath(path), query,
 741:                fragment);
 742:       }
 743:     catch (URISyntaxException e)
 744:       {
 745:     throw (Error) new InternalError("Normalized URI variant could not "+
 746:                     "be constructed").initCause(e);
 747:       }
 748:   }
 749: 
 750:   /**
 751:    * <p>
 752:    * Normalize the given path.  The following transformation takes place:
 753:    * </p>
 754:    * <ol>
 755:    * <li>All `.' segments are removed.</li>
 756:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 757:    * is removed along with the preceding segment.</li>
 758:    * <li>A `.' segment is added to the front if the first segment contains
 759:    * a colon (`:').  This is a deviation from the RFC, which prevents
 760:    * confusion between the path and the scheme.</li>
 761:    * </ol>
 762:    * <p>
 763:    * The resulting URI will be free of `.' and `..' segments, barring those
 764:    * that were prepended or which couldn't be paired, respectively.
 765:    * </p>
 766:    * 
 767:    * @param relativePath the relative path to be normalized.
 768:    * @return the normalized path.
 769:    */
 770:   private String normalizePath(String relativePath)
 771:   {
 772:     /* 
 773:        This follows the algorithm in section 5.2.4. of RFC3986,
 774:        but doesn't modify the input buffer.
 775:     */
 776:     StringBuffer input = new StringBuffer(relativePath);
 777:     StringBuffer output = new StringBuffer();
 778:     int start = 0;
 779:     while (start < input.length())
 780:       {
 781:     /* A */
 782:     if (input.indexOf("../",start) == start)
 783:       {
 784:         start += 3;
 785:         continue;
 786:       }
 787:     if (input.indexOf("./",start) == start)
 788:       {
 789:         start += 2;
 790:         continue;
 791:       }
 792:     /* B */
 793:     if (input.indexOf("/./",start) == start)
 794:       {
 795:         start += 2;
 796:         continue;
 797:       }
 798:     if (input.indexOf("/.",start) == start
 799:         && input.charAt(start + 2) != '.')
 800:       {
 801:         start += 1;
 802:         input.setCharAt(start,'/');
 803:         continue;
 804:       }
 805:     /* C */
 806:     if (input.indexOf("/../",start) == start)
 807:       {
 808:         start += 3;
 809:         removeLastSegment(output);
 810:         continue;
 811:       }
 812:     if (input.indexOf("/..",start) == start)
 813:       {
 814:         start += 2;
 815:         input.setCharAt(start,'/');
 816:         removeLastSegment(output);
 817:         continue;
 818:       }
 819:     /* D */
 820:     if (start == input.length() - 1 && input.indexOf(".",start) == start)
 821:       {
 822:         input.delete(0,1);
 823:         continue;
 824:       }
 825:     if (start == input.length() - 2 && input.indexOf("..",start) == start)
 826:       {
 827:         input.delete(0,2);
 828:         continue;
 829:       }
 830:     /* E */
 831:     int indexOfSlash = input.indexOf("/",start);
 832:     while (indexOfSlash == start)
 833:       {
 834:         output.append("/");
 835:         ++start;
 836:         indexOfSlash = input.indexOf("/",start);
 837:       }
 838:     if (indexOfSlash == -1)
 839:       indexOfSlash = input.length();
 840:     output.append(input.substring(start, indexOfSlash));
 841:         start = indexOfSlash;
 842:       }
 843:     return output.toString();
 844:   }
 845: 
 846:   /**
 847:    * Removes the last segment of the path from the specified buffer.
 848:    *
 849:    * @param buffer the buffer containing the path.
 850:    */
 851:   private void removeLastSegment(StringBuffer buffer)
 852:   {
 853:     int lastSlash = buffer.lastIndexOf("/");
 854:     if (lastSlash == -1)
 855:       buffer.setLength(0);
 856:     else
 857:       buffer.setLength(lastSlash);
 858:   }
 859: 
 860:   /**
 861:    * Resolves the given URI against this URI
 862:    *
 863:    * @param uri The URI to resolve against this URI
 864:    *
 865:    * @return The resulting URI, or null when it couldn't be resolved
 866:    * for some reason.
 867:    *
 868:    * @throws NullPointerException if uri is null
 869:    */
 870:   public URI resolve(URI uri)
 871:   {
 872:     if (uri.isAbsolute())
 873:       return uri;
 874:     if (uri.isOpaque())
 875:       return uri;
 876: 
 877:     String scheme = uri.getScheme();
 878:     String schemeSpecificPart = uri.getSchemeSpecificPart();
 879:     String authority = uri.getAuthority();
 880:     String path = uri.getPath();
 881:     String query = uri.getQuery();
 882:     String fragment = uri.getFragment();
 883: 
 884:     try
 885:       {
 886:     if (fragment != null && path != null && path.equals("")
 887:         && scheme == null && authority == null && query == null)
 888:       return new URI(this.scheme, this.schemeSpecificPart, fragment);
 889: 
 890:     if (authority == null)
 891:       {
 892:         authority = this.authority;
 893:         if (path == null)
 894:           path = "";
 895:         if (! (path.startsWith("/")))
 896:           {
 897:         StringBuffer basepath = new StringBuffer(this.path);
 898:         int i = this.path.lastIndexOf('/');
 899: 
 900:         if (i >= 0)
 901:           basepath.delete(i + 1, basepath.length());
 902: 
 903:         basepath.append(path);
 904:         path = normalizePath(basepath.toString());
 905:           }
 906:       }
 907:     return new URI(this.scheme, authority, path, query, fragment);
 908:       }
 909:     catch (URISyntaxException e)
 910:       {
 911:     throw (Error) new InternalError("Resolved URI variant could not "+
 912:                     "be constructed").initCause(e);
 913:       }
 914:   }
 915: 
 916:   /**
 917:    * Resolves the given URI string against this URI
 918:    *
 919:    * @param str The URI as string to resolve against this URI
 920:    *
 921:    * @return The resulting URI
 922:    *
 923:    * @throws IllegalArgumentException If the given URI string
 924:    * violates RFC 2396
 925:    * @throws NullPointerException If uri is null
 926:    */
 927:   public URI resolve(String str) throws IllegalArgumentException
 928:   {
 929:     return resolve(create(str));
 930:   }
 931: 
 932:   /**
 933:    * <p>
 934:    * Relativizes the given URI against this URI.  The following
 935:    * algorithm is used:
 936:    * </p>
 937:    * <ul>
 938:    * <li>If either URI is opaque, the given URI is returned.</li>
 939:    * <li>If the schemes of the URIs differ, the given URI is returned.</li>
 940:    * <li>If the authority components of the URIs differ, then the given
 941:    * URI is returned.</li>
 942:    * <li>If the path of this URI is not a prefix of the supplied URI,
 943:    * then the given URI is returned.</li>
 944:    * <li>If all the above conditions hold, a new URI is created using the
 945:    * query and fragment components of the given URI, along with a path
 946:    * computed by removing the path of this URI from the start of the path
 947:    * of the supplied URI.</li>
 948:    * </ul>
 949:    *
 950:    * @param uri the URI to relativize agsint this URI
 951:    * @return the resulting URI
 952:    * @throws NullPointerException if the uri is null
 953:    */
 954:   public URI relativize(URI uri)
 955:   {
 956:     if (isOpaque() || uri.isOpaque())
 957:       return uri;
 958:     if (scheme == null && uri.getScheme() != null)
 959:       return uri;
 960:     if (scheme != null && !(scheme.equals(uri.getScheme())))
 961:       return uri;
 962:     if (rawAuthority == null && uri.getRawAuthority() != null)
 963:       return uri;
 964:     if (rawAuthority != null && !(rawAuthority.equals(uri.getRawAuthority())))
 965:       return uri;
 966:     if (!(uri.getRawPath().startsWith(rawPath)))
 967:       return uri;
 968:     try
 969:       {
 970:     return new URI(null, null, 
 971:                uri.getRawPath().substring(rawPath.length()),
 972:                uri.getRawQuery(), uri.getRawFragment());
 973:       }
 974:     catch (URISyntaxException e)
 975:       {
 976:     throw (Error) new InternalError("Relativized URI variant could not "+
 977:                     "be constructed").initCause(e);       
 978:       }
 979:   }
 980: 
 981:   /**
 982:    * Creates an URL from an URI
 983:    *
 984:    * @throws MalformedURLException If a protocol handler for the URL could
 985:    * not be found, or if some other error occurred while constructing the URL
 986:    * @throws IllegalArgumentException If the URI is not absolute
 987:    */
 988:   public URL toURL() throws IllegalArgumentException, MalformedURLException
 989:   {
 990:     if (isAbsolute())
 991:       return new URL(this.toString());
 992: 
 993:     throw new IllegalArgumentException("not absolute");
 994:   }
 995: 
 996:   /**
 997:    * Returns the scheme of the URI
 998:    */
 999:   public String getScheme()
1000:   {
1001:     return scheme;
1002:   }
1003: 
1004:   /**
1005:    * Tells whether this URI is absolute or not
1006:    */
1007:   public boolean isAbsolute()
1008:   {
1009:     return scheme != null;
1010:   }
1011: 
1012:   /**
1013:    * Tell whether this URI is opaque or not
1014:    */
1015:   public boolean isOpaque()
1016:   {
1017:     return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
1018:   }
1019: 
1020:   /**
1021:    * Returns the raw scheme specific part of this URI.
1022:    * The scheme-specific part is never undefined, though it may be empty
1023:    */
1024:   public String getRawSchemeSpecificPart()
1025:   {
1026:     return rawSchemeSpecificPart;
1027:   }
1028: 
1029:   /**
1030:    * Returns the decoded scheme specific part of this URI.
1031:    */
1032:   public String getSchemeSpecificPart()
1033:   {
1034:     return schemeSpecificPart;
1035:   }
1036: 
1037:   /**
1038:    * Returns the raw authority part of this URI
1039:    */
1040:   public String getRawAuthority()
1041:   {
1042:     return rawAuthority;
1043:   }
1044: 
1045:   /**
1046:    * Returns the decoded authority part of this URI
1047:    */
1048:   public String getAuthority()
1049:   {
1050:     return authority;
1051:   }
1052: 
1053:   /**
1054:    * Returns the raw user info part of this URI
1055:    */
1056:   public String getRawUserInfo()
1057:   {
1058:     return rawUserInfo;
1059:   }
1060: 
1061:   /**
1062:    * Returns the decoded user info part of this URI
1063:    */
1064:   public String getUserInfo()
1065:   {
1066:     return userInfo;
1067:   }
1068: 
1069:   /**
1070:    * Returns the hostname of the URI
1071:    */
1072:   public String getHost()
1073:   {
1074:     return host;
1075:   }
1076: 
1077:   /**
1078:    * Returns the port number of the URI
1079:    */
1080:   public int getPort()
1081:   {
1082:     return port;
1083:   }
1084: 
1085:   /**
1086:    * Returns the raw path part of this URI
1087:    */
1088:   public String getRawPath()
1089:   {
1090:     return rawPath;
1091:   }
1092: 
1093:   /**
1094:    * Returns the path of the URI
1095:    */
1096:   public String getPath()
1097:   {
1098:     return path;
1099:   }
1100: 
1101:   /**
1102:    * Returns the raw query part of this URI
1103:    */
1104:   public String getRawQuery()
1105:   {
1106:     return rawQuery;
1107:   }
1108: 
1109:   /**
1110:    * Returns the query of the URI
1111:    */
1112:   public String getQuery()
1113:   {
1114:     return query;
1115:   }
1116: 
1117:   /**
1118:    * Return the raw fragment part of this URI
1119:    */
1120:   public String getRawFragment()
1121:   {
1122:     return rawFragment;
1123:   }
1124: 
1125:   /**
1126:    * Returns the fragment of the URI
1127:    */
1128:   public String getFragment()
1129:   {
1130:     return fragment;
1131:   }
1132: 
1133:   /**
1134:    * <p> 
1135:    * Compares the URI with the given object for equality.  If the
1136:    * object is not a <code>URI</code>, then the method returns false.
1137:    * Otherwise, the following criteria are observed:
1138:    * </p>
1139:    * <ul>
1140:    * <li>The scheme of the URIs must either be null (undefined) in both cases,
1141:    * or equal, ignorant of case.</li>
1142:    * <li>The raw fragment of the URIs must either be null (undefined) in both
1143:    * cases, or equal, ignorant of case.</li>
1144:    * <li>Both URIs must be of the same type (opaque or hierarchial)</li>
1145:    * <li><strong>For opaque URIs:</strong></li>
1146:    * <ul>
1147:    * <li>The raw scheme-specific parts must be equal.</li>
1148:    * </ul>
1149:    * <li>For hierarchical URIs:</li>
1150:    * <ul>
1151:    * <li>The raw paths must be equal, ignorant of case.</li>
1152:    * <li>The raw queries are either both undefined or both equal, ignorant
1153:    * of case.</li>
1154:    * <li>The raw authority sections are either both undefined or:</li>
1155:    * <li><strong>For registry-based authorities:</strong></li>
1156:    * <ul><li>they are equal.</li></ul>
1157:    * <li><strong>For server-based authorities:</strong></li>
1158:    * <ul>
1159:    * <li>the hosts are equal, ignoring case</li>
1160:    * <li>the ports are equal</li>
1161:    * <li>the user information components are equal</li>
1162:    * </ul>
1163:    * </ul>
1164:    * </ul>
1165:    *
1166:    * @param obj the obj to compare the URI with.
1167:    * @return <code>true</code> if the objects are equal, according to
1168:    *         the specification above.
1169:    */
1170:   public boolean equals(Object obj)
1171:   {
1172:     if (!(obj instanceof URI))
1173:       return false;
1174:     URI uriObj = (URI) obj;
1175:     if (scheme == null)
1176:       {
1177:     if (uriObj.getScheme() != null)
1178:       return false;
1179:       }
1180:     else
1181:       if (!(scheme.equalsIgnoreCase(uriObj.getScheme())))
1182:     return false;
1183:     if (rawFragment == null)
1184:       {
1185:     if (uriObj.getRawFragment() != null)
1186:       return false;
1187:       }
1188:     else
1189:       if (!(rawFragment.equalsIgnoreCase(uriObj.getRawFragment())))
1190:     return false;
1191:     boolean opaqueThis = isOpaque();
1192:     boolean opaqueObj = uriObj.isOpaque();
1193:     if (opaqueThis && opaqueObj)
1194:       return rawSchemeSpecificPart.equals(uriObj.getRawSchemeSpecificPart());
1195:     else if (!opaqueThis && !opaqueObj)
1196:       {
1197:     boolean common = rawPath.equalsIgnoreCase(uriObj.getRawPath())
1198:       && ((rawQuery == null && uriObj.getRawQuery() == null)
1199:           || rawQuery.equalsIgnoreCase(uriObj.getRawQuery()));
1200:     if (rawAuthority == null && uriObj.getRawAuthority() == null)
1201:       return common;
1202:     if (host == null)
1203:       return common 
1204:         && rawAuthority.equalsIgnoreCase(uriObj.getRawAuthority());
1205:     return common 
1206:       && host.equalsIgnoreCase(uriObj.getHost())
1207:       && port == uriObj.getPort()
1208:       && (rawUserInfo == null ?
1209:           uriObj.getRawUserInfo() == null :
1210:           rawUserInfo.equalsIgnoreCase(uriObj.getRawUserInfo()));
1211:       }
1212:     else
1213:       return false;
1214:   }
1215: 
1216:   /**
1217:    * Computes the hashcode of the URI
1218:    */
1219:   public int hashCode()
1220:   {
1221:     return (getScheme() == null ? 0 : 13 * getScheme().hashCode())
1222:       + 17 * getRawSchemeSpecificPart().hashCode()
1223:       + (getRawFragment() == null ? 0 : 21 + getRawFragment().hashCode());
1224:   }
1225: 
1226:   /**
1227:    * Compare the URI with another object that must also be a URI.
1228:    * Undefined components are taken to be less than any other component.
1229:    * The following criteria are observed:
1230:    * </p>
1231:    * <ul>
1232:    * <li>Two URIs with different schemes are compared according to their
1233:    * scheme, regardless of case.</li>
1234:    * <li>A hierarchical URI is less than an opaque URI with the same
1235:    * scheme.</li>
1236:    * <li><strong>For opaque URIs:</strong></li>
1237:    * <ul>
1238:    * <li>URIs with differing scheme-specific parts are ordered according
1239:    * to the ordering of the scheme-specific part.</li>
1240:    * <li>URIs with the same scheme-specific part are ordered by the
1241:    * raw fragment.</li>
1242:    * </ul>
1243:    * <li>For hierarchical URIs:</li>
1244:    * <ul>
1245:    * <li>URIs are ordered according to their raw authority sections,
1246:    * if they are unequal.</li>
1247:    * <li><strong>For registry-based authorities:</strong></li>
1248:    * <ul><li>they are ordered according to the ordering of the authority
1249:    * component.</li></ul>
1250:    * <li><strong>For server-based authorities:</strong></li>
1251:    * <ul>
1252:    * <li>URIs are ordered according to the raw user information.</li>
1253:    * <li>URIs with the same user information are ordered by the host,
1254:    * ignoring case.</li>
1255:    * <lI>URIs with the same host are ordered by the port.</li>
1256:    * </ul>
1257:    * <li>URIs with the same authority section are ordered by the raw path.</li>
1258:    * <li>URIs with the same path are ordered by their raw query.</li>
1259:    * <li>URIs with the same query are ordered by their raw fragments.</li>
1260:    * </ul>
1261:    * </ul>
1262:    *
1263:    * @param obj This object to compare this URI with
1264:    * @return a negative integer, zero or a positive integer depending
1265:    *         on whether this URI is less than, equal to or greater
1266:    *         than that supplied, respectively.
1267:    * @throws ClassCastException if the given object is not a URI
1268:    */
1269:   public int compareTo(Object obj) 
1270:     throws ClassCastException
1271:   {
1272:     URI uri = (URI) obj;
1273:     if (scheme == null && uri.getScheme() != null)
1274:       return -1;
1275:     if (scheme != null)
1276:       {
1277:     int sCompare = scheme.compareToIgnoreCase(uri.getScheme()); 
1278:     if (sCompare != 0)
1279:       return sCompare;
1280:       }
1281:     boolean opaqueThis = isOpaque();
1282:     boolean opaqueObj = uri.isOpaque();
1283:     if (opaqueThis && !opaqueObj)
1284:       return 1;
1285:     if (!opaqueThis && opaqueObj)
1286:       return -1;
1287:     if (opaqueThis)
1288:       {
1289:     int ssCompare = 
1290:       rawSchemeSpecificPart.compareTo(uri.getRawSchemeSpecificPart());
1291:     if (ssCompare == 0)
1292:       return compareFragments(uri);
1293:     else
1294:       return ssCompare;
1295:       }
1296:     if (rawAuthority == null && uri.getRawAuthority() != null)
1297:       return -1;
1298:     if (rawAuthority != null)
1299:       {
1300:     int aCompare = rawAuthority.compareTo(uri.getRawAuthority());
1301:     if (aCompare != 0)
1302:       {
1303:         if (host == null)
1304:           return aCompare;
1305:         if (rawUserInfo == null && uri.getRawUserInfo() != null)
1306:           return -1;
1307:         int uCompare = rawUserInfo.compareTo(uri.getRawUserInfo());
1308:         if (uCompare != 0)
1309:           return uCompare;
1310:         if (host == null && uri.getHost() != null)
1311:           return -1;
1312:         int hCompare = host.compareTo(uri.getHost());
1313:         if (hCompare != 0)
1314:           return hCompare;
1315:         return new Integer(port).compareTo(new Integer(uri.getPort()));
1316:       }
1317:       }
1318:     if (rawPath == null && uri.getRawPath() != null)
1319:       return -1;
1320:     if (rawPath != null)
1321:       {
1322:     int pCompare = rawPath.compareTo(uri.getRawPath()); 
1323:     if (pCompare != 0)
1324:       return pCompare;
1325:       }
1326:     if (rawQuery == null && uri.getRawQuery() != null)
1327:       return -1;
1328:     if (rawQuery != null)
1329:       {
1330:     int qCompare = rawQuery.compareTo(uri.getRawQuery());
1331:     if (qCompare != 0)
1332:       return qCompare;
1333:       }
1334:     return compareFragments(uri);
1335:   }
1336: 
1337:   /**
1338:    * Compares the fragment of this URI with that of the supplied URI.
1339:    *
1340:    * @param uri the URI to compare with this one.
1341:    * @return a negative integer, zero or a positive integer depending
1342:    *         on whether this uri's fragment is less than, equal to
1343:    *         or greater than the fragment of the uri supplied, respectively.
1344:    */
1345:   private int compareFragments(URI uri)
1346:   {
1347:     if (rawFragment == null && uri.getRawFragment() != null)
1348:       return -1;
1349:     else if (rawFragment == null)
1350:       return 0;
1351:     else
1352:       return rawFragment.compareTo(uri.getRawFragment());
1353:   }
1354: 
1355:   /**
1356:    * Returns the URI as a String.  If the URI was created using a constructor,
1357:    * then this will be the same as the original input string.
1358:    *
1359:    * @return a string representation of the URI.
1360:    */
1361:   public String toString()
1362:   {
1363:     return (scheme == null ? "" : scheme + ":")
1364:       + rawSchemeSpecificPart
1365:       + (rawFragment == null ? "" : "#" + rawFragment);
1366:   }
1367: 
1368:   /**
1369:    * Returns the URI as US-ASCII string.  This is the same as the result
1370:    * from <code>toString()</code> for URIs that don't contain any non-US-ASCII
1371:    * characters.  Otherwise, the non-US-ASCII characters are replaced
1372:    * by their percent-encoded representations.
1373:    *
1374:    * @return a string representation of the URI, containing only US-ASCII
1375:    *         characters.
1376:    */
1377:   public String toASCIIString()
1378:   {
1379:     String strRep = toString();
1380:     boolean inNonAsciiBlock = false;
1381:     StringBuffer buffer = new StringBuffer();
1382:     StringBuffer encBuffer = null;
1383:     for (int i = 0; i < strRep.length(); i++)
1384:       {
1385:     char c = strRep.charAt(i);
1386:     if (c <= 127)
1387:       {
1388:         if (inNonAsciiBlock)
1389:           {
1390:         buffer.append(escapeCharacters(encBuffer.toString()));
1391:         inNonAsciiBlock = false;
1392:           }
1393:         buffer.append(c);
1394:       }
1395:     else
1396:       {
1397:         if (!inNonAsciiBlock)
1398:           {
1399:         encBuffer = new StringBuffer();
1400:         inNonAsciiBlock = true;
1401:           }
1402:         encBuffer.append(c);
1403:       }
1404:       }
1405:     return buffer.toString();
1406:   }
1407: 
1408:   /**
1409:    * Converts the non-ASCII characters in the supplied string
1410:    * to their equivalent percent-encoded representations.
1411:    * That is, they are replaced by "%" followed by their hexadecimal value.
1412:    *
1413:    * @param str a string including non-ASCII characters.
1414:    * @return the string with the non-ASCII characters converted to their
1415:    *         percent-encoded representations.
1416:    */
1417:   private static String escapeCharacters(String str)
1418:   {
1419:     try
1420:       {
1421:     StringBuffer sb = new StringBuffer(); 
1422:     // this is far from optimal, but it works
1423:     byte[] utf8 = str.getBytes("utf-8");
1424:     for (int j = 0; j < utf8.length; j++)
1425:       {
1426:         sb.append('%');
1427:         sb.append(HEX.charAt((utf8[j] & 0xff) / 16));
1428:         sb.append(HEX.charAt((utf8[j] & 0xff) % 16));
1429:       }
1430:     return sb.toString();
1431:       }
1432:     catch (java.io.UnsupportedEncodingException x)
1433:       {
1434:     throw (Error) new InternalError("Escaping error").initCause(x);
1435:       }
1436:   }
1437: 
1438: }