001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 * 019 */ 020 package org.apache.directory.shared.ldap.util; 021 022 023 import java.io.ByteArrayOutputStream; 024 import java.io.File; 025 import java.io.FileFilter; 026 import java.io.OutputStreamWriter; 027 import java.io.UnsupportedEncodingException; 028 import java.lang.reflect.Method; 029 import java.nio.charset.Charset; 030 import java.util.ArrayList; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.regex.Pattern; 035 import java.util.regex.PatternSyntaxException; 036 037 import javax.naming.InvalidNameException; 038 039 import org.apache.directory.shared.asn1.codec.binary.Hex; 040 import org.apache.directory.shared.i18n.I18n; 041 import org.apache.directory.shared.ldap.entry.BinaryValue; 042 import org.apache.directory.shared.ldap.entry.StringValue; 043 import org.apache.directory.shared.ldap.schema.syntaxCheckers.UuidSyntaxChecker; 044 045 046 /** 047 * Various string manipulation methods that are more efficient then chaining 048 * string operations: all is done in the same buffer without creating a bunch of 049 * string objects. 050 * 051 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a> 052 * @version $Rev: 928945 $ 053 */ 054 public class StringTools 055 { 056 /** The default charset, because it's not provided by JDK 1.5 */ 057 static String defaultCharset = null; 058 059 060 061 // ~ Static fields/initializers 062 // ----------------------------------------------------------------- 063 064 /** Hex chars */ 065 private static final byte[] HEX_CHAR = new byte[] 066 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 067 068 private static final int UTF8_MULTI_BYTES_MASK = 0x0080; 069 070 private static final int UTF8_TWO_BYTES_MASK = 0x00E0; 071 072 private static final int UTF8_TWO_BYTES = 0x00C0; 073 074 private static final int UTF8_THREE_BYTES_MASK = 0x00F0; 075 076 private static final int UTF8_THREE_BYTES = 0x00E0; 077 078 private static final int UTF8_FOUR_BYTES_MASK = 0x00F8; 079 080 private static final int UTF8_FOUR_BYTES = 0x00F0; 081 082 private static final int UTF8_FIVE_BYTES_MASK = 0x00FC; 083 084 private static final int UTF8_FIVE_BYTES = 0x00F8; 085 086 private static final int UTF8_SIX_BYTES_MASK = 0x00FE; 087 088 private static final int UTF8_SIX_BYTES = 0x00FC; 089 090 /** <alpha> ::= [0x41-0x5A] | [0x61-0x7A] */ 091 public static final boolean[] ALPHA = 092 { 093 false, false, false, false, false, false, false, false, 094 false, false, false, false, false, false, false, false, 095 false, false, false, false, false, false, false, false, 096 false, false, false, false, false, false, false, false, 097 false, false, false, false, false, false, false, false, 098 false, false, false, false, false, false, false, false, 099 false, false, false, false, false, false, false, false, 100 false, false, false, false, false, false, false, false, 101 false, true, true, true, true, true, true, true, 102 true, true, true, true, true, true, true, true, 103 true, true, true, true, true, true, true, true, 104 true, true, true, false, false, false, false, false, 105 false, true, true, true, true, true, true, true, 106 true, true, true, true, true, true, true, true, 107 true, true, true, true, true, true, true, true, 108 true, true, true, false, false, false, false, false 109 }; 110 111 /** <alpha-lower-case> ::= [0x61-0x7A] */ 112 public static final boolean[] ALPHA_LOWER_CASE = 113 { 114 false, false, false, false, false, false, false, false, 115 false, false, false, false, false, false, false, false, 116 false, false, false, false, false, false, false, false, 117 false, false, false, false, false, false, false, false, 118 false, false, false, false, false, false, false, false, 119 false, false, false, false, false, false, false, false, 120 false, false, false, false, false, false, false, false, 121 false, false, false, false, false, false, false, false, 122 false, false, false, false, false, false, false, false, 123 false, false, false, false, false, false, false, false, 124 false, false, false, false, false, false, false, false, 125 false, false, false, false, false, false, false, false, 126 false, true, true, true, true, true, true, true, 127 true, true, true, true, true, true, true, true, 128 true, true, true, true, true, true, true, true, 129 true, true, true, false, false, false, false, false 130 }; 131 132 /** <alpha-upper-case> ::= [0x41-0x5A] */ 133 public static final boolean[] ALPHA_UPPER_CASE = 134 { 135 false, false, false, false, false, false, false, false, 136 false, false, false, false, false, false, false, false, 137 false, false, false, false, false, false, false, false, 138 false, false, false, false, false, false, false, false, 139 false, false, false, false, false, false, false, false, 140 false, false, false, false, false, false, false, false, 141 false, false, false, false, false, false, false, false, 142 false, false, false, false, false, false, false, false, 143 false, true, true, true, true, true, true, true, 144 true, true, true, true, true, true, true, true, 145 true, true, true, true, true, true, true, true, 146 true, true, true, false, false, false, false, false, 147 false, false, false, false, false, false, false, false, 148 false, false, false, false, false, false, false, false, 149 false, false, false, false, false, false, false, false, 150 false, false, false, false, false, false, false, false, 151 }; 152 153 /** <alpha-digit> | <digit> */ 154 public static final boolean[] ALPHA_DIGIT = 155 { 156 false, false, false, false, false, false, false, false, 157 false, false, false, false, false, false, false, false, 158 false, false, false, false, false, false, false, false, 159 false, false, false, false, false, false, false, false, 160 false, false, false, false, false, false, false, false, 161 false, false, false, false, false, false, false, false, 162 true, true, true, true, true, true, true, true, 163 true, true, false, false, false, false, false, false, 164 false, true, true, true, true, true, true, true, 165 true, true, true, true, true, true, true, true, 166 true, true, true, true, true, true, true, true, 167 true, true, true, false, false, false, false, false, 168 false, true, true, true, true, true, true, true, 169 true, true, true, true, true, true, true, true, 170 true, true, true, true, true, true, true, true, 171 true, true, true, false, false, false, false, false 172 }; 173 174 /** <alpha> | <digit> | '-' */ 175 public static final boolean[] CHAR = 176 { 177 false, false, false, false, false, false, false, false, 178 false, false, false, false, false, false, false, false, 179 false, false, false, false, false, false, false, false, 180 false, false, false, false, false, false, false, false, 181 false, false, false, false, false, false, false, false, 182 false, false, false, false, false, true, false, false, 183 true, true, true, true, true, true, true, true, 184 true, true, false, false, false, false, false, false, 185 false, true, true, true, true, true, true, true, 186 true, true, true, true, true, true, true, true, 187 true, true, true, true, true, true, true, true, 188 true, true, true, false, false, false, false, false, 189 false, true, true, true, true, true, true, true, 190 true, true, true, true, true, true, true, true, 191 true, true, true, true, true, true, true, true, 192 true, true, true, false, false, false, false, false 193 }; 194 195 /** %01-%27 %2B-%5B %5D-%7F */ 196 private static final boolean[] UNICODE_SUBSET = 197 { 198 false, true, true, true, true, true, true, true, // '\0' 199 true, true, true, true, true, true, true, true, 200 true, true, true, true, true, true, true, true, 201 true, true, true, true, true, true, true, true, 202 true, true, true, true, true, true, true, true, 203 false, false, false, true, true, true, true, true, // '(', ')', '*' 204 true, true, true, true, true, true, true, true, 205 true, true, true, true, true, true, true, true, 206 true, true, true, true, true, true, true, true, 207 true, true, true, true, true, true, true, true, 208 true, true, true, true, true, true, true, true, 209 true, true, true, true, false, true, true, true, // '\' 210 true, true, true, true, true, true, true, true, 211 true, true, true, true, true, true, true, true, 212 true, true, true, true, true, true, true, true, 213 true, true, true, true, true, true, true, true, 214 }; 215 216 /** '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' */ 217 private static final boolean[] DIGIT = 218 { 219 false, false, false, false, false, false, false, false, 220 false, false, false, false, false, false, false, false, 221 false, false, false, false, false, false, false, false, 222 false, false, false, false, false, false, false, false, 223 false, false, false, false, false, false, false, false, 224 false, false, false, false, false, false, false, false, 225 true, true, true, true, true, true, true, true, 226 true, true, false, false, false, false, false, false, 227 false, false, false, false, false, false, false, false, 228 false, false, false, false, false, false, false, false, 229 false, false, false, false, false, false, false, false, 230 false, false, false, false, false, false, false, false, 231 false, false, false, false, false, false, false, false, 232 false, false, false, false, false, false, false, false, 233 false, false, false, false, false, false, false, false, 234 false, false, false, false, false, false, false, false 235 }; 236 237 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */ 238 private static final boolean[] HEX = 239 { 240 false, false, false, false, false, false, false, false, 241 false, false, false, false, false, false, false, false, 242 false, false, false, false, false, false, false, false, 243 false, false, false, false, false, false, false, false, 244 false, false, false, false, false, false, false, false, 245 false, false, false, false, false, false, false, false, 246 true, true, true, true, true, true, true, true, 247 true, true, false, false, false, false, false, false, 248 false, true, true, true, true, true, true, false, 249 false, false, false, false, false, false, false, false, 250 false, false, false, false, false, false, false, false, 251 false, false, false, false, false, false, false, false, 252 false, true, true, true, true, true, true, false, 253 false, false, false, false, false, false, false, false, 254 false, false, false, false, false, false, false, false, 255 false, false, false, false, false, false, false, false }; 256 257 /** A table containing booleans when the corresponding char is printable */ 258 private static final boolean[] IS_PRINTABLE_CHAR = 259 { 260 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, --- 261 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, --- 262 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, --- 263 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, --- 264 true, false, false, false, false, false, false, true, // ' ', ---, ---, ---, ---, ---, ---, "'" 265 true, true, false, true, true, true, true, true, // '(', ')', ---, '+', ',', '-', '.', '/' 266 true, true, true, true, true, true, true, true, // '0', '1', '2', '3', '4', '5', '6', '7', 267 true, true, true, false, false, true, false, true, // '8', '9', ':', ---, ---, '=', ---, '?' 268 false, true, true, true, true, true, true, true, // ---, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 269 true, true, true, true, true, true, true, true, // 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O' 270 true, true, true, true, true, true, true, true, // 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W' 271 true, true, true, false, false, false, false, false, // 'X', 'Y', 'Z', ---, ---, ---, ---, --- 272 false, true, true, true, true, true, true, true, // ---, 'a', 'b', 'c', 'd', 'e', 'f', 'g' 273 true, true, true, true, true, true, true, true, // 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o' 274 true, true, true, true, true, true, true, true, // 'p', 'q', 'r', 's', 't', 'u', 'v', 'w' 275 true, true, true, false, false, false, false, false // 'x', 'y', 'z', ---, ---, ---, ---, --- 276 }; 277 278 279 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */ 280 private static final byte[] HEX_VALUE = 281 { 282 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 -> 0F 283 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 -> 1F 284 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 -> 2F 285 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 -> 3F ( 0, 1,2, 3, 4,5, 6, 7, 8, 9 ) 286 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40 -> 4F ( A, B, C, D, E, F ) 287 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50 -> 5F 288 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 // 60 -> 6F ( a, b, c, d, e, f ) 289 }; 290 291 /** lowerCase = 'a' .. 'z', '0'..'9', '-' */ 292 private static final char[] LOWER_CASE = 293 { 294 0, 0, 0, 0, 0, 0, 0, 0, 295 0, 0, 0, 0, 0, 0, 0, 0, 296 0, 0, 0, 0, 0, 0, 0, 0, 297 0, 0, 0, 0, 0, 0, 0, 0, 298 0, 0, 0, 0, 0, 0, 0, 0, 299 0, 0, 0, 0, 0, '-', 0, 0, 300 '0', '1', '2', '3', '4', '5', '6', '7', 301 '8', '9', 0, 0, 0, 0, 0, 0, 302 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 303 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 304 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 305 'x', 'y', 'z', 0, 0, 0, 0, 0, 306 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 307 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 308 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 309 'x', 'y', 'z', 0, 0, 0, 0, 0, 310 0, 0, 0, 0, 0, 0, 0, 0, 311 0, 0, 0, 0, 0, 0, 0, 0, 312 0, 0, 0, 0, 0, 0, 0, 0, 313 0, 0, 0, 0, 0, 0, 0, 0, 314 0, 0, 0, 0, 0, 0, 0, 0, 315 0, 0, 0, 0, 0, 0, 0, 0, 316 0, 0, 0, 0, 0, 0, 0, 0, 317 0, 0, 0, 0, 0, 0, 0, 0 318 }; 319 320 private static final char[] TO_LOWER_CASE = 321 { 322 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 323 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 324 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 325 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 326 ' ', 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'', 327 '(', ')', 0x2A, '+', ',', '-', '.', '/', 328 '0', '1', '2', '3', '4', '5', '6', '7', 329 '8', '9', ':', 0x3B, 0x3C, '=', 0x3E, '?', 330 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 331 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 332 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 333 'x', 'y', 'z', 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 334 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 335 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 336 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 337 'x', 'y', 'z', 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 338 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 339 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 340 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 341 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 342 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 343 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 344 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 345 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 346 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 347 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 348 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 349 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 350 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 351 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 352 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 353 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 354 }; 355 356 357 /** upperCase = 'A' .. 'Z', '0'..'9', '-' */ 358 private static final char[] UPPER_CASE = 359 { 360 0, 0, 0, 0, 0, 0, 0, 0, 361 0, 0, 0, 0, 0, 0, 0, 0, 362 0, 0, 0, 0, 0, 0, 0, 0, 363 0, 0, 0, 0, 0, 0, 0, 0, 364 0, 0, 0, 0, 0, 0, 0, 0, 365 0, 0, 0, 0, 0, '-', 0, 0, 366 '0', '1', '2', '3', '4', '5', '6', '7', 367 '8', '9', 0, 0, 0, 0, 0, 0, 368 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 369 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 370 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 371 'X', 'Y', 'Z', 0, 0, 0, 0, 0, 372 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 373 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 374 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 375 'X', 'Y', 'Z', 0, 0, 0, 0, 0, 376 0, 0, 0, 0, 0, 0, 0, 0, 377 0, 0, 0, 0, 0, 0, 0, 0, 378 0, 0, 0, 0, 0, 0, 0, 0, 379 0, 0, 0, 0, 0, 0, 0, 0, 380 0, 0, 0, 0, 0, 0, 0, 0, 381 0, 0, 0, 0, 0, 0, 0, 0, 382 0, 0, 0, 0, 0, 0, 0, 0, 383 0, 0, 0, 0, 0, 0, 0, 0 384 }; 385 386 private static final int CHAR_ONE_BYTE_MASK = 0xFFFFFF80; 387 388 private static final int CHAR_TWO_BYTES_MASK = 0xFFFFF800; 389 390 private static final int CHAR_THREE_BYTES_MASK = 0xFFFF0000; 391 392 private static final int CHAR_FOUR_BYTES_MASK = 0xFFE00000; 393 394 private static final int CHAR_FIVE_BYTES_MASK = 0xFC000000; 395 396 private static final int CHAR_SIX_BYTES_MASK = 0x80000000; 397 398 public static final int NOT_EQUAL = -1; 399 400 // The following methods are taken from org.apache.commons.lang.StringUtils 401 402 /** 403 * The empty String <code>""</code>. 404 * 405 * @since 2.0 406 */ 407 public static final String EMPTY = ""; 408 409 /** 410 * The empty byte[] 411 */ 412 public static final byte[] EMPTY_BYTES = new byte[] 413 {}; 414 415 /** 416 * The empty String[] 417 */ 418 public static final String[] EMPTY_STRINGS = new String[] 419 {}; 420 421 /** 422 * Trims several consecutive characters into one. 423 * 424 * @param str 425 * the string to trim consecutive characters of 426 * @param ch 427 * the character to trim down 428 * @return the newly trimmed down string 429 */ 430 public static final String trimConsecutiveToOne( String str, char ch ) 431 { 432 if ( ( null == str ) || ( str.length() == 0 ) ) 433 { 434 return ""; 435 } 436 437 char[] buffer = str.toCharArray(); 438 char[] newbuf = new char[buffer.length]; 439 int pos = 0; 440 boolean same = false; 441 442 for ( int i = 0; i < buffer.length; i++ ) 443 { 444 char car = buffer[i]; 445 446 if ( car == ch ) 447 { 448 if ( same ) 449 { 450 continue; 451 } 452 else 453 { 454 same = true; 455 newbuf[pos++] = car; 456 } 457 } 458 else 459 { 460 same = false; 461 newbuf[pos++] = car; 462 } 463 } 464 465 return new String( newbuf, 0, pos ); 466 } 467 468 469 /** 470 * A deep trim of a string remove whitespace from the ends as well as 471 * excessive whitespace within the inside of the string between 472 * non-whitespace characters. A deep trim reduces internal whitespace down 473 * to a single space to perserve the whitespace separated tokenization order 474 * of the String. 475 * 476 * @param string the string to deep trim. 477 * @return the trimmed string. 478 */ 479 public static final String deepTrim( String string ) 480 { 481 return deepTrim( string, false ); 482 } 483 484 485 /** 486 * This does the same thing as a trim but we also lowercase the string while 487 * performing the deep trim within the same buffer. This saves us from 488 * having to create multiple String and StringBuffer objects and is much 489 * more efficient. 490 * 491 * @see StringTools#deepTrim( String ) 492 */ 493 public static final String deepTrimToLower( String string ) 494 { 495 return deepTrim( string, true ); 496 } 497 498 499 /** 500 * Put common code to deepTrim(String) and deepTrimToLower here. 501 * 502 * @param str the string to deep trim 503 * @param toLowerCase how to normalize for case: upper or lower 504 * @return the deep trimmed string 505 * @see StringTools#deepTrim( String ) 506 * 507 * TODO Replace the toCharArray() by substring manipulations 508 */ 509 public static final String deepTrim( String str, boolean toLowerCase ) 510 { 511 if ( ( null == str ) || ( str.length() == 0 ) ) 512 { 513 return ""; 514 } 515 516 char ch; 517 char[] buf = str.toCharArray(); 518 char[] newbuf = new char[buf.length]; 519 boolean wsSeen = false; 520 boolean isStart = true; 521 int pos = 0; 522 523 for ( int i = 0; i < str.length(); i++ ) 524 { 525 ch = buf[i]; 526 527 // filter out all uppercase characters 528 if ( toLowerCase ) 529 { 530 if ( Character.isUpperCase( ch ) ) 531 { 532 ch = Character.toLowerCase( ch ); 533 } 534 } 535 536 // Check to see if we should add space 537 if ( Character.isWhitespace( ch ) ) 538 { 539 // If the buffer has had characters added already check last 540 // added character. Only append a spc if last character was 541 // not whitespace. 542 if ( wsSeen ) 543 { 544 continue; 545 } 546 else 547 { 548 wsSeen = true; 549 550 if ( isStart ) 551 { 552 isStart = false; 553 } 554 else 555 { 556 newbuf[pos++] = ch; 557 } 558 } 559 } 560 else 561 { 562 // Add all non-whitespace 563 wsSeen = false; 564 isStart = false; 565 newbuf[pos++] = ch; 566 } 567 } 568 569 return ( pos == 0 ? "" : new String( newbuf, 0, ( wsSeen ? pos - 1 : pos ) ) ); 570 } 571 572 /** 573 * Truncates large Strings showing a portion of the String's head and tail 574 * with the center cut out and replaced with '...'. Also displays the total 575 * length of the truncated string so size of '...' can be interpreted. 576 * Useful for large strings in UIs or hex dumps to log files. 577 * 578 * @param str the string to truncate 579 * @param head the amount of the head to display 580 * @param tail the amount of the tail to display 581 * @return the center truncated string 582 */ 583 public static final String centerTrunc( String str, int head, int tail ) 584 { 585 StringBuffer buf = null; 586 587 // Return as-is if String is smaller than or equal to the head plus the 588 // tail plus the number of characters added to the trunc representation 589 // plus the number of digits in the string length. 590 if ( str.length() <= ( head + tail + 7 + str.length() / 10 ) ) 591 { 592 return str; 593 } 594 595 buf = new StringBuffer(); 596 buf.append( '[' ).append( str.length() ).append( "][" ); 597 buf.append( str.substring( 0, head ) ).append( "..." ); 598 buf.append( str.substring( str.length() - tail ) ); 599 buf.append( ']' ); 600 return buf.toString(); 601 } 602 603 604 /** 605 * Gets a hex string from byte array. 606 * 607 * @param res 608 * the byte array 609 * @return the hex string representing the binary values in the array 610 */ 611 public static final String toHexString( byte[] res ) 612 { 613 StringBuffer buf = new StringBuffer( res.length << 1 ); 614 615 for ( int ii = 0; ii < res.length; ii++ ) 616 { 617 String digit = Integer.toHexString( 0xFF & res[ii] ); 618 619 if ( digit.length() == 1 ) 620 { 621 digit = '0' + digit; 622 } 623 624 buf.append( digit ); 625 } 626 return buf.toString().toUpperCase(); 627 } 628 629 /** 630 * Rewrote the toLowercase method to improve performances. 631 * In Ldap, attributesType are supposed to use ASCII chars : 632 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only. 633 * 634 * @param value The String to lowercase 635 * @return The lowercase string 636 */ 637 public static final String toLowerCase( String value ) 638 { 639 if ( ( null == value ) || ( value.length() == 0 ) ) 640 { 641 return ""; 642 } 643 644 char[] chars = value.toCharArray(); 645 646 for ( int i = 0; i < chars.length; i++ ) 647 { 648 chars[i] = TO_LOWER_CASE[ chars[i] ]; 649 } 650 651 return new String( chars ); 652 } 653 654 /** 655 * Rewrote the toLowercase method to improve performances. 656 * In Ldap, attributesType are supposed to use ASCII chars : 657 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only. 658 * 659 * @param value The String to uppercase 660 * @return The uppercase string 661 */ 662 public static final String toUpperCase( String value ) 663 { 664 if ( ( null == value ) || ( value.length() == 0 ) ) 665 { 666 return ""; 667 } 668 669 char[] chars = value.toCharArray(); 670 671 for ( int i = 0; i < chars.length; i++ ) 672 { 673 chars[i] = UPPER_CASE[ chars[i] ]; 674 } 675 676 return new String( chars ); 677 } 678 679 /** 680 * Get byte array from hex string 681 * 682 * @param hexString 683 * the hex string to convert to a byte array 684 * @return the byte form of the hex string. 685 */ 686 public static final byte[] toByteArray( String hexString ) 687 { 688 int arrLength = hexString.length() >> 1; 689 byte buf[] = new byte[arrLength]; 690 691 for ( int ii = 0; ii < arrLength; ii++ ) 692 { 693 int index = ii << 1; 694 695 String l_digit = hexString.substring( index, index + 2 ); 696 buf[ii] = ( byte ) Integer.parseInt( l_digit, 16 ); 697 } 698 699 return buf; 700 } 701 702 703 /** 704 * This method is used to insert HTML block dynamically 705 * 706 * @param source the HTML code to be processes 707 * @param replaceNl if true '\n' will be replaced by <br> 708 * @param replaceTag if true '<' will be replaced by < and '>' will be replaced 709 * by > 710 * @param replaceQuote if true '\"' will be replaced by " 711 * @return the formated html block 712 */ 713 public static final String formatHtml( String source, boolean replaceNl, boolean replaceTag, 714 boolean replaceQuote ) 715 { 716 StringBuffer buf = new StringBuffer(); 717 int len = source.length(); 718 719 for ( int ii = 0; ii < len; ii++ ) 720 { 721 char ch = source.charAt( ii ); 722 723 switch ( ch ) 724 { 725 case '\"': 726 if ( replaceQuote ) 727 { 728 buf.append( """ ); 729 } 730 else 731 { 732 buf.append( ch ); 733 } 734 break; 735 736 case '<': 737 if ( replaceTag ) 738 { 739 buf.append( "<" ); 740 } 741 else 742 { 743 buf.append( ch ); 744 } 745 break; 746 747 case '>': 748 if ( replaceTag ) 749 { 750 buf.append( ">" ); 751 } 752 else 753 { 754 buf.append( ch ); 755 } 756 break; 757 758 case '\n': 759 if ( replaceNl ) 760 { 761 if ( replaceTag ) 762 { 763 buf.append( "<br>" ); 764 } 765 else 766 { 767 buf.append( "<br>" ); 768 } 769 } 770 else 771 { 772 buf.append( ch ); 773 } 774 break; 775 776 case '\r': 777 break; 778 779 case '&': 780 buf.append( "&" ); 781 break; 782 783 default: 784 buf.append( ch ); 785 break; 786 } 787 } 788 789 return buf.toString(); 790 } 791 792 793 /** 794 * Creates a regular expression from an LDAP substring assertion filter 795 * specification. 796 * 797 * @param initialPattern 798 * the initial fragment before wildcards 799 * @param anyPattern 800 * fragments surrounded by wildcards if any 801 * @param finalPattern 802 * the final fragment after last wildcard if any 803 * @return the regular expression for the substring match filter 804 * @throws PatternSyntaxException 805 * if a syntactically correct regular expression cannot be 806 * compiled 807 */ 808 public static final Pattern getRegex( String initialPattern, String[] anyPattern, String finalPattern ) 809 throws PatternSyntaxException 810 { 811 StringBuffer buf = new StringBuffer(); 812 813 if ( initialPattern != null ) 814 { 815 buf.append( '^' ).append( Pattern.quote( initialPattern ) ); 816 } 817 818 if ( anyPattern != null ) 819 { 820 for ( int i = 0; i < anyPattern.length; i++ ) 821 { 822 buf.append( ".*" ).append( Pattern.quote( anyPattern[i] ) ); 823 } 824 } 825 826 if ( finalPattern != null ) 827 { 828 buf.append( ".*" ).append( Pattern.quote( finalPattern ) ); 829 } 830 else 831 { 832 buf.append( ".*" ); 833 } 834 835 return Pattern.compile( buf.toString() ); 836 } 837 838 839 /** 840 * Generates a regular expression from an LDAP substring match expression by 841 * parsing out the supplied string argument. 842 * 843 * @param ldapRegex 844 * the substring match expression 845 * @return the regular expression for the substring match filter 846 * @throws PatternSyntaxException 847 * if a syntactically correct regular expression cannot be 848 * compiled 849 */ 850 public static final Pattern getRegex( String ldapRegex ) throws PatternSyntaxException 851 { 852 if ( ldapRegex == null ) 853 { 854 throw new PatternSyntaxException( I18n.err( I18n.ERR_04429 ), "null", -1 ); 855 } 856 857 List<String> any = new ArrayList<String>(); 858 String remaining = ldapRegex; 859 int index = remaining.indexOf( '*' ); 860 861 if ( index == -1 ) 862 { 863 throw new PatternSyntaxException( I18n.err( I18n.ERR_04430 ), remaining, -1 ); 864 } 865 866 String initialPattern = null; 867 868 if ( remaining.charAt( 0 ) != '*' ) 869 { 870 initialPattern = remaining.substring( 0, index ); 871 } 872 873 remaining = remaining.substring( index + 1, remaining.length() ); 874 875 while ( ( index = remaining.indexOf( '*' ) ) != -1 ) 876 { 877 any.add( remaining.substring( 0, index ) ); 878 remaining = remaining.substring( index + 1, remaining.length() ); 879 } 880 881 String finalPattern = null; 882 if ( !remaining.endsWith( "*" ) && remaining.length() > 0 ) 883 { 884 finalPattern = remaining; 885 } 886 887 if ( any.size() > 0 ) 888 { 889 String[] anyStrs = new String[any.size()]; 890 891 for ( int i = 0; i < anyStrs.length; i++ ) 892 { 893 anyStrs[i] = any.get( i ); 894 } 895 896 return getRegex( initialPattern, anyStrs, finalPattern ); 897 } 898 899 return getRegex( initialPattern, null, finalPattern ); 900 } 901 902 903 /** 904 * Splits apart a OS separator delimited set of paths in a string into 905 * multiple Strings. File component path strings are returned within a List 906 * in the order they are found in the composite path string. Optionally, a 907 * file filter can be used to filter out path strings to control the 908 * components returned. If the filter is null all path components are 909 * returned. 910 * 911 * @param paths 912 * a set of paths delimited using the OS path separator 913 * @param filter 914 * a FileFilter used to filter the return set 915 * @return the filter accepted path component Strings in the order 916 * encountered 917 */ 918 public static final List<String> getPaths( String paths, FileFilter filter ) 919 { 920 int start = 0; 921 int stop = -1; 922 String path = null; 923 List<String> list = new ArrayList<String>(); 924 925 // Abandon with no values if paths string is null 926 if ( paths == null || paths.trim().equals( "" ) ) 927 { 928 return list; 929 } 930 931 final int max = paths.length() - 1; 932 933 // Loop spliting string using OS path separator: terminate 934 // when the start index is at the end of the paths string. 935 while ( start < max ) 936 { 937 stop = paths.indexOf( File.pathSeparatorChar, start ); 938 939 // The is no file sep between the start and the end of the string 940 if ( stop == -1 ) 941 { 942 // If we have a trailing path remaining without ending separator 943 if ( start < max ) 944 { 945 // Last path is everything from start to the string's end 946 path = paths.substring( start ); 947 948 // Protect against consecutive separators side by side 949 if ( !path.trim().equals( "" ) ) 950 { 951 // If filter is null add path, if it is not null add the 952 // path only if the filter accepts the path component. 953 if ( filter == null || filter.accept( new File( path ) ) ) 954 { 955 list.add( path ); 956 } 957 } 958 } 959 960 break; // Exit loop no more path components left! 961 } 962 963 // There is a separator between start and the end if we got here! 964 // start index is now at 0 or the index of last separator + 1 965 // stop index is now at next separator in front of start index 966 path = paths.substring( start, stop ); 967 968 // Protect against consecutive separators side by side 969 if ( !path.trim().equals( "" ) ) 970 { 971 // If filter is null add path, if it is not null add the path 972 // only if the filter accepts the path component. 973 if ( filter == null || filter.accept( new File( path ) ) ) 974 { 975 list.add( path ); 976 } 977 } 978 979 // Advance start index past separator to start of next path comp 980 start = stop + 1; 981 } 982 983 return list; 984 } 985 986 987 // ~ Methods 988 // ------------------------------------------------------------------------------------ 989 990 /** 991 * Helper function that dump a byte in hex form 992 * 993 * @param octet The byte to dump 994 * @return A string representation of the byte 995 */ 996 public static final String dumpByte( byte octet ) 997 { 998 return new String( new byte[] 999 { '0', 'x', HEX_CHAR[( octet & 0x00F0 ) >> 4], HEX_CHAR[octet & 0x000F] } ); 1000 } 1001 1002 1003 /** 1004 * Helper function that returns a char from an hex 1005 * 1006 * @param hex The hex to dump 1007 * @return A char representation of the hex 1008 */ 1009 public static final char dumpHex( byte hex ) 1010 { 1011 return ( char ) HEX_CHAR[hex & 0x000F]; 1012 } 1013 1014 1015 /** 1016 * Helper function that dump an array of bytes in hex form 1017 * 1018 * @param buffer The bytes array to dump 1019 * @return A string representation of the array of bytes 1020 */ 1021 public static final String dumpBytes( byte[] buffer ) 1022 { 1023 if ( buffer == null ) 1024 { 1025 return ""; 1026 } 1027 1028 StringBuffer sb = new StringBuffer(); 1029 1030 for ( int i = 0; i < buffer.length; i++ ) 1031 { 1032 sb.append( "0x" ).append( ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] ) ).append( 1033 ( char ) ( HEX_CHAR[buffer[i] & 0x000F] ) ).append( " " ); 1034 } 1035 1036 return sb.toString(); 1037 } 1038 1039 /** 1040 * 1041 * Helper method to render an object which can be a String or a byte[] 1042 * 1043 * @return A string representing the object 1044 */ 1045 public static String dumpObject( Object object ) 1046 { 1047 if ( object != null ) 1048 { 1049 if ( object instanceof String ) 1050 { 1051 return (String) object; 1052 } 1053 else if ( object instanceof byte[] ) 1054 { 1055 return dumpBytes( ( byte[] ) object ); 1056 } 1057 else if ( object instanceof StringValue ) 1058 { 1059 return ( ( StringValue ) object ).get(); 1060 } 1061 else if ( object instanceof BinaryValue ) 1062 { 1063 return dumpBytes( ( ( BinaryValue ) object ).get() ); 1064 } 1065 else 1066 { 1067 return "<unknown type>"; 1068 } 1069 } 1070 else 1071 { 1072 return ""; 1073 } 1074 } 1075 1076 /** 1077 * Helper function that dump an array of bytes in hex pair form, 1078 * without '0x' and space chars 1079 * 1080 * @param buffer The bytes array to dump 1081 * @return A string representation of the array of bytes 1082 */ 1083 public static final String dumpHexPairs( byte[] buffer ) 1084 { 1085 if ( buffer == null ) 1086 { 1087 return ""; 1088 } 1089 1090 char[] str = new char[buffer.length << 1]; 1091 1092 for ( int i = 0, pos = 0; i < buffer.length; i++ ) 1093 { 1094 str[pos++] = ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] ); 1095 str[pos++] = ( char ) ( HEX_CHAR[buffer[i] & 0x000F] ); 1096 } 1097 1098 return new String( str ); 1099 } 1100 1101 /** 1102 * Return the Unicode char which is coded in the bytes at position 0. 1103 * 1104 * @param bytes The byte[] represntation of an Unicode string. 1105 * @return The first char found. 1106 */ 1107 public static final char bytesToChar( byte[] bytes ) 1108 { 1109 return bytesToChar( bytes, 0 ); 1110 } 1111 1112 1113 /** 1114 * Count the number of bytes needed to return an Unicode char. This can be 1115 * from 1 to 6. 1116 * 1117 * @param bytes The bytes to read 1118 * @param pos Position to start counting. It must be a valid start of a 1119 * encoded char ! 1120 * @return The number of bytes to create a char, or -1 if the encoding is 1121 * wrong. TODO : Should stop after the third byte, as a char is only 1122 * 2 bytes long. 1123 */ 1124 public static final int countBytesPerChar( byte[] bytes, int pos ) 1125 { 1126 if ( bytes == null ) 1127 { 1128 return -1; 1129 } 1130 1131 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 ) 1132 { 1133 return 1; 1134 } 1135 else if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES ) 1136 { 1137 return 2; 1138 } 1139 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES ) 1140 { 1141 return 3; 1142 } 1143 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES ) 1144 { 1145 return 4; 1146 } 1147 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES ) 1148 { 1149 return 5; 1150 } 1151 else if ( ( bytes[pos] & UTF8_SIX_BYTES_MASK ) == UTF8_SIX_BYTES ) 1152 { 1153 return 6; 1154 } 1155 else 1156 { 1157 return -1; 1158 } 1159 } 1160 1161 1162 /** 1163 * Return the number of bytes that hold an Unicode char. 1164 * 1165 * @param car The character to be decoded 1166 * @return The number of bytes to hold the char. TODO : Should stop after 1167 * the third byte, as a char is only 2 bytes long. 1168 */ 1169 public static final int countNbBytesPerChar( char car ) 1170 { 1171 if ( ( car & CHAR_ONE_BYTE_MASK ) == 0 ) 1172 { 1173 return 1; 1174 } 1175 else if ( ( car & CHAR_TWO_BYTES_MASK ) == 0 ) 1176 { 1177 return 2; 1178 } 1179 else if ( ( car & CHAR_THREE_BYTES_MASK ) == 0 ) 1180 { 1181 return 3; 1182 } 1183 else if ( ( car & CHAR_FOUR_BYTES_MASK ) == 0 ) 1184 { 1185 return 4; 1186 } 1187 else if ( ( car & CHAR_FIVE_BYTES_MASK ) == 0 ) 1188 { 1189 return 5; 1190 } 1191 else if ( ( car & CHAR_SIX_BYTES_MASK ) == 0 ) 1192 { 1193 return 6; 1194 } 1195 else 1196 { 1197 return -1; 1198 } 1199 } 1200 1201 1202 /** 1203 * Count the number of bytes included in the given char[]. 1204 * 1205 * @param chars The char array to decode 1206 * @return The number of bytes in the char array 1207 */ 1208 public static final int countBytes( char[] chars ) 1209 { 1210 if ( chars == null ) 1211 { 1212 return 0; 1213 } 1214 1215 int nbBytes = 0; 1216 int currentPos = 0; 1217 1218 while ( currentPos < chars.length ) 1219 { 1220 int nbb = countNbBytesPerChar( chars[currentPos] ); 1221 1222 // If the number of bytes necessary to encode a character is 1223 // above 3, we will need two UTF-16 chars 1224 currentPos += ( nbb < 4 ? 1 : 2 ); 1225 nbBytes += nbb; 1226 } 1227 1228 return nbBytes; 1229 } 1230 1231 1232 /** 1233 * Return the Unicode char which is coded in the bytes at the given 1234 * position. 1235 * 1236 * @param bytes The byte[] represntation of an Unicode string. 1237 * @param pos The current position to start decoding the char 1238 * @return The decoded char, or -1 if no char can be decoded TODO : Should 1239 * stop after the third byte, as a char is only 2 bytes long. 1240 */ 1241 public static final char bytesToChar( byte[] bytes, int pos ) 1242 { 1243 if ( bytes == null ) 1244 { 1245 return ( char ) -1; 1246 } 1247 1248 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 ) 1249 { 1250 return ( char ) bytes[pos]; 1251 } 1252 else 1253 { 1254 if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES ) 1255 { 1256 // Two bytes char 1257 return ( char ) ( ( ( bytes[pos] & 0x1C ) << 6 ) + // 110x-xxyy 1258 // 10zz-zzzz 1259 // -> 1260 // 0000-0xxx 1261 // 0000-0000 1262 ( ( bytes[pos] & 0x03 ) << 6 ) + // 110x-xxyy 10zz-zzzz 1263 // -> 0000-0000 1264 // yy00-0000 1265 ( bytes[pos + 1] & 0x3F ) // 110x-xxyy 10zz-zzzz -> 0000-0000 1266 // 00zz-zzzz 1267 ); // -> 0000-0xxx yyzz-zzzz (07FF) 1268 } 1269 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES ) 1270 { 1271 // Three bytes char 1272 return ( char ) ( 1273 // 1110-tttt 10xx-xxyy 10zz-zzzz -> tttt-0000-0000-0000 1274 ( ( bytes[pos] & 0x0F ) << 12 ) + 1275 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-xxxx-0000-0000 1276 ( ( bytes[pos + 1] & 0x3C ) << 6 ) + 1277 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-yy00-0000 1278 ( ( bytes[pos + 1] & 0x03 ) << 6 ) + 1279 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-00zz-zzzz 1280 ( bytes[pos + 2] & 0x3F ) 1281 // -> tttt-xxxx yyzz-zzzz (FF FF) 1282 ); 1283 } 1284 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES ) 1285 { 1286 // Four bytes char 1287 return ( char ) ( 1288 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 000t-tt00 1289 // 0000-0000 0000-0000 1290 ( ( bytes[pos] & 0x07 ) << 18 ) + 1291 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-00uu 1292 // 0000-0000 0000-0000 1293 ( ( bytes[pos + 1] & 0x30 ) << 16 ) + 1294 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000 1295 // vvvv-0000 0000-0000 1296 ( ( bytes[pos + 1] & 0x0F ) << 12 ) + 1297 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000 1298 // 0000-xxxx 0000-0000 1299 ( ( bytes[pos + 2] & 0x3C ) << 6 ) + 1300 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000 1301 // 0000-0000 yy00-0000 1302 ( ( bytes[pos + 2] & 0x03 ) << 6 ) + 1303 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000 1304 // 0000-0000 00zz-zzzz 1305 ( bytes[pos + 3] & 0x3F ) 1306 // -> 000t-ttuu vvvv-xxxx yyzz-zzzz (1FFFFF) 1307 ); 1308 } 1309 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES ) 1310 { 1311 // Five bytes char 1312 return ( char ) ( 1313 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1314 // 0000-00tt 0000-0000 0000-0000 0000-0000 1315 ( ( bytes[pos] & 0x03 ) << 24 ) + 1316 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1317 // 0000-0000 uuuu-uu00 0000-0000 0000-0000 1318 ( ( bytes[pos + 1] & 0x3F ) << 18 ) + 1319 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1320 // 0000-0000 0000-00vv 0000-0000 0000-0000 1321 ( ( bytes[pos + 2] & 0x30 ) << 12 ) + 1322 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1323 // 0000-0000 0000-0000 wwww-0000 0000-0000 1324 ( ( bytes[pos + 2] & 0x0F ) << 12 ) + 1325 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1326 // 0000-0000 0000-0000 0000-xxxx 0000-0000 1327 ( ( bytes[pos + 3] & 0x3C ) << 6 ) + 1328 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1329 // 0000-0000 0000-0000 0000-0000 yy00-0000 1330 ( ( bytes[pos + 3] & 0x03 ) << 6 ) + 1331 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz -> 1332 // 0000-0000 0000-0000 0000-0000 00zz-zzzz 1333 ( bytes[pos + 4] & 0x3F ) 1334 // -> 0000-00tt uuuu-uuvv wwww-xxxx yyzz-zzzz (03 FF FF FF) 1335 ); 1336 } 1337 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES ) 1338 { 1339 // Six bytes char 1340 return ( char ) ( 1341 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz 1342 // -> 1343 // 0s00-0000 0000-0000 0000-0000 0000-0000 1344 ( ( bytes[pos] & 0x01 ) << 30 ) + 1345 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz 1346 // -> 1347 // 00tt-tttt 0000-0000 0000-0000 0000-0000 1348 ( ( bytes[pos + 1] & 0x3F ) << 24 ) + 1349 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 1350 // 10zz-zzzz -> 1351 // 0000-0000 uuuu-uu00 0000-0000 0000-0000 1352 ( ( bytes[pos + 2] & 0x3F ) << 18 ) + 1353 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 1354 // 10zz-zzzz -> 1355 // 0000-0000 0000-00vv 0000-0000 0000-0000 1356 ( ( bytes[pos + 3] & 0x30 ) << 12 ) + 1357 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 1358 // 10zz-zzzz -> 1359 // 0000-0000 0000-0000 wwww-0000 0000-0000 1360 ( ( bytes[pos + 3] & 0x0F ) << 12 ) + 1361 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 1362 // 10zz-zzzz -> 1363 // 0000-0000 0000-0000 0000-xxxx 0000-0000 1364 ( ( bytes[pos + 4] & 0x3C ) << 6 ) + 1365 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 1366 // 10zz-zzzz -> 1367 // 0000-0000 0000-0000 0000-0000 yy00-0000 1368 ( ( bytes[pos + 4] & 0x03 ) << 6 ) + 1369 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz 1370 // -> 1371 // 0000-0000 0000-0000 0000-0000 00zz-zzzz 1372 ( bytes[pos + 5] & 0x3F ) 1373 // -> 0stt-tttt uuuu-uuvv wwww-xxxx yyzz-zzzz (7F FF FF FF) 1374 ); 1375 } 1376 else 1377 { 1378 return ( char ) -1; 1379 } 1380 } 1381 } 1382 1383 1384 /** 1385 * Return the Unicode char which is coded in the bytes at the given 1386 * position. 1387 * 1388 * @param car The character to be transformed to an array of bytes 1389 * 1390 * @return The byte array representing the char 1391 * 1392 * TODO : Should stop after the third byte, as a char is only 2 bytes long. 1393 */ 1394 public static final byte[] charToBytes( char car ) 1395 { 1396 byte[] bytes = new byte[countNbBytesPerChar( car )]; 1397 1398 if ( car <= 0x7F ) 1399 { 1400 // Single byte char 1401 bytes[0] = ( byte ) car; 1402 return bytes; 1403 } 1404 else if ( car <= 0x7FF ) 1405 { 1406 // two bytes char 1407 bytes[0] = ( byte ) ( 0x00C0 + ( ( car & 0x07C0 ) >> 6 ) ); 1408 bytes[1] = ( byte ) ( 0x0080 + ( car & 0x3F ) ); 1409 } 1410 else 1411 { 1412 // Three bytes char 1413 bytes[0] = ( byte ) ( 0x00E0 + ( ( car & 0xF000 ) >> 12 ) ); 1414 bytes[1] = ( byte ) ( 0x0080 + ( ( car & 0x0FC0 ) >> 6 ) ); 1415 bytes[2] = ( byte ) ( 0x0080 + ( car & 0x3F ) ); 1416 } 1417 1418 return bytes; 1419 } 1420 1421 1422 /** 1423 * Count the number of chars included in the given byte[]. 1424 * 1425 * @param bytes The byte array to decode 1426 * @return The number of char in the byte array 1427 */ 1428 public static final int countChars( byte[] bytes ) 1429 { 1430 if ( bytes == null ) 1431 { 1432 return 0; 1433 } 1434 1435 int nbChars = 0; 1436 int currentPos = 0; 1437 1438 while ( currentPos < bytes.length ) 1439 { 1440 currentPos += countBytesPerChar( bytes, currentPos ); 1441 nbChars++; 1442 } 1443 1444 return nbChars; 1445 } 1446 1447 1448 /** 1449 * Check if a text is present at the current position in a buffer. 1450 * 1451 * @param bytes The buffer which contains the data 1452 * @param index Current position in the buffer 1453 * @param text The text we want to check 1454 * @return <code>true</code> if the buffer contains the text. 1455 */ 1456 public static final int areEquals( byte[] bytes, int index, String text ) 1457 { 1458 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 ) 1459 || ( text == null ) ) 1460 { 1461 return NOT_EQUAL; 1462 } 1463 else 1464 { 1465 try 1466 { 1467 byte[] data = text.getBytes( "UTF-8" ); 1468 1469 return areEquals( bytes, index, data ); 1470 } 1471 catch ( UnsupportedEncodingException uee ) 1472 { 1473 // if this happens something is really strange 1474 throw new RuntimeException( uee ); 1475 } 1476 } 1477 } 1478 1479 1480 /** 1481 * Check if a text is present at the current position in a buffer. 1482 * 1483 * @param chars The buffer which contains the data 1484 * @param index Current position in the buffer 1485 * @param text The text we want to check 1486 * @return <code>true</code> if the buffer contains the text. 1487 */ 1488 public static final int areEquals( char[] chars, int index, String text ) 1489 { 1490 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 ) 1491 || ( text == null ) ) 1492 { 1493 return NOT_EQUAL; 1494 } 1495 else 1496 { 1497 char[] data = text.toCharArray(); 1498 1499 return areEquals( chars, index, data ); 1500 } 1501 } 1502 1503 1504 /** 1505 * Check if a text is present at the current position in a buffer. 1506 * 1507 * @param chars The buffer which contains the data 1508 * @param index Current position in the buffer 1509 * @param chars2 The text we want to check 1510 * @return <code>true</code> if the buffer contains the text. 1511 */ 1512 public static final int areEquals( char[] chars, int index, char[] chars2 ) 1513 { 1514 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 ) 1515 || ( chars2 == null ) || ( chars2.length == 0 ) 1516 || ( chars2.length > ( chars.length + index ) ) ) 1517 { 1518 return NOT_EQUAL; 1519 } 1520 else 1521 { 1522 for ( int i = 0; i < chars2.length; i++ ) 1523 { 1524 if ( chars[index++] != chars2[i] ) 1525 { 1526 return NOT_EQUAL; 1527 } 1528 } 1529 1530 return index; 1531 } 1532 } 1533 1534 /** 1535 * Check if a text is present at the current position in another string. 1536 * 1537 * @param string The string which contains the data 1538 * @param index Current position in the string 1539 * @param text The text we want to check 1540 * @return <code>true</code> if the string contains the text. 1541 */ 1542 public static final boolean areEquals( String string, int index, String text ) 1543 { 1544 if ( ( string == null ) || ( text == null ) ) 1545 { 1546 return false; 1547 } 1548 1549 int length1 = string.length(); 1550 int length2 = text.length(); 1551 1552 if ( ( length1 == 0 ) || ( length1 <= index ) || ( index < 0 ) 1553 || ( length2 == 0 ) || ( length2 > ( length1 + index ) ) ) 1554 { 1555 return false; 1556 } 1557 else 1558 { 1559 return string.substring( index ).startsWith( text ); 1560 } 1561 } 1562 1563 1564 /** 1565 * Check if a text is present at the current position in a buffer. 1566 * 1567 * @param bytes The buffer which contains the data 1568 * @param index Current position in the buffer 1569 * @param bytes2 The text we want to check 1570 * @return <code>true</code> if the buffer contains the text. 1571 */ 1572 public static final int areEquals( byte[] bytes, int index, byte[] bytes2 ) 1573 { 1574 1575 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 ) 1576 || ( bytes2 == null ) || ( bytes2.length == 0 ) 1577 || ( bytes2.length > ( bytes.length + index ) ) ) 1578 { 1579 return NOT_EQUAL; 1580 } 1581 else 1582 { 1583 for ( int i = 0; i < bytes2.length; i++ ) 1584 { 1585 if ( bytes[index++] != bytes2[i] ) 1586 { 1587 return NOT_EQUAL; 1588 } 1589 } 1590 1591 return index; 1592 } 1593 } 1594 1595 1596 /** 1597 * Test if the current character is equal to a specific character. This 1598 * function works only for character between 0 and 127, as it does compare a 1599 * byte and a char (which is 16 bits wide) 1600 * 1601 * @param byteArray 1602 * The buffer which contains the data 1603 * @param index 1604 * Current position in the buffer 1605 * @param car 1606 * The character we want to compare with the current buffer 1607 * position 1608 * @return <code>true</code> if the current character equals the given 1609 * character. 1610 */ 1611 public static final boolean isCharASCII( byte[] byteArray, int index, char car ) 1612 { 1613 if ( ( byteArray == null ) || ( byteArray.length == 0 ) || ( index < 0 ) || ( index >= byteArray.length ) ) 1614 { 1615 return false; 1616 } 1617 else 1618 { 1619 return ( ( byteArray[index] == car ) ? true : false ); 1620 } 1621 } 1622 1623 1624 /** 1625 * Test if the current character is equal to a specific character. 1626 * 1627 * @param chars 1628 * The buffer which contains the data 1629 * @param index 1630 * Current position in the buffer 1631 * @param car 1632 * The character we want to compare with the current buffer 1633 * position 1634 * @return <code>true</code> if the current character equals the given 1635 * character. 1636 */ 1637 public static final boolean isCharASCII( char[] chars, int index, char car ) 1638 { 1639 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) ) 1640 { 1641 return false; 1642 } 1643 else 1644 { 1645 return ( ( chars[index] == car ) ? true : false ); 1646 } 1647 } 1648 1649 /** 1650 * Test if the current character is equal to a specific character. 1651 * 1652 * @param string The String which contains the data 1653 * @param index Current position in the string 1654 * @param car The character we want to compare with the current string 1655 * position 1656 * @return <code>true</code> if the current character equals the given 1657 * character. 1658 */ 1659 public static final boolean isCharASCII( String string, int index, char car ) 1660 { 1661 if ( string == null ) 1662 { 1663 return false; 1664 } 1665 1666 int length = string.length(); 1667 1668 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1669 { 1670 return false; 1671 } 1672 else 1673 { 1674 return string.charAt( index ) == car; 1675 } 1676 } 1677 1678 1679 /** 1680 * Test if the current character is equal to a specific character. 1681 * 1682 * @param string The String which contains the data 1683 * @param index Current position in the string 1684 * @param car The character we want to compare with the current string 1685 * position 1686 * @return <code>true</code> if the current character equals the given 1687 * character. 1688 */ 1689 public static final boolean isICharASCII( String string, int index, char car ) 1690 { 1691 if ( string == null ) 1692 { 1693 return false; 1694 } 1695 1696 int length = string.length(); 1697 1698 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1699 { 1700 return false; 1701 } 1702 else 1703 { 1704 return ( ( string.charAt( index ) | 0x20 ) & car ) == car; 1705 } 1706 } 1707 1708 1709 /** 1710 * Test if the current character is equal to a specific character. 1711 * 1712 * @param string The String which contains the data 1713 * @param index Current position in the string 1714 * @param car The character we want to compare with the current string 1715 * position 1716 * @return <code>true</code> if the current character equals the given 1717 * character. 1718 */ 1719 public static final boolean isICharASCII( byte[] bytes, int index, char car ) 1720 { 1721 if ( bytes == null ) 1722 { 1723 return false; 1724 } 1725 1726 int length = bytes.length; 1727 1728 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1729 { 1730 return false; 1731 } 1732 else 1733 { 1734 return ( ( bytes[ index ] | 0x20 ) & car ) == car; 1735 } 1736 } 1737 1738 1739 /** 1740 * Test if the current character is a bit, ie 0 or 1. 1741 * 1742 * @param string 1743 * The String which contains the data 1744 * @param index 1745 * Current position in the string 1746 * @return <code>true</code> if the current character is a bit (0 or 1) 1747 */ 1748 public static final boolean isBit( String string, int index ) 1749 { 1750 if ( string == null ) 1751 { 1752 return false; 1753 } 1754 1755 int length = string.length(); 1756 1757 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1758 { 1759 return false; 1760 } 1761 else 1762 { 1763 char c = string.charAt( index ); 1764 return ( ( c == '0' ) || ( c == '1' ) ); 1765 } 1766 } 1767 1768 1769 /** 1770 * Get the character at a given position in a string, checking fo limits 1771 * 1772 * @param string The string which contains the data 1773 * @param index Current position in the string 1774 * @return The character ar the given position, or '\0' if something went wrong 1775 */ 1776 public static final char charAt( String string, int index ) 1777 { 1778 if ( string == null ) 1779 { 1780 return '\0'; 1781 } 1782 1783 int length = string.length(); 1784 1785 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1786 { 1787 return '\0'; 1788 } 1789 else 1790 { 1791 return string.charAt( index ) ; 1792 } 1793 } 1794 1795 1796 /** 1797 * Translate two chars to an hex value. The chars must be 1798 * in [a-fA-F0-9] 1799 * 1800 * @param high The high value 1801 * @param low The low value 1802 * @return A byte representation of the two chars 1803 */ 1804 public static byte getHexValue( char high, char low ) 1805 { 1806 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) ) 1807 { 1808 return -1; 1809 } 1810 1811 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] ); 1812 } 1813 1814 1815 /** 1816 * Translate two bytes to an hex value. The bytes must be 1817 * in [0-9a-fA-F] 1818 * 1819 * @param high The high value 1820 * @param low The low value 1821 * @return A byte representation of the two bytes 1822 */ 1823 public static byte getHexValue( byte high, byte low ) 1824 { 1825 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) ) 1826 { 1827 return -1; 1828 } 1829 1830 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] ); 1831 } 1832 1833 1834 /** 1835 * Return an hex value from a sinle char 1836 * The char must be in [0-9a-fA-F] 1837 * 1838 * @param c The char we want to convert 1839 * @return A byte between 0 and 15 1840 */ 1841 public static byte getHexValue( char c ) 1842 { 1843 if ( ( c > 127 ) || ( c < 0 ) ) 1844 { 1845 return -1; 1846 } 1847 1848 return HEX_VALUE[c]; 1849 } 1850 1851 /** 1852 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] | 1853 * [0x41-0x46] | [0x61-0x66] 1854 * 1855 * @param bytes The buffer which contains the data 1856 * @param index Current position in the buffer 1857 * @return <code>true</code> if the current character is a Hex Char 1858 */ 1859 public static final boolean isHex( byte[] bytes, int index ) 1860 { 1861 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) ) 1862 { 1863 return false; 1864 } 1865 else 1866 { 1867 byte c = bytes[index]; 1868 1869 if ( ( ( c | 0x7F ) != 0x7F ) || ( HEX[c] == false ) ) 1870 { 1871 return false; 1872 } 1873 else 1874 { 1875 return true; 1876 } 1877 } 1878 } 1879 1880 1881 /** 1882 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] | 1883 * [0x41-0x46] | [0x61-0x66] 1884 * 1885 * @param chars The buffer which contains the data 1886 * @param index Current position in the buffer 1887 * @return <code>true</code> if the current character is a Hex Char 1888 */ 1889 public static final boolean isHex( char[] chars, int index ) 1890 { 1891 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) ) 1892 { 1893 return false; 1894 } 1895 else 1896 { 1897 char c = chars[index]; 1898 1899 if ( ( c > 127 ) || ( HEX[c] == false ) ) 1900 { 1901 return false; 1902 } 1903 else 1904 { 1905 return true; 1906 } 1907 } 1908 } 1909 1910 /** 1911 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] | 1912 * [0x41-0x46] | [0x61-0x66] 1913 * 1914 * @param string The string which contains the data 1915 * @param index Current position in the string 1916 * @return <code>true</code> if the current character is a Hex Char 1917 */ 1918 public static final boolean isHex( String string, int index ) 1919 { 1920 if ( string == null ) 1921 { 1922 return false; 1923 } 1924 1925 int length = string.length(); 1926 1927 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 1928 { 1929 return false; 1930 } 1931 else 1932 { 1933 char c = string.charAt( index ); 1934 1935 if ( ( c > 127 ) || ( HEX[c] == false ) ) 1936 { 1937 return false; 1938 } 1939 else 1940 { 1941 return true; 1942 } 1943 } 1944 } 1945 1946 1947 /** 1948 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 1949 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 1950 * 1951 * @param bytes The buffer which contains the data 1952 * @return <code>true</code> if the current character is a Digit 1953 */ 1954 public static final boolean isDigit( byte[] bytes ) 1955 { 1956 if ( ( bytes == null ) || ( bytes.length == 0 ) ) 1957 { 1958 return false; 1959 } 1960 else 1961 { 1962 return ( ( ( ( bytes[0] | 0x7F ) != 0x7F ) || !DIGIT[bytes[0]] ) ? false : true ); 1963 } 1964 } 1965 1966 1967 /** 1968 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 1969 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 1970 * 1971 * @param car the character to test 1972 * 1973 * @return <code>true</code> if the character is a Digit 1974 */ 1975 public static final boolean isDigit( char car ) 1976 { 1977 return ( car >= '0' ) && ( car <= '9' ); 1978 } 1979 1980 1981 /** 1982 * Test if the current byte is an Alpha character : 1983 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A] 1984 * 1985 * @param c The byte to test 1986 * 1987 * @return <code>true</code> if the byte is an Alpha 1988 * character 1989 */ 1990 public static final boolean isAlpha( byte c ) 1991 { 1992 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] ); 1993 } 1994 1995 1996 /** 1997 * Test if the current character is an Alpha character : 1998 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A] 1999 * 2000 * @param c The char to test 2001 * 2002 * @return <code>true</code> if the character is an Alpha 2003 * character 2004 */ 2005 public static final boolean isAlpha( char c ) 2006 { 2007 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] ); 2008 } 2009 2010 2011 /** 2012 * Test if the current character is an Alpha character : <alpha> ::= 2013 * [0x41-0x5A] | [0x61-0x7A] 2014 * 2015 * @param bytes The buffer which contains the data 2016 * @param index Current position in the buffer 2017 * @return <code>true</code> if the current character is an Alpha 2018 * character 2019 */ 2020 public static final boolean isAlphaASCII( byte[] bytes, int index ) 2021 { 2022 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) ) 2023 { 2024 return false; 2025 } 2026 else 2027 { 2028 byte c = bytes[index]; 2029 2030 if ( ( ( c | 0x7F ) != 0x7F ) || ( ALPHA[c] == false ) ) 2031 { 2032 return false; 2033 } 2034 else 2035 { 2036 return true; 2037 } 2038 } 2039 } 2040 2041 2042 /** 2043 * Test if the current character is an Alpha character : <alpha> ::= 2044 * [0x41-0x5A] | [0x61-0x7A] 2045 * 2046 * @param chars The buffer which contains the data 2047 * @param index Current position in the buffer 2048 * @return <code>true</code> if the current character is an Alpha 2049 * character 2050 */ 2051 public static final boolean isAlphaASCII( char[] chars, int index ) 2052 { 2053 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) ) 2054 { 2055 return false; 2056 } 2057 else 2058 { 2059 char c = chars[index]; 2060 2061 if ( ( c > 127 ) || ( ALPHA[c] == false ) ) 2062 { 2063 return false; 2064 } 2065 else 2066 { 2067 return true; 2068 } 2069 } 2070 } 2071 2072 2073 /** 2074 * Test if the current character is an Alpha character : <alpha> ::= 2075 * [0x41-0x5A] | [0x61-0x7A] 2076 * 2077 * @param string The string which contains the data 2078 * @param index Current position in the string 2079 * @return <code>true</code> if the current character is an Alpha 2080 * character 2081 */ 2082 public static final boolean isAlphaASCII( String string, int index ) 2083 { 2084 if ( string == null ) 2085 { 2086 return false; 2087 } 2088 2089 int length = string.length(); 2090 2091 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2092 { 2093 return false; 2094 } 2095 else 2096 { 2097 char c = string.charAt( index ); 2098 2099 if ( ( c > 127 ) || ( ALPHA[c] == false ) ) 2100 { 2101 return false; 2102 } 2103 else 2104 { 2105 return true; 2106 } 2107 } 2108 } 2109 2110 2111 /** 2112 * Test if the current character is a lowercased Alpha character : <br/> 2113 * <alpha> ::= [0x61-0x7A] 2114 * 2115 * @param string The string which contains the data 2116 * @param index Current position in the string 2117 * @return <code>true</code> if the current character is a lower Alpha 2118 * character 2119 */ 2120 public static final boolean isAlphaLowercaseASCII( String string, int index ) 2121 { 2122 if ( string == null ) 2123 { 2124 return false; 2125 } 2126 2127 int length = string.length(); 2128 2129 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2130 { 2131 return false; 2132 } 2133 else 2134 { 2135 char c = string.charAt( index ); 2136 2137 if ( ( c > 127 ) || ( ALPHA_LOWER_CASE[c] == false ) ) 2138 { 2139 return false; 2140 } 2141 else 2142 { 2143 return true; 2144 } 2145 } 2146 } 2147 2148 2149 /** 2150 * Test if the current character is a uppercased Alpha character : <br/> 2151 * <alpha> ::= [0x61-0x7A] 2152 * 2153 * @param string The string which contains the data 2154 * @param index Current position in the string 2155 * @return <code>true</code> if the current character is a lower Alpha 2156 * character 2157 */ 2158 public static final boolean isAlphaUppercaseASCII( String string, int index ) 2159 { 2160 if ( string == null ) 2161 { 2162 return false; 2163 } 2164 2165 int length = string.length(); 2166 2167 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2168 { 2169 return false; 2170 } 2171 else 2172 { 2173 char c = string.charAt( index ); 2174 2175 if ( ( c > 127 ) || ( ALPHA_UPPER_CASE[c] == false ) ) 2176 { 2177 return false; 2178 } 2179 else 2180 { 2181 return true; 2182 } 2183 } 2184 } 2185 2186 2187 /** 2188 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 2189 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 2190 * 2191 * @param bytes The buffer which contains the data 2192 * @param index Current position in the buffer 2193 * @return <code>true</code> if the current character is a Digit 2194 */ 2195 public static final boolean isDigit( byte[] bytes, int index ) 2196 { 2197 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) ) 2198 { 2199 return false; 2200 } 2201 else 2202 { 2203 return ( ( ( ( bytes[index] | 0x7F ) != 0x7F ) || !DIGIT[bytes[index]] ) ? false : true ); 2204 } 2205 } 2206 2207 2208 /** 2209 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 2210 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 2211 * 2212 * @param chars The buffer which contains the data 2213 * @param index Current position in the buffer 2214 * @return <code>true</code> if the current character is a Digit 2215 */ 2216 public static final boolean isDigit( char[] chars, int index ) 2217 { 2218 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) ) 2219 { 2220 return false; 2221 } 2222 else 2223 { 2224 return ( ( ( chars[index] > 127 ) || !DIGIT[chars[index]] ) ? false : true ); 2225 } 2226 } 2227 2228 2229 /** 2230 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 2231 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 2232 * 2233 * @param string The string which contains the data 2234 * @param index Current position in the string 2235 * @return <code>true</code> if the current character is a Digit 2236 */ 2237 public static final boolean isDigit( String string, int index ) 2238 { 2239 if ( string == null ) 2240 { 2241 return false; 2242 } 2243 2244 int length = string.length(); 2245 2246 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2247 { 2248 return false; 2249 } 2250 else 2251 { 2252 char c = string.charAt( index ); 2253 return ( ( ( c > 127 ) || !DIGIT[c] ) ? false : true ); 2254 } 2255 } 2256 2257 2258 /** 2259 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' | 2260 * '3' | '4' | '5' | '6' | '7' | '8' | '9' 2261 * 2262 * @param chars The buffer which contains the data 2263 * @return <code>true</code> if the current character is a Digit 2264 */ 2265 public static final boolean isDigit( char[] chars ) 2266 { 2267 if ( ( chars == null ) || ( chars.length == 0 ) ) 2268 { 2269 return false; 2270 } 2271 else 2272 { 2273 return ( ( ( chars[0] > 127 ) || !DIGIT[chars[0]] ) ? false : true ); 2274 } 2275 } 2276 2277 2278 /** 2279 * Check if the current character is an 7 bits ASCII CHAR (between 0 and 2280 * 127). 2281 * <char> ::= <alpha> | <digit> 2282 * 2283 * @param string The string which contains the data 2284 * @param index Current position in the string 2285 * @return The position of the next character, if the current one is a CHAR. 2286 */ 2287 public static final boolean isAlphaDigit( String string, int index ) 2288 { 2289 if ( string == null ) 2290 { 2291 return false; 2292 } 2293 2294 int length = string.length(); 2295 2296 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2297 { 2298 return false; 2299 } 2300 else 2301 { 2302 char c = string.charAt( index ); 2303 2304 if ( ( c > 127 ) || ( ALPHA_DIGIT[c] == false ) ) 2305 { 2306 return false; 2307 } 2308 else 2309 { 2310 return true; 2311 } 2312 } 2313 } 2314 2315 2316 /** 2317 * Check if the current character is an 7 bits ASCII CHAR (between 0 and 2318 * 127). <char> ::= <alpha> | <digit> | '-' 2319 * 2320 * @param bytes The buffer which contains the data 2321 * @param index Current position in the buffer 2322 * @return The position of the next character, if the current one is a CHAR. 2323 */ 2324 public static final boolean isAlphaDigitMinus( byte[] bytes, int index ) 2325 { 2326 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) ) 2327 { 2328 return false; 2329 } 2330 else 2331 { 2332 byte c = bytes[index]; 2333 2334 if ( ( ( c | 0x7F ) != 0x7F ) || ( CHAR[c] == false ) ) 2335 { 2336 return false; 2337 } 2338 else 2339 { 2340 return true; 2341 } 2342 } 2343 } 2344 2345 2346 /** 2347 * Check if the current character is an 7 bits ASCII CHAR (between 0 and 2348 * 127). <char> ::= <alpha> | <digit> | '-' 2349 * 2350 * @param chars The buffer which contains the data 2351 * @param index Current position in the buffer 2352 * @return The position of the next character, if the current one is a CHAR. 2353 */ 2354 public static final boolean isAlphaDigitMinus( char[] chars, int index ) 2355 { 2356 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) ) 2357 { 2358 return false; 2359 } 2360 else 2361 { 2362 char c = chars[index]; 2363 2364 if ( ( c > 127 ) || ( CHAR[c] == false ) ) 2365 { 2366 return false; 2367 } 2368 else 2369 { 2370 return true; 2371 } 2372 } 2373 } 2374 2375 2376 /** 2377 * Check if the current character is an 7 bits ASCII CHAR (between 0 and 2378 * 127). <char> ::= <alpha> | <digit> | '-' 2379 * 2380 * @param string The string which contains the data 2381 * @param index Current position in the string 2382 * @return The position of the next character, if the current one is a CHAR. 2383 */ 2384 public static final boolean isAlphaDigitMinus( String string, int index ) 2385 { 2386 if ( string == null ) 2387 { 2388 return false; 2389 } 2390 2391 int length = string.length(); 2392 2393 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) ) 2394 { 2395 return false; 2396 } 2397 else 2398 { 2399 char c = string.charAt( index ); 2400 2401 if ( ( c > 127 ) || ( CHAR[c] == false ) ) 2402 { 2403 return false; 2404 } 2405 else 2406 { 2407 return true; 2408 } 2409 } 2410 } 2411 2412 2413 // Empty checks 2414 // ----------------------------------------------------------------------- 2415 /** 2416 * <p> 2417 * Checks if a String is empty ("") or null. 2418 * </p> 2419 * 2420 * <pre> 2421 * StringUtils.isEmpty(null) = true 2422 * StringUtils.isEmpty("") = true 2423 * StringUtils.isEmpty(" ") = false 2424 * StringUtils.isEmpty("bob") = false 2425 * StringUtils.isEmpty(" bob ") = false 2426 * </pre> 2427 * 2428 * <p> 2429 * NOTE: This method changed in Lang version 2.0. It no longer trims the 2430 * String. That functionality is available in isBlank(). 2431 * </p> 2432 * 2433 * @param str the String to check, may be null 2434 * @return <code>true</code> if the String is empty or null 2435 */ 2436 public static final boolean isEmpty( String str ) 2437 { 2438 return str == null || str.length() == 0; 2439 } 2440 2441 2442 /** 2443 * Checks if a bytes array is empty or null. 2444 * 2445 * @param bytes The bytes array to check, may be null 2446 * @return <code>true</code> if the bytes array is empty or null 2447 */ 2448 public static final boolean isEmpty( byte[] bytes ) 2449 { 2450 return bytes == null || bytes.length == 0; 2451 } 2452 2453 2454 /** 2455 * <p> 2456 * Checks if a String is not empty ("") and not null. 2457 * </p> 2458 * 2459 * <pre> 2460 * StringUtils.isNotEmpty(null) = false 2461 * StringUtils.isNotEmpty("") = false 2462 * StringUtils.isNotEmpty(" ") = true 2463 * StringUtils.isNotEmpty("bob") = true 2464 * StringUtils.isNotEmpty(" bob ") = true 2465 * </pre> 2466 * 2467 * @param str the String to check, may be null 2468 * @return <code>true</code> if the String is not empty and not null 2469 */ 2470 public static final boolean isNotEmpty( String str ) 2471 { 2472 return str != null && str.length() > 0; 2473 } 2474 2475 2476 /** 2477 * <p> 2478 * Removes spaces (char <= 32) from both start and ends of this String, 2479 * handling <code>null</code> by returning <code>null</code>. 2480 * </p> 2481 * Trim removes start and end characters <= 32. 2482 * 2483 * <pre> 2484 * StringUtils.trim(null) = null 2485 * StringUtils.trim("") = "" 2486 * StringUtils.trim(" ") = "" 2487 * StringUtils.trim("abc") = "abc" 2488 * StringUtils.trim(" abc ") = "abc" 2489 * </pre> 2490 * 2491 * @param str the String to be trimmed, may be null 2492 * @return the trimmed string, <code>null</code> if null String input 2493 */ 2494 public static final String trim( String str ) 2495 { 2496 return ( isEmpty( str ) ? "" : str.trim() ); 2497 } 2498 2499 2500 /** 2501 * <p> 2502 * Removes spaces (char <= 32) from both start and ends of this bytes 2503 * array, handling <code>null</code> by returning <code>null</code>. 2504 * </p> 2505 * Trim removes start and end characters <= 32. 2506 * 2507 * <pre> 2508 * StringUtils.trim(null) = null 2509 * StringUtils.trim("") = "" 2510 * StringUtils.trim(" ") = "" 2511 * StringUtils.trim("abc") = "abc" 2512 * StringUtils.trim(" abc ") = "abc" 2513 * </pre> 2514 * 2515 * @param bytes the byte array to be trimmed, may be null 2516 * 2517 * @return the trimmed byte array 2518 */ 2519 public static final byte[] trim( byte[] bytes ) 2520 { 2521 if ( isEmpty( bytes ) ) 2522 { 2523 return EMPTY_BYTES; 2524 } 2525 2526 int start = trimLeft( bytes, 0 ); 2527 int end = trimRight( bytes, bytes.length - 1 ); 2528 2529 int length = end - start + 1; 2530 2531 if ( length != 0 ) 2532 { 2533 byte[] newBytes = new byte[end - start + 1]; 2534 2535 System.arraycopy( bytes, start, newBytes, 0, length ); 2536 2537 return newBytes; 2538 } 2539 else 2540 { 2541 return EMPTY_BYTES; 2542 } 2543 } 2544 2545 2546 /** 2547 * <p> 2548 * Removes spaces (char <= 32) from start of this String, handling 2549 * <code>null</code> by returning <code>null</code>. 2550 * </p> 2551 * Trim removes start characters <= 32. 2552 * 2553 * <pre> 2554 * StringUtils.trimLeft(null) = null 2555 * StringUtils.trimLeft("") = "" 2556 * StringUtils.trimLeft(" ") = "" 2557 * StringUtils.trimLeft("abc") = "abc" 2558 * StringUtils.trimLeft(" abc ") = "abc " 2559 * </pre> 2560 * 2561 * @param str the String to be trimmed, may be null 2562 * @return the trimmed string, <code>null</code> if null String input 2563 */ 2564 public static final String trimLeft( String str ) 2565 { 2566 if ( isEmpty( str ) ) 2567 { 2568 return ""; 2569 } 2570 2571 int start = 0; 2572 int end = str.length(); 2573 2574 while ( ( start < end ) && ( str.charAt( start ) == ' ' ) ) 2575 { 2576 start++; 2577 } 2578 2579 return ( start == 0 ? str : str.substring( start ) ); 2580 } 2581 2582 2583 /** 2584 * <p> 2585 * Removes spaces (char <= 32) from start of this array, handling 2586 * <code>null</code> by returning <code>null</code>. 2587 * </p> 2588 * Trim removes start characters <= 32. 2589 * 2590 * <pre> 2591 * StringUtils.trimLeft(null) = null 2592 * StringUtils.trimLeft("") = "" 2593 * StringUtils.trimLeft(" ") = "" 2594 * StringUtils.trimLeft("abc") = "abc" 2595 * StringUtils.trimLeft(" abc ") = "abc " 2596 * </pre> 2597 * 2598 * @param chars the chars array to be trimmed, may be null 2599 * @return the position of the first char which is not a space, or the last 2600 * position of the array. 2601 */ 2602 public static final int trimLeft( char[] chars, int pos ) 2603 { 2604 if ( chars == null ) 2605 { 2606 return pos; 2607 } 2608 2609 while ( ( pos < chars.length ) && ( chars[pos] == ' ' ) ) 2610 { 2611 pos++; 2612 } 2613 2614 return pos; 2615 } 2616 2617 2618 /** 2619 * <p> 2620 * Removes spaces (char <= 32) from a position in this array, handling 2621 * <code>null</code> by returning <code>null</code>. 2622 * </p> 2623 * Trim removes start characters <= 32. 2624 * 2625 * <pre> 2626 * StringUtils.trimLeft(null) = null 2627 * StringUtils.trimLeft("",...) = "" 2628 * StringUtils.trimLeft(" ",...) = "" 2629 * StringUtils.trimLeft("abc",...) = "abc" 2630 * StringUtils.trimLeft(" abc ",...) = "abc " 2631 * </pre> 2632 * 2633 * @param string the string to be trimmed, may be null 2634 * @param pos The starting position 2635 */ 2636 public static final void trimLeft( String string, Position pos ) 2637 { 2638 if ( string == null ) 2639 { 2640 return; 2641 } 2642 2643 int length = string.length(); 2644 2645 while ( ( pos.start < length ) && ( string.charAt( pos.start ) == ' ' ) ) 2646 { 2647 pos.start++; 2648 } 2649 2650 pos.end = pos.start; 2651 2652 return; 2653 } 2654 2655 2656 /** 2657 * <p> 2658 * Removes spaces (char <= 32) from a position in this array, handling 2659 * <code>null</code> by returning <code>null</code>. 2660 * </p> 2661 * Trim removes start characters <= 32. 2662 * 2663 * <pre> 2664 * StringUtils.trimLeft(null) = null 2665 * StringUtils.trimLeft("",...) = "" 2666 * StringUtils.trimLeft(" ",...) = "" 2667 * StringUtils.trimLeft("abc",...) = "abc" 2668 * StringUtils.trimLeft(" abc ",...) = "abc " 2669 * </pre> 2670 * 2671 * @param bytes the byte array to be trimmed, may be null 2672 * @param pos The starting position 2673 */ 2674 public static final void trimLeft( byte[] bytes, Position pos ) 2675 { 2676 if ( bytes == null ) 2677 { 2678 return; 2679 } 2680 2681 int length = bytes.length; 2682 2683 while ( ( pos.start < length ) && ( bytes[ pos.start ] == ' ' ) ) 2684 { 2685 pos.start++; 2686 } 2687 2688 pos.end = pos.start; 2689 2690 return; 2691 } 2692 2693 2694 /** 2695 * <p> 2696 * Removes spaces (char <= 32) from start of this array, handling 2697 * <code>null</code> by returning <code>null</code>. 2698 * </p> 2699 * Trim removes start characters <= 32. 2700 * 2701 * <pre> 2702 * StringUtils.trimLeft(null) = null 2703 * StringUtils.trimLeft("") = "" 2704 * StringUtils.trimLeft(" ") = "" 2705 * StringUtils.trimLeft("abc") = "abc" 2706 * StringUtils.trimLeft(" abc ") = "abc " 2707 * </pre> 2708 * 2709 * @param bytes the byte array to be trimmed, may be null 2710 * @return the position of the first byte which is not a space, or the last 2711 * position of the array. 2712 */ 2713 public static final int trimLeft( byte[] bytes, int pos ) 2714 { 2715 if ( bytes == null ) 2716 { 2717 return pos; 2718 } 2719 2720 while ( ( pos < bytes.length ) && ( bytes[pos] == ' ' ) ) 2721 { 2722 pos++; 2723 } 2724 2725 return pos; 2726 } 2727 2728 2729 /** 2730 * <p> 2731 * Removes spaces (char <= 32) from end of this String, handling 2732 * <code>null</code> by returning <code>null</code>. 2733 * </p> 2734 * Trim removes start characters <= 32. 2735 * 2736 * <pre> 2737 * StringUtils.trimRight(null) = null 2738 * StringUtils.trimRight("") = "" 2739 * StringUtils.trimRight(" ") = "" 2740 * StringUtils.trimRight("abc") = "abc" 2741 * StringUtils.trimRight(" abc ") = " abc" 2742 * </pre> 2743 * 2744 * @param str the String to be trimmed, may be null 2745 * @return the trimmed string, <code>null</code> if null String input 2746 */ 2747 public static final String trimRight( String str ) 2748 { 2749 if ( isEmpty( str ) ) 2750 { 2751 return ""; 2752 } 2753 2754 int length = str.length(); 2755 int end = length; 2756 2757 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) ) 2758 { 2759 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) ) 2760 { 2761 break; 2762 } 2763 2764 end--; 2765 } 2766 2767 return ( end == length ? str : str.substring( 0, end ) ); 2768 } 2769 2770 /** 2771 * <p> 2772 * Removes spaces (char <= 32) from end of this String, handling 2773 * <code>null</code> by returning <code>null</code>. 2774 * </p> 2775 * Trim removes start characters <= 32. 2776 * 2777 * <pre> 2778 * StringUtils.trimRight(null) = null 2779 * StringUtils.trimRight("") = "" 2780 * StringUtils.trimRight(" ") = "" 2781 * StringUtils.trimRight("abc") = "abc" 2782 * StringUtils.trimRight(" abc ") = " abc" 2783 * </pre> 2784 * 2785 * @param str the String to be trimmed, may be null 2786 * @param escapedSpace The last escaped space, if any 2787 * @return the trimmed string, <code>null</code> if null String input 2788 */ 2789 public static final String trimRight( String str, int escapedSpace ) 2790 { 2791 if ( isEmpty( str ) ) 2792 { 2793 return ""; 2794 } 2795 2796 int length = str.length(); 2797 int end = length; 2798 2799 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) && ( end > escapedSpace ) ) 2800 { 2801 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) ) 2802 { 2803 break; 2804 } 2805 2806 end--; 2807 } 2808 2809 return ( end == length ? str : str.substring( 0, end ) ); 2810 } 2811 2812 2813 /** 2814 * <p> 2815 * Removes spaces (char <= 32) from end of this array, handling 2816 * <code>null</code> by returning <code>null</code>. 2817 * </p> 2818 * Trim removes start characters <= 32. 2819 * 2820 * <pre> 2821 * StringUtils.trimRight(null) = null 2822 * StringUtils.trimRight("") = "" 2823 * StringUtils.trimRight(" ") = "" 2824 * StringUtils.trimRight("abc") = "abc" 2825 * StringUtils.trimRight(" abc ") = " abc" 2826 * </pre> 2827 * 2828 * @param chars the chars array to be trimmed, may be null 2829 * @return the position of the first char which is not a space, or the last 2830 * position of the array. 2831 */ 2832 public static final int trimRight( char[] chars, int pos ) 2833 { 2834 if ( chars == null ) 2835 { 2836 return pos; 2837 } 2838 2839 while ( ( pos >= 0 ) && ( chars[pos - 1] == ' ' ) ) 2840 { 2841 pos--; 2842 } 2843 2844 return pos; 2845 } 2846 2847 2848 /** 2849 * <p> 2850 * Removes spaces (char <= 32) from end of this string, handling 2851 * <code>null</code> by returning <code>null</code>. 2852 * </p> 2853 * Trim removes start characters <= 32. 2854 * 2855 * <pre> 2856 * StringUtils.trimRight(null) = null 2857 * StringUtils.trimRight("") = "" 2858 * StringUtils.trimRight(" ") = "" 2859 * StringUtils.trimRight("abc") = "abc" 2860 * StringUtils.trimRight(" abc ") = " abc" 2861 * </pre> 2862 * 2863 * @param string the string to be trimmed, may be null 2864 * @return the position of the first char which is not a space, or the last 2865 * position of the string. 2866 */ 2867 public static final String trimRight( String string, Position pos ) 2868 { 2869 if ( string == null ) 2870 { 2871 return ""; 2872 } 2873 2874 while ( ( pos.end >= 0 ) && ( string.charAt( pos.end - 1 ) == ' ' ) ) 2875 { 2876 if ( ( pos.end > 1 ) && ( string.charAt( pos.end - 2 ) == '\\' ) ) 2877 { 2878 break; 2879 } 2880 2881 pos.end--; 2882 } 2883 2884 return ( pos.end == string.length() ? string : string.substring( 0, pos.end ) ); 2885 } 2886 2887 2888 /** 2889 * <p> 2890 * Removes spaces (char <= 32) from end of this string, handling 2891 * <code>null</code> by returning <code>null</code>. 2892 * </p> 2893 * Trim removes start characters <= 32. 2894 * 2895 * <pre> 2896 * StringUtils.trimRight(null) = null 2897 * StringUtils.trimRight("") = "" 2898 * StringUtils.trimRight(" ") = "" 2899 * StringUtils.trimRight("abc") = "abc" 2900 * StringUtils.trimRight(" abc ") = " abc" 2901 * </pre> 2902 * 2903 * @param bytes the byte array to be trimmed, may be null 2904 * @return the position of the first char which is not a space, or the last 2905 * position of the byte array. 2906 */ 2907 public static final String trimRight( byte[] bytes, Position pos ) 2908 { 2909 if ( bytes == null ) 2910 { 2911 return ""; 2912 } 2913 2914 while ( ( pos.end >= 0 ) && ( bytes[pos.end - 1] == ' ' ) ) 2915 { 2916 if ( ( pos.end > 1 ) && ( bytes[pos.end - 2] == '\\' ) ) 2917 { 2918 break; 2919 } 2920 2921 pos.end--; 2922 } 2923 2924 if ( pos.end == bytes.length ) 2925 { 2926 return StringTools.utf8ToString( bytes ); 2927 } 2928 else 2929 { 2930 return StringTools.utf8ToString( bytes, pos.end ); 2931 } 2932 } 2933 2934 2935 /** 2936 * <p> 2937 * Removes spaces (char <= 32) from end of this array, handling 2938 * <code>null</code> by returning <code>null</code>. 2939 * </p> 2940 * Trim removes start characters <= 32. 2941 * 2942 * <pre> 2943 * StringUtils.trimRight(null) = null 2944 * StringUtils.trimRight("") = "" 2945 * StringUtils.trimRight(" ") = "" 2946 * StringUtils.trimRight("abc") = "abc" 2947 * StringUtils.trimRight(" abc ") = " abc" 2948 * </pre> 2949 * 2950 * @param bytes the byte array to be trimmed, may be null 2951 * @return the position of the first char which is not a space, or the last 2952 * position of the array. 2953 */ 2954 public static final int trimRight( byte[] bytes, int pos ) 2955 { 2956 if ( bytes == null ) 2957 { 2958 return pos; 2959 } 2960 2961 while ( ( pos >= 0 ) && ( bytes[pos] == ' ' ) ) 2962 { 2963 pos--; 2964 } 2965 2966 return pos; 2967 } 2968 2969 2970 // Case conversion 2971 // ----------------------------------------------------------------------- 2972 /** 2973 * <p> 2974 * Converts a String to upper case as per {@link String#toUpperCase()}. 2975 * </p> 2976 * <p> 2977 * A <code>null</code> input String returns <code>null</code>. 2978 * </p> 2979 * 2980 * <pre> 2981 * StringUtils.upperCase(null) = null 2982 * StringUtils.upperCase("") = "" 2983 * StringUtils.upperCase("aBc") = "ABC" 2984 * </pre> 2985 * 2986 * @param str the String to upper case, may be null 2987 * @return the upper cased String, <code>null</code> if null String input 2988 */ 2989 public static final String upperCase( String str ) 2990 { 2991 if ( str == null ) 2992 { 2993 return null; 2994 } 2995 2996 return str.toUpperCase(); 2997 } 2998 2999 3000 /** 3001 * <p> 3002 * Converts a String to lower case as per {@link String#toLowerCase()}. 3003 * </p> 3004 * <p> 3005 * A <code>null</code> input String returns <code>null</code>. 3006 * </p> 3007 * 3008 * <pre> 3009 * StringUtils.lowerCase(null) = null 3010 * StringUtils.lowerCase("") = "" 3011 * StringUtils.lowerCase("aBc") = "abc" 3012 * </pre> 3013 * 3014 * @param str the String to lower case, may be null 3015 * @return the lower cased String, <code>null</code> if null String input 3016 */ 3017 public static final String lowerCase( String str ) 3018 { 3019 if ( str == null ) 3020 { 3021 return null; 3022 } 3023 3024 return str.toLowerCase(); 3025 } 3026 3027 3028 /** 3029 * Rewrote the toLowercase method to improve performances. 3030 * In Ldap, attributesType are supposed to use ASCII chars : 3031 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only. We will take 3032 * care of any other chars either. 3033 * 3034 * @param str The String to lowercase 3035 * @return The lowercase string 3036 */ 3037 public static final String lowerCaseAscii( String str ) 3038 { 3039 if ( str == null ) 3040 { 3041 return null; 3042 } 3043 3044 char[] chars = str.toCharArray(); 3045 int pos = 0; 3046 3047 for ( char c:chars ) 3048 { 3049 chars[pos++] = TO_LOWER_CASE[c]; 3050 } 3051 3052 return new String( chars ); 3053 } 3054 3055 3056 // Equals 3057 // ----------------------------------------------------------------------- 3058 /** 3059 * <p> 3060 * Compares two Strings, returning <code>true</code> if they are equal. 3061 * </p> 3062 * <p> 3063 * <code>null</code>s are handled without exceptions. Two 3064 * <code>null</code> references are considered to be equal. The comparison 3065 * is case sensitive. 3066 * </p> 3067 * 3068 * <pre> 3069 * StringUtils.equals(null, null) = true 3070 * StringUtils.equals(null, "abc") = false 3071 * StringUtils.equals("abc", null) = false 3072 * StringUtils.equals("abc", "abc") = true 3073 * StringUtils.equals("abc", "ABC") = false 3074 * </pre> 3075 * 3076 * @see java.lang.String#equals(Object) 3077 * @param str1 the first String, may be null 3078 * @param str2 the second String, may be null 3079 * @return <code>true</code> if the Strings are equal, case sensitive, or 3080 * both <code>null</code> 3081 */ 3082 public static final boolean equals( String str1, String str2 ) 3083 { 3084 return str1 == null ? str2 == null : str1.equals( str2 ); 3085 } 3086 3087 3088 /** 3089 * Return an UTF-8 encoded String 3090 * 3091 * @param bytes The byte array to be transformed to a String 3092 * @return A String. 3093 */ 3094 public static final String utf8ToString( byte[] bytes ) 3095 { 3096 if ( bytes == null ) 3097 { 3098 return ""; 3099 } 3100 3101 try 3102 { 3103 return new String( bytes, "UTF-8" ); 3104 } 3105 catch ( UnsupportedEncodingException uee ) 3106 { 3107 // if this happens something is really strange 3108 throw new RuntimeException( uee ); 3109 } 3110 } 3111 3112 3113 /** 3114 * Return an UTF-8 encoded String 3115 * 3116 * @param bytes The byte array to be transformed to a String 3117 * @param length The length of the byte array to be converted 3118 * @return A String. 3119 */ 3120 public static final String utf8ToString( byte[] bytes, int length ) 3121 { 3122 if ( bytes == null ) 3123 { 3124 return ""; 3125 } 3126 3127 try 3128 { 3129 return new String( bytes, 0, length, "UTF-8" ); 3130 } 3131 catch ( UnsupportedEncodingException uee ) 3132 { 3133 // if this happens something is really strange 3134 throw new RuntimeException( uee ); 3135 } 3136 } 3137 3138 3139 /** 3140 * Return an UTF-8 encoded String 3141 * 3142 * @param bytes The byte array to be transformed to a String 3143 * @param start the starting position in the byte array 3144 * @param length The length of the byte array to be converted 3145 * @return A String. 3146 */ 3147 public static final String utf8ToString( byte[] bytes, int start, int length ) 3148 { 3149 if ( bytes == null ) 3150 { 3151 return ""; 3152 } 3153 3154 try 3155 { 3156 return new String( bytes, start, length, "UTF-8" ); 3157 } 3158 catch ( UnsupportedEncodingException uee ) 3159 { 3160 // if this happens something is really strange 3161 throw new RuntimeException( uee ); 3162 } 3163 } 3164 3165 3166 /** 3167 * Return UTF-8 encoded byte[] representation of a String 3168 * 3169 * @param string The string to be transformed to a byte array 3170 * @return The transformed byte array 3171 */ 3172 public static final byte[] getBytesUtf8( String string ) 3173 { 3174 if ( string == null ) 3175 { 3176 return new byte[0]; 3177 } 3178 3179 try 3180 { 3181 return string.getBytes( "UTF-8" ); 3182 } 3183 catch ( UnsupportedEncodingException uee ) 3184 { 3185 // if this happens something is really strange 3186 throw new RuntimeException( uee ); 3187 } 3188 } 3189 3190 3191 /** 3192 * Utility method that return a String representation of a list 3193 * 3194 * @param list The list to transform to a string 3195 * @return A csv string 3196 */ 3197 public static final String listToString( List<?> list ) 3198 { 3199 if ( ( list == null ) || ( list.size() == 0 ) ) 3200 { 3201 return ""; 3202 } 3203 3204 StringBuilder sb = new StringBuilder(); 3205 boolean isFirst = true; 3206 3207 for ( Object elem : list ) 3208 { 3209 if ( isFirst ) 3210 { 3211 isFirst = false; 3212 } 3213 else 3214 { 3215 sb.append( ", " ); 3216 } 3217 3218 sb.append( elem ); 3219 } 3220 3221 return sb.toString(); 3222 } 3223 3224 3225 3226 3227 /** 3228 * Utility method that return a String representation of a set 3229 * 3230 * @param set The set to transform to a string 3231 * @return A csv string 3232 */ 3233 public static final String setToString( Set<?> set ) 3234 { 3235 if ( ( set == null ) || ( set.size() == 0 ) ) 3236 { 3237 return ""; 3238 } 3239 3240 StringBuilder sb = new StringBuilder(); 3241 boolean isFirst = true; 3242 3243 for ( Object elem : set ) 3244 { 3245 if ( isFirst ) 3246 { 3247 isFirst = false; 3248 } 3249 else 3250 { 3251 sb.append( ", " ); 3252 } 3253 3254 sb.append( elem ); 3255 } 3256 3257 return sb.toString(); 3258 } 3259 3260 3261 /** 3262 * Utility method that return a String representation of a list 3263 * 3264 * @param list The list to transform to a string 3265 * @param tabs The tabs to add in ffront of the elements 3266 * @return A csv string 3267 */ 3268 public static final String listToString( List<?> list, String tabs ) 3269 { 3270 if ( ( list == null ) || ( list.size() == 0 ) ) 3271 { 3272 return ""; 3273 } 3274 3275 StringBuffer sb = new StringBuffer(); 3276 3277 for ( Object elem : list ) 3278 { 3279 sb.append( tabs ); 3280 sb.append( elem ); 3281 sb.append( '\n' ); 3282 } 3283 3284 return sb.toString(); 3285 } 3286 3287 3288 /** 3289 * Utility method that return a String representation of a map. The elements 3290 * will be represented as "key = value" 3291 * 3292 * @param map The map to transform to a string 3293 * @return A csv string 3294 */ 3295 public static final String mapToString( Map<?,?> map ) 3296 { 3297 if ( ( map == null ) || ( map.size() == 0 ) ) 3298 { 3299 return ""; 3300 } 3301 3302 StringBuffer sb = new StringBuffer(); 3303 boolean isFirst = true; 3304 3305 for ( Map.Entry<?, ?> entry:map.entrySet() ) 3306 { 3307 if ( isFirst ) 3308 { 3309 isFirst = false; 3310 } 3311 else 3312 { 3313 sb.append( ", " ); 3314 } 3315 3316 sb.append( entry.getKey() ); 3317 sb.append( " = '" ).append( entry.getValue() ).append( "'" ); 3318 } 3319 3320 return sb.toString(); 3321 } 3322 3323 3324 /** 3325 * Utility method that return a String representation of a map. The elements 3326 * will be represented as "key = value" 3327 * 3328 * @param map The map to transform to a string 3329 * @param tabs The tabs to add in ffront of the elements 3330 * @return A csv string 3331 */ 3332 public static final String mapToString( Map<?,?> map, String tabs ) 3333 { 3334 if ( ( map == null ) || ( map.size() == 0 ) ) 3335 { 3336 return ""; 3337 } 3338 3339 StringBuffer sb = new StringBuffer(); 3340 3341 for ( Map.Entry<?, ?> entry:map.entrySet() ) 3342 { 3343 sb.append( tabs ); 3344 sb.append( entry.getKey() ); 3345 3346 sb.append( " = '" ).append( entry.getValue().toString() ).append( "'\n" ); 3347 } 3348 3349 return sb.toString(); 3350 } 3351 3352 3353 /** 3354 * Get the default charset 3355 * 3356 * @return The default charset 3357 */ 3358 public static final String getDefaultCharsetName() 3359 { 3360 if ( null == defaultCharset ) 3361 { 3362 try 3363 { 3364 // Try with jdk 1.5 method, if we are using a 1.5 jdk :) 3365 Method method = Charset.class.getMethod( "defaultCharset", new Class[0] ); 3366 defaultCharset = ((Charset) method.invoke( null, new Object[0]) ).name(); 3367 } 3368 catch (Exception e) 3369 { 3370 // fall back to old method 3371 defaultCharset = new OutputStreamWriter( new ByteArrayOutputStream() ).getEncoding(); 3372 } 3373 } 3374 3375 return defaultCharset; 3376 } 3377 3378 3379 /** 3380 * Decodes values of attributes in the DN encoded in hex into a UTF-8 3381 * String. RFC2253 allows a DN's attribute to be encoded in hex. 3382 * The encoded value starts with a # then is followed by an even 3383 * number of hex characters. 3384 * 3385 * @param str the string to decode 3386 * @return the decoded string 3387 */ 3388 public static final String decodeHexString( String str ) throws InvalidNameException 3389 { 3390 if ( str == null || str.length() == 0 ) 3391 { 3392 throw new InvalidNameException( I18n.err( I18n.ERR_04431 ) ); 3393 } 3394 3395 char[] chars = str.toCharArray(); 3396 3397 if ( chars[0] != '#' ) 3398 { 3399 throw new InvalidNameException( I18n.err( I18n.ERR_04432, str ) ); 3400 } 3401 3402 // the bytes representing the encoded string of hex 3403 // this should be ( length - 1 )/2 in size 3404 byte[] decoded = new byte[ ( chars.length - 1 ) >> 1 ]; 3405 3406 for ( int ii = 1, jj = 0 ; ii < chars.length; ii+=2, jj++ ) 3407 { 3408 int ch = ( StringTools.HEX_VALUE[chars[ii]] << 4 ) + 3409 StringTools.HEX_VALUE[chars[ii+1]]; 3410 decoded[jj] = ( byte ) ch; 3411 } 3412 3413 return StringTools.utf8ToString( decoded ); 3414 } 3415 3416 3417 /** 3418 * Decodes sequences of escaped hex within an attribute's value into 3419 * a UTF-8 String. The hex is decoded inline and the complete decoded 3420 * String is returned. 3421 * 3422 * @param str the string containing hex escapes 3423 * @return the decoded string 3424 */ 3425 public static final String decodeEscapedHex( String str ) throws InvalidNameException 3426 { 3427 if ( str == null ) 3428 { 3429 throw new InvalidNameException( I18n.err( I18n.ERR_04433 ) ); 3430 } 3431 3432 int length = str.length(); 3433 3434 if ( length == 0 ) 3435 { 3436 throw new InvalidNameException( I18n.err( I18n.ERR_04434 ) ); 3437 } 3438 3439 // create buffer and add everything before start of scan 3440 StringBuffer buf = new StringBuffer(); 3441 ByteBuffer bb = new ByteBuffer(); 3442 boolean escaped = false; 3443 3444 // start scaning until we find an escaped series of bytes 3445 for ( int ii = 0; ii < length; ii++ ) 3446 { 3447 char c = str.charAt( ii ); 3448 3449 if ( !escaped && c == '\\' ) 3450 { 3451 // we have the start of a hex escape sequence 3452 if ( isHex( str, ii+1 ) && isHex ( str, ii+2 ) ) 3453 { 3454 bb.clear(); 3455 int advancedBy = collectEscapedHexBytes( bb, str, ii ); 3456 ii+=advancedBy-1; 3457 buf.append( StringTools.utf8ToString( bb.buffer(), bb.position() ) ); 3458 escaped = false; 3459 continue; 3460 } 3461 else 3462 { 3463 // It may be an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' ) 3464 escaped = true; 3465 continue; 3466 } 3467 } 3468 3469 if ( escaped ) 3470 { 3471 if ( DNUtils.isPairCharOnly( c ) ) 3472 { 3473 // It is an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' ) 3474 // Stores it into the buffer without the '\' 3475 escaped = false; 3476 buf.append( c ); 3477 continue; 3478 } 3479 else 3480 { 3481 throw new InvalidNameException( I18n.err( I18n.ERR_04435 ) ); 3482 } 3483 } 3484 else 3485 { 3486 buf.append( str.charAt( ii ) ); 3487 } 3488 } 3489 3490 if ( escaped ) 3491 { 3492 // We should not have a '\' at the end of the string 3493 throw new InvalidNameException( I18n.err( I18n.ERR_04436 ) ); 3494 } 3495 3496 return buf.toString(); 3497 } 3498 3499 3500 /** 3501 * Convert an escaoed list of bytes to a byte[] 3502 * 3503 * @param str the string containing hex escapes 3504 * @return the converted byte[] 3505 */ 3506 public static final byte[] convertEscapedHex( String str ) throws InvalidNameException 3507 { 3508 if ( str == null ) 3509 { 3510 throw new InvalidNameException( I18n.err( I18n.ERR_04433 ) ); 3511 } 3512 3513 int length = str.length(); 3514 3515 if ( length == 0 ) 3516 { 3517 throw new InvalidNameException( I18n.err( I18n.ERR_04434 ) ); 3518 } 3519 3520 // create buffer and add everything before start of scan 3521 byte[] buf = new byte[ str.length()/3]; 3522 int pos = 0; 3523 3524 // start scaning until we find an escaped series of bytes 3525 for ( int i = 0; i < length; i++ ) 3526 { 3527 char c = str.charAt( i ); 3528 3529 if ( c == '\\' ) 3530 { 3531 // we have the start of a hex escape sequence 3532 if ( isHex( str, i+1 ) && isHex ( str, i+2 ) ) 3533 { 3534 byte value = ( byte ) ( (StringTools.HEX_VALUE[str.charAt( i+1 )] << 4 ) + 3535 StringTools.HEX_VALUE[str.charAt( i+2 )] ); 3536 3537 i+=2; 3538 buf[pos++] = value; 3539 } 3540 } 3541 else 3542 { 3543 throw new InvalidNameException( I18n.err( I18n.ERR_04435 ) ); 3544 } 3545 } 3546 3547 return buf; 3548 } 3549 3550 3551 /** 3552 * Collects an hex sequence from a string, and returns the value 3553 * as an integer, after having modified the initial value (the escaped 3554 * hex value is transsformed to the byte it represents). 3555 * 3556 * @param bb the buffer which will contain the unescaped byte 3557 * @param str the initial string with ecaped chars 3558 * @param index the position in the string of the escaped data 3559 * @return the byte as an integer 3560 */ 3561 public static int collectEscapedHexBytes( ByteBuffer bb, String str, int index ) 3562 { 3563 int advanceBy = 0; 3564 3565 for ( int ii = index; ii < str.length(); ii += 3, advanceBy += 3 ) 3566 { 3567 // we have the start of a hex escape sequence 3568 if ( ( str.charAt( ii ) == '\\' ) && isHex( str, ii+1 ) && isHex ( str, ii+2 ) ) 3569 { 3570 int bite = ( StringTools.HEX_VALUE[str.charAt( ii+1 )] << 4 ) + 3571 StringTools.HEX_VALUE[str.charAt( ii+2 )]; 3572 bb.append( bite ); 3573 } 3574 else 3575 { 3576 break; 3577 } 3578 } 3579 3580 return advanceBy; 3581 } 3582 3583 3584 /** 3585 * Thansform an array of ASCII bytes to a string. the byte array should contains 3586 * only values in [0, 127]. 3587 * 3588 * @param bytes The byte array to transform 3589 * @return The resulting string 3590 */ 3591 public static String asciiBytesToString( byte[] bytes ) 3592 { 3593 if ( (bytes == null) || (bytes.length == 0 ) ) 3594 { 3595 return ""; 3596 } 3597 3598 char[] result = new char[bytes.length]; 3599 3600 for ( int i = 0; i < bytes.length; i++ ) 3601 { 3602 result[i] = (char)bytes[i]; 3603 } 3604 3605 return new String( result ); 3606 } 3607 3608 3609 /** 3610 * Build an AttributeType froma byte array. An AttributeType contains 3611 * only chars within [0-9][a-z][A-Z][-.]. 3612 * 3613 * @param bytes The bytes containing the AttributeType 3614 * @return The AttributeType as a String 3615 */ 3616 public static String getType( byte[] bytes) 3617 { 3618 if ( bytes == null ) 3619 { 3620 return null; 3621 } 3622 3623 char[] chars = new char[bytes.length]; 3624 int pos = 0; 3625 3626 for ( byte b:bytes ) 3627 { 3628 chars[pos++] = (char)b; 3629 } 3630 3631 return new String( chars ); 3632 } 3633 3634 3635 /** 3636 * 3637 * Check that a String is a valid IA5String. An IA5String contains only 3638 * char which values is between [0, 7F] 3639 * 3640 * @param str The String to check 3641 * @return <code>true</code> if the string is an IA5String or is empty, 3642 * <code>false</code> otherwise 3643 */ 3644 public static boolean isIA5String( String str ) 3645 { 3646 if ( ( str == null ) || ( str.length() == 0 ) ) 3647 { 3648 return true; 3649 } 3650 3651 // All the chars must be in [0x00, 0x7F] 3652 for ( char c:str.toCharArray() ) 3653 { 3654 if ( ( c < 0 ) || ( c > 0x7F ) ) 3655 { 3656 return false; 3657 } 3658 } 3659 3660 return true; 3661 } 3662 3663 3664 /** 3665 * 3666 * Check that a String is a valid PrintableString. A PrintableString contains only 3667 * the following set of chars : 3668 * { ' ', ''', '(', ')', '+', '-', '.', '/', [0-9], ':', '=', '?', [A-Z], [a-z]} 3669 * 3670 * @param str The String to check 3671 * @return <code>true</code> if the string is a PrintableString or is empty, 3672 * <code>false</code> otherwise 3673 */ 3674 public static boolean isPrintableString( String str ) 3675 { 3676 if ( ( str == null ) || ( str.length() == 0 ) ) 3677 { 3678 return true; 3679 } 3680 3681 for ( char c:str.toCharArray() ) 3682 { 3683 if ( ( c > 127 ) || !IS_PRINTABLE_CHAR[ c ] ) 3684 { 3685 return false; 3686 } 3687 } 3688 3689 return true; 3690 } 3691 3692 3693 /** 3694 * Check if the current char is in the unicodeSubset : all chars but 3695 * '\0', '(', ')', '*' and '\' 3696 * 3697 * @param str The string to check 3698 * @param pos Position of the current char 3699 * @return True if the current char is in the unicode subset 3700 */ 3701 public static boolean isUnicodeSubset( String str, int pos ) 3702 { 3703 if ( ( str == null ) || ( str.length() <= pos ) || ( pos < 0 ) ) 3704 { 3705 return false; 3706 } 3707 3708 char c = str.charAt( pos ); 3709 3710 return ( ( c > 127 ) || UNICODE_SUBSET[c] ); 3711 } 3712 3713 3714 /** 3715 * Check if the current char is in the unicodeSubset : all chars but 3716 * '\0', '(', ')', '*' and '\' 3717 * 3718 * @param c The char to check 3719 * @return True if the current char is in the unicode subset 3720 */ 3721 public static boolean isUnicodeSubset( char c ) 3722 { 3723 return ( ( c > 127 ) || UNICODE_SUBSET[c] ); 3724 } 3725 3726 3727 /** 3728 * converts the bytes of a UUID to string 3729 * 3730 * @param bytes bytes of a UUID 3731 * @return UUID in string format 3732 */ 3733 public static String uuidToString( byte[] bytes ) 3734 { 3735 if ( bytes == null || bytes.length != 16 ) 3736 { 3737 return "Invalid UUID"; 3738 } 3739 3740 char[] hex = Hex.encodeHex( bytes ); 3741 StringBuffer sb = new StringBuffer(); 3742 sb.append( hex, 0, 8 ); 3743 sb.append( '-' ); 3744 sb.append( hex, 8, 4 ); 3745 sb.append( '-' ); 3746 sb.append( hex, 12, 4 ); 3747 sb.append( '-' ); 3748 sb.append( hex, 16, 4 ); 3749 sb.append( '-' ); 3750 sb.append( hex, 20, 12 ); 3751 3752 return sb.toString().toLowerCase(); 3753 } 3754 3755 3756 /** 3757 * converts the string representation of an UUID to bytes 3758 * 3759 * @param string the string representation of an UUID 3760 * @return the bytes, null if the the syntax is not valid 3761 */ 3762 public static byte[] uuidToBytes( String string ) 3763 { 3764 if ( !new UuidSyntaxChecker().isValidSyntax( string ) ) 3765 { 3766 return null; 3767 } 3768 3769 char[] chars = string.toCharArray(); 3770 byte[] bytes = new byte[16]; 3771 bytes[0] = getHexValue( chars[0], chars[1] ); 3772 bytes[1] = getHexValue( chars[2], chars[3] ); 3773 bytes[2] = getHexValue( chars[4], chars[5] ); 3774 bytes[3] = getHexValue( chars[6], chars[7] ); 3775 3776 bytes[4] = getHexValue( chars[9], chars[10] ); 3777 bytes[5] = getHexValue( chars[11], chars[12] ); 3778 3779 bytes[6] = getHexValue( chars[14], chars[15] ); 3780 bytes[7] = getHexValue( chars[16], chars[17] ); 3781 3782 bytes[8] = getHexValue( chars[19], chars[20] ); 3783 bytes[9] = getHexValue( chars[21], chars[22] ); 3784 3785 bytes[10] = getHexValue( chars[24], chars[25] ); 3786 bytes[11] = getHexValue( chars[26], chars[27] ); 3787 bytes[12] = getHexValue( chars[28], chars[29] ); 3788 bytes[13] = getHexValue( chars[30], chars[31] ); 3789 bytes[14] = getHexValue( chars[32], chars[33] ); 3790 bytes[15] = getHexValue( chars[34], chars[35] ); 3791 3792 return bytes; 3793 } 3794 3795 }