001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 * 019 */ 020 package org.apache.directory.shared.ldap.filter; 021 022 023 import java.text.ParseException; 024 025 import org.apache.directory.shared.i18n.I18n; 026 import org.apache.directory.shared.ldap.entry.BinaryValue; 027 import org.apache.directory.shared.ldap.entry.Value; 028 import org.apache.directory.shared.ldap.util.AttributeUtils; 029 import org.apache.directory.shared.ldap.util.Position; 030 import org.apache.directory.shared.ldap.util.StringTools; 031 032 033 /** 034 * This class parse a Ldap filter. The grammar is given in RFC 4515 035 * 036 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a> 037 * @version $Rev$, $Date$ 038 */ 039 public class FilterParser 040 { 041 /** 042 * Creates a filter parser implementation. 043 */ 044 public FilterParser() 045 { 046 } 047 048 049 /** 050 * Parse an extensible 051 * 052 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue ) 053 * / ( [":dn"] ':' oid ":=" assertionvalue ) 054 * matchingrule = ":" oid 055 */ 056 private static ExprNode parseExtensible( String attr, String filter, Position pos ) throws ParseException 057 { 058 ExtensibleNode node = new ExtensibleNode( attr ); 059 060 if ( attr != null ) 061 { 062 // First check if we have a ":dn" 063 if ( StringTools.areEquals( filter, pos.start, "dn" ) ) 064 { 065 // Set the dnAttributes flag and move forward in the string 066 node.setDnAttributes( true ); 067 pos.start += 2; 068 } 069 else 070 { 071 // Push back the ':' 072 pos.start--; 073 } 074 075 // Do we have a MatchingRule ? 076 if ( StringTools.charAt( filter, pos.start ) == ':' ) 077 { 078 pos.start++; 079 int start = pos.start; 080 081 if ( StringTools.charAt( filter, pos.start ) == '=' ) 082 { 083 pos.start++; 084 085 // Get the assertionValue 086 node.setValue( parseAssertionValue( filter, pos, true ) ); 087 088 return node; 089 } 090 else 091 { 092 AttributeUtils.parseAttribute( filter, pos, false ); 093 094 node.setMatchingRuleId( filter.substring( start, pos.start ) ); 095 096 if ( StringTools.areEquals( filter, pos.start, ":=" ) ) 097 { 098 pos.start += 2; 099 100 // Get the assertionValue 101 node.setValue( parseAssertionValue( filter, pos, true ) ); 102 103 return node; 104 } 105 else 106 { 107 throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start ); 108 } 109 } 110 } 111 else 112 { 113 throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start ); 114 } 115 } 116 else 117 { 118 boolean oidRequested = false; 119 120 // First check if we have a ":dn" 121 if ( StringTools.areEquals( filter, pos.start, ":dn" ) ) 122 { 123 // Set the dnAttributes flag and move forward in the string 124 node.setDnAttributes( true ); 125 pos.start += 3; 126 } 127 else 128 { 129 oidRequested = true; 130 } 131 132 // Do we have a MatchingRule ? 133 if ( StringTools.charAt( filter, pos.start ) == ':' ) 134 { 135 pos.start++; 136 int start = pos.start; 137 138 if ( StringTools.charAt( filter, pos.start ) == '=' ) 139 { 140 if ( oidRequested ) 141 { 142 throw new ParseException( I18n.err( I18n.ERR_04148 ), pos.start ); 143 } 144 145 pos.start++; 146 147 // Get the assertionValue 148 node.setValue( parseAssertionValue( filter, pos, true ) ); 149 150 return node; 151 } 152 else 153 { 154 AttributeUtils.parseAttribute( filter, pos, false ); 155 156 node.setMatchingRuleId( filter.substring( start, pos.start ) ); 157 158 if ( StringTools.areEquals( filter, pos.start, ":=" ) ) 159 { 160 pos.start += 2; 161 162 // Get the assertionValue 163 node.setValue( parseAssertionValue( filter, pos, true ) ); 164 165 return node; 166 } 167 else 168 { 169 throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start ); 170 } 171 } 172 } 173 else 174 { 175 throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start ); 176 } 177 } 178 } 179 180 181 /** 182 * An assertion value : 183 * assertionvalue = valueencoding 184 * valueencoding = 0*(normal / escaped) 185 * normal = UTF1SUBSET / UTFMB 186 * escaped = '\' HEX HEX 187 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f' 188 * UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F (Everything but '\0', '*', '(', ')' and '\') 189 * UTFMB = UTF2 / UTF3 / UTF4 190 * UTF0 = %x80-BF 191 * UTF2 = %xC2-DF UTF0 192 * UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC UTF0 UTF0 / %xED %x80-9F UTF0 / %xEE-EF UTF0 UTF0 193 * UTF4 = %xF0 %x90-BF UTF0 UTF0 / %xF1-F3 UTF0 UTF0 UTF0 / %xF4 %x80-8F UTF0 UTF0 194 * 195 * With the specific constraints (RFC 4515): 196 * "The <valueencoding> rule ensures that the entire filter string is a" 197 * "valid UTF-8 string and provides that the octets that represent the" 198 * "ASCII characters "*" (ASCII 0x2a), "(" (ASCII 0x28), ")" (ASCII" 199 * "0x29), "\" (ASCII 0x5c), and NUL (ASCII 0x00) are represented as a" 200 * "backslash "\" (ASCII 0x5c) followed by the two hexadecimal digits" 201 * "representing the value of the encoded octet." 202 203 * 204 * The incomming String is already transformed from UTF-8 to unicode, so we must assume that the 205 * grammar we have to check is the following : 206 * 207 * assertionvalue = valueencoding 208 * valueencoding = 0*(normal / escaped) 209 * normal = unicodeSubset 210 * escaped = '\' HEX HEX 211 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f' 212 * unicodeSubset = %x01-27 / %x2B-5B / %x5D-FFFF 213 */ 214 private static Value<?> parseAssertionValue( String filter, Position pos, boolean preserveEscapedChars ) throws ParseException 215 { 216 int start = pos.start; 217 char c = StringTools.charAt( filter, pos.start ); 218 219 // Create a buffer big enough to contain the value once converted 220 byte[] value = new byte[ filter.length() - pos.start]; 221 int current = 0; 222 223 do 224 { 225 if ( StringTools.isUnicodeSubset( c ) ) 226 { 227 value[current++] = (byte)c; 228 pos.start++; 229 } 230 else if ( StringTools.isCharASCII( filter, pos.start, '\\' ) ) 231 { 232 // Maybe an escaped 233 pos.start++; 234 235 // First hex 236 if ( StringTools.isHex( filter, pos.start ) ) 237 { 238 pos.start++; 239 } 240 else 241 { 242 throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start ); 243 } 244 245 // second hex 246 if ( StringTools.isHex( filter, pos.start ) ) 247 { 248 value[current++] = StringTools.getHexValue( filter.charAt( pos.start - 1 ), filter.charAt( pos.start ) ); 249 pos.start++; 250 } 251 else 252 { 253 throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start ); 254 } 255 } 256 else 257 { 258 // not a valid char, so let's get out 259 break; 260 } 261 } 262 while ( ( c = StringTools.charAt( filter, pos.start ) ) != '\0' ); 263 264 if ( current != 0 ) 265 { 266 byte[] result = new byte[ current ]; 267 System.arraycopy( value, 0, result, 0, current ); 268 269 return new BinaryValue( result ); 270 } 271 else 272 { 273 return new BinaryValue(); 274 } 275 } 276 277 278 private static Value<?> parseAssertionValue( String filter, Position pos ) throws ParseException 279 { 280 return parseAssertionValue( filter, pos, false ); 281 } 282 283 284 /** 285 * Parse a substring 286 */ 287 private static ExprNode parseSubstring( String attr, Value<?> initial, String filter, Position pos ) 288 throws ParseException 289 { 290 if ( StringTools.isCharASCII( filter, pos.start, '*' ) ) 291 { 292 // We have found a '*' : this is a substring 293 SubstringNode node = new SubstringNode( attr ); 294 295 if ( initial != null && !initial.isNull() ) 296 { 297 // We have a substring starting with a value : val*... 298 // Set the initial value. It must be a String 299 String initialStr = initial.getString(); 300 node.setInitial( initialStr ); 301 } 302 303 pos.start++; 304 305 // 306 while ( true ) 307 { 308 Value<?> assertionValue = parseAssertionValue( filter, pos ); 309 310 // Is there anything else but a ')' after the value ? 311 if ( StringTools.isCharASCII( filter, pos.start, ')' ) ) 312 { 313 // Nope : as we have had [initial] '*' (any '*' ) *, 314 // this is the final 315 if ( !assertionValue.isNull() ) 316 { 317 String finalStr = assertionValue.getString(); 318 node.setFinal( finalStr ); 319 } 320 321 return node; 322 } 323 else if ( StringTools.isCharASCII( filter, pos.start, '*' ) ) 324 { 325 // We have a '*' : it's an any 326 // If the value is empty, that means we have more than 327 // one consecutive '*' : do nothing in this case. 328 if ( !assertionValue.isNull() ) 329 { 330 String anyStr = assertionValue.getString(); 331 node.addAny( anyStr ); 332 } 333 334 pos.start++; 335 } 336 else 337 { 338 // This is an error 339 throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start ); 340 } 341 } 342 } 343 else 344 { 345 // This is an error 346 throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start ); 347 } 348 } 349 350 351 /** 352 * Here is the grammar to parse : 353 * 354 * simple ::= '=' assertionValue 355 * present ::= '=' '*' 356 * substring ::= '=' [initial] any [final] 357 * initial ::= assertionValue 358 * any ::= '*' ( assertionValue '*')* 359 * 360 * As we can see, there is an ambiguity in the grammar : attr=* can be 361 * seen as a present or as a substring. As stated in the RFC : 362 * 363 * "Note that although both the <substring> and <present> productions in" 364 * "the grammar above can produce the "attr=*" construct, this construct" 365 * "is used only to denote a presence filter." (RFC 4515, 3) 366 * 367 * We have also to consider the difference between a substring and the 368 * equality node : this last node does not contain a '*' 369 * 370 * @param attr 371 * @param filter 372 * @param pos 373 * @return 374 */ 375 private static ExprNode parsePresenceEqOrSubstring( String attr, String filter, Position pos ) 376 throws ParseException 377 { 378 if ( StringTools.isCharASCII( filter, pos.start, '*' ) ) 379 { 380 // To be a present node, the next char should be a ')' 381 pos.start++; 382 383 if ( StringTools.isCharASCII( filter, pos.start, ')' ) ) 384 { 385 // This is a present node 386 return new PresenceNode( attr ); 387 } 388 else 389 { 390 // Definitively a substring with no initial or an error 391 // Push back the '*' on the string 392 pos.start--; 393 return parseSubstring( attr, null, filter, pos ); 394 } 395 } 396 else if ( StringTools.isCharASCII( filter, pos.start, ')' ) ) 397 { 398 // An empty equality Node 399 return new EqualityNode( attr, new BinaryValue() ); 400 } 401 else 402 { 403 // A substring or an equality node 404 Value<?> value = parseAssertionValue( filter, pos ); 405 406 // Is there anything else but a ')' after the value ? 407 if ( StringTools.isCharASCII( filter, pos.start, ')' ) ) 408 { 409 // This is an equality node 410 return new EqualityNode( attr, value ); 411 } 412 413 return parseSubstring( attr, value, filter, pos ); 414 } 415 } 416 417 418 /** 419 * Parse the following grammar : 420 * item = simple / present / substring / extensible 421 * simple = attr filtertype assertionvalue 422 * filtertype = '=' / '~=' / '>=' / '<=' 423 * present = attr '=' '*' 424 * substring = attr '=' [initial] any [final] 425 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue ) 426 * / ( [":dn"] ':' oid ":=" assertionvalue ) 427 * matchingrule = ":" oid 428 * 429 * An item starts with an attribute or a colon. 430 */ 431 private static ExprNode parseItem( String filter, Position pos, char c ) throws ParseException 432 { 433 LeafNode node = null; 434 String attr = null; 435 436 if ( c == '\0' ) 437 { 438 throw new ParseException( I18n.err( I18n.ERR_04151 ), pos.start ); 439 } 440 441 if ( c == ':' ) 442 { 443 // If we have a colon, then the item is an extensible one 444 return parseExtensible( null, filter, pos ); 445 } 446 else 447 { 448 // We must have an attribute 449 attr = AttributeUtils.parseAttribute( filter, pos, true ); 450 451 // Now, we may have a present, substring, simple or an extensible 452 c = StringTools.charAt( filter, pos.start ); 453 454 switch ( c ) 455 { 456 case '=': 457 // It can be a presence, an equal or a substring 458 pos.start++; 459 return parsePresenceEqOrSubstring( attr, filter, pos ); 460 461 case '~': 462 // Approximate node 463 pos.start++; 464 465 // Check that we have a '=' 466 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) ) 467 { 468 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start ); 469 } 470 471 pos.start++; 472 473 // Parse the value and create the node 474 node = new ApproximateNode( attr, parseAssertionValue( filter, pos ) ); 475 return node; 476 477 case '>': 478 // Greater or equal node 479 pos.start++; 480 481 // Check that we have a '=' 482 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) ) 483 { 484 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start ); 485 } 486 487 pos.start++; 488 489 // Parse the value and create the node 490 node = new GreaterEqNode( attr, parseAssertionValue( filter, pos ) ); 491 return node; 492 493 case '<': 494 // Less or equal node 495 pos.start++; 496 497 // Check that we have a '=' 498 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) ) 499 { 500 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start ); 501 } 502 503 pos.start++; 504 505 // Parse the value and create the node 506 node = new LessEqNode( attr, parseAssertionValue( filter, pos ) ); 507 return node; 508 509 case ':': 510 // An extensible node 511 pos.start++; 512 return parseExtensible( attr, filter, pos ); 513 514 default: 515 // This is an error 516 throw new ParseException( I18n.err( I18n.ERR_04153 ), pos.start ); 517 } 518 } 519 } 520 521 522 /** 523 * Parse AND, OR and NOT nodes : 524 * 525 * and = '&' filterlist 526 * or = '|' filterlist 527 * not = '!' filter 528 * filterlist = 1*filter 529 * 530 * @return 531 */ 532 private static ExprNode parseBranchNode( ExprNode node, String filter, Position pos ) throws ParseException 533 { 534 BranchNode bNode = ( BranchNode ) node; 535 536 // We must have at least one filter 537 ExprNode child = parseFilterInternal( filter, pos ); 538 539 // Add the child to the node children 540 bNode.addNode( child ); 541 542 // Now, iterate recusively though all the remaining filters, if any 543 while ( ( child = parseFilterInternal( filter, pos ) ) != null ) 544 { 545 // Add the child to the node children 546 bNode.addNode( child ); 547 } 548 549 return node; 550 } 551 552 553 /** 554 * filtercomp = and / or / not / item 555 * and = '&' filterlist 556 * or = '|' filterlist 557 * not = '!' filter 558 * item = simple / present / substring / extensible 559 * simple = attr filtertype assertionvalue 560 * present = attr EQUALS ASTERISK 561 * substring = attr EQUALS [initial] any [final] 562 * extensible = ( attr [dnattrs] 563 * [matchingrule] COLON EQUALS assertionvalue ) 564 * / ( [dnattrs] 565 * matchingrule COLON EQUALS assertionvalue ) 566 */ 567 private static ExprNode parseFilterComp( String filter, Position pos ) throws ParseException 568 { 569 ExprNode node = null; 570 571 if ( pos.start == pos.length ) 572 { 573 throw new ParseException( I18n.err( I18n.ERR_04154 ), pos.start ); 574 } 575 576 char c = StringTools.charAt( filter, pos.start ); 577 578 switch ( c ) 579 { 580 case '&': 581 // This is a AND node 582 pos.start++; 583 node = new AndNode(); 584 parseBranchNode( node, filter, pos ); 585 break; 586 587 case '|': 588 // This is an OR node 589 pos.start++; 590 node = new OrNode(); 591 parseBranchNode( node, filter, pos ); 592 break; 593 594 case '!': 595 // This is a NOT node 596 pos.start++; 597 node = new NotNode(); 598 parseBranchNode( node, filter, pos ); 599 break; 600 601 default: 602 // This is an item 603 node = parseItem( filter, pos, c ); 604 break; 605 606 } 607 608 return node; 609 } 610 611 612 /** 613 * Pasre the grammar rule : 614 * filter ::= '(' filterComp ')' 615 */ 616 private static ExprNode parseFilterInternal( String filter, Position pos ) throws ParseException 617 { 618 // Check for the left '(' 619 if ( !StringTools.isCharASCII( filter, pos.start, '(' ) ) 620 { 621 // No more node, get out 622 if ( ( pos.start == 0 ) && ( pos.length != 0 ) ) 623 { 624 throw new ParseException( I18n.err( I18n.ERR_04155 ), 0 ); 625 } 626 else 627 { 628 return null; 629 } 630 } 631 632 pos.start++; 633 634 // parse the filter component 635 ExprNode node = parseFilterComp( filter, pos ); 636 637 if ( node == null ) 638 { 639 throw new ParseException( I18n.err( I18n.ERR_04156 ), pos.start ); 640 } 641 642 // Check that we have a right ')' 643 if ( !StringTools.isCharASCII( filter, pos.start, ')' ) ) 644 { 645 throw new ParseException( I18n.err( I18n.ERR_04157 ), pos.start ); 646 } 647 648 pos.start++; 649 650 return node; 651 } 652 653 654 /** 655 * @see FilterParser#parse(String) 656 */ 657 public static ExprNode parse( String filter ) throws ParseException 658 { 659 // The filter must not be null. This is a defensive test 660 if ( StringTools.isEmpty( filter ) ) 661 { 662 throw new ParseException( I18n.err( I18n.ERR_04158 ), 0 ); 663 } 664 665 Position pos = new Position(); 666 pos.start = 0; 667 pos.end = 0; 668 pos.length = filter.length(); 669 670 return parseFilterInternal( filter, pos ); 671 } 672 673 674 public void setFilterParserMonitor( FilterParserMonitor monitor ) 675 { 676 } 677 }