001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one
003     *  or more contributor license agreements.  See the NOTICE file
004     *  distributed with this work for additional information
005     *  regarding copyright ownership.  The ASF licenses this file
006     *  to you under the Apache License, Version 2.0 (the
007     *  "License"); you may not use this file except in compliance
008     *  with the License.  You may obtain a copy of the License at
009     *  
010     *    http://www.apache.org/licenses/LICENSE-2.0
011     *  
012     *  Unless required by applicable law or agreed to in writing,
013     *  software distributed under the License is distributed on an
014     *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     *  KIND, either express or implied.  See the License for the
016     *  specific language governing permissions and limitations
017     *  under the License. 
018     *  
019     */
020    package org.apache.directory.shared.ldap.filter;
021    
022    
023    import java.text.ParseException;
024    
025    import org.apache.directory.shared.i18n.I18n;
026    import org.apache.directory.shared.ldap.entry.BinaryValue;
027    import org.apache.directory.shared.ldap.entry.Value;
028    import org.apache.directory.shared.ldap.util.AttributeUtils;
029    import org.apache.directory.shared.ldap.util.Position;
030    import org.apache.directory.shared.ldap.util.StringTools;
031    
032    
033    /**
034     * This class parse a Ldap filter. The grammar is given in RFC 4515
035     *
036     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
037     * @version $Rev$, $Date$
038     */
039    public class FilterParser
040    {
041        /**
042         * Creates a filter parser implementation.
043         */
044        public FilterParser()
045        {
046        }
047    
048    
049        /**
050         * Parse an extensible
051         * 
052         * extensible     = ( attr [":dn"] [':' oid] ":=" assertionvalue )
053         *                  / ( [":dn"] ':' oid ":=" assertionvalue )
054         * matchingrule   = ":" oid
055         */
056        private static ExprNode parseExtensible( String attr, String filter, Position pos ) throws ParseException
057        {
058            ExtensibleNode node = new ExtensibleNode( attr );
059    
060            if ( attr != null )
061            {
062                // First check if we have a ":dn"
063                if ( StringTools.areEquals( filter, pos.start, "dn" ) )
064                {
065                    // Set the dnAttributes flag and move forward in the string
066                    node.setDnAttributes( true );
067                    pos.start += 2;
068                }
069                else
070                {
071                    // Push back the ':' 
072                    pos.start--;
073                }
074    
075                // Do we have a MatchingRule ?
076                if ( StringTools.charAt( filter, pos.start ) == ':' )
077                {
078                    pos.start++;
079                    int start = pos.start;
080    
081                    if ( StringTools.charAt( filter, pos.start ) == '=' )
082                    {
083                        pos.start++;
084    
085                        // Get the assertionValue
086                        node.setValue( parseAssertionValue( filter, pos, true ) );
087    
088                        return node;
089                    }
090                    else
091                    {
092                        AttributeUtils.parseAttribute( filter, pos, false );
093    
094                        node.setMatchingRuleId( filter.substring( start, pos.start ) );
095    
096                        if ( StringTools.areEquals( filter, pos.start, ":=" ) )
097                        {
098                            pos.start += 2;
099    
100                            // Get the assertionValue
101                            node.setValue( parseAssertionValue( filter, pos, true ) );
102    
103                            return node;
104                        }
105                        else
106                        {
107                            throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start );
108                        }
109                    }
110                }
111                else
112                {
113                    throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start );
114                }
115            }
116            else
117            {
118                boolean oidRequested = false;
119    
120                // First check if we have a ":dn"
121                if ( StringTools.areEquals( filter, pos.start, ":dn" ) )
122                {
123                    // Set the dnAttributes flag and move forward in the string
124                    node.setDnAttributes( true );
125                    pos.start += 3;
126                }
127                else
128                {
129                    oidRequested = true;
130                }
131    
132                // Do we have a MatchingRule ?
133                if ( StringTools.charAt( filter, pos.start ) == ':' )
134                {
135                    pos.start++;
136                    int start = pos.start;
137    
138                    if ( StringTools.charAt( filter, pos.start ) == '=' )
139                    {
140                        if ( oidRequested )
141                        {
142                            throw new ParseException( I18n.err( I18n.ERR_04148 ), pos.start );
143                        }
144    
145                        pos.start++;
146    
147                        // Get the assertionValue
148                        node.setValue( parseAssertionValue( filter, pos, true ) );
149    
150                        return node;
151                    }
152                    else
153                    {
154                        AttributeUtils.parseAttribute( filter, pos, false );
155    
156                        node.setMatchingRuleId( filter.substring( start, pos.start ) );
157    
158                        if ( StringTools.areEquals( filter, pos.start, ":=" ) )
159                        {
160                            pos.start += 2;
161    
162                            // Get the assertionValue
163                            node.setValue( parseAssertionValue( filter, pos, true ) );
164    
165                            return node;
166                        }
167                        else
168                        {
169                            throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start );
170                        }
171                    }
172                }
173                else
174                {
175                    throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start );
176                }
177            }
178        }
179    
180    
181        /**
182         * An assertion value : 
183         * assertionvalue = valueencoding
184         * valueencoding  = 0*(normal / escaped)
185         * normal         = UTF1SUBSET / UTFMB
186         * escaped        = '\' HEX HEX
187         * HEX            = '0'-'9' / 'A'-'F' / 'a'-'f'
188         * UTF1SUBSET     = %x01-27 / %x2B-5B / %x5D-7F (Everything but '\0', '*', '(', ')' and '\')
189         * UTFMB          = UTF2 / UTF3 / UTF4
190         * UTF0           = %x80-BF
191         * UTF2           = %xC2-DF UTF0
192         * UTF3           = %xE0 %xA0-BF UTF0 / %xE1-EC UTF0 UTF0 / %xED %x80-9F UTF0 / %xEE-EF UTF0 UTF0
193         * UTF4           = %xF0 %x90-BF UTF0 UTF0 / %xF1-F3 UTF0 UTF0 UTF0 / %xF4 %x80-8F UTF0 UTF0
194         * 
195         * With the specific constraints (RFC 4515):
196         *    "The <valueencoding> rule ensures that the entire filter string is a"
197         *    "valid UTF-8 string and provides that the octets that represent the"
198         *    "ASCII characters "*" (ASCII 0x2a), "(" (ASCII 0x28), ")" (ASCII"
199         *    "0x29), "\" (ASCII 0x5c), and NUL (ASCII 0x00) are represented as a"
200         *    "backslash "\" (ASCII 0x5c) followed by the two hexadecimal digits"
201         *    "representing the value of the encoded octet."
202    
203         * 
204         * The incomming String is already transformed from UTF-8 to unicode, so we must assume that the 
205         * grammar we have to check is the following :
206         * 
207         * assertionvalue = valueencoding
208         * valueencoding  = 0*(normal / escaped)
209         * normal         = unicodeSubset
210         * escaped        = '\' HEX HEX
211         * HEX            = '0'-'9' / 'A'-'F' / 'a'-'f'
212         * unicodeSubset     = %x01-27 / %x2B-5B / %x5D-FFFF
213         */
214        private static Value<?> parseAssertionValue( String filter, Position pos, boolean preserveEscapedChars ) throws ParseException
215        {
216            int start = pos.start;
217            char c = StringTools.charAt( filter, pos.start );
218            
219            // Create a buffer big enough to contain the value once converted
220            byte[] value = new byte[ filter.length() - pos.start];
221            int current = 0;
222    
223            do
224            {
225                if ( StringTools.isUnicodeSubset( c ) )
226                {
227                    value[current++] = (byte)c;
228                    pos.start++;
229                }
230                else if ( StringTools.isCharASCII( filter, pos.start, '\\' ) )
231                {
232                    // Maybe an escaped 
233                    pos.start++;
234    
235                    // First hex
236                    if ( StringTools.isHex( filter, pos.start ) )
237                    {
238                        pos.start++;
239                    }
240                    else
241                    {
242                        throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start );
243                    }
244    
245                    // second hex
246                    if ( StringTools.isHex( filter, pos.start ) )
247                    {
248                        value[current++] = StringTools.getHexValue( filter.charAt( pos.start - 1 ), filter.charAt( pos.start ) );
249                        pos.start++;
250                    }
251                    else
252                    {
253                        throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start );
254                    }
255                }
256                else
257                {
258                    // not a valid char, so let's get out
259                    break;
260                }
261            }
262            while ( ( c = StringTools.charAt( filter, pos.start ) ) != '\0' );
263    
264            if ( current != 0 )
265            {
266                byte[] result = new byte[ current ];
267                System.arraycopy( value, 0, result, 0, current );
268                
269                return new BinaryValue( result );
270            }
271            else
272            {
273                return new BinaryValue();
274            }
275        }
276    
277    
278        private static Value<?> parseAssertionValue( String filter, Position pos ) throws ParseException
279        {
280            return parseAssertionValue( filter, pos, false );
281        }
282    
283    
284        /**
285         * Parse a substring
286         */
287        private static ExprNode parseSubstring( String attr, Value<?> initial, String filter, Position pos )
288            throws ParseException
289        {
290            if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
291            {
292                // We have found a '*' : this is a substring
293                SubstringNode node = new SubstringNode( attr );
294    
295                if ( initial != null && !initial.isNull() )
296                {
297                    // We have a substring starting with a value : val*...
298                    // Set the initial value. It must be a String
299                    String initialStr = initial.getString();
300                    node.setInitial( initialStr );
301                }
302    
303                pos.start++;
304    
305                // 
306                while ( true )
307                {
308                    Value<?> assertionValue = parseAssertionValue( filter, pos );
309    
310                    // Is there anything else but a ')' after the value ?
311                    if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
312                    {
313                        // Nope : as we have had [initial] '*' (any '*' ) *,
314                        // this is the final
315                        if ( !assertionValue.isNull() )
316                        {
317                            String finalStr = assertionValue.getString();
318                            node.setFinal( finalStr );
319                        }
320    
321                        return node;
322                    }
323                    else if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
324                    {
325                        // We have a '*' : it's an any
326                        // If the value is empty, that means we have more than 
327                        // one consecutive '*' : do nothing in this case.
328                        if ( !assertionValue.isNull() )
329                        {
330                            String anyStr = assertionValue.getString();
331                            node.addAny( anyStr );
332                        }
333    
334                        pos.start++;
335                    }
336                    else
337                    {
338                        // This is an error
339                        throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start );
340                    }
341                }
342            }
343            else
344            {
345                // This is an error
346                throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start );
347            }
348        }
349    
350    
351        /**
352         * Here is the grammar to parse :
353         * 
354         * simple    ::= '=' assertionValue
355         * present   ::= '=' '*'
356         * substring ::= '=' [initial] any [final]
357         * initial   ::= assertionValue
358         * any       ::= '*' ( assertionValue '*')*
359         * 
360         * As we can see, there is an ambiguity in the grammar : attr=* can be
361         * seen as a present or as a substring. As stated in the RFC :
362         * 
363         * "Note that although both the <substring> and <present> productions in"
364         * "the grammar above can produce the "attr=*" construct, this construct"
365         * "is used only to denote a presence filter." (RFC 4515, 3)
366         * 
367         * We have also to consider the difference between a substring and the
368         * equality node : this last node does not contain a '*'
369         *
370         * @param attr
371         * @param filter
372         * @param pos
373         * @return
374         */
375        private static ExprNode parsePresenceEqOrSubstring( String attr, String filter, Position pos )
376            throws ParseException
377        {
378            if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
379            {
380                // To be a present node, the next char should be a ')'
381                pos.start++;
382    
383                if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
384                {
385                    // This is a present node
386                    return new PresenceNode( attr );
387                }
388                else
389                {
390                    // Definitively a substring with no initial or an error
391                    // Push back the '*' on the string
392                    pos.start--;
393                    return parseSubstring( attr, null, filter, pos );
394                }
395            }
396            else if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
397            {
398                // An empty equality Node
399                return new EqualityNode( attr, new BinaryValue() );
400            }
401            else
402            {
403                // A substring or an equality node
404                Value<?> value = parseAssertionValue( filter, pos );
405    
406                // Is there anything else but a ')' after the value ?
407                if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
408                {
409                    // This is an equality node
410                    return new EqualityNode( attr, value );
411                }
412    
413                return parseSubstring( attr, value, filter, pos );
414            }
415        }
416    
417    
418        /**
419         * Parse the following grammar :
420         * item           = simple / present / substring / extensible
421         * simple         = attr filtertype assertionvalue
422         * filtertype     = '=' / '~=' / '>=' / '<='
423         * present        = attr '=' '*'
424         * substring      = attr '=' [initial] any [final]
425         * extensible     = ( attr [":dn"] [':' oid] ":=" assertionvalue )
426         *                  / ( [":dn"] ':' oid ":=" assertionvalue )
427         * matchingrule   = ":" oid
428         *                  
429         * An item starts with an attribute or a colon.
430         */
431        private static ExprNode parseItem( String filter, Position pos, char c ) throws ParseException
432        {
433            LeafNode node = null;
434            String attr = null;
435    
436            if ( c == '\0' )
437            {
438                throw new ParseException( I18n.err( I18n.ERR_04151 ), pos.start );
439            }
440    
441            if ( c == ':' )
442            {
443                // If we have a colon, then the item is an extensible one
444                return parseExtensible( null, filter, pos );
445            }
446            else
447            {
448                // We must have an attribute
449                attr = AttributeUtils.parseAttribute( filter, pos, true );
450    
451                // Now, we may have a present, substring, simple or an extensible
452                c = StringTools.charAt( filter, pos.start );
453    
454                switch ( c )
455                {
456                    case '=':
457                        // It can be a presence, an equal or a substring
458                        pos.start++;
459                        return parsePresenceEqOrSubstring( attr, filter, pos );
460    
461                    case '~':
462                        // Approximate node
463                        pos.start++;
464    
465                        // Check that we have a '='
466                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
467                        {
468                            throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
469                        }
470    
471                        pos.start++;
472    
473                        // Parse the value and create the node
474                        node = new ApproximateNode( attr, parseAssertionValue( filter, pos ) );
475                        return node;
476    
477                    case '>':
478                        // Greater or equal node
479                        pos.start++;
480    
481                        // Check that we have a '='
482                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
483                        {
484                            throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
485                        }
486    
487                        pos.start++;
488    
489                        // Parse the value and create the node
490                        node = new GreaterEqNode( attr, parseAssertionValue( filter, pos ) );
491                        return node;
492    
493                    case '<':
494                        // Less or equal node
495                        pos.start++;
496    
497                        // Check that we have a '='
498                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
499                        {
500                            throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
501                        }
502    
503                        pos.start++;
504    
505                        // Parse the value and create the node
506                        node = new LessEqNode( attr, parseAssertionValue( filter, pos ) );
507                        return node;
508    
509                    case ':':
510                        // An extensible node
511                        pos.start++;
512                        return parseExtensible( attr, filter, pos );
513    
514                    default:
515                        // This is an error
516                        throw new ParseException( I18n.err( I18n.ERR_04153 ), pos.start );
517                }
518            }
519        }
520    
521    
522        /**
523         * Parse AND, OR and NOT nodes :
524         * 
525         * and            = '&' filterlist
526         * or             = '|' filterlist
527         * not            = '!' filter
528         * filterlist     = 1*filter
529         * 
530         * @return
531         */
532        private static ExprNode parseBranchNode( ExprNode node, String filter, Position pos ) throws ParseException
533        {
534            BranchNode bNode = ( BranchNode ) node;
535    
536            // We must have at least one filter
537            ExprNode child = parseFilterInternal( filter, pos );
538    
539            // Add the child to the node children
540            bNode.addNode( child );
541    
542            // Now, iterate recusively though all the remaining filters, if any
543            while ( ( child = parseFilterInternal( filter, pos ) ) != null )
544            {
545                // Add the child to the node children
546                bNode.addNode( child );
547            }
548    
549            return node;
550        }
551    
552    
553        /**
554         * filtercomp     = and / or / not / item
555         * and            = '&' filterlist
556         * or             = '|' filterlist
557         * not            = '!' filter
558         * item           = simple / present / substring / extensible
559         * simple         = attr filtertype assertionvalue
560         * present        = attr EQUALS ASTERISK
561         * substring      = attr EQUALS [initial] any [final]
562         * extensible     = ( attr [dnattrs]
563         *                    [matchingrule] COLON EQUALS assertionvalue )
564         *                    / ( [dnattrs]
565         *                         matchingrule COLON EQUALS assertionvalue )
566         */
567        private static ExprNode parseFilterComp( String filter, Position pos ) throws ParseException
568        {
569            ExprNode node = null;
570    
571            if ( pos.start == pos.length )
572            {
573                throw new ParseException( I18n.err( I18n.ERR_04154 ), pos.start );
574            }
575    
576            char c = StringTools.charAt( filter, pos.start );
577    
578            switch ( c )
579            {
580                case '&':
581                    // This is a AND node
582                    pos.start++;
583                    node = new AndNode();
584                    parseBranchNode( node, filter, pos );
585                    break;
586    
587                case '|':
588                    // This is an OR node
589                    pos.start++;
590                    node = new OrNode();
591                    parseBranchNode( node, filter, pos );
592                    break;
593    
594                case '!':
595                    // This is a NOT node
596                    pos.start++;
597                    node = new NotNode();
598                    parseBranchNode( node, filter, pos );
599                    break;
600    
601                default:
602                    // This is an item
603                    node = parseItem( filter, pos, c );
604                    break;
605    
606            }
607    
608            return node;
609        }
610    
611    
612        /**
613         * Pasre the grammar rule :
614         * filter ::= '(' filterComp ')'
615         */
616        private static ExprNode parseFilterInternal( String filter, Position pos ) throws ParseException
617        {
618            // Check for the left '('
619            if ( !StringTools.isCharASCII( filter, pos.start, '(' ) )
620            {
621                // No more node, get out
622                if ( ( pos.start == 0 ) && ( pos.length != 0 ) )
623                {
624                    throw new ParseException( I18n.err( I18n.ERR_04155 ), 0 );
625                }
626                else
627                {
628                    return null;
629                }
630            }
631    
632            pos.start++;
633    
634            // parse the filter component
635            ExprNode node = parseFilterComp( filter, pos );
636    
637            if ( node == null )
638            {
639                throw new ParseException( I18n.err( I18n.ERR_04156 ), pos.start );
640            }
641    
642            // Check that we have a right ')'
643            if ( !StringTools.isCharASCII( filter, pos.start, ')' ) )
644            {
645                throw new ParseException( I18n.err( I18n.ERR_04157 ), pos.start );
646            }
647    
648            pos.start++;
649    
650            return node;
651        }
652    
653    
654        /**
655         * @see FilterParser#parse(String)
656         */
657        public static ExprNode parse( String filter ) throws ParseException
658        {
659            // The filter must not be null. This is a defensive test
660            if ( StringTools.isEmpty( filter ) )
661            {
662                throw new ParseException( I18n.err( I18n.ERR_04158 ), 0 );
663            }
664    
665            Position pos = new Position();
666            pos.start = 0;
667            pos.end = 0;
668            pos.length = filter.length();
669    
670            return parseFilterInternal( filter, pos );
671        }
672    
673    
674        public void setFilterParserMonitor( FilterParserMonitor monitor )
675        {
676        }
677    }