001    // Copyright 2004, 2005 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry.parse;
016    
017    import java.util.ArrayList;
018    import java.util.Collections;
019    import java.util.HashMap;
020    import java.util.Iterator;
021    import java.util.List;
022    import java.util.Map;
023    
024    import org.apache.hivemind.ApplicationRuntimeException;
025    import org.apache.hivemind.Location;
026    import org.apache.hivemind.Resource;
027    import org.apache.hivemind.impl.LocationImpl;
028    import org.apache.oro.text.regex.MalformedPatternException;
029    import org.apache.oro.text.regex.MatchResult;
030    import org.apache.oro.text.regex.Pattern;
031    import org.apache.oro.text.regex.PatternMatcher;
032    import org.apache.oro.text.regex.Perl5Compiler;
033    import org.apache.oro.text.regex.Perl5Matcher;
034    import org.apache.tapestry.util.IdAllocator;
035    
036    /**
037     * Parses Tapestry templates, breaking them into a series of
038     * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
039     * template", there is no real requirement that the template be HTML. This parser can handle any
040     * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
041     * HTML reasonably.
042     * <p>
043     * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
044     * <p>
045     * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
046     * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
047     * or end the tag with "<code>/></code>".
048     * <p>
049     * Generally, the id specified in the template is matched against an component defined in the
050     * specification. However, implicit components are also possible. The jwcid attribute uses the
051     * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
052     *       prefix. Such a component is anonymous (but is given a unique id).
053     *       <p>
054     *       (The unique ids assigned start with a dollar sign, which is normally no allowed for
055     *       component ids ... this helps to make them stand out and assures that they do not conflict
056     *       with user-defined component ids. These ids tend to propagate into URLs and become HTML
057     *       element names and even JavaScript variable names ... the dollar sign is acceptible in these
058     *       contexts as well).
059     *       <p>
060     *       Implicit component may also be given a name using the syntax "
061     *       <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
062     *       specification, but may still be accessed via
063     *       {@link org.apache.tapestry.IComponent#getComponent(String)}.
064     *       <p>
065     *       Both defined and implicit components may have additional attributes defined, simply by
066     *       including them in the template. They set formal or informal parameters of the component to
067     *       static strings.
068     *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
069     *       false, will cause such attributes to be simply ignored. For defined components, conflicting
070     *       values defined in the template are ignored.
071     *       <p>
072     *       Attributes in component tags will become formal and informal parameters of the
073     *       corresponding component. Most attributes will be
074     *       <p>
075     *       The parser removes the body of some tags (when the corresponding component doesn't
076     *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
077     *       allows portions of the template to be completely removed.
078     *       <p>
079     *       The parser does a pretty thorough lexical analysis of the template, and reports a great
080     *       number of errors, including improper nesting of tags.
081     *       <p>
082     *       The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
083     *       form: <code><span key="<i>value</i>"> ... </span></code> and converts them
084     *       into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code>
085     *       attribute ... if the value is <code>true</code>, then the localized value is sent to the
086     *       client without filtering, which is appropriate if the value has any markup that should not
087     *       be escaped.
088     * @author Howard Lewis Ship, Geoff Longman
089     */
090    
091    public class TemplateParser implements ITemplateParser
092    {
093        /**
094         * A "magic" component id that causes the tag with the id and its entire body to be ignored
095         * during parsing.
096         */
097    
098        private static final String REMOVE_ID = "$remove$";
099    
100        /**
101         * A "magic" component id that causes the tag to represent the true content of the template. Any
102         * content prior to the tag is discarded, and any content after the tag is ignored. The tag
103         * itself is not included.
104         */
105    
106        private static final String CONTENT_ID = "$content$";
107    
108        /**
109         * The attribute, checked for in <span> tags, that signfies that the span is being used as
110         * an invisible localization.
111         * 
112         * @since 2.0.4
113         */
114    
115        public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
116    
117        /**
118         * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be
119         * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
120         * equal "true" (caselessly).
121         * 
122         * @since 2.3
123         */
124    
125        public static final String RAW_ATTRIBUTE_NAME = "raw";
126    
127        /**
128         * Attribute name used to identify components.
129         * 
130         * @since 4.0
131         */
132    
133        private String _componentAttributeName;
134    
135        private static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
136    
137        /**
138         * Pattern used to recognize ordinary components (defined in the specification).
139         * 
140         * @since 3.0
141         */
142    
143        public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
144    
145        /**
146         * Pattern used to recognize implicit components (whose type is defined in the template).
147         * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
148         * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
149         * type, which may (as of 4.0) have slashes to delinate folders containing the component.
150         * 
151         * @since 3.0
152         */
153    
154        public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
155                + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*"
156                + PROPERTY_NAME_PATTERN + "))$";
157    
158        private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
159    
160        private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
161    
162        private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
163    
164        private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
165    
166        private Pattern _simpleIdPattern;
167    
168        private Pattern _implicitIdPattern;
169    
170        private PatternMatcher _patternMatcher;
171    
172        private IdAllocator _idAllocator = new IdAllocator();
173    
174        private ITemplateParserDelegate _delegate;
175    
176        /**
177         * Identifies the template being parsed; used with error messages.
178         */
179    
180        private Resource _resourceLocation;
181    
182        /**
183         * Shared instance of {@link Location} used by all {@link TextToken} instances in the template.
184         */
185    
186        private Location _templateLocation;
187    
188        /**
189         * Location with in the resource for the current line.
190         */
191    
192        private Location _currentLocation;
193    
194        /**
195         * Local reference to the template data that is to be parsed.
196         */
197    
198        private char[] _templateData;
199    
200        /**
201         * List of Tag
202         */
203    
204        private List _stack = new ArrayList();
205    
206        private static class Tag
207        {
208            // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
209            String _tagName;
210    
211            // If true, the tag is a placeholder for a dynamic element
212            boolean _component;
213    
214            // If true, the body of the tag is being ignored, and the
215            // ignore flag is cleared when the close tag is reached
216            boolean _ignoringBody;
217    
218            // If true, then the entire tag (and its body) is being ignored
219            boolean _removeTag;
220    
221            // If true, then the tag must have a balanced closing tag.
222            // This is always true for components.
223            boolean _mustBalance;
224    
225            // The line on which the start tag exists
226            int _line;
227    
228            // If true, then the parse ends when the closing tag is found.
229            boolean _content;
230    
231            Tag(String tagName, int line)
232            {
233                _tagName = tagName;
234                _line = line;
235            }
236    
237            boolean match(String matchTagName)
238            {
239                return _tagName.equalsIgnoreCase(matchTagName);
240            }
241        }
242    
243        /**
244         * List of {@link TemplateToken}, this forms the ultimate response.
245         */
246    
247        private List _tokens = new ArrayList();
248    
249        /**
250         * The location of the 'cursor' within the template data. The advance() method moves this
251         * forward.
252         */
253    
254        private int _cursor;
255    
256        /**
257         * The start of the current block of static text, or -1 if no block is active.
258         */
259    
260        private int _blockStart;
261    
262        /**
263         * The current line number; tracked by advance(). Starts at 1.
264         */
265    
266        private int _line;
267    
268        /**
269         * Set to true when the body of a tag is being ignored. This is typically used to skip over the
270         * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
271         * of $remove$ is used.
272         */
273    
274        private boolean _ignoring;
275    
276        /**
277         * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
278         */
279    
280        private Map _attributes = new HashMap();
281    
282        /**
283         * A factory used to create template tokens.
284         */
285    
286        private TemplateTokenFactory _factory;
287    
288        public TemplateParser()
289        {
290            Perl5Compiler compiler = new Perl5Compiler();
291    
292            try
293            {
294                _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
295                _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
296            }
297            catch (MalformedPatternException ex)
298            {
299                throw new ApplicationRuntimeException(ex);
300            }
301    
302            _patternMatcher = new Perl5Matcher();
303        }
304    
305        /**
306         * Parses the template data into an array of {@link TemplateToken}s.
307         * <p>
308         * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
309         * thread accesses it.
310         * 
311         * @param templateData
312         *            the HTML template to parse. Some tokens will hold a reference to this array.
313         * @param delegate
314         *            object that "knows" about defined components
315         * @param resourceLocation
316         *            a description of where the template originated from, used with error messages.
317         */
318    
319        public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate,
320                Resource resourceLocation) throws TemplateParseException
321        {
322            try
323            {
324                beforeParse(templateData, delegate, resourceLocation);
325    
326                parse();
327    
328                return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
329            }
330            finally
331            {
332                afterParse();
333            }
334        }
335    
336        /**
337         * perform default initialization of the parser.
338         */
339    
340        protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate,
341                Resource resourceLocation)
342        {
343            _templateData = templateData;
344            _resourceLocation = resourceLocation;
345            _templateLocation = new LocationImpl(resourceLocation);
346            _delegate = delegate;
347            _ignoring = false;
348            _line = 1;
349            _componentAttributeName = delegate.getComponentAttributeName();
350        }
351    
352        /**
353         * Perform default cleanup after parsing completes.
354         */
355    
356        protected void afterParse()
357        {
358            _delegate = null;
359            _templateData = null;
360            _resourceLocation = null;
361            _templateLocation = null;
362            _currentLocation = null;
363            _stack.clear();
364            _tokens.clear();
365            _attributes.clear();
366            _idAllocator.clear();
367        }
368    
369        /**
370         * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
371         * is reported.
372         * <p>
373         * The default implementation simply throws an exception that contains the message and location
374         * parameters.
375         * <p>
376         * Subclasses may override but <b>must </b> ensure they throw the required exception.
377         * 
378         * @param message
379         * @param location
380         * @param line
381         *            ignored by the default impl
382         * @param cursor
383         *            ignored by the default impl
384         * @throws TemplateParseException
385         *             always thrown in order to terminate the parse.
386         */
387    
388        protected void templateParseProblem(String message, Location location, int line, int cursor)
389                throws TemplateParseException
390        {
391            throw new TemplateParseException(message, location);
392        }
393    
394        /**
395         * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
396         * </b> stop when a problem is reported.
397         * <p>
398         * The default implementation simply rethrows the exception.
399         * <p>
400         * Subclasses may override but <b>must </b> ensure they rethrow the exception.
401         * 
402         * @param exception
403         * @param line
404         *            ignored by the default impl
405         * @param cursor
406         *            ignored by the default impl
407         * @throws ApplicationRuntimeException
408         *             always rethrown in order to terminate the parse.
409         */
410    
411        protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
412                throws ApplicationRuntimeException
413        {
414            throw exception;
415        }
416    
417        /**
418         * Give subclasses access to the parse results.
419         */
420        protected List getTokens()
421        {
422            if (_tokens == null)
423                return Collections.EMPTY_LIST;
424    
425            return _tokens;
426        }
427    
428        /**
429         * Checks to see if the next few characters match a given pattern.
430         */
431    
432        private boolean lookahead(char[] match)
433        {
434            try
435            {
436                for (int i = 0; i < match.length; i++)
437                {
438                    if (_templateData[_cursor + i] != match[i])
439                        return false;
440                }
441    
442                // Every character matched.
443    
444                return true;
445            }
446            catch (IndexOutOfBoundsException ex)
447            {
448                return false;
449            }
450        }
451    
452        private static final char[] COMMENT_START = new char[]
453        { '<', '!', '-', '-' };
454    
455        private static final char[] COMMENT_END = new char[]
456        { '-', '-', '>' };
457    
458        private static final char[] CLOSE_TAG = new char[]
459        { '<', '/' };
460    
461        protected void parse() throws TemplateParseException
462        {
463            _cursor = 0;
464            _blockStart = -1;
465            int length = _templateData.length;
466    
467            while (_cursor < length)
468            {
469                if (_templateData[_cursor] != '<')
470                {
471                    if (_blockStart < 0 && !_ignoring)
472                        _blockStart = _cursor;
473    
474                    advance();
475                    continue;
476                }
477    
478                // OK, start of something.
479    
480                if (lookahead(CLOSE_TAG))
481                {
482                    closeTag();
483                    continue;
484                }
485    
486                if (lookahead(COMMENT_START))
487                {
488                    skipComment();
489                    continue;
490                }
491    
492                // The start of some tag.
493    
494                startTag();
495            }
496    
497            // Usually there's some text at the end of the template (after the last closing tag) that
498            // should
499            // be added. Often the last few tags are static tags so we definately
500            // need to end the text block.
501    
502            addTextToken(_templateData.length - 1);
503        }
504    
505        /**
506         * Advance forward in the document until the end of the comment is reached. In addition, skip
507         * any whitespace following the comment.
508         */
509    
510        private void skipComment() throws TemplateParseException
511        {
512            int length = _templateData.length;
513            int startLine = _line;
514    
515            if (_blockStart < 0 && !_ignoring)
516                _blockStart = _cursor;
517    
518            while (true)
519            {
520                if (_cursor >= length)
521                    templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
522                            _resourceLocation, startLine), startLine, _cursor);
523    
524                if (lookahead(COMMENT_END))
525                    break;
526    
527                // Not the end of the comment, advance over it.
528    
529                advance();
530            }
531    
532            _cursor += COMMENT_END.length;
533            advanceOverWhitespace();
534        }
535    
536        private void addTextToken(int end)
537        {
538            // No active block to add to.
539    
540            if (_blockStart < 0)
541                return;
542    
543            if (_blockStart <= end)
544            {
545                // This seems odd, shouldn't the location be the current location? I guess
546                // no errors are ever reported for a text token.
547    
548                TemplateToken token = _factory.createTextToken(
549                        _templateData,
550                        _blockStart,
551                        end,
552                        _templateLocation);
553    
554                _tokens.add(token);
555            }
556    
557            _blockStart = -1;
558        }
559    
560        private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
561    
562        private static final int COLLECT_ATTRIBUTE_NAME = 1;
563    
564        private static final int ADVANCE_PAST_EQUALS = 2;
565    
566        private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
567    
568        private static final int COLLECT_QUOTED_VALUE = 4;
569    
570        private static final int COLLECT_UNQUOTED_VALUE = 5;
571    
572        private void startTag() throws TemplateParseException
573        {
574            int cursorStart = _cursor;
575            int length = _templateData.length;
576            String tagName = null;
577            boolean endOfTag = false;
578            boolean emptyTag = false;
579            int startLine = _line;
580            Location startLocation = new LocationImpl(_resourceLocation, startLine);
581    
582            tagBeginEvent(startLine, _cursor);
583    
584            advance();
585    
586            // Collect the element type
587    
588            while (_cursor < length)
589            {
590                char ch = _templateData[_cursor];
591    
592                if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
593                {
594                    tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
595    
596                    break;
597                }
598    
599                advance();
600            }
601    
602            String attributeName = null;
603            int attributeNameStart = -1;
604            int attributeValueStart = -1;
605            int state = WAIT_FOR_ATTRIBUTE_NAME;
606            char quoteChar = 0;
607    
608            _attributes.clear();
609    
610            // Collect each attribute
611    
612            while (!endOfTag)
613            {
614                if (_cursor >= length)
615                {
616                    String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
617                            : ParseMessages.unclosedTag(tagName, startLine);
618    
619                    templateParseProblem(message, startLocation, startLine, cursorStart);
620                }
621    
622                char ch = _templateData[_cursor];
623    
624                switch (state)
625                {
626                    case WAIT_FOR_ATTRIBUTE_NAME:
627    
628                        // Ignore whitespace before the next attribute name, while
629                        // looking for the end of the current tag.
630    
631                        if (ch == '/')
632                        {
633                            emptyTag = true;
634                            advance();
635                            break;
636                        }
637    
638                        if (ch == '>')
639                        {
640                            endOfTag = true;
641                            break;
642                        }
643    
644                        if (Character.isWhitespace(ch))
645                        {
646                            advance();
647                            break;
648                        }
649    
650                        // Found non-whitespace, assume its the attribute name.
651                        // Note: could use a check here for non-alpha.
652    
653                        attributeNameStart = _cursor;
654                        state = COLLECT_ATTRIBUTE_NAME;
655                        advance();
656                        break;
657    
658                    case COLLECT_ATTRIBUTE_NAME:
659    
660                        // Looking for end of attribute name.
661    
662                        if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
663                        {
664                            attributeName = new String(_templateData, attributeNameStart, _cursor
665                                    - attributeNameStart);
666    
667                            state = ADVANCE_PAST_EQUALS;
668                            break;
669                        }
670    
671                        // Part of the attribute name
672    
673                        advance();
674                        break;
675    
676                    case ADVANCE_PAST_EQUALS:
677    
678                        // Looking for the '=' sign. May hit the end of the tag, or (for bare
679                        // attributes),
680                        // the next attribute name.
681    
682                        if (ch == '/' || ch == '>')
683                        {
684                            // A bare attribute, which is not interesting to
685                            // us.
686    
687                            state = WAIT_FOR_ATTRIBUTE_NAME;
688                            break;
689                        }
690    
691                        if (Character.isWhitespace(ch))
692                        {
693                            advance();
694                            break;
695                        }
696    
697                        if (ch == '=')
698                        {
699                            state = WAIT_FOR_ATTRIBUTE_VALUE;
700                            quoteChar = 0;
701                            attributeValueStart = -1;
702                            advance();
703                            break;
704                        }
705    
706                        // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
707                        // We aren't interested in those (we're just looking for the id or jwcid
708                        // attribute).
709    
710                        state = WAIT_FOR_ATTRIBUTE_NAME;
711                        break;
712    
713                    case WAIT_FOR_ATTRIBUTE_VALUE:
714    
715                        if (ch == '/' || ch == '>')
716                            templateParseProblem(ParseMessages.missingAttributeValue(
717                                    tagName,
718                                    _line,
719                                    attributeName), getCurrentLocation(), _line, _cursor);
720    
721                        // Ignore whitespace between '=' and the attribute value. Also, look
722                        // for initial quote.
723    
724                        if (Character.isWhitespace(ch))
725                        {
726                            advance();
727                            break;
728                        }
729    
730                        if (ch == '\'' || ch == '"')
731                        {
732                            quoteChar = ch;
733    
734                            state = COLLECT_QUOTED_VALUE;
735                            advance();
736                            attributeValueStart = _cursor;
737                            attributeBeginEvent(attributeName, _line, attributeValueStart);
738                            break;
739                        }
740    
741                        // Not whitespace or quote, must be start of unquoted attribute.
742    
743                        state = COLLECT_UNQUOTED_VALUE;
744                        attributeValueStart = _cursor;
745                        attributeBeginEvent(attributeName, _line, attributeValueStart);
746                        break;
747    
748                    case COLLECT_QUOTED_VALUE:
749    
750                        // Start collecting the quoted attribute value. Stop at the matching quote
751                        // character,
752                        // unless bare, in which case, stop at the next whitespace.
753    
754                        if (ch == quoteChar)
755                        {
756                            String attributeValue = new String(_templateData, attributeValueStart,
757                                    _cursor - attributeValueStart);
758    
759                            attributeEndEvent(_cursor);
760    
761                            addAttributeIfUnique(tagName, attributeName, attributeValue);
762    
763                            // Advance over the quote.
764                            advance();
765                            state = WAIT_FOR_ATTRIBUTE_NAME;
766                            break;
767                        }
768    
769                        advance();
770                        break;
771    
772                    case COLLECT_UNQUOTED_VALUE:
773    
774                        // An unquoted attribute value ends with whitespace
775                        // or the end of the enclosing tag.
776    
777                        if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
778                        {
779                            String attributeValue = new String(_templateData, attributeValueStart,
780                                    _cursor - attributeValueStart);
781    
782                            attributeEndEvent(_cursor);
783                            addAttributeIfUnique(tagName, attributeName, attributeValue);
784    
785                            state = WAIT_FOR_ATTRIBUTE_NAME;
786                            break;
787                        }
788    
789                        advance();
790                        break;
791                }
792            }
793    
794            tagEndEvent(_cursor);
795    
796            // Check for invisible localizations
797    
798            String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
799            String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
800    
801            if (localizationKey != null && tagName.equalsIgnoreCase("span") && jwcId == null)
802            {
803                if (_ignoring)
804                    templateParseProblem(
805                            ParseMessages.componentMayNotBeIgnored(tagName, startLine),
806                            startLocation,
807                            startLine,
808                            cursorStart);
809    
810                // If the tag isn't empty, then create a Tag instance to ignore the
811                // body of the tag.
812    
813                if (!emptyTag)
814                {
815                    Tag tag = new Tag(tagName, startLine);
816    
817                    tag._component = false;
818                    tag._removeTag = true;
819                    tag._ignoringBody = true;
820                    tag._mustBalance = true;
821    
822                    _stack.add(tag);
823    
824                    // Start ignoring content until the close tag.
825    
826                    _ignoring = true;
827                }
828                else
829                {
830                    // Cursor is at the closing carat, advance over it.
831                    advance();
832                    // TAPESTRY-359: *don't* skip whitespace
833                }
834    
835                // End any open block.
836    
837                addTextToken(cursorStart - 1);
838    
839                boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
840    
841                Map attributes = filter(_attributes, new String[]
842                { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
843    
844                TemplateToken token = _factory.createLocalizationToken(
845                        tagName,
846                        localizationKey,
847                        raw,
848                        attributes,
849                        startLocation);
850    
851                _tokens.add(token);
852    
853                return;
854            }
855    
856            if (jwcId != null)
857            {
858                processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
859                return;
860            }
861    
862            // A static tag (not a tag without a jwcid attribute).
863            // We need to record this so that we can match close tags later.
864    
865            if (!emptyTag)
866            {
867                Tag tag = new Tag(tagName, startLine);
868                _stack.add(tag);
869            }
870    
871            // If there wasn't an active block, then start one.
872    
873            if (_blockStart < 0 && !_ignoring)
874                _blockStart = cursorStart;
875    
876            advance();
877        }
878    
879        /**
880         * @throws TemplateParseException
881         * @since 4.0
882         */
883    
884        private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
885                throws TemplateParseException
886        {
887    
888            if (_attributes.containsKey(attributeName))
889                templateParseProblem(
890                        ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
891                        getCurrentLocation(),
892                        _line,
893                        _cursor);
894    
895            _attributes.put(attributeName, attributeValue);
896        }
897    
898        /**
899         * Processes a tag that is the open tag for a component (but also handles the $remove$ and
900         * $content$ tags).
901         */
902    
903        /**
904         * Notify that the beginning of a tag has been detected.
905         * <p>
906         * Default implementation does nothing.
907         */
908        protected void tagBeginEvent(int startLine, int cursorPosition)
909        {
910        }
911    
912        /**
913         * Notify that the end of the current tag has been detected.
914         * <p>
915         * Default implementation does nothing.
916         */
917        protected void tagEndEvent(int cursorPosition)
918        {
919        }
920    
921        /**
922         * Notify that the beginning of an attribute value has been detected.
923         * <p>
924         * Default implementation does nothing.
925         */
926        protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
927        {
928        }
929    
930        /**
931         * Notify that the end of the current attribute value has been detected.
932         * <p>
933         * Default implementation does nothing.
934         */
935        protected void attributeEndEvent(int cursorPosition)
936        {
937        }
938    
939        private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
940                int startLine, int cursorStart, Location startLocation) throws TemplateParseException
941        {
942            if (jwcId.equalsIgnoreCase(CONTENT_ID))
943            {
944                processContentTag(tagName, startLine, cursorStart, emptyTag);
945    
946                return;
947            }
948    
949            boolean isRemoveId = jwcId.equalsIgnoreCase(REMOVE_ID);
950    
951            if (_ignoring && !isRemoveId)
952                templateParseProblem(
953                        ParseMessages.componentMayNotBeIgnored(tagName, startLine),
954                        startLocation,
955                        startLine,
956                        cursorStart);
957    
958            String type = null;
959            boolean allowBody = false;
960    
961            if (_patternMatcher.matches(jwcId, _implicitIdPattern))
962            {
963                MatchResult match = _patternMatcher.getMatch();
964    
965                jwcId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
966                type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
967    
968                String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
969                String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
970    
971                // If (and this is typical) no actual component id was specified,
972                // then generate one on the fly.
973                // The allocated id for anonymous components is
974                // based on the simple (unprefixed) type, but starts
975                // with a leading dollar sign to ensure no conflicts
976                // with user defined component ids (which don't allow dollar signs
977                // in the id).
978                // New for 4.0: the component type may included slashes ('/'), but these
979                // are not valid identifiers, so we convert them to '$'.
980    
981                if (jwcId == null)
982                    jwcId = _idAllocator.allocateId("$" + simpleType.replace('/', '$'));
983    
984                try
985                {
986                    allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
987                }
988                catch (ApplicationRuntimeException e)
989                {
990                    // give subclasses a chance to handle and rethrow
991                    templateParseProblem(e, startLine, cursorStart);
992                }
993    
994            }
995            else
996            {
997                if (!isRemoveId)
998                {
999                    if (!_patternMatcher.matches(jwcId, _simpleIdPattern))
1000                        templateParseProblem(
1001                                ParseMessages.componentIdInvalid(tagName, startLine, jwcId),
1002                                startLocation,
1003                                startLine,
1004                                cursorStart);
1005    
1006                    if (!_delegate.getKnownComponent(jwcId))
1007                        templateParseProblem(
1008                                ParseMessages.unknownComponentId(tagName, startLine, jwcId),
1009                                startLocation,
1010                                startLine,
1011                                cursorStart);
1012    
1013                    try
1014                    {
1015                        allowBody = _delegate.getAllowBody(jwcId, startLocation);
1016                    }
1017                    catch (ApplicationRuntimeException e)
1018                    {
1019                        // give subclasses a chance to handle and rethrow
1020                        templateParseProblem(e, startLine, cursorStart);
1021                    }
1022                }
1023            }
1024    
1025            // Ignore the body if we're removing the entire tag,
1026            // of if the corresponding component doesn't allow
1027            // a body.
1028    
1029            boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
1030    
1031            if (_ignoring && ignoreBody)
1032                templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
1033                        _resourceLocation, startLine), startLine, cursorStart);
1034    
1035            if (!emptyTag)
1036                pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
1037    
1038            // End any open block.
1039    
1040            addTextToken(cursorStart - 1);
1041    
1042            if (!isRemoveId)
1043            {
1044                addOpenToken(tagName, jwcId, type, startLocation);
1045    
1046                if (emptyTag)
1047                    _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1048            }
1049    
1050            advance();
1051        }
1052    
1053        private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
1054        {
1055            Tag tag = new Tag(tagName, startLine);
1056    
1057            tag._component = !isRemoveId;
1058            tag._removeTag = isRemoveId;
1059    
1060            tag._ignoringBody = ignoreBody;
1061    
1062            _ignoring = tag._ignoringBody;
1063    
1064            tag._mustBalance = true;
1065    
1066            _stack.add(tag);
1067        }
1068    
1069        private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
1070                throws TemplateParseException
1071        {
1072            if (_ignoring)
1073                templateParseProblem(
1074                        ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
1075                        new LocationImpl(_resourceLocation, startLine),
1076                        startLine,
1077                        cursorStart);
1078    
1079            if (emptyTag)
1080                templateParseProblem(
1081                        ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
1082                        new LocationImpl(_resourceLocation, startLine),
1083                        startLine,
1084                        cursorStart);
1085    
1086            _tokens.clear();
1087            _blockStart = -1;
1088    
1089            Tag tag = new Tag(tagName, startLine);
1090    
1091            tag._mustBalance = true;
1092            tag._content = true;
1093    
1094            _stack.clear();
1095            _stack.add(tag);
1096    
1097            advance();
1098        }
1099    
1100        private void addOpenToken(String tagName, String jwcId, String type, Location location)
1101        {
1102            OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
1103            _tokens.add(token);
1104    
1105            if (_attributes.isEmpty())
1106                return;
1107    
1108            Iterator i = _attributes.entrySet().iterator();
1109            while (i.hasNext())
1110            {
1111                Map.Entry entry = (Map.Entry) i.next();
1112    
1113                String key = (String) entry.getKey();
1114    
1115                if (key.equalsIgnoreCase(_componentAttributeName))
1116                    continue;
1117    
1118                String value = (String) entry.getValue();
1119    
1120                addAttributeToToken(token, key, value);
1121            }
1122        }
1123    
1124        /**
1125         * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
1126         * 
1127         * @since 3.0
1128         */
1129    
1130        private void addAttributeToToken(OpenToken token, String name, String attributeValue)
1131        {
1132            token.addAttribute(name, convertEntitiesToPlain(attributeValue));
1133        }
1134    
1135        /**
1136         * Invoked to handle a closing tag, i.e., </foo>. When a tag closes, it will match against
1137         * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
1138         * balanced), but this is HTML, not XML, so many tags won't balance.
1139         * <p>
1140         * Once the matching tag is located, the question is ... is the tag dynamic or static? If
1141         * static, then the current text block is extended to include this close tag. If dynamic, then
1142         * the current text block is ended (before the '<' that starts the tag) and a close token is
1143         * added.
1144         * <p>
1145         * In either case, the matching static element and anything above it is removed, and the cursor
1146         * is left on the character following the '>'.
1147         */
1148    
1149        private void closeTag() throws TemplateParseException
1150        {
1151            int cursorStart = _cursor;
1152            int length = _templateData.length;
1153            int startLine = _line;
1154    
1155            Location startLocation = getCurrentLocation();
1156    
1157            _cursor += CLOSE_TAG.length;
1158    
1159            int tagStart = _cursor;
1160    
1161            while (true)
1162            {
1163                if (_cursor >= length)
1164                    templateParseProblem(
1165                            ParseMessages.incompleteCloseTag(startLine),
1166                            startLocation,
1167                            startLine,
1168                            cursorStart);
1169    
1170                char ch = _templateData[_cursor];
1171    
1172                if (ch == '>')
1173                    break;
1174    
1175                advance();
1176            }
1177    
1178            String tagName = new String(_templateData, tagStart, _cursor - tagStart);
1179    
1180            int stackPos = _stack.size() - 1;
1181            Tag tag = null;
1182    
1183            while (stackPos >= 0)
1184            {
1185                tag = (Tag) _stack.get(stackPos);
1186    
1187                if (tag.match(tagName))
1188                    break;
1189    
1190                if (tag._mustBalance)
1191                    templateParseProblem(ParseMessages.improperlyNestedCloseTag(
1192                            tagName,
1193                            startLine,
1194                            tag._tagName,
1195                            tag._line), startLocation, startLine, cursorStart);
1196    
1197                stackPos--;
1198            }
1199    
1200            if (stackPos < 0)
1201                templateParseProblem(
1202                        ParseMessages.unmatchedCloseTag(tagName, startLine),
1203                        startLocation,
1204                        startLine,
1205                        cursorStart);
1206    
1207            // Special case for the content tag
1208    
1209            if (tag._content)
1210            {
1211                addTextToken(cursorStart - 1);
1212    
1213                // Advance the cursor right to the end.
1214    
1215                _cursor = length;
1216                _stack.clear();
1217                return;
1218            }
1219    
1220            // When a component closes, add a CLOSE tag.
1221            if (tag._component)
1222            {
1223                addTextToken(cursorStart - 1);
1224    
1225                _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1226            }
1227            else
1228            {
1229                // The close of a static tag. Unless removing the tag
1230                // entirely, make sure the block tag is part of a text block.
1231    
1232                if (_blockStart < 0 && !tag._removeTag && !_ignoring)
1233                    _blockStart = cursorStart;
1234            }
1235    
1236            // Remove all elements at stackPos or above.
1237    
1238            for (int i = _stack.size() - 1; i >= stackPos; i--)
1239                _stack.remove(i);
1240    
1241            // Advance cursor past '>'
1242    
1243            advance();
1244    
1245            // If editting out the tag (i.e., $remove$) then kill any whitespace.
1246            // For components that simply don't contain a body, removeTag will
1247            // be false.
1248    
1249            if (tag._removeTag)
1250                advanceOverWhitespace();
1251    
1252            // If we were ignoring the body of the tag, then clear the ignoring
1253            // flag, since we're out of the body.
1254    
1255            if (tag._ignoringBody)
1256                _ignoring = false;
1257        }
1258    
1259        /**
1260         * Advances the cursor to the next character. If the end-of-line is reached, then increments the
1261         * line counter.
1262         */
1263    
1264        private void advance()
1265        {
1266            int length = _templateData.length;
1267    
1268            if (_cursor >= length)
1269                return;
1270    
1271            char ch = _templateData[_cursor];
1272    
1273            _cursor++;
1274    
1275            if (ch == '\n')
1276            {
1277                _line++;
1278                _currentLocation = null;
1279                return;
1280            }
1281    
1282            // A \r, or a \r\n also counts as a new line.
1283    
1284            if (ch == '\r')
1285            {
1286                _line++;
1287                _currentLocation = null;
1288    
1289                if (_cursor < length && _templateData[_cursor] == '\n')
1290                    _cursor++;
1291    
1292                return;
1293            }
1294    
1295            // Not an end-of-line character.
1296    
1297        }
1298    
1299        private void advanceOverWhitespace()
1300        {
1301            int length = _templateData.length;
1302    
1303            while (_cursor < length)
1304            {
1305                char ch = _templateData[_cursor];
1306                if (!Character.isWhitespace(ch))
1307                    return;
1308    
1309                advance();
1310            }
1311        }
1312    
1313        /**
1314         * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
1315         * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
1316         * from the output map. May return null (rather than return an empty Map).
1317         */
1318    
1319        private Map filter(Map input, String[] removeKeys)
1320        {
1321            if (input == null || input.isEmpty())
1322                return null;
1323    
1324            Map result = null;
1325    
1326            Iterator i = input.entrySet().iterator();
1327    
1328            nextkey: while (i.hasNext())
1329            {
1330                Map.Entry entry = (Map.Entry) i.next();
1331    
1332                String key = (String) entry.getKey();
1333    
1334                for (int j = 0; j < removeKeys.length; j++)
1335                {
1336                    if (key.equalsIgnoreCase(removeKeys[j]))
1337                        continue nextkey;
1338                }
1339    
1340                if (result == null)
1341                    result = new HashMap(input.size());
1342    
1343                result.put(key, entry.getValue());
1344            }
1345    
1346            return result;
1347        }
1348    
1349        /**
1350         * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
1351         * and values. Returns the value for the first key found that matches (caselessly) the input
1352         * key. Returns null if no value found.
1353         */
1354    
1355        protected String findValueCaselessly(String key, Map map)
1356        {
1357            String result = (String) map.get(key);
1358    
1359            if (result != null)
1360                return result;
1361    
1362            Iterator i = map.entrySet().iterator();
1363            while (i.hasNext())
1364            {
1365                Map.Entry entry = (Map.Entry) i.next();
1366    
1367                String entryKey = (String) entry.getKey();
1368    
1369                if (entryKey.equalsIgnoreCase(key))
1370                    return (String) entry.getValue();
1371            }
1372    
1373            return null;
1374        }
1375    
1376        /**
1377         * Conversions needed by {@link #convertEntitiesToPlain(String)}
1378         */
1379    
1380        private static final String[] CONVERSIONS =
1381        { "<", "<", ">", ">", """, "\"", "&", "&" };
1382    
1383        /**
1384         * Provided a raw input string that has been recognized to be an expression, this removes excess
1385         * white space and converts &amp;;, &quot;; &lt;; and &gt;; to their normal
1386         * character values (otherwise its impossible to specify those values in expressions in the
1387         * template).
1388         */
1389    
1390        private String convertEntitiesToPlain(String input)
1391        {
1392            int inputLength = input.length();
1393    
1394            StringBuffer buffer = new StringBuffer(inputLength);
1395    
1396            int cursor = 0;
1397    
1398            outer: while (cursor < inputLength)
1399            {
1400                for (int i = 0; i < CONVERSIONS.length; i += 2)
1401                {
1402                    String entity = CONVERSIONS[i];
1403                    int entityLength = entity.length();
1404                    String value = CONVERSIONS[i + 1];
1405    
1406                    if (cursor + entityLength > inputLength)
1407                        continue;
1408    
1409                    if (input.substring(cursor, cursor + entityLength).equals(entity))
1410                    {
1411                        buffer.append(value);
1412                        cursor += entityLength;
1413                        continue outer;
1414                    }
1415                }
1416    
1417                buffer.append(input.charAt(cursor));
1418                cursor++;
1419            }
1420    
1421            return buffer.toString().trim();
1422        }
1423    
1424        /**
1425         * Returns true if the map contains the given key (caseless search) and the value is "true"
1426         * (caseless comparison).
1427         */
1428    
1429        private boolean checkBoolean(String key, Map map)
1430        {
1431            String value = findValueCaselessly(key, map);
1432    
1433            if (value == null)
1434                return false;
1435    
1436            return value.equalsIgnoreCase("true");
1437        }
1438    
1439        /**
1440         * Gets the current location within the file. This allows the location to be created only as
1441         * needed, and multiple objects on the same line can share the same Location instance.
1442         * 
1443         * @since 3.0
1444         */
1445    
1446        protected Location getCurrentLocation()
1447        {
1448            if (_currentLocation == null)
1449                _currentLocation = new LocationImpl(_resourceLocation, _line);
1450    
1451            return _currentLocation;
1452        }
1453    
1454        public void setFactory(TemplateTokenFactory factory)
1455        {
1456            _factory = factory;
1457        }
1458    
1459    }