com.springsource.util.parser.manifest.internal
Class RecoveringManifestLexer

java.lang.Object
  extended by com.springsource.util.parser.manifest.internal.RecoveringManifestLexer

public class RecoveringManifestLexer
extends java.lang.Object

Process some input data according to the expected layout above. However, this is not a dumb lexer and intelligently processes the data to allow for some errors (which will be reported but are recovered from) and automatically processes some constructs so that the real parser building on top of this lexer does not have to (ie. it identifies ; = , := in the values)

The end of result of a 'lex' is that the data is broken into these tokens: NAME, COLON, VALUE, NEWLINE. Importantly:

Hopefully building the above knowledge into the lexer isn't make it too difficult to understand...

Concurrent Semantics
This class is thread safe.

Author:
Andy Clement

Nested Class Summary
private static class RecoveringManifestLexer.ParsingState
           
 
Field Summary
private static byte alphanumMask
           
private static char[] BLANK_TOKEN_VALUE
           
private static char COLON
           
private  SourceContext context
           
private  char[] data
           
private  int dataLength
           
private  int dataPos
           
private static byte digitChar
           
private static byte[] fastCharLookup
           
private  ManifestToken lastEmittedNameToken
           
private static byte letterChar
           
private  java.util.List<java.lang.Integer> linebreaks
           
static int MAX_TOKEN_LENGTH
           
private static byte nameMask
           
private static byte newlineChar
           
private  java.util.List<Token> newlineTokens
           
private  RecoveringManifestLexer.ParsingState parsingState
           
private  int possibleTokenEnd
           
private static char SPACE
           
private  int startPosition
           
private  char[] token
           
private  int tokenPos
           
private  SimpleTokenStream tokenStream
           
private static byte underscoreHyphenChar
           
 
Constructor Summary
private RecoveringManifestLexer()
           
 
Method Summary
private  int correctMaxPosition(int pos)
           
private  void emitAccumulatedNewlineTokens()
           
private  void emitColonToken(int spos, int epos)
           
private  void emitNameToken()
           
private  void emitPhantomColonToken(int pos)
           
private  void emitPhantomValueToken(int pos)
           
private  void emitValueToken(int spos, int epos)
           
private  void initialize()
           
private static boolean isAlphanumeric(char ch)
           
private static boolean isNameChar(char ch)
           
private static boolean isNewline(char ch)
           
private  void process(java.lang.String str)
           
private  boolean processNewlines(boolean emitToTokenStreamImmediately)
          Process newlines from the currentposition until something other than a newline is encountered.
private  void recordProblem(ManifestProblemKind problemKind, int start, int end, java.lang.String... inserts)
           
private  void startNewToken()
           
static char[] subarray(char[] array, int start, int end)
           
static java.lang.String substring(char[] array, int start, int end)
           
static TokenStream tokenize(java.io.Reader reader)
          Tokenize input from a Reader as a manifest.
static TokenStream tokenize(java.lang.String manifestString)
          Tokenize a string representing a manifest.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

BLANK_TOKEN_VALUE

private static final char[] BLANK_TOKEN_VALUE

SPACE

private static final char SPACE
See Also:
Constant Field Values

COLON

private static final char COLON
See Also:
Constant Field Values

MAX_TOKEN_LENGTH

public static final int MAX_TOKEN_LENGTH
See Also:
Constant Field Values

tokenStream

private SimpleTokenStream tokenStream

context

private SourceContext context

data

private char[] data

dataLength

private int dataLength

dataPos

private int dataPos

linebreaks

private java.util.List<java.lang.Integer> linebreaks

parsingState

private RecoveringManifestLexer.ParsingState parsingState

token

private char[] token

tokenPos

private int tokenPos

startPosition

private int startPosition

possibleTokenEnd

private int possibleTokenEnd

lastEmittedNameToken

private ManifestToken lastEmittedNameToken

newlineTokens

private java.util.List<Token> newlineTokens

fastCharLookup

private static byte[] fastCharLookup

letterChar

private static final byte letterChar
See Also:
Constant Field Values

digitChar

private static final byte digitChar
See Also:
Constant Field Values

underscoreHyphenChar

private static final byte underscoreHyphenChar
See Also:
Constant Field Values

newlineChar

private static final byte newlineChar
See Also:
Constant Field Values

alphanumMask

private static final byte alphanumMask
See Also:
Constant Field Values

nameMask

private static final byte nameMask
See Also:
Constant Field Values
Constructor Detail

RecoveringManifestLexer

private RecoveringManifestLexer()
Method Detail

tokenize

public static TokenStream tokenize(java.lang.String manifestString)
Tokenize a string representing a manifest.

Parameters:
manifestString - the string
Returns:
the token stream

tokenize

public static TokenStream tokenize(java.io.Reader reader)
                            throws java.io.IOException
Tokenize input from a Reader as a manifest.

Parameters:
reader - for input
Returns:
token stream
Throws:
java.io.IOException - for read errors

initialize

private void initialize()

process

private void process(java.lang.String str)

correctMaxPosition

private int correctMaxPosition(int pos)

emitNameToken

private void emitNameToken()

emitColonToken

private void emitColonToken(int spos,
                            int epos)

emitPhantomColonToken

private void emitPhantomColonToken(int pos)

emitValueToken

private void emitValueToken(int spos,
                            int epos)

emitPhantomValueToken

private void emitPhantomValueToken(int pos)

emitAccumulatedNewlineTokens

private void emitAccumulatedNewlineTokens()

startNewToken

private void startNewToken()

recordProblem

private void recordProblem(ManifestProblemKind problemKind,
                           int start,
                           int end,
                           java.lang.String... inserts)

subarray

public static final char[] subarray(char[] array,
                                    int start,
                                    int end)

substring

public static final java.lang.String substring(char[] array,
                                               int start,
                                               int end)

isAlphanumeric

private static boolean isAlphanumeric(char ch)

isNameChar

private static boolean isNameChar(char ch)

isNewline

private static boolean isNewline(char ch)

processNewlines

private boolean processNewlines(boolean emitToTokenStreamImmediately)
Process newlines from the currentposition until something other than a newline is encountered. Newlines are either "\n" or "\r" or "\n\r". The parameter emitToTokenStreamImmediately determines whether tokens for the newlines should be emitted to the tokenstream immediately or recorded for emitting later. The reason a caller may want to emit them later is that they are making a decision based on: (1) whether there was a blankline (2) what the next character is after the newlines

Parameters:
emitToTokenStreamImmediately - whether to emit the Newline tokens immediately or simply record for later emission
Returns:
true if a blank line was found (ie. at least two Newlines together)