diff options
Diffstat (limited to 'runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs')
-rw-r--r-- | runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs | 1186 |
1 files changed, 0 insertions, 1186 deletions
diff --git a/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs b/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs deleted file mode 100644 index c62a5bf..0000000 --- a/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs +++ /dev/null @@ -1,1186 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2011 Terence Parr - * All rights reserved. - * - * Conversion to C#: - * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -namespace Antlr.Runtime -{ - using System.Collections.Generic; - - using ArgumentNullException = System.ArgumentNullException; - using Array = System.Array; - using Conditional = System.Diagnostics.ConditionalAttribute; - using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener; - using MethodBase = System.Reflection.MethodBase; - using Regex = System.Text.RegularExpressions.Regex; - using StackFrame = System.Diagnostics.StackFrame; - using StackTrace = System.Diagnostics.StackTrace; - using TextWriter = System.IO.TextWriter; - - /** <summary> - * A generic recognizer that can handle recognizers generated from - * lexer, parser, and tree grammars. This is all the parsing - * support code essentially; most of it is error recovery stuff and - * backtracking. - * </summary> - */ - public abstract class BaseRecognizer - { - public const int MemoRuleFailed = -2; - public const int MemoRuleUnknown = -1; - public const int InitialFollowStackSize = 100; - - // copies from Token object for convenience in actions - public const int DefaultTokenChannel = TokenChannels.Default; - public const int Hidden = TokenChannels.Hidden; - - public const string NextTokenRuleName = "nextToken"; - - /** <summary> - * State of a lexer, parser, or tree parser are collected into a state - * object so the state can be shared. This sharing is needed to - * have one grammar import others and share same error variables - * and other state variables. It's a kind of explicit multiple - * inheritance via delegation of methods and shared state. - * </summary> - */ - protected internal RecognizerSharedState state; - - public BaseRecognizer() - : this(new RecognizerSharedState()) - { - } - - public BaseRecognizer( RecognizerSharedState state ) - { - if ( state == null ) - { - state = new RecognizerSharedState(); - } - this.state = state; - InitDFAs(); - } - - public TextWriter TraceDestination - { - get; - set; - } - - public virtual void SetState(RecognizerSharedState value) - { - this.state = value; - } - - protected virtual void InitDFAs() - { - } - - /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */ - public virtual void Reset() - { - // wack everything related to error recovery - if ( state == null ) - { - return; // no shared state work to do - } - state._fsp = -1; - state.errorRecovery = false; - state.lastErrorIndex = -1; - state.failed = false; - state.syntaxErrors = 0; - // wack everything related to backtracking and memoization - state.backtracking = 0; - for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ ) - { // wipe cache - state.ruleMemo[i] = null; - } - } - - - /** <summary> - * Match current input symbol against ttype. Attempt - * single token insertion or deletion error recovery. If - * that fails, throw MismatchedTokenException. - * </summary> - * - * <remarks> - * To turn off single token insertion or deletion error - * recovery, override recoverFromMismatchedToken() and have it - * throw an exception. See TreeParser.recoverFromMismatchedToken(). - * This way any error in a rule will cause an exception and - * immediate exit from rule. Rule would recover by resynchronizing - * to the set of symbols that can follow rule ref. - * </remarks> - */ - public virtual object Match( IIntStream input, int ttype, BitSet follow ) - { - //System.out.println("match "+((TokenStream)input).LT(1)); - object matchedSymbol = GetCurrentInputSymbol( input ); - if ( input.LA( 1 ) == ttype ) - { - input.Consume(); - state.errorRecovery = false; - state.failed = false; - return matchedSymbol; - } - if ( state.backtracking > 0 ) - { - state.failed = true; - return matchedSymbol; - } - matchedSymbol = RecoverFromMismatchedToken( input, ttype, follow ); - return matchedSymbol; - } - - /** <summary>Match the wildcard: in a symbol</summary> */ - public virtual void MatchAny( IIntStream input ) - { - state.errorRecovery = false; - state.failed = false; - input.Consume(); - } - - public virtual bool MismatchIsUnwantedToken( IIntStream input, int ttype ) - { - return input.LA( 2 ) == ttype; - } - - public virtual bool MismatchIsMissingToken( IIntStream input, BitSet follow ) - { - if ( follow == null ) - { - // we have no information about the follow; we can only consume - // a single token and hope for the best - return false; - } - // compute what can follow this grammar element reference - if ( follow.Member( TokenTypes.EndOfRule ) ) - { - BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW(); - follow = follow.Or( viableTokensFollowingThisRule ); - if ( state._fsp >= 0 ) - { // remove EOR if we're not the start symbol - follow.Remove( TokenTypes.EndOfRule ); - } - } - // if current token is consistent with what could come after set - // then we know we're missing a token; error recovery is free to - // "insert" the missing token - - //System.out.println("viable tokens="+follow.toString(getTokenNames())); - //System.out.println("LT(1)="+((TokenStream)input).LT(1)); - - // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR - // in follow set to indicate that the fall of the start symbol is - // in the set (EOF can follow). - if ( follow.Member( input.LA( 1 ) ) || follow.Member( TokenTypes.EndOfRule ) ) - { - //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); - return true; - } - return false; - } - - /** <summary>Report a recognition problem.</summary> - * - * <remarks> - * This method sets errorRecovery to indicate the parser is recovering - * not parsing. Once in recovery mode, no errors are generated. - * To get out of recovery mode, the parser must successfully match - * a token (after a resync). So it will go: - * - * 1. error occurs - * 2. enter recovery mode, report error - * 3. consume until token found in resynch set - * 4. try to resume parsing - * 5. next match() will reset errorRecovery mode - * - * If you override, make sure to update syntaxErrors if you care about that. - * </remarks> - */ - public virtual void ReportError( RecognitionException e ) - { - // if we've already reported an error and have not matched a token - // yet successfully, don't report any errors. - if ( state.errorRecovery ) - { - //System.err.print("[SPURIOUS] "); - return; - } - state.syntaxErrors++; // don't count spurious - state.errorRecovery = true; - - DisplayRecognitionError( this.TokenNames, e ); - } - - public virtual void DisplayRecognitionError( string[] tokenNames, - RecognitionException e ) - { - string hdr = GetErrorHeader( e ); - string msg = GetErrorMessage( e, tokenNames ); - EmitErrorMessage( hdr + " " + msg ); - } - - /** <summary>What error message should be generated for the various exception types?</summary> - * - * <remarks> - * Not very object-oriented code, but I like having all error message - * generation within one method rather than spread among all of the - * exception classes. This also makes it much easier for the exception - * handling because the exception classes do not have to have pointers back - * to this object to access utility routines and so on. Also, changing - * the message for an exception type would be difficult because you - * would have to subclassing exception, but then somehow get ANTLR - * to make those kinds of exception objects instead of the default. - * This looks weird, but trust me--it makes the most sense in terms - * of flexibility. - * - * For grammar debugging, you will want to override this to add - * more information such as the stack frame with - * getRuleInvocationStack(e, this.getClass().getName()) and, - * for no viable alts, the decision description and state etc... - * - * Override this to change the message generated for one or more - * exception types. - * </remarks> - */ - public virtual string GetErrorMessage( RecognitionException e, string[] tokenNames ) - { - string msg = e.Message; - if ( e is UnwantedTokenException ) - { - UnwantedTokenException ute = (UnwantedTokenException)e; - string tokenName = "<unknown>"; - if ( ute.Expecting == TokenTypes.EndOfFile ) - { - tokenName = "EndOfFile"; - } - else - { - tokenName = tokenNames[ute.Expecting]; - } - msg = "extraneous input " + GetTokenErrorDisplay( ute.UnexpectedToken ) + - " expecting " + tokenName; - } - else if ( e is MissingTokenException ) - { - MissingTokenException mte = (MissingTokenException)e; - string tokenName = "<unknown>"; - if ( mte.Expecting == TokenTypes.EndOfFile ) - { - tokenName = "EndOfFile"; - } - else - { - tokenName = tokenNames[mte.Expecting]; - } - msg = "missing " + tokenName + " at " + GetTokenErrorDisplay( e.Token ); - } - else if ( e is MismatchedTokenException ) - { - MismatchedTokenException mte = (MismatchedTokenException)e; - string tokenName = "<unknown>"; - if ( mte.Expecting == TokenTypes.EndOfFile ) - { - tokenName = "EndOfFile"; - } - else - { - tokenName = tokenNames[mte.Expecting]; - } - msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + - " expecting " + tokenName; - } - else if ( e is MismatchedTreeNodeException ) - { - MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; - string tokenName = "<unknown>"; - if ( mtne.Expecting == TokenTypes.EndOfFile ) - { - tokenName = "EndOfFile"; - } - else - { - tokenName = tokenNames[mtne.Expecting]; - } - // workaround for a .NET framework bug (NullReferenceException) - string nodeText = ( mtne.Node != null ) ? mtne.Node.ToString() ?? string.Empty : string.Empty; - msg = "mismatched tree node: " + nodeText + " expecting " + tokenName; - } - else if ( e is NoViableAltException ) - { - //NoViableAltException nvae = (NoViableAltException)e; - // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" - // and "(decision="+nvae.decisionNumber+") and - // "state "+nvae.stateNumber - msg = "no viable alternative at input " + GetTokenErrorDisplay( e.Token ); - } - else if ( e is EarlyExitException ) - { - //EarlyExitException eee = (EarlyExitException)e; - // for development, can add "(decision="+eee.decisionNumber+")" - msg = "required (...)+ loop did not match anything at input " + - GetTokenErrorDisplay( e.Token ); - } - else if ( e is MismatchedSetException ) - { - MismatchedSetException mse = (MismatchedSetException)e; - msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + - " expecting set " + mse.Expecting; - } - else if ( e is MismatchedNotSetException ) - { - MismatchedNotSetException mse = (MismatchedNotSetException)e; - msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + - " expecting set " + mse.Expecting; - } - else if ( e is FailedPredicateException ) - { - FailedPredicateException fpe = (FailedPredicateException)e; - msg = "rule " + fpe.RuleName + " failed predicate: {" + - fpe.PredicateText + "}?"; - } - return msg; - } - - /** <summary> - * Get number of recognition errors (lexer, parser, tree parser). Each - * recognizer tracks its own number. So parser and lexer each have - * separate count. Does not count the spurious errors found between - * an error and next valid token match - * </summary> - * - * <seealso cref="reportError()"/> - */ - public virtual int NumberOfSyntaxErrors - { - get - { - return state.syntaxErrors; - } - } - - /** <summary>What is the error header, normally line/character position information?</summary> */ - public virtual string GetErrorHeader( RecognitionException e ) - { - string prefix = SourceName ?? string.Empty; - if (prefix.Length > 0) - prefix += ' '; - - return string.Format("{0}line {1}:{2}", prefix, e.Line, e.CharPositionInLine + 1); - } - - /** <summary> - * How should a token be displayed in an error message? The default - * is to display just the text, but during development you might - * want to have a lot of information spit out. Override in that case - * to use t.ToString() (which, for CommonToken, dumps everything about - * the token). This is better than forcing you to override a method in - * your token objects because you don't have to go modify your lexer - * so that it creates a new Java type. - * </summary> - */ - public virtual string GetTokenErrorDisplay( IToken t ) - { - string s = t.Text; - if ( s == null ) - { - if ( t.Type == TokenTypes.EndOfFile ) - { - s = "<EOF>"; - } - else - { - s = "<" + t.Type + ">"; - } - } - s = Regex.Replace( s, "\n", "\\\\n" ); - s = Regex.Replace( s, "\r", "\\\\r" ); - s = Regex.Replace( s, "\t", "\\\\t" ); - return "'" + s + "'"; - } - - /** <summary>Override this method to change where error messages go</summary> */ - public virtual void EmitErrorMessage( string msg ) - { - if (TraceDestination != null) - TraceDestination.WriteLine( msg ); - } - - /** <summary> - * Recover from an error found on the input stream. This is - * for NoViableAlt and mismatched symbol exceptions. If you enable - * single token insertion and deletion, this will usually not - * handle mismatched symbol exceptions but there could be a mismatched - * token that the match() routine could not recover from. - * </summary> - */ - public virtual void Recover( IIntStream input, RecognitionException re ) - { - if ( state.lastErrorIndex == input.Index ) - { - // uh oh, another error at same token index; must be a case - // where LT(1) is in the recovery token set so nothing is - // consumed; consume a single token so at least to prevent - // an infinite loop; this is a failsafe. - input.Consume(); - } - state.lastErrorIndex = input.Index; - BitSet followSet = ComputeErrorRecoverySet(); - BeginResync(); - ConsumeUntil( input, followSet ); - EndResync(); - } - - /** <summary> - * A hook to listen in on the token consumption during error recovery. - * The DebugParser subclasses this to fire events to the listenter. - * </summary> - */ - public virtual void BeginResync() - { - } - - public virtual void EndResync() - { - } - - /* Compute the error recovery set for the current rule. During - * rule invocation, the parser pushes the set of tokens that can - * follow that rule reference on the stack; this amounts to - * computing FIRST of what follows the rule reference in the - * enclosing rule. This local follow set only includes tokens - * from within the rule; i.e., the FIRST computation done by - * ANTLR stops at the end of a rule. - * - * EXAMPLE - * - * When you find a "no viable alt exception", the input is not - * consistent with any of the alternatives for rule r. The best - * thing to do is to consume tokens until you see something that - * can legally follow a call to r *or* any rule that called r. - * You don't want the exact set of viable next tokens because the - * input might just be missing a token--you might consume the - * rest of the input looking for one of the missing tokens. - * - * Consider grammar: - * - * a : '[' b ']' - * | '(' b ')' - * ; - * b : c '^' INT ; - * c : ID - * | INT - * ; - * - * At each rule invocation, the set of tokens that could follow - * that rule is pushed on a stack. Here are the various "local" - * follow sets: - * - * FOLLOW(b1_in_a) = FIRST(']') = ']' - * FOLLOW(b2_in_a) = FIRST(')') = ')' - * FOLLOW(c_in_b) = FIRST('^') = '^' - * - * Upon erroneous input "[]", the call chain is - * - * a -> b -> c - * - * and, hence, the follow context stack is: - * - * depth local follow set after call to rule - * 0 <EOF> a (from main()) - * 1 ']' b - * 3 '^' c - * - * Notice that ')' is not included, because b would have to have - * been called from a different context in rule a for ')' to be - * included. - * - * For error recovery, we cannot consider FOLLOW(c) - * (context-sensitive or otherwise). We need the combined set of - * all context-sensitive FOLLOW sets--the set of all tokens that - * could follow any reference in the call chain. We need to - * resync to one of those tokens. Note that FOLLOW(c)='^' and if - * we resync'd to that token, we'd consume until EOF. We need to - * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. - * In this case, for input "[]", LA(1) is in this set so we would - * not consume anything and after printing an error rule c would - * return normally. It would not find the required '^' though. - * At this point, it gets a mismatched token error and throws an - * exception (since LA(1) is not in the viable following token - * set). The rule exception handler tries to recover, but finds - * the same recovery set and doesn't consume anything. Rule b - * exits normally returning to rule a. Now it finds the ']' (and - * with the successful match exits errorRecovery mode). - * - * So, you cna see that the parser walks up call chain looking - * for the token that was a member of the recovery set. - * - * Errors are not generated in errorRecovery mode. - * - * ANTLR's error recovery mechanism is based upon original ideas: - * - * "Algorithms + Data Structures = Programs" by Niklaus Wirth - * - * and - * - * "A note on error recovery in recursive descent parsers": - * http://portal.acm.org/citation.cfm?id=947902.947905 - * - * Later, Josef Grosch had some good ideas: - * - * "Efficient and Comfortable Error Recovery in Recursive Descent - * Parsers": - * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip - * - * Like Grosch I implemented local FOLLOW sets that are combined - * at run-time upon error to avoid overhead during parsing. - */ - protected virtual BitSet ComputeErrorRecoverySet() - { - return CombineFollows( false ); - } - - /** <summary> - * Compute the context-sensitive FOLLOW set for current rule. - * This is set of token types that can follow a specific rule - * reference given a specific call chain. You get the set of - * viable tokens that can possibly come next (lookahead depth 1) - * given the current call chain. Contrast this with the - * definition of plain FOLLOW for rule r: - * </summary> - * - * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} - * - * where x in T* and alpha, beta in V*; T is set of terminals and - * V is the set of terminals and nonterminals. In other words, - * FOLLOW(r) is the set of all tokens that can possibly follow - * references to r in *any* sentential form (context). At - * runtime, however, we know precisely which context applies as - * we have the call chain. We may compute the exact (rather - * than covering superset) set of following tokens. - * - * For example, consider grammar: - * - * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} - * | "return" expr '.' - * ; - * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} - * atom : INT // FOLLOW(atom)=={'+',')',';','.'} - * | '(' expr ')' - * ; - * - * The FOLLOW sets are all inclusive whereas context-sensitive - * FOLLOW sets are precisely what could follow a rule reference. - * For input input "i=(3);", here is the derivation: - * - * stat => ID '=' expr ';' - * => ID '=' atom ('+' atom)* ';' - * => ID '=' '(' expr ')' ('+' atom)* ';' - * => ID '=' '(' atom ')' ('+' atom)* ';' - * => ID '=' '(' INT ')' ('+' atom)* ';' - * => ID '=' '(' INT ')' ';' - * - * At the "3" token, you'd have a call chain of - * - * stat -> expr -> atom -> expr -> atom - * - * What can follow that specific nested ref to atom? Exactly ')' - * as you can see by looking at the derivation of this specific - * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. - * - * You want the exact viable token set when recovering from a - * token mismatch. Upon token mismatch, if LA(1) is member of - * the viable next token set, then you know there is most likely - * a missing token in the input stream. "Insert" one by just not - * throwing an exception. - */ - protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() - { - return CombineFollows( true ); - } - - // what is exact? it seems to only add sets from above on stack - // if EOR is in set i. When it sees a set w/o EOR, it stops adding. - // Why would we ever want them all? Maybe no viable alt instead of - // mismatched token? - protected virtual BitSet CombineFollows(bool exact) - { - int top = state._fsp; - BitSet followSet = new BitSet(); - for ( int i = top; i >= 0; i-- ) - { - BitSet localFollowSet = (BitSet)state.following[i]; - /* - System.out.println("local follow depth "+i+"="+ - localFollowSet.toString(getTokenNames())+")"); - */ - followSet.OrInPlace( localFollowSet ); - if ( exact ) - { - // can we see end of rule? - if ( localFollowSet.Member( TokenTypes.EndOfRule ) ) - { - // Only leave EOR in set if at top (start rule); this lets - // us know if have to include follow(start rule); i.e., EOF - if ( i > 0 ) - { - followSet.Remove( TokenTypes.EndOfRule ); - } - } - else - { // can't see end of rule, quit - break; - } - } - } - return followSet; - } - - /** <summary>Attempt to recover from a single missing or extra token.</summary> - * - * EXTRA TOKEN - * - * LA(1) is not what we are looking for. If LA(2) has the right token, - * however, then assume LA(1) is some extra spurious token. Delete it - * and LA(2) as if we were doing a normal match(), which advances the - * input. - * - * MISSING TOKEN - * - * If current token is consistent with what could come after - * ttype then it is ok to "insert" the missing token, else throw - * exception For example, Input "i=(3;" is clearly missing the - * ')'. When the parser returns from the nested call to expr, it - * will have call chain: - * - * stat -> expr -> atom - * - * and it will be trying to match the ')' at this point in the - * derivation: - * - * => ID '=' '(' INT ')' ('+' atom)* ';' - * ^ - * match() will see that ';' doesn't match ')' and report a - * mismatched token error. To recover, it sees that LA(1)==';' - * is in the set of tokens that can follow the ')' token - * reference in rule atom. It can assume that you forgot the ')'. - */ - protected virtual object RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow ) - { - RecognitionException e = null; - // if next token is what we are looking for then "delete" this token - if ( MismatchIsUnwantedToken( input, ttype ) ) - { - e = new UnwantedTokenException( ttype, input, TokenNames ); - /* - System.err.println("recoverFromMismatchedToken deleting "+ - ((TokenStream)input).LT(1)+ - " since "+((TokenStream)input).LT(2)+" is what we want"); - */ - BeginResync(); - input.Consume(); // simply delete extra token - EndResync(); - ReportError( e ); // report after consuming so AW sees the token in the exception - // we want to return the token we're actually matching - object matchedSymbol = GetCurrentInputSymbol( input ); - input.Consume(); // move past ttype token as if all were ok - return matchedSymbol; - } - // can't recover with single token deletion, try insertion - if ( MismatchIsMissingToken( input, follow ) ) - { - object inserted = GetMissingSymbol( input, e, ttype, follow ); - e = new MissingTokenException( ttype, input, inserted ); - ReportError( e ); // report after inserting so AW sees the token in the exception - return inserted; - } - // even that didn't work; must throw the exception - e = new MismatchedTokenException(ttype, input, TokenNames); - throw e; - } - - /** Not currently used */ - public virtual object RecoverFromMismatchedSet( IIntStream input, - RecognitionException e, - BitSet follow ) - { - if ( MismatchIsMissingToken( input, follow ) ) - { - // System.out.println("missing token"); - ReportError( e ); - // we don't know how to conjure up a token for sets yet - return GetMissingSymbol( input, e, TokenTypes.Invalid, follow ); - } - // TODO do single token deletion like above for Token mismatch - throw e; - } - - /** <summary> - * Match needs to return the current input symbol, which gets put - * into the label for the associated token ref; e.g., x=ID. Token - * and tree parsers need to return different objects. Rather than test - * for input stream type or change the IntStream interface, I use - * a simple method to ask the recognizer to tell me what the current - * input symbol is. - * </summary> - * - * <remarks>This is ignored for lexers.</remarks> - */ - protected virtual object GetCurrentInputSymbol( IIntStream input ) - { - return null; - } - - /** <summary>Conjure up a missing token during error recovery.</summary> - * - * <remarks> - * The recognizer attempts to recover from single missing - * symbols. But, actions might refer to that missing symbol. - * For example, x=ID {f($x);}. The action clearly assumes - * that there has been an identifier matched previously and that - * $x points at that token. If that token is missing, but - * the next token in the stream is what we want we assume that - * this token is missing and we keep going. Because we - * have to return some token to replace the missing token, - * we have to conjure one up. This method gives the user control - * over the tokens returned for missing tokens. Mostly, - * you will want to create something special for identifier - * tokens. For literals such as '{' and ',', the default - * action in the parser or tree parser works. It simply creates - * a CommonToken of the appropriate type. The text will be the token. - * If you change what tokens must be created by the lexer, - * override this method to create the appropriate tokens. - * </remarks> - */ - protected virtual object GetMissingSymbol( IIntStream input, - RecognitionException e, - int expectedTokenType, - BitSet follow ) - { - return null; - } - - public virtual void ConsumeUntil( IIntStream input, int tokenType ) - { - //System.out.println("consumeUntil "+tokenType); - int ttype = input.LA( 1 ); - while ( ttype != TokenTypes.EndOfFile && ttype != tokenType ) - { - input.Consume(); - ttype = input.LA( 1 ); - } - } - - /** <summary>Consume tokens until one matches the given token set</summary> */ - public virtual void ConsumeUntil( IIntStream input, BitSet set ) - { - //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); - int ttype = input.LA( 1 ); - while ( ttype != TokenTypes.EndOfFile && !set.Member( ttype ) ) - { - //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); - input.Consume(); - ttype = input.LA( 1 ); - } - } - - /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */ - protected void PushFollow( BitSet fset ) - { - if ( ( state._fsp + 1 ) >= state.following.Length ) - { - Array.Resize(ref state.following, state.following.Length * 2); - } - state.following[++state._fsp] = fset; - } - - protected void PopFollow() - { - state._fsp--; - } - - /** <summary> - * Return List<String> of the rules in your parser instance - * leading up to a call to this method. You could override if - * you want more details such as the file/line info of where - * in the parser java code a rule is invoked. - * </summary> - * - * <remarks> - * This is very useful for error messages and for context-sensitive - * error recovery. - * </remarks> - */ - public virtual IList<string> GetRuleInvocationStack() - { - return GetRuleInvocationStack( new StackTrace(true) ); - } - - /** <summary> - * A more general version of GetRuleInvocationStack where you can - * pass in the StackTrace of, for example, a RecognitionException - * to get it's rule stack trace. - * </summary> - */ - public static IList<string> GetRuleInvocationStack(StackTrace trace) - { - if (trace == null) - throw new ArgumentNullException("trace"); - - List<string> rules = new List<string>(); - StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0]; - - for (int i = stack.Length - 1; i >= 0; i--) - { - StackFrame frame = stack[i]; - MethodBase method = frame.GetMethod(); - GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true); - if (attributes != null && attributes.Length > 0) - rules.Add(attributes[0].Name); - } - - return rules; - } - - public virtual int BacktrackingLevel - { - get - { - return state.backtracking; - } - set - { - state.backtracking = value; - } - } - - /** <summary>Return whether or not a backtracking attempt failed.</summary> */ - public virtual bool Failed - { - get - { - return state.failed; - } - } - - /** <summary> - * Used to print out token names like ID during debugging and - * error reporting. The generated parsers implement a method - * that overrides this to point to their String[] tokenNames. - * </summary> - */ - public virtual string[] TokenNames - { - get - { - return null; - } - } - - /** <summary> - * For debugging and other purposes, might want the grammar name. - * Have ANTLR generate an implementation for this method. - * </summary> - */ - public virtual string GrammarFileName - { - get - { - return null; - } - } - - public abstract string SourceName - { - get; - } - - /** <summary> - * A convenience method for use most often with template rewrites. - * Convert a List<Token> to List<String> - * </summary> - */ - public virtual List<string> ToStrings( ICollection<IToken> tokens ) - { - if ( tokens == null ) - return null; - - List<string> strings = new List<string>( tokens.Count ); - foreach ( IToken token in tokens ) - { - strings.Add( token.Text ); - } - - return strings; - } - - /** <summary> - * Given a rule number and a start token index number, return - * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from - * start index. If this rule has parsed input starting from the - * start index before, then return where the rule stopped parsing. - * It returns the index of the last token matched by the rule. - * </summary> - * - * <remarks> - * For now we use a hashtable and just the slow Object-based one. - * Later, we can make a special one for ints and also one that - * tosses out data after we commit past input position i. - * </remarks> - */ - public virtual int GetRuleMemoization( int ruleIndex, int ruleStartIndex ) - { - if ( state.ruleMemo[ruleIndex] == null ) - { - state.ruleMemo[ruleIndex] = new Dictionary<int, int>(); - } - - int stopIndex; - if ( !state.ruleMemo[ruleIndex].TryGetValue( ruleStartIndex, out stopIndex ) ) - return MemoRuleUnknown; - - return stopIndex; - } - - /** <summary> - * Has this rule already parsed input at the current index in the - * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. - * If we attempted but failed to parse properly before, return - * MEMO_RULE_FAILED. - * </summary> - * - * <remarks> - * This method has a side-effect: if we have seen this input for - * this rule and successfully parsed before, then seek ahead to - * 1 past the stop token matched for this rule last time. - * </remarks> - */ - public virtual bool AlreadyParsedRule( IIntStream input, int ruleIndex ) - { - int stopIndex = GetRuleMemoization( ruleIndex, input.Index ); - if ( stopIndex == MemoRuleUnknown ) - { - return false; - } - if ( stopIndex == MemoRuleFailed ) - { - //System.out.println("rule "+ruleIndex+" will never succeed"); - state.failed = true; - } - else - { - //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); - input.Seek( stopIndex + 1 ); // jump to one past stop token - } - return true; - } - - /** <summary> - * Record whether or not this rule parsed the input at this position - * successfully. Use a standard java hashtable for now. - * </summary> - */ - public virtual void Memoize( IIntStream input, - int ruleIndex, - int ruleStartIndex ) - { - int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1; - if ( state.ruleMemo == null ) - { - if (TraceDestination != null) - TraceDestination.WriteLine( "!!!!!!!!! memo array is null for " + GrammarFileName ); - } - if ( ruleIndex >= state.ruleMemo.Length ) - { - if (TraceDestination != null) - TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex); - } - if ( state.ruleMemo[ruleIndex] != null ) - { - state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; - } - } - - /** <summary>return how many rule/input-index pairs there are in total.</summary> - * TODO: this includes synpreds. :( - */ - public virtual int GetRuleMemoizationCacheSize() - { - int n = 0; - for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ ) - { - var ruleMap = state.ruleMemo[i]; - if ( ruleMap != null ) - { - n += ruleMap.Count; // how many input indexes are recorded? - } - } - return n; - } - - public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) - { - if (TraceDestination == null) - return; - - TraceDestination.Write("enter " + ruleName + " " + inputSymbol); - if (state.backtracking > 0) - { - TraceDestination.Write(" backtracking=" + state.backtracking); - } - TraceDestination.WriteLine(); - } - - public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) - { - if (TraceDestination == null) - return; - - TraceDestination.Write("exit " + ruleName + " " + inputSymbol); - if (state.backtracking > 0) - { - TraceDestination.Write(" backtracking=" + state.backtracking); - if (state.failed) - TraceDestination.Write(" failed"); - else - TraceDestination.Write(" succeeded"); - } - TraceDestination.WriteLine(); - } - - #region Debugging support - public virtual IDebugEventListener DebugListener - { - get - { - return null; - } - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugEnterRule(string grammarFileName, string ruleName) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.EnterRule(grammarFileName, ruleName); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugExitRule(string grammarFileName, string ruleName) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.ExitRule(grammarFileName, ruleName); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugEnterSubRule(int decisionNumber) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.EnterSubRule(decisionNumber); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugExitSubRule(int decisionNumber) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.ExitSubRule(decisionNumber); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugEnterAlt(int alt) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.EnterAlt(alt); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.EnterDecision(decisionNumber, couldBacktrack); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugExitDecision(int decisionNumber) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.ExitDecision(decisionNumber); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugLocation(int line, int charPositionInLine) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.Location(line, charPositionInLine); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugSemanticPredicate(bool result, string predicate) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.SemanticPredicate(result, predicate); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugBeginBacktrack(int level) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.BeginBacktrack(level); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugEndBacktrack(int level, bool successful) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.EndBacktrack(level, successful); - } - - [Conditional("ANTLR_DEBUG")] - protected virtual void DebugRecognitionException(RecognitionException ex) - { - IDebugEventListener dbg = DebugListener; - if (dbg != null) - dbg.RecognitionException(ex); - } - #endregion - } -} |