aboutsummaryrefslogtreecommitdiff
path: root/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs')
-rw-r--r--runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs1186
1 files changed, 0 insertions, 1186 deletions
diff --git a/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs b/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs
deleted file mode 100644
index c62a5bf..0000000
--- a/runtime/CSharp3/Sources/Antlr3.Runtime/BaseRecognizer.cs
+++ /dev/null
@@ -1,1186 +0,0 @@
-/*
- * [The "BSD license"]
- * Copyright (c) 2011 Terence Parr
- * All rights reserved.
- *
- * Conversion to C#:
- * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-namespace Antlr.Runtime
-{
- using System.Collections.Generic;
-
- using ArgumentNullException = System.ArgumentNullException;
- using Array = System.Array;
- using Conditional = System.Diagnostics.ConditionalAttribute;
- using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
- using MethodBase = System.Reflection.MethodBase;
- using Regex = System.Text.RegularExpressions.Regex;
- using StackFrame = System.Diagnostics.StackFrame;
- using StackTrace = System.Diagnostics.StackTrace;
- using TextWriter = System.IO.TextWriter;
-
- /** <summary>
- * A generic recognizer that can handle recognizers generated from
- * lexer, parser, and tree grammars. This is all the parsing
- * support code essentially; most of it is error recovery stuff and
- * backtracking.
- * </summary>
- */
- public abstract class BaseRecognizer
- {
- public const int MemoRuleFailed = -2;
- public const int MemoRuleUnknown = -1;
- public const int InitialFollowStackSize = 100;
-
- // copies from Token object for convenience in actions
- public const int DefaultTokenChannel = TokenChannels.Default;
- public const int Hidden = TokenChannels.Hidden;
-
- public const string NextTokenRuleName = "nextToken";
-
- /** <summary>
- * State of a lexer, parser, or tree parser are collected into a state
- * object so the state can be shared. This sharing is needed to
- * have one grammar import others and share same error variables
- * and other state variables. It's a kind of explicit multiple
- * inheritance via delegation of methods and shared state.
- * </summary>
- */
- protected internal RecognizerSharedState state;
-
- public BaseRecognizer()
- : this(new RecognizerSharedState())
- {
- }
-
- public BaseRecognizer( RecognizerSharedState state )
- {
- if ( state == null )
- {
- state = new RecognizerSharedState();
- }
- this.state = state;
- InitDFAs();
- }
-
- public TextWriter TraceDestination
- {
- get;
- set;
- }
-
- public virtual void SetState(RecognizerSharedState value)
- {
- this.state = value;
- }
-
- protected virtual void InitDFAs()
- {
- }
-
- /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
- public virtual void Reset()
- {
- // wack everything related to error recovery
- if ( state == null )
- {
- return; // no shared state work to do
- }
- state._fsp = -1;
- state.errorRecovery = false;
- state.lastErrorIndex = -1;
- state.failed = false;
- state.syntaxErrors = 0;
- // wack everything related to backtracking and memoization
- state.backtracking = 0;
- for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
- { // wipe cache
- state.ruleMemo[i] = null;
- }
- }
-
-
- /** <summary>
- * Match current input symbol against ttype. Attempt
- * single token insertion or deletion error recovery. If
- * that fails, throw MismatchedTokenException.
- * </summary>
- *
- * <remarks>
- * To turn off single token insertion or deletion error
- * recovery, override recoverFromMismatchedToken() and have it
- * throw an exception. See TreeParser.recoverFromMismatchedToken().
- * This way any error in a rule will cause an exception and
- * immediate exit from rule. Rule would recover by resynchronizing
- * to the set of symbols that can follow rule ref.
- * </remarks>
- */
- public virtual object Match( IIntStream input, int ttype, BitSet follow )
- {
- //System.out.println("match "+((TokenStream)input).LT(1));
- object matchedSymbol = GetCurrentInputSymbol( input );
- if ( input.LA( 1 ) == ttype )
- {
- input.Consume();
- state.errorRecovery = false;
- state.failed = false;
- return matchedSymbol;
- }
- if ( state.backtracking > 0 )
- {
- state.failed = true;
- return matchedSymbol;
- }
- matchedSymbol = RecoverFromMismatchedToken( input, ttype, follow );
- return matchedSymbol;
- }
-
- /** <summary>Match the wildcard: in a symbol</summary> */
- public virtual void MatchAny( IIntStream input )
- {
- state.errorRecovery = false;
- state.failed = false;
- input.Consume();
- }
-
- public virtual bool MismatchIsUnwantedToken( IIntStream input, int ttype )
- {
- return input.LA( 2 ) == ttype;
- }
-
- public virtual bool MismatchIsMissingToken( IIntStream input, BitSet follow )
- {
- if ( follow == null )
- {
- // we have no information about the follow; we can only consume
- // a single token and hope for the best
- return false;
- }
- // compute what can follow this grammar element reference
- if ( follow.Member( TokenTypes.EndOfRule ) )
- {
- BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
- follow = follow.Or( viableTokensFollowingThisRule );
- if ( state._fsp >= 0 )
- { // remove EOR if we're not the start symbol
- follow.Remove( TokenTypes.EndOfRule );
- }
- }
- // if current token is consistent with what could come after set
- // then we know we're missing a token; error recovery is free to
- // "insert" the missing token
-
- //System.out.println("viable tokens="+follow.toString(getTokenNames()));
- //System.out.println("LT(1)="+((TokenStream)input).LT(1));
-
- // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
- // in follow set to indicate that the fall of the start symbol is
- // in the set (EOF can follow).
- if ( follow.Member( input.LA( 1 ) ) || follow.Member( TokenTypes.EndOfRule ) )
- {
- //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
- return true;
- }
- return false;
- }
-
- /** <summary>Report a recognition problem.</summary>
- *
- * <remarks>
- * This method sets errorRecovery to indicate the parser is recovering
- * not parsing. Once in recovery mode, no errors are generated.
- * To get out of recovery mode, the parser must successfully match
- * a token (after a resync). So it will go:
- *
- * 1. error occurs
- * 2. enter recovery mode, report error
- * 3. consume until token found in resynch set
- * 4. try to resume parsing
- * 5. next match() will reset errorRecovery mode
- *
- * If you override, make sure to update syntaxErrors if you care about that.
- * </remarks>
- */
- public virtual void ReportError( RecognitionException e )
- {
- // if we've already reported an error and have not matched a token
- // yet successfully, don't report any errors.
- if ( state.errorRecovery )
- {
- //System.err.print("[SPURIOUS] ");
- return;
- }
- state.syntaxErrors++; // don't count spurious
- state.errorRecovery = true;
-
- DisplayRecognitionError( this.TokenNames, e );
- }
-
- public virtual void DisplayRecognitionError( string[] tokenNames,
- RecognitionException e )
- {
- string hdr = GetErrorHeader( e );
- string msg = GetErrorMessage( e, tokenNames );
- EmitErrorMessage( hdr + " " + msg );
- }
-
- /** <summary>What error message should be generated for the various exception types?</summary>
- *
- * <remarks>
- * Not very object-oriented code, but I like having all error message
- * generation within one method rather than spread among all of the
- * exception classes. This also makes it much easier for the exception
- * handling because the exception classes do not have to have pointers back
- * to this object to access utility routines and so on. Also, changing
- * the message for an exception type would be difficult because you
- * would have to subclassing exception, but then somehow get ANTLR
- * to make those kinds of exception objects instead of the default.
- * This looks weird, but trust me--it makes the most sense in terms
- * of flexibility.
- *
- * For grammar debugging, you will want to override this to add
- * more information such as the stack frame with
- * getRuleInvocationStack(e, this.getClass().getName()) and,
- * for no viable alts, the decision description and state etc...
- *
- * Override this to change the message generated for one or more
- * exception types.
- * </remarks>
- */
- public virtual string GetErrorMessage( RecognitionException e, string[] tokenNames )
- {
- string msg = e.Message;
- if ( e is UnwantedTokenException )
- {
- UnwantedTokenException ute = (UnwantedTokenException)e;
- string tokenName = "<unknown>";
- if ( ute.Expecting == TokenTypes.EndOfFile )
- {
- tokenName = "EndOfFile";
- }
- else
- {
- tokenName = tokenNames[ute.Expecting];
- }
- msg = "extraneous input " + GetTokenErrorDisplay( ute.UnexpectedToken ) +
- " expecting " + tokenName;
- }
- else if ( e is MissingTokenException )
- {
- MissingTokenException mte = (MissingTokenException)e;
- string tokenName = "<unknown>";
- if ( mte.Expecting == TokenTypes.EndOfFile )
- {
- tokenName = "EndOfFile";
- }
- else
- {
- tokenName = tokenNames[mte.Expecting];
- }
- msg = "missing " + tokenName + " at " + GetTokenErrorDisplay( e.Token );
- }
- else if ( e is MismatchedTokenException )
- {
- MismatchedTokenException mte = (MismatchedTokenException)e;
- string tokenName = "<unknown>";
- if ( mte.Expecting == TokenTypes.EndOfFile )
- {
- tokenName = "EndOfFile";
- }
- else
- {
- tokenName = tokenNames[mte.Expecting];
- }
- msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
- " expecting " + tokenName;
- }
- else if ( e is MismatchedTreeNodeException )
- {
- MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
- string tokenName = "<unknown>";
- if ( mtne.Expecting == TokenTypes.EndOfFile )
- {
- tokenName = "EndOfFile";
- }
- else
- {
- tokenName = tokenNames[mtne.Expecting];
- }
- // workaround for a .NET framework bug (NullReferenceException)
- string nodeText = ( mtne.Node != null ) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
- msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
- }
- else if ( e is NoViableAltException )
- {
- //NoViableAltException nvae = (NoViableAltException)e;
- // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
- // and "(decision="+nvae.decisionNumber+") and
- // "state "+nvae.stateNumber
- msg = "no viable alternative at input " + GetTokenErrorDisplay( e.Token );
- }
- else if ( e is EarlyExitException )
- {
- //EarlyExitException eee = (EarlyExitException)e;
- // for development, can add "(decision="+eee.decisionNumber+")"
- msg = "required (...)+ loop did not match anything at input " +
- GetTokenErrorDisplay( e.Token );
- }
- else if ( e is MismatchedSetException )
- {
- MismatchedSetException mse = (MismatchedSetException)e;
- msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
- " expecting set " + mse.Expecting;
- }
- else if ( e is MismatchedNotSetException )
- {
- MismatchedNotSetException mse = (MismatchedNotSetException)e;
- msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
- " expecting set " + mse.Expecting;
- }
- else if ( e is FailedPredicateException )
- {
- FailedPredicateException fpe = (FailedPredicateException)e;
- msg = "rule " + fpe.RuleName + " failed predicate: {" +
- fpe.PredicateText + "}?";
- }
- return msg;
- }
-
- /** <summary>
- * Get number of recognition errors (lexer, parser, tree parser). Each
- * recognizer tracks its own number. So parser and lexer each have
- * separate count. Does not count the spurious errors found between
- * an error and next valid token match
- * </summary>
- *
- * <seealso cref="reportError()"/>
- */
- public virtual int NumberOfSyntaxErrors
- {
- get
- {
- return state.syntaxErrors;
- }
- }
-
- /** <summary>What is the error header, normally line/character position information?</summary> */
- public virtual string GetErrorHeader( RecognitionException e )
- {
- string prefix = SourceName ?? string.Empty;
- if (prefix.Length > 0)
- prefix += ' ';
-
- return string.Format("{0}line {1}:{2}", prefix, e.Line, e.CharPositionInLine + 1);
- }
-
- /** <summary>
- * How should a token be displayed in an error message? The default
- * is to display just the text, but during development you might
- * want to have a lot of information spit out. Override in that case
- * to use t.ToString() (which, for CommonToken, dumps everything about
- * the token). This is better than forcing you to override a method in
- * your token objects because you don't have to go modify your lexer
- * so that it creates a new Java type.
- * </summary>
- */
- public virtual string GetTokenErrorDisplay( IToken t )
- {
- string s = t.Text;
- if ( s == null )
- {
- if ( t.Type == TokenTypes.EndOfFile )
- {
- s = "<EOF>";
- }
- else
- {
- s = "<" + t.Type + ">";
- }
- }
- s = Regex.Replace( s, "\n", "\\\\n" );
- s = Regex.Replace( s, "\r", "\\\\r" );
- s = Regex.Replace( s, "\t", "\\\\t" );
- return "'" + s + "'";
- }
-
- /** <summary>Override this method to change where error messages go</summary> */
- public virtual void EmitErrorMessage( string msg )
- {
- if (TraceDestination != null)
- TraceDestination.WriteLine( msg );
- }
-
- /** <summary>
- * Recover from an error found on the input stream. This is
- * for NoViableAlt and mismatched symbol exceptions. If you enable
- * single token insertion and deletion, this will usually not
- * handle mismatched symbol exceptions but there could be a mismatched
- * token that the match() routine could not recover from.
- * </summary>
- */
- public virtual void Recover( IIntStream input, RecognitionException re )
- {
- if ( state.lastErrorIndex == input.Index )
- {
- // uh oh, another error at same token index; must be a case
- // where LT(1) is in the recovery token set so nothing is
- // consumed; consume a single token so at least to prevent
- // an infinite loop; this is a failsafe.
- input.Consume();
- }
- state.lastErrorIndex = input.Index;
- BitSet followSet = ComputeErrorRecoverySet();
- BeginResync();
- ConsumeUntil( input, followSet );
- EndResync();
- }
-
- /** <summary>
- * A hook to listen in on the token consumption during error recovery.
- * The DebugParser subclasses this to fire events to the listenter.
- * </summary>
- */
- public virtual void BeginResync()
- {
- }
-
- public virtual void EndResync()
- {
- }
-
- /* Compute the error recovery set for the current rule. During
- * rule invocation, the parser pushes the set of tokens that can
- * follow that rule reference on the stack; this amounts to
- * computing FIRST of what follows the rule reference in the
- * enclosing rule. This local follow set only includes tokens
- * from within the rule; i.e., the FIRST computation done by
- * ANTLR stops at the end of a rule.
- *
- * EXAMPLE
- *
- * When you find a "no viable alt exception", the input is not
- * consistent with any of the alternatives for rule r. The best
- * thing to do is to consume tokens until you see something that
- * can legally follow a call to r *or* any rule that called r.
- * You don't want the exact set of viable next tokens because the
- * input might just be missing a token--you might consume the
- * rest of the input looking for one of the missing tokens.
- *
- * Consider grammar:
- *
- * a : '[' b ']'
- * | '(' b ')'
- * ;
- * b : c '^' INT ;
- * c : ID
- * | INT
- * ;
- *
- * At each rule invocation, the set of tokens that could follow
- * that rule is pushed on a stack. Here are the various "local"
- * follow sets:
- *
- * FOLLOW(b1_in_a) = FIRST(']') = ']'
- * FOLLOW(b2_in_a) = FIRST(')') = ')'
- * FOLLOW(c_in_b) = FIRST('^') = '^'
- *
- * Upon erroneous input "[]", the call chain is
- *
- * a -> b -> c
- *
- * and, hence, the follow context stack is:
- *
- * depth local follow set after call to rule
- * 0 <EOF> a (from main())
- * 1 ']' b
- * 3 '^' c
- *
- * Notice that ')' is not included, because b would have to have
- * been called from a different context in rule a for ')' to be
- * included.
- *
- * For error recovery, we cannot consider FOLLOW(c)
- * (context-sensitive or otherwise). We need the combined set of
- * all context-sensitive FOLLOW sets--the set of all tokens that
- * could follow any reference in the call chain. We need to
- * resync to one of those tokens. Note that FOLLOW(c)='^' and if
- * we resync'd to that token, we'd consume until EOF. We need to
- * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
- * In this case, for input "[]", LA(1) is in this set so we would
- * not consume anything and after printing an error rule c would
- * return normally. It would not find the required '^' though.
- * At this point, it gets a mismatched token error and throws an
- * exception (since LA(1) is not in the viable following token
- * set). The rule exception handler tries to recover, but finds
- * the same recovery set and doesn't consume anything. Rule b
- * exits normally returning to rule a. Now it finds the ']' (and
- * with the successful match exits errorRecovery mode).
- *
- * So, you cna see that the parser walks up call chain looking
- * for the token that was a member of the recovery set.
- *
- * Errors are not generated in errorRecovery mode.
- *
- * ANTLR's error recovery mechanism is based upon original ideas:
- *
- * "Algorithms + Data Structures = Programs" by Niklaus Wirth
- *
- * and
- *
- * "A note on error recovery in recursive descent parsers":
- * http://portal.acm.org/citation.cfm?id=947902.947905
- *
- * Later, Josef Grosch had some good ideas:
- *
- * "Efficient and Comfortable Error Recovery in Recursive Descent
- * Parsers":
- * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
- *
- * Like Grosch I implemented local FOLLOW sets that are combined
- * at run-time upon error to avoid overhead during parsing.
- */
- protected virtual BitSet ComputeErrorRecoverySet()
- {
- return CombineFollows( false );
- }
-
- /** <summary>
- * Compute the context-sensitive FOLLOW set for current rule.
- * This is set of token types that can follow a specific rule
- * reference given a specific call chain. You get the set of
- * viable tokens that can possibly come next (lookahead depth 1)
- * given the current call chain. Contrast this with the
- * definition of plain FOLLOW for rule r:
- * </summary>
- *
- * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
- *
- * where x in T* and alpha, beta in V*; T is set of terminals and
- * V is the set of terminals and nonterminals. In other words,
- * FOLLOW(r) is the set of all tokens that can possibly follow
- * references to r in *any* sentential form (context). At
- * runtime, however, we know precisely which context applies as
- * we have the call chain. We may compute the exact (rather
- * than covering superset) set of following tokens.
- *
- * For example, consider grammar:
- *
- * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
- * | "return" expr '.'
- * ;
- * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
- * atom : INT // FOLLOW(atom)=={'+',')',';','.'}
- * | '(' expr ')'
- * ;
- *
- * The FOLLOW sets are all inclusive whereas context-sensitive
- * FOLLOW sets are precisely what could follow a rule reference.
- * For input input "i=(3);", here is the derivation:
- *
- * stat => ID '=' expr ';'
- * => ID '=' atom ('+' atom)* ';'
- * => ID '=' '(' expr ')' ('+' atom)* ';'
- * => ID '=' '(' atom ')' ('+' atom)* ';'
- * => ID '=' '(' INT ')' ('+' atom)* ';'
- * => ID '=' '(' INT ')' ';'
- *
- * At the "3" token, you'd have a call chain of
- *
- * stat -> expr -> atom -> expr -> atom
- *
- * What can follow that specific nested ref to atom? Exactly ')'
- * as you can see by looking at the derivation of this specific
- * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
- *
- * You want the exact viable token set when recovering from a
- * token mismatch. Upon token mismatch, if LA(1) is member of
- * the viable next token set, then you know there is most likely
- * a missing token in the input stream. "Insert" one by just not
- * throwing an exception.
- */
- protected virtual BitSet ComputeContextSensitiveRuleFOLLOW()
- {
- return CombineFollows( true );
- }
-
- // what is exact? it seems to only add sets from above on stack
- // if EOR is in set i. When it sees a set w/o EOR, it stops adding.
- // Why would we ever want them all? Maybe no viable alt instead of
- // mismatched token?
- protected virtual BitSet CombineFollows(bool exact)
- {
- int top = state._fsp;
- BitSet followSet = new BitSet();
- for ( int i = top; i >= 0; i-- )
- {
- BitSet localFollowSet = (BitSet)state.following[i];
- /*
- System.out.println("local follow depth "+i+"="+
- localFollowSet.toString(getTokenNames())+")");
- */
- followSet.OrInPlace( localFollowSet );
- if ( exact )
- {
- // can we see end of rule?
- if ( localFollowSet.Member( TokenTypes.EndOfRule ) )
- {
- // Only leave EOR in set if at top (start rule); this lets
- // us know if have to include follow(start rule); i.e., EOF
- if ( i > 0 )
- {
- followSet.Remove( TokenTypes.EndOfRule );
- }
- }
- else
- { // can't see end of rule, quit
- break;
- }
- }
- }
- return followSet;
- }
-
- /** <summary>Attempt to recover from a single missing or extra token.</summary>
- *
- * EXTRA TOKEN
- *
- * LA(1) is not what we are looking for. If LA(2) has the right token,
- * however, then assume LA(1) is some extra spurious token. Delete it
- * and LA(2) as if we were doing a normal match(), which advances the
- * input.
- *
- * MISSING TOKEN
- *
- * If current token is consistent with what could come after
- * ttype then it is ok to "insert" the missing token, else throw
- * exception For example, Input "i=(3;" is clearly missing the
- * ')'. When the parser returns from the nested call to expr, it
- * will have call chain:
- *
- * stat -> expr -> atom
- *
- * and it will be trying to match the ')' at this point in the
- * derivation:
- *
- * => ID '=' '(' INT ')' ('+' atom)* ';'
- * ^
- * match() will see that ';' doesn't match ')' and report a
- * mismatched token error. To recover, it sees that LA(1)==';'
- * is in the set of tokens that can follow the ')' token
- * reference in rule atom. It can assume that you forgot the ')'.
- */
- protected virtual object RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow )
- {
- RecognitionException e = null;
- // if next token is what we are looking for then "delete" this token
- if ( MismatchIsUnwantedToken( input, ttype ) )
- {
- e = new UnwantedTokenException( ttype, input, TokenNames );
- /*
- System.err.println("recoverFromMismatchedToken deleting "+
- ((TokenStream)input).LT(1)+
- " since "+((TokenStream)input).LT(2)+" is what we want");
- */
- BeginResync();
- input.Consume(); // simply delete extra token
- EndResync();
- ReportError( e ); // report after consuming so AW sees the token in the exception
- // we want to return the token we're actually matching
- object matchedSymbol = GetCurrentInputSymbol( input );
- input.Consume(); // move past ttype token as if all were ok
- return matchedSymbol;
- }
- // can't recover with single token deletion, try insertion
- if ( MismatchIsMissingToken( input, follow ) )
- {
- object inserted = GetMissingSymbol( input, e, ttype, follow );
- e = new MissingTokenException( ttype, input, inserted );
- ReportError( e ); // report after inserting so AW sees the token in the exception
- return inserted;
- }
- // even that didn't work; must throw the exception
- e = new MismatchedTokenException(ttype, input, TokenNames);
- throw e;
- }
-
- /** Not currently used */
- public virtual object RecoverFromMismatchedSet( IIntStream input,
- RecognitionException e,
- BitSet follow )
- {
- if ( MismatchIsMissingToken( input, follow ) )
- {
- // System.out.println("missing token");
- ReportError( e );
- // we don't know how to conjure up a token for sets yet
- return GetMissingSymbol( input, e, TokenTypes.Invalid, follow );
- }
- // TODO do single token deletion like above for Token mismatch
- throw e;
- }
-
- /** <summary>
- * Match needs to return the current input symbol, which gets put
- * into the label for the associated token ref; e.g., x=ID. Token
- * and tree parsers need to return different objects. Rather than test
- * for input stream type or change the IntStream interface, I use
- * a simple method to ask the recognizer to tell me what the current
- * input symbol is.
- * </summary>
- *
- * <remarks>This is ignored for lexers.</remarks>
- */
- protected virtual object GetCurrentInputSymbol( IIntStream input )
- {
- return null;
- }
-
- /** <summary>Conjure up a missing token during error recovery.</summary>
- *
- * <remarks>
- * The recognizer attempts to recover from single missing
- * symbols. But, actions might refer to that missing symbol.
- * For example, x=ID {f($x);}. The action clearly assumes
- * that there has been an identifier matched previously and that
- * $x points at that token. If that token is missing, but
- * the next token in the stream is what we want we assume that
- * this token is missing and we keep going. Because we
- * have to return some token to replace the missing token,
- * we have to conjure one up. This method gives the user control
- * over the tokens returned for missing tokens. Mostly,
- * you will want to create something special for identifier
- * tokens. For literals such as '{' and ',', the default
- * action in the parser or tree parser works. It simply creates
- * a CommonToken of the appropriate type. The text will be the token.
- * If you change what tokens must be created by the lexer,
- * override this method to create the appropriate tokens.
- * </remarks>
- */
- protected virtual object GetMissingSymbol( IIntStream input,
- RecognitionException e,
- int expectedTokenType,
- BitSet follow )
- {
- return null;
- }
-
- public virtual void ConsumeUntil( IIntStream input, int tokenType )
- {
- //System.out.println("consumeUntil "+tokenType);
- int ttype = input.LA( 1 );
- while ( ttype != TokenTypes.EndOfFile && ttype != tokenType )
- {
- input.Consume();
- ttype = input.LA( 1 );
- }
- }
-
- /** <summary>Consume tokens until one matches the given token set</summary> */
- public virtual void ConsumeUntil( IIntStream input, BitSet set )
- {
- //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
- int ttype = input.LA( 1 );
- while ( ttype != TokenTypes.EndOfFile && !set.Member( ttype ) )
- {
- //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
- input.Consume();
- ttype = input.LA( 1 );
- }
- }
-
- /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
- protected void PushFollow( BitSet fset )
- {
- if ( ( state._fsp + 1 ) >= state.following.Length )
- {
- Array.Resize(ref state.following, state.following.Length * 2);
- }
- state.following[++state._fsp] = fset;
- }
-
- protected void PopFollow()
- {
- state._fsp--;
- }
-
- /** <summary>
- * Return List<String> of the rules in your parser instance
- * leading up to a call to this method. You could override if
- * you want more details such as the file/line info of where
- * in the parser java code a rule is invoked.
- * </summary>
- *
- * <remarks>
- * This is very useful for error messages and for context-sensitive
- * error recovery.
- * </remarks>
- */
- public virtual IList<string> GetRuleInvocationStack()
- {
- return GetRuleInvocationStack( new StackTrace(true) );
- }
-
- /** <summary>
- * A more general version of GetRuleInvocationStack where you can
- * pass in the StackTrace of, for example, a RecognitionException
- * to get it's rule stack trace.
- * </summary>
- */
- public static IList<string> GetRuleInvocationStack(StackTrace trace)
- {
- if (trace == null)
- throw new ArgumentNullException("trace");
-
- List<string> rules = new List<string>();
- StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
-
- for (int i = stack.Length - 1; i >= 0; i--)
- {
- StackFrame frame = stack[i];
- MethodBase method = frame.GetMethod();
- GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
- if (attributes != null && attributes.Length > 0)
- rules.Add(attributes[0].Name);
- }
-
- return rules;
- }
-
- public virtual int BacktrackingLevel
- {
- get
- {
- return state.backtracking;
- }
- set
- {
- state.backtracking = value;
- }
- }
-
- /** <summary>Return whether or not a backtracking attempt failed.</summary> */
- public virtual bool Failed
- {
- get
- {
- return state.failed;
- }
- }
-
- /** <summary>
- * Used to print out token names like ID during debugging and
- * error reporting. The generated parsers implement a method
- * that overrides this to point to their String[] tokenNames.
- * </summary>
- */
- public virtual string[] TokenNames
- {
- get
- {
- return null;
- }
- }
-
- /** <summary>
- * For debugging and other purposes, might want the grammar name.
- * Have ANTLR generate an implementation for this method.
- * </summary>
- */
- public virtual string GrammarFileName
- {
- get
- {
- return null;
- }
- }
-
- public abstract string SourceName
- {
- get;
- }
-
- /** <summary>
- * A convenience method for use most often with template rewrites.
- * Convert a List<Token> to List<String>
- * </summary>
- */
- public virtual List<string> ToStrings( ICollection<IToken> tokens )
- {
- if ( tokens == null )
- return null;
-
- List<string> strings = new List<string>( tokens.Count );
- foreach ( IToken token in tokens )
- {
- strings.Add( token.Text );
- }
-
- return strings;
- }
-
- /** <summary>
- * Given a rule number and a start token index number, return
- * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
- * start index. If this rule has parsed input starting from the
- * start index before, then return where the rule stopped parsing.
- * It returns the index of the last token matched by the rule.
- * </summary>
- *
- * <remarks>
- * For now we use a hashtable and just the slow Object-based one.
- * Later, we can make a special one for ints and also one that
- * tosses out data after we commit past input position i.
- * </remarks>
- */
- public virtual int GetRuleMemoization( int ruleIndex, int ruleStartIndex )
- {
- if ( state.ruleMemo[ruleIndex] == null )
- {
- state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
- }
-
- int stopIndex;
- if ( !state.ruleMemo[ruleIndex].TryGetValue( ruleStartIndex, out stopIndex ) )
- return MemoRuleUnknown;
-
- return stopIndex;
- }
-
- /** <summary>
- * Has this rule already parsed input at the current index in the
- * input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
- * If we attempted but failed to parse properly before, return
- * MEMO_RULE_FAILED.
- * </summary>
- *
- * <remarks>
- * This method has a side-effect: if we have seen this input for
- * this rule and successfully parsed before, then seek ahead to
- * 1 past the stop token matched for this rule last time.
- * </remarks>
- */
- public virtual bool AlreadyParsedRule( IIntStream input, int ruleIndex )
- {
- int stopIndex = GetRuleMemoization( ruleIndex, input.Index );
- if ( stopIndex == MemoRuleUnknown )
- {
- return false;
- }
- if ( stopIndex == MemoRuleFailed )
- {
- //System.out.println("rule "+ruleIndex+" will never succeed");
- state.failed = true;
- }
- else
- {
- //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
- input.Seek( stopIndex + 1 ); // jump to one past stop token
- }
- return true;
- }
-
- /** <summary>
- * Record whether or not this rule parsed the input at this position
- * successfully. Use a standard java hashtable for now.
- * </summary>
- */
- public virtual void Memoize( IIntStream input,
- int ruleIndex,
- int ruleStartIndex )
- {
- int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
- if ( state.ruleMemo == null )
- {
- if (TraceDestination != null)
- TraceDestination.WriteLine( "!!!!!!!!! memo array is null for " + GrammarFileName );
- }
- if ( ruleIndex >= state.ruleMemo.Length )
- {
- if (TraceDestination != null)
- TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
- }
- if ( state.ruleMemo[ruleIndex] != null )
- {
- state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
- }
- }
-
- /** <summary>return how many rule/input-index pairs there are in total.</summary>
- * TODO: this includes synpreds. :(
- */
- public virtual int GetRuleMemoizationCacheSize()
- {
- int n = 0;
- for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
- {
- var ruleMap = state.ruleMemo[i];
- if ( ruleMap != null )
- {
- n += ruleMap.Count; // how many input indexes are recorded?
- }
- }
- return n;
- }
-
- public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol)
- {
- if (TraceDestination == null)
- return;
-
- TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
- if (state.backtracking > 0)
- {
- TraceDestination.Write(" backtracking=" + state.backtracking);
- }
- TraceDestination.WriteLine();
- }
-
- public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol)
- {
- if (TraceDestination == null)
- return;
-
- TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
- if (state.backtracking > 0)
- {
- TraceDestination.Write(" backtracking=" + state.backtracking);
- if (state.failed)
- TraceDestination.Write(" failed");
- else
- TraceDestination.Write(" succeeded");
- }
- TraceDestination.WriteLine();
- }
-
- #region Debugging support
- public virtual IDebugEventListener DebugListener
- {
- get
- {
- return null;
- }
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugEnterRule(string grammarFileName, string ruleName)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.EnterRule(grammarFileName, ruleName);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugExitRule(string grammarFileName, string ruleName)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.ExitRule(grammarFileName, ruleName);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugEnterSubRule(int decisionNumber)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.EnterSubRule(decisionNumber);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugExitSubRule(int decisionNumber)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.ExitSubRule(decisionNumber);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugEnterAlt(int alt)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.EnterAlt(alt);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.EnterDecision(decisionNumber, couldBacktrack);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugExitDecision(int decisionNumber)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.ExitDecision(decisionNumber);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugLocation(int line, int charPositionInLine)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.Location(line, charPositionInLine);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugSemanticPredicate(bool result, string predicate)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.SemanticPredicate(result, predicate);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugBeginBacktrack(int level)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.BeginBacktrack(level);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugEndBacktrack(int level, bool successful)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.EndBacktrack(level, successful);
- }
-
- [Conditional("ANTLR_DEBUG")]
- protected virtual void DebugRecognitionException(RecognitionException ex)
- {
- IDebugEventListener dbg = DebugListener;
- if (dbg != null)
- dbg.RecognitionException(ex);
- }
- #endregion
- }
-}