SharpC: A C Interpreter In C# - 1001
構文解析を理解する前に、単語の定義を見てみましょう.
注意便宜上、オペレータなどのすべての語を単語としてマークすることはありません.キーワード、変数名、関数名のみが単語としてマークされます.ソースコードをよりよく分析するために、SourceCodeクラスを作成し、構文解析を行います.
より複雑な解析は、構文解析とともにParserクラスに統合されます.Parserクラスは、Cコードを構文解析して構文ツリーを構築するために使用されます.
構文解析の主な方法は次のように定義されます.
文処理は、申明、制御構造、式の3つの状況に分けられます.
関数解析の実装は次のとおりです.
if/elseを例に,制御構造の解析過程を説明する.まずif部分を見てください.
elseセクションを見てみましょう.
forループの解析:
複雑なのはswitchです.
式の解析は2つのステップに分けられ,最初のステップはStatementオブジェクトになり,このオブジェクトに基づいて解析を行う.ステップ1:
ステップ2:
///
/// A structure for the result of word parsing.
///
public class Word
{
public int AbsoluteLine;
public int AbsoluteStartPos;
public int AbsoluteEndPos;
public int Offset;
public int Column;
public string Text;
public bool IsAvailable;
public Context.LocationInfo Location;
}
注意便宜上、オペレータなどのすべての語を単語としてマークすることはありません.キーワード、変数名、関数名のみが単語としてマークされます.ソースコードをよりよく分析するために、SourceCodeクラスを作成し、構文解析を行います.
public class SourceCode
{
//
public static char[] MultiLineCommentsStartMark = new char[] { '/', '*' };
public static char[] HexMark = new char[] { 'x', 'X' };
public static char[] ExponentialMark = new char[] { 'e', 'E' };
public static char[] FloatMark = new char[] { 'f', 'F' };
public static char[] PlusMinusMark = new char[] { '-', '+' };
public string Text = "";
public int Line = 0;
public int ColumnOfCurrentLine = 0;
public int PosOffset = 0;
public int LineOffset = 0;
public int Column;
public Word LastWord;
public SourceCode() { }
public SourceCode(string txt);
public void LoadFromFile(string path);
public bool Eof;
//
public int AbsolutePos;
//
public int AbsoluteLine;
public Context.LocationInfo Location;
public char CurrentChar;
// ( )
public void ResetPos();
//
public bool TestNextChar(char c);
//
public bool TestNextChar(char[] chars);
// ,
public void NextChar(bool skipSpace = true);
//
public string Tail;
public static bool IsDigit(char ch);
public static bool IsLetter(char ch);
public static bool IsSpace(char ch);
public static bool IsOperator(char ch);
public static bool IsBracket(char ch);
public void SkipSpace();
// ';' ,
public List SplitStatement();
// ',' ,
public List SplitMultiDeclaration();
//
public List SplitParameter();
//
public static Word GetWord(SourceCode src);
//
public static SourceCode GetBracketCode(char leftBracket, char rightBracket, SourceCode src);
}
より複雑な解析は、構文解析とともにParserクラスに統合されます.Parserクラスは、Cコードを構文解析して構文ツリーを構築するために使用されます.
///
/// Parser class parses the simple C source code to build
/// the syntax serial.
///
public class Parser
{
public enum ParsingErrorType
{
Warning,
UnknownError,
UndefinedVariable,
UnexpectedKeyword,
TokenExpected,
SyntaxError,
TypeError,
FormatError,
FunctionBodyUndefined
};
public class ParsingEventArg
{
public Context Context;
}
public class ParsingWarningEventArg
{
public Context Context;
public Context.LocationInfo Location;
public string Description;
}
public class ParsingErrorEventArg
{
public Context Context;
public Context.LocationInfo Location;
public ParsingErrorType Error;
public string Description;
public bool Continue;
}
public event EventHandler OnParsing;
public event EventHandler OnParsingWarning;
public event EventHandler OnParsingError;
///////////////////////////////
// Private member variables
///////////////////////////////
private SourceCode m_sourceCode = null;
private Word m_lastWord = null;
private Expression.ExpressionNode m_lastExpNode = null;
private Dictionary m_escapeCharDict = new Dictionary();
private int m_errorCount = 0;
private int m_warningCount = 0;
public String Source
{
get { return m_sourceCode.Text; }
}
public bool HasError
{
get { return m_errorCount > 0; }
}
public bool HasWarning
{
get { return m_warningCount > 0; }
}
public int ErrorCount
{
get { return m_errorCount; }
}
public int WarningCount
{
get { return m_warningCount; }
}
public int MaxError = 0;
public int MaxWarning = 65535;
public Context Parse(SourceCode src)
{
m_sourceCode = src;
Context ctx = new Context();
if (Parse(ctx, src))
{
// , ,
if (ValidateContextReference(ctx))
return ctx;
}
return null;
}
...
構文解析の主な方法は次のように定義されます.
private bool Parse(Context ctx, SourceCode src)
{
bool res = true;
ctx.Location.FirstLine = src.AbsoluteLine;
ctx.Location.FirstPos = src.AbsolutePos;
foreach (SourceCode stmt in src.SplitStatement()) //
{
try
{
// do...while while
if (ctx.Children.Count > 0 && ctx.Children.Last() is ControlFlow.DoWhileLoop)
{
if ((ctx.Children.Last() as ControlFlow.DoWhileLoop).Condition == null)
{
Word wordOfWhile = GetWord(stmt);
if (wordOfWhile.Text != "while")
{
if (!NotifyError(ctx, wordOfWhile.Location, ParsingErrorType.SyntaxError, "\"while\" is expected."))
return false;
}
else
{
res = ParseControl_While(ctx, stmt, new Context.LocationInfo()
{
FirstLine = wordOfWhile.AbsoluteLine,
FirstPos = wordOfWhile.AbsoluteStartPos
});
if (!res)
return false;
else
continue;
}
}
}
if (stmt.Text.EndsWith(";")) //
{
res = ParseStatement(ctx, stmt);
}
else
{
if (stmt.Text.EndsWith("}")) //
{
if (stmt.Text.StartsWith("{")) //
{
SourceCode blockSrc = new SourceCode()
{
LineOffset = stmt.AbsoluteLine,
PosOffset = stmt.AbsolutePos,
Text = stmt.Text.Substring(1, stmt.Text.Length - 2)
};
Block block = new Block()
{
Name = Context.GetAnonymousName("block"),
Location = new Context.LocationInfo()
{
FirstLine = stmt.AbsoluteLine,
FirstPos = stmt.AbsolutePos
}
};
ctx.AddChild(block);
res = Parse(block, blockSrc);
block.Location.LastLine = stmt.AbsoluteLine;
block.Location.LastPos = stmt.AbsolutePos;
}
else
{ //
//
Word wordOfControlFlow = GetWord(stmt);
if (Context.IsControlFlow(wordOfControlFlow.Text))
{
res = ParseControlFlow(ctx, stmt, wordOfControlFlow);
}
else
{
stmt.ResetPos();
res = ParseFunction(ctx, stmt, wordOfControlFlow.Location);
}
}
}
}
}
catch (ParseException pe)
{
if (!NotifyError(ctx, ctx.Location, ParsingErrorType.SyntaxError, pe.Message))
return false;
}
if (!res)
return false;
} // for
ctx.Location.LastLine = src.AbsoluteLine;
ctx.Location.LastPos = src.AbsolutePos;
return true;
}
文処理は、申明、制御構造、式の3つの状況に分けられます.
private bool ParseStatement(Context ctx, SourceCode src)
{
Word firstWord = GetWord(src);
if (Context.IsDataType(firstWord.Text)) //
{
//
return ParseDeclare(ctx, src, firstWord);
}
else
if (Context.IsControlFlow(firstWord.Text)) //
{
//Control
return ParseControlFlow(ctx, src, firstWord);
}
else
{
//
src.ResetPos();
return ParseExpression(ctx, src, firstWord.Location);
}
}
関数解析の実装は次のとおりです.
private bool ParseFunction(Context ctx, SourceCode src, Context.LocationInfo loc)
{
//
while (!src.Eof && src.CurrentChar != '{')
src.NextChar();
//
Context.LocationInfo headerLoc = loc;
headerLoc.LastPos = src.AbsolutePos - 1;
//
SourceCode funcHeader = new SourceCode()
{
PosOffset = loc.FirstPos,
LineOffset = loc.FirstLine,
Text = src.Text.Substring(0, src.Column)
};
//
// , FunctionDefine Context
if (!ParseStatement(ctx, funcHeader))
return false;
src.NextChar(); // skip '{'
//
SourceCode bodyStmt = new SourceCode()
{
PosOffset = src.AbsolutePos,
LineOffset = src.AbsoluteLine,
Text = src.Text.Substring(src.Column, src.Text.Length - src.Column - 1)
};
//
Function.FunctionDefine funcDef = ctx.Children.Last() as Function.FunctionDefine;
funcDef.AddChild(new Block()
{
Name = Context.GetAnonymousName("block")
});
//
if (Parse(funcDef.Body, bodyStmt))
{
funcDef.Location = headerLoc;
return true;
}
return false;
}
:
private bool ParseControlFlow(Context ctx, SourceCode src, Word wordOfControlFlow)
{
bool res = false;
switch (wordOfControlFlow.Text)
{
case "if": res = ParseControl_If(ctx, src, wordOfControlFlow.Location); break;
case "else": res = ParseControl_Else(ctx, src, wordOfControlFlow.Location); break;
case "for": res = ParseControl_For(ctx, src, wordOfControlFlow.Location); break;
case "do": res = ParseControl_DoWhile(ctx, src, wordOfControlFlow.Location); break;
case "while": res = ParseControl_While(ctx, src, wordOfControlFlow.Location); break;
case "switch": res = ParseControl_Switch(ctx, src, wordOfControlFlow.Location); break;
case "continue": res = ParseControl_Continue(ctx, src, wordOfControlFlow.Location); break;
case "break": res = ParseControl_Break(ctx, src, wordOfControlFlow.Location); break;
case "return": res = ParseControl_Return(ctx, src, wordOfControlFlow.Location); break;
default:
{
// Unsupported control flow.
if (!NotifyError(ctx, wordOfControlFlow.Location, ParsingErrorType.SyntaxError, "Unsupported keyword."))
return false;
}
break;
} // switch
if (res)
NotifyParsing(ctx.Children.Last());
return res;
}
if/elseを例に,制御構造の解析過程を説明する.まずif部分を見てください.
private bool ParseControl_If(Context ctx, SourceCode src, Context.LocationInfo loc)
{
src.SkipSpace(); // get '('
if (src.CurrentChar != '(')
if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "'(' is expected."))
return false;
//
SourceCode condition = GetParenthesisCode(src);
ControlFlow.IfThen stxIf = new ControlFlow.IfThen()
{
Location = new Context.LocationInfo()
{
FirstLine = loc.FirstLine,
FirstPos = loc.FirstPos
}
};
ctx.AddChild(stxIf);
//
if (!ParseExpression(stxIf, condition, ref stxIf.Condition))
return false;
// then
src.SkipSpace();
bool res = false;
Block ThenBlock = new Block();
stxIf.AddChild(ThenBlock);
if (src.CurrentChar == '{') // ?
{
SourceCode code = GetBlockCode(src);
res = Parse(ThenBlock, code);
stxIf.Location.LastLine = src.AbsoluteLine;
stxIf.Location.LastPos = src.AbsolutePos;
}
else
{ // ?
SourceCode stmt = new SourceCode()
{
PosOffset = src.AbsolutePos,
LineOffset = src.AbsoluteLine,
Text = src.Text.Substring(src.Column)
};
res = Parse(ThenBlock, stmt);
stxIf.Location.LastLine = stmt.AbsoluteLine;
stxIf.Location.LastPos = stmt.AbsolutePos;
}
// else
return res;
}
elseセクションを見てみましょう.
private bool ParseControl_Else(Context ctx, SourceCode src, Context.LocationInfo loc)
{
// else , if。 else if 。
if (!(ctx.Children.Last() is ControlFlow.IfThen))
if (!NotifyError(ctx, loc, ParsingErrorType.SyntaxError, "\"else\" should not appear here."))
return false;
// if/then
Context lastStx = ctx.Children.Last();
while (lastStx.Children.Count > 2) // Children 2, else , if/then
{
lastStx = lastStx.Children.Last();
}
// if/then
if (!(lastStx is ControlFlow.IfThen))
if (!NotifyError(ctx, loc, ParsingErrorType.SyntaxError, "Can't find matched \"if\"."))
return false;
ControlFlow.IfThen stxIf = lastStx as ControlFlow.IfThen;
src.SkipSpace();
bool res = false;
Block elseBlock = new Block();
stxIf.AddChild(elseBlock);
// Block
if (src.CurrentChar == '{')
{
SourceCode code = GetBlockCode(src);
res = Parse(elseBlock, code);
lastStx.Location.LastLine = src.AbsoluteLine;
lastStx.Location.LastPos = src.AbsolutePos;
}
else
{ // Statement
SourceCode stmt = new SourceCode()
{
PosOffset = src.AbsolutePos,
LineOffset = src.AbsoluteLine,
Text = src.Text.Substring(src.Column)
};
res = Parse(elseBlock, stmt);
lastStx.Location.LastLine = stmt.AbsoluteLine;
lastStx.Location.LastPos = stmt.AbsolutePos;
}
return res;
}
forループの解析:
private bool ParseControl_For(Context ctx, SourceCode src, Context.LocationInfo loc)
{
src.SkipSpace();
if (src.CurrentChar != '(')
if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "'(' is expected."))
return false;
//
SourceCode stmt = GetParenthesisCode(src);
List stmtList = stmt.SplitStatement();
// ,
if (stmtList.Count != 3)
if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "Syntax error."))
return false;
ControlFlow.ForLoop stxFor = new ControlFlow.ForLoop()
{
Location = new Context.LocationInfo()
{
FirstLine = loc.FirstLine,
FirstPos = loc.FirstPos
}
};
ctx.AddChild(stxFor);
//
Context stxInit = new Context();
stxFor.AddChild(stxInit);
if (!ParseStatement(stxInit, stmtList[0]))
return false;
//
if (!ParseExpression(stxInit, stmtList[1], ref stxFor.Condition))
return false;
//
if (!ParseExpression(stxInit, stmtList[2], ref stxFor.Iterator))
return false;
src.SkipSpace();
//
if (src.CurrentChar == '{')
{
stmt = GetBlockCode(src);
}
else
{
stmt = new SourceCode()
{
PosOffset = src.AbsolutePos,
LineOffset = src.AbsoluteLine,
Text = src.Text.Substring(src.Column)
};
}
Block block = new Block();
stxFor.AddChild(block);
bool res = Parse(block, stmt);
stxFor.Location.LastLine = stmt.AbsoluteLine;
stxFor.Location.LastPos = stmt.AbsolutePos;
return res;
}
複雑なのはswitchです.
private bool ParseControl_Switch(Context ctx, SourceCode src, Context.LocationInfo loc)
{
// Check condition
src.SkipSpace();
if (src.CurrentChar != '(')
if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "Expecte a '('."))
return false;
ControlFlow.Switch stxSwitch = new ControlFlow.Switch()
{
Location = new Context.LocationInfo()
{
FirstLine = loc.FirstLine,
FirstPos = loc.FirstPos
}
};
ctx.AddChild(stxSwitch);
// Parse condition expression
if (!ParseExpression(stxSwitch, GetParenthesisCode(src), ref stxSwitch.Condition))
{
ctx.Children.RemoveAt(ctx.Children.Count - 1);
return false;
}
// Add body
stxSwitch.AddChild(new Block());
ControlFlow.Case stxDefault = new ControlFlow.Case(); // default part
stxSwitch.Body.AddChild(stxDefault);
// Check '{'
src.SkipSpace();
if (src.CurrentChar != '{')
if (!NotifyError(ctx, GetLocation(loc.FirstLine, loc.FirstPos, src.AbsoluteLine, src.AbsolutePos), ParsingErrorType.SyntaxError, "Expecte a '{'."))
return false;
// Parse body
SourceCode switchBodyStmt = GetBlockCode(src);
Dictionary
式の解析は2つのステップに分けられ,最初のステップはStatementオブジェクトになり,このオブジェクトに基づいて解析を行う.ステップ1:
private bool ParseExpression(Context ctx, SourceCode src, Context.LocationInfo loc)
{
Statement stmtStx = new Statement()
{
Name = Context.GetAnonymousName("statement"),
Location = loc
};
if (!ParseExpression(ctx, src, ref stmtStx.TargetExpression))
return false;
ctx.AddChild(stmtStx);
stmtStx.Location.LastLine = src.AbsoluteLine;
stmtStx.Location.LastPos = src.AbsolutePos;
NotifyParsing(stmtStx);
return true;
}
ステップ2:
private bool ParseExpression(Context ctx, SourceCode src, ref Expression.ExpressionNode expTree)
{
bool res = true;
while (!src.Eof && res)
{
src.SkipSpace();
switch (src.CurrentChar)
{
case ',':
{
src.NextChar(); // skip ','
Statement stxExp = new Statement();
if (ctx.Parent != null)
ctx.Parent.AddChild(stxExp);
else
ctx.AddChild(stxExp);
res = ParseExpression(ctx, src, ref stxExp.TargetExpression);
}
break;
case ';': src.NextChar(); break; // End of statement
case '=': res = ParseExpression_Equal(ctx, src, ref expTree); break;
case '+': res = ParseExpression_Plus(ctx, src, ref expTree); break;
case '-': res = ParseExpression_Minus(ctx, src, ref expTree); break;
case '*': res = ParseExpression_Mul(ctx, src, ref expTree); break;
case '/': res = ParseExpression_Div(ctx, src, ref expTree); break;
case '%': res = ParseExpression_Mod(ctx, src, ref expTree); break;
case '&': res = ParseExpression_And(ctx, src, ref expTree); break;
case '|': res = ParseExpression_Or(ctx, src, ref expTree); break;
case '^': res = ParseExpression_Xor(ctx, src, ref expTree); break;
case '!': res = ParseExpression_Not(ctx, src, ref expTree); break;
case '~': res = ParseExpression_BitwiseNot(ctx, src, ref expTree); break;
case '': res = ParseExpression_Greater(ctx, src, ref expTree); break;
case '(': res = ParseExpression_Parentheses(ctx, src, ref expTree); break;
case '\'': res = ParseExpression_CharValue(ctx, src, ref expTree); break;
case '"':
{
// const string
res = ParseExpression_ConstStringValue(ctx, src, ref expTree);
//if (!FireParsingFailedEvent(ctx, src, ParsingErrorType.SyntaxError, "String is not supported."))
// return false;
}
break;
default:
{
Expression.ExpressionNode lastNode = m_lastExpNode;
if (SourceCode.IsDigit(src.CurrentChar))
{
res = ParseExpression_NumberValue(ctx, src, ref expTree);
}
else
if (SourceCode.IsLetter(src.CurrentChar))
{
res = ParseExpression_Var(ctx, src, ref expTree);
}
else
if (!NotifyError(ctx, src.Location, ParsingErrorType.SyntaxError, "Syntax error."))
return false;
if (!ValidateOperator(ctx, src, lastNode))
return false;
}
break;
} // switch
} // while !Eof
return res;
} // func ParseExpression