随想録(cコンパイラの実装)

10345 ワード

【声明:著作権所有、転載歓迎、商業用途に使用しないでください.連絡ポスト:[email protected]
何気なくgithubで面白いプロジェクトを見つけました.例えば、ここです.https://github.com/hukun01/CMMInterpreter.flexを用いて文字列の識別を実現し,bisonを用いてast構文ツリーの構築を実現し,最後にastを直接利用して計算と識別を行った.astノードが遍歴するときは,著者は根が左右に遍歴するはずであるが,実は左右の根が遍歴したほうがよいかもしれない.文法解析の場合は必ず木の構造ですが、必ずしも二叉木ではありません.
例えばlexファイルはこう書かれています
/***********************************/
/* File: cmm.l    version 2.0      */
/* Flex version for CMM            */
/* CMM Interpreter Construction    */
/***********************************/
%option noyywrap
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "cmm.tab.h"
%}

digit		[0-9]
int_num		{digit}+
real_num	{digit}+"."{digit}*
letter		[a-zA-Z]
identifier	{letter}+({letter}|{digit}|_)*({letter}+|{digit}+)|{letter}+
whitespace	[ \t]*

%%

"if"		{ return IF;}
"else"		{ return ELSE;}
"while"		{ return WHILE;}
"int"		{ yylval.dataType = Int; return INT;} 
"real"		{ yylval.dataType = Real; return REAL;}
"bool"		{ yylval.dataType = Bool; return BOOL;}
"read"		{ return READ;}
"write"		{ return WRITE;}
"main"		{ return MAIN;}

{int_num}		{ yylval.intval = atoi(yytext); return INT_VALUE;}
{real_num}		{ yylval.realval = atof(yytext); return REAL_VALUE;}
{identifier}	{ yylval.idName = copyString(yytext); return ID;}
{whitespace}	{/*Do nothing.*/}

"+"		{ return PLUS;}
"-"		{ return SUB;}
"*"		{ return MUL;}
"/"		{ return DIV;}
""		{ yylval.binaryOperator = 2; return REL_OP;}
"<="	{ yylval.binaryOperator = 3; return REL_OP;}
">="	{ yylval.binaryOperator = 4; return REL_OP;}
"=="	{ yylval.binaryOperator = 5; return REL_OP;}
"<>"	{ yylval.binaryOperator = 6; return REL_OP;}
"&&"	{ yylval.binaryOperator = 7; return REL_OP;}
"||"	{ yylval.binaryOperator = 8; return REL_OP;}

"="		{ return ASSIGN;}
"("		{ return LPAREN;}
")"		{ return RPAREN;}
";"		{ return SEMI;}
"{"		{ return LBRACE;}
"}"		{ return RBRACE;}
"["		{ return LBRACKET;}
"]"		{ return RBRACKET;}
","		{ return COMMA;}

"//"	{ char c = input();
		  while(c != '
') { if (c == EOF) break; c = input(); } lineno++; } "/*" { char c; int flag = 1; do { c = input(); entry1: if (c == EOF) break; if (c == '
') lineno++; if (c == '*') { c = input(); if (c == '/') flag = 0; else goto entry1; } } while (flag); }
{lineno++;} . {yyerror("Mystery character %s
", yytext);return ERROR;} %% /* */ void iniLexer(void) { static int firstTime = TRUE; lineno = 0; if (firstTime) { firstTime = FALSE; lineno++; yyin = source; yyout = listing; } } /* TokenType getToken(void) { static int firstTime = TRUE; TokenType currentToken; if (firstTime) { firstTime = FALSE; lineno++; yyin = source; yyout = listing; } currentToken = yylex(); strncpy(tokenString,yytext,MAXTOKENLEN); if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken); } return currentToken; }*/
yaccファイルはこう書かれています
/***********************************/
/* File: cmm.y                     */
/* Bison grammar file about CMM    */
/* CMM Interpreter Construction    */
/***********************************/

/*   1   /    */
%expect 1
%{

#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"

static TreeNode * savedTree; /* stores syntax tree for later return */

%}

%union{
	struct treeNode * node;
	int intval;
	double realval;
	char * idName;
	int binaryOperator;
	int dataType;
}
%token  INT_VALUE
%token  REAL_VALUE
%token  ID
%token  INT REAL BOOL

/*       */
%right ASSIGN
%left PLUS SUB
%left MUL DIV
%nonassoc  REL_OP
%nonassoc UMINUS
/*         tokens */
%token IF ELSE WHILE READ WRITE MAIN
%token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA
%token ASSIGN
%token NEWLINE ERROR

%type  stmt_list stmt
%type  if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt
%type  exp factor bin_exp
%type  type_spec

%start program

%% /* CMM   */

program		: stmt_list
				{ savedTree = $1;}
			;
stmt_list	:	{ $$ = NULL; }
			| stmt_list stmt 
				{	TreeNode * t = $1;
					if (t != NULL)
					{	while (t->sibling != NULL){ t = t->sibling;}
						t->sibling = $2;
						$$ = $1;
					}
					else $$ = $2;
				}
			;
stmt		: if_stmt { $$ = $1; }
			| decl_stmt SEMI { $$ = $1; }
			| compound_stmt { $$ = $1; }
			| while_stmt { $$ = $1; }
			| assign_stmt SEMI { $$ = $1; }
			| read_stmt SEMI { $$ = $1; }
			| write_stmt SEMI { $$ = $1; }
			| error  { $$ = NULL; }
			;
compound_stmt
			: LBRACE stmt_list RBRACE
				{	$$ = newStmtNode(CompoundK);
					$$->child[0] = $2;
					$$->lineno = lineno;
				}
			;
decl_stmt	: type_spec ID 
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					/*      0      */
					$$->arrayLength = 0;
					$$->type = $1;
					$$->lineno = lineno;
				}
			| type_spec ID LBRACKET INT_VALUE RBRACKET
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					$$->arrayLength = $4;
					$$->type = $1;
					if($$->type == Int) {
						int int_array_temp[$4];
						$$->array.intArray = int_array_temp;
					}
					else if($$->type == Real) {
						double real_array_temp[$4];
						$$->array.realArray = real_array_temp;
					}
					$$->lineno = lineno;
				}
			;
type_spec	: INT 	{ $$ = $1; }
			| REAL 	{ $$ = $1; }
			| BOOL	{ $$ = $1; }
			;
if_stmt     : IF LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = NULL;
					$$->lineno = lineno;
				}
			| IF LPAREN bin_exp RPAREN stmt ELSE stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = $7;
					$$->lineno = lineno;
				}
			;
while_stmt	: WHILE LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(WhileK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->lineno = lineno;
				}
			;
assign_stmt : ID ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->child[1] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					/* child[1]  NULL                */
					/* child[0]          */
					$$->child[0] = $3;
					$$->child[1] = $6;
					$$->lineno = lineno;
				}
			;
read_stmt   : READ LPAREN ID RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| READ LPAREN ID LBRACKET exp RBRACKET RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = $5;
					$$->lineno = lineno;
				}
			;
write_stmt  : WRITE LPAREN exp RPAREN
				{	$$ = newStmtNode(WriteK);
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			;
exp			: factor
				{	$$ = $1; }
			| bin_exp
				{	$$ = $1; }
			;
factor		: INT_VALUE
				{	$$ = newExpNode(IntValueK);
					$$->value.int_val = $1;
					$$->type = Int;
					$$->lineno = lineno;
				}
			| REAL_VALUE
				{	$$ = newExpNode(RealValueK);
					$$->value.real_val = $1;
					$$->type = Real;
					$$->lineno = lineno;
				}
			| LPAREN exp RPAREN
				{	$$ = $2; }
			| ID
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					/* child[0] NULL         ,      */
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			| error { $$ = NULL; }
			;
bin_exp     : /*       */
			  exp REL_OP exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = $2;
					$$->lineno = lineno;
				}
			/*       */
			| exp PLUS exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = PLUS;
					$$->lineno = lineno;
				}
			| exp SUB exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = SUB;
					$$->lineno = lineno;
				}
			| exp MUL exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = MUL;
					$$->lineno = lineno;
				}
			| exp DIV exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = DIV;
					$$->lineno = lineno;
				}
			| SUB exp %prec UMINUS
				{	$$ = newExpNode(OpK);
					$$->child[0] = $2;
					$$->child[1] = NULL;
					$$->attr.op = UMINUS;
					$$->lineno = lineno;
				}
			;

%%

int yyerror(char * message)
{	fprintf(listing,"Syntax error at line %d: %s
",lineno,message); fprintf(listing,"Current token: %s",tokenString); printToken(yychar); Error = TRUE; return 0; } /* */ TreeNode * parse(void) { iniLexer(); yyparse(); return savedTree; }
のコンパイルも非常に簡単で、
#The makefile for interpreter based on Flex and Bison
CC = gcc
FLAG = -w
interpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
	$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
main.o: main.c parse.h analyze.h
	$(CC) $(FLAG) -c main.c
cmm.tab.o:cmm.tab.c parse.h
	$(CC) $(FLAG) -c cmm.tab.c
cmm.tab.c:cmm.y
	bison -d cmm.y
cmm.lex.o:cmm.lex.c cmm.tab.h
	$(CC) $(FLAG) -c cmm.lex.c
cmm.lex.c:cmm.l
	flex -o cmm.lex.c cmm.l
symtab.o:symtab.c symtab.h globals.h
	$(CC) $(FLAG) -c symtab.c
analyze.o:analyze.c globals.h symtab.h analyze.h
	$(CC) $(FLAG) -c analyze.c
util.o:util.c
	$(CC) $(FLAG) -c util.c

clean:
	-rm *.o
最終テストの場合、直接利用します./interpreter[file_name]でいいです.
興味のある学生はこのプロジェクトをダウンロードして、flex&bisonを利用してスクリプト言語を迅速に実現すれば、もっと見て、理解してください.