C言語解釈器-2参照用FlexとBisonスクリプト


タイトルの中の2つの有名なツールは使っていませんが、参考にしてみましょう.
作成中、2つのツールを知るためにyaccとlexのスクリプトを作成し、BisonとFlexをインストールしました.
 
まずyaccのシナリオを見てみましょう.
このスクリプトを使用して生成された構文解析プログラムは、指定したソースファイルを解析して印刷し、自分で作成した構文ルールをテストします.
%{

#include <stdio.h>
#include <string.h>

extern FILE *yyin;

extern "C" {
	int yylex(void);
}

# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1

typedef struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
} YYLTYPE;


void yyerror(const char* err);

char __yyac_string_buffer[65535];

#define format(strptr, ...) \
	sprintf(__yyac_string_buffer, __VA_ARGS__); \
	strptr = strdup(__yyac_string_buffer);


%}

%union {
	char *string;
}

%error-verbose
%debug
%glr-parser

/* TOKENS: Space & Comments */
%token <string> SPACE
%token <string> SINGLE_LINE_COMMENTS
%token <string> MULTI_LINE_COMMENTS

/* TOKENS: Type Define */
%token <string> TK_CONST
%token <string> TK_VOID_TYPE
%token <string> TK_CHAR_TYPE
%token <string> TK_SHORT_TYPE
%token <string> TK_INT_TYPE
%token <string> TK_FLOAT_TYPE
%token <string> TK_SIGNED_TYPE
%token <string> TK_UNSIGNED_TYPE

/* TOKEN: Value & Identifier*/
%token <string> TK_ID
%token <string> TK_CHAR
%token <string> TK_INTEGER
%token <string> TK_FLOAT
%token <string> TK_HEX
%token <string> TK_STRING

/* TOKENS: Expression Operator */
%token <string> TK_ASSIGN TK_ADD_ASSIGN TK_SUB_ASSIGN TK_MUL_ASSIGN TK_DIV_ASSIGN TK_MOD_ASSIGN TK_AND_ASSIGN TK_OR_ASSIGN TK_XOR_ASSIGN TK_LSHIFT_ASSIGN TK_RSHIFT_ASSIGN
%token <string> TK_LOGIC_AND TK_LOGIC_OR
%token <string> TK_LESS TK_LESS_EQ TK_GREATER TK_GREATER_EQ TK_EQUAL TK_NOT_EQUAL
%token <string> TK_ADD TK_SUB TK_XOR TK_AND TK_OR TK_LSHIFT TK_RSHIFT
%token <string> TK_MUL TK_DIV TK_MOD
%token <string> TK_NOT TK_BITWISE_NOT TK_SELF_ADD TK_SELF_SUB

/* TOKENS: Contol Flow */
%token <string> TK_IF TK_ELSE
%token <string> TK_FOR
%token <string> TK_DO TK_WHILE
%token <string> TK_SWITCH TK_CASE TK_DEFAULT
%token <string> TK_BREAK TK_CONTINUE TK_RETURN

/* Type & Value */
%type <string> DataType
%type <string> StringValue
%type <string> Value

%type <string> Expression 

/* Variable */
%type <string> VarDeclID
%type <string> VarDeclIDList 
%type <string> VariableDecl

/* Statement */
%type <string> Statement 
%type <string> StatementSet
%type <string> Block

/* Control Flow */
%type <string> ControlFlow
%type <string> If DoWhile While Return Break Continue
%type <string> For For_Init For_Condition For_Iterator
%type <string> Switch CaseBranch CaseValue Case Default CaseSet 

/* Function */
%type <string> FunctionCalling
%type <string> FunctionArgumentDefine
%type <string> FunctionForwardingDecl
%type <string> FunctionParameterSet
%type <string> FunctionDecl
%type <string> FunctionDefine

/* Program */
%type <string> ProgramStatement
%type <string> ProgramStatementSet
%type <string> Program

/* Operator precedence */
%left TK_ASSIGN TK_ADD_ASSIGN TK_SUB_ASSIGN TK_MUL_ASSIGN TK_DIV_ASSIGN TK_MOD_ASSIGN TK_AND_ASSIGN TK_OR_ASSIGN TK_XOR_ASSIGN TK_LSHIFT_ASSIGN TK_RSHIFT_ASSIGN
%left TK_LOGIC_AND TK_LOGIC_OR
%left TK_LESS TK_LESS_EQ TK_GREATER TK_GREATER_EQ TK_EQUAL TK_NOT_EQUAL
%left TK_ADD TK_SUB TK_XOR TK_AND TK_OR TK_LSHIFT TK_RSHIFT
%left TK_MUL TK_DIV TK_MOD
%left SELF_OP
%left UNARY

%start Program
%%

DataType
	: TK_VOID_TYPE			{ format($$, "void"); }
	| TK_CHAR_TYPE			{ format($$, "char"); }
	| TK_SHORT_TYPE			{ format($$, "short"); }
	| TK_INT_TYPE			{ format($$, "int"); }
	| TK_FLOAT_TYPE			{ format($$, "float"); }
	| TK_SIGNED_TYPE		{ format($$, $1); }
	| TK_UNSIGNED_TYPE		{ format($$, $1); }
	| TK_SIGNED_TYPE TK_CHAR_TYPE		{ format($$, "%s %s", $1, $2); }
	| TK_SIGNED_TYPE TK_SHORT_TYPE		{ format($$, "%s %s", $1, $2); }
	| TK_SIGNED_TYPE TK_INT_TYPE		{ format($$, "%s %s", $1, $2); }
	| TK_UNSIGNED_TYPE TK_CHAR_TYPE		{ format($$, "%s %s", $1, $2); }
	| TK_UNSIGNED_TYPE TK_SHORT_TYPE	{ format($$, "%s %s", $1, $2); }
	| TK_UNSIGNED_TYPE TK_INT_TYPE		{ format($$, "%s %s", $1, $2); }
	;

StringValue
	: TK_STRING				{ format($$, $1); }
	| StringValue TK_STRING	{ format($$, "%s
%s", $1, $2); } ; Value : TK_CHAR { format($$, "'%s'", $1); } | StringValue { format($$, "\"%s\"", $1); } | TK_INTEGER { format($$, $1); } | TK_HEX { format($$, $1); } | TK_FLOAT { format($$, $1); } ; Expression : TK_ID { format($$, $1); } | Value { format($$, $1); } /* Operators */ /* Arithmetic */ | Expression TK_ADD Expression { format($$, "%s + %s", $1, $3); } | Expression TK_SUB Expression { format($$, "%s - %s", $1, $3); } | Expression TK_MUL Expression { format($$, "%s * %s", $1, $3); } | Expression TK_DIV Expression { format($$, "%s / %s", $1, $3); } | Expression TK_MOD Expression { format($$, "%s %% %s", $1, $3); } | Expression TK_XOR Expression { format($$, "%s ^ %s", $1, $3); } /* Self Add/Sub */ | Expression TK_SELF_ADD %prec SELF_OP { format($$, "%s++", $1); } | Expression TK_SELF_SUB %prec SELF_OP { format($$, "%s--", $1); } | TK_SELF_ADD Expression %prec SELF_OP { format($$, "++%s", $2); } | TK_SELF_SUB Expression %prec SELF_OP { format($$, "--%s", $2); } /* Unary */ | TK_ADD Expression %prec UNARY { format($$, "+ %s", $2); } | TK_SUB Expression %prec UNARY { format($$, "- %s", $2); } | TK_NOT Expression %prec UNARY { format($$, "! %s", $2); } | TK_BITWISE_NOT Expression %prec UNARY { format($$, "~ %s", $2); } /* Logic */ | Expression TK_GREATER Expression { format($$, "%s > %s", $1, $3); } | Expression TK_GREATER_EQ Expression { format($$, "%s >= %s", $1, $3); } | Expression TK_LESS Expression { format($$, "%s < %s", $1, $3); } | Expression TK_LESS_EQ Expression { format($$, "%s <= %s", $1, $3); } | Expression TK_EQUAL Expression { format($$, "%s == %s", $1, $3); } | Expression TK_NOT_EQUAL Expression { format($$, "%s != %s", $1, $3); } | Expression TK_LOGIC_AND Expression { format($$, "%s && %s", $1, $3); } | Expression TK_LOGIC_OR Expression { format($$, "%s || %s", $1, $3); } /* Bit */ | Expression TK_AND Expression { format($$, "%s & %s", $1, $3); } | Expression TK_OR Expression { format($$, "%s | %s", $1, $3); } | Expression TK_LSHIFT Expression { format($$, "%s << %s", $1, $3); } | Expression TK_RSHIFT Expression { format($$, "%s >> %s", $1, $3); } /* Assign */ | Expression TK_ASSIGN Expression { format($$, "%s = %s", $1, $3); printf($$); } | Expression TK_ADD_ASSIGN Expression { format($$, "%s += %s", $1, $3); } | Expression TK_SUB_ASSIGN Expression { format($$, "%s -= %s", $1, $3); } | Expression TK_MUL_ASSIGN Expression { format($$, "%s *= %s", $1, $3); } | Expression TK_DIV_ASSIGN Expression { format($$, "%s /= %s", $1, $3); } | Expression TK_MOD_ASSIGN Expression { format($$, "%s %= %s", $1, $3); } | Expression TK_AND_ASSIGN Expression { format($$, "%s &= %s", $1, $3); } | Expression TK_OR_ASSIGN Expression { format($$, "%s |= %s", $1, $3); } | Expression TK_XOR_ASSIGN Expression { format($$, "%s ^= %s", $1, $3); } | Expression TK_LSHIFT_ASSIGN Expression { format($$, "%s <<= %s", $1, $3); } | Expression TK_RSHIFT_ASSIGN Expression { format($$, "%s >>= %s", $1, $3); } /* Parentheses */ | '(' Expression ')' { format($$, "( %s )", $2); } | FunctionCalling { format($$, $1); } ; VarDeclID : TK_ID { format($$, "%s", $1); } | TK_ID TK_ASSIGN Expression { format($$, "%s=%s", $1, $3); } ; VarDeclIDList : VarDeclID { format($$, "%s", $1); } | VarDeclIDList ',' VarDeclID { format($$, "%s, %s", $1, $3); } ; VariableDecl : DataType VarDeclIDList { format($$, "%s %s", $1, $2); } ; Statement : /* Dummy */ ';' { format($$, "/* empty */;
"); } | VariableDecl ';' { format($$, "%s;
", $1); } | Expression ';' { format($$, "%s;
", $1); } | Block { format($$, $1); } | ControlFlow { format($$, $1); } ; StatementSet : Statement { format($$, $1); } | StatementSet Statement { format($$, "%s%s", $1, $2); } ; Block : '{' '}' { format($$, "{
/* Empty Block */
}
"); } | '{' StatementSet '}' { format($$, "{
%s
}
", $2); } ; If : TK_IF '(' Expression ')' Statement { format($$, "if(%s)
\t%s", $3, $5); } | TK_IF '(' Expression ')' Statement TK_ELSE Statement { format($$, "if(%s)
\t%selse
\t%s", $3, $5, $7); } ; For_Init : /* Empty */ { format($$, "/* empty */"); } | VariableDecl { format($$, $1); } | Expression { format($$, $1); } ; For_Condition : /* Empty */ { format($$, "/* empty */"); } | Expression { format($$, $1); } ; For_Iterator : /* Empty */ { format($$, "/* empty */"); } | Expression { format($$, $1); } ; For : TK_FOR '(' For_Init ';' For_Condition ';' For_Iterator ')' Statement { format($$, "for(%s, %s, %s)
%s
", $3, $5, $7, $9); } ; DoWhile : TK_DO '{' Statement '}' TK_WHILE '(' Expression ')' ';' { format($$, "do
{
%s
}while(%s);
", $3, $7); } ; While : TK_WHILE '(' Expression ')' Statement { format($$, "while(%s)
%s
", $3, $5); } ; CaseValue : TK_CHAR { format($$, $1); } | TK_INTEGER { format($$, $1); } | TK_HEX { format($$, $1); } ; CaseBranch : TK_CASE CaseValue ':' /* Empty */ { format($$, "case %s: /* Empty */", $2); } | TK_CASE CaseValue ':' StatementSet { format($$, "case %s:
%s", $2, $4); } ; Default : TK_DEFAULT ':' StatementSet { format($$, "default:
%s
", $3); } ; Case : CaseBranch { format($$, "%s", $1); } | Default { format($$, "%s", $1); } ; CaseSet : Case { format($$, $1); } | CaseSet Case { format($$, "%s
%s", $1, $2); } ; Switch : TK_SWITCH '(' Expression ')' '{' CaseSet '}' { format($$, "switch(%s)
{
%s
} // switch
", $3, $6); } ; Break : TK_BREAK ';' { format($$, "break ;
"); } ; Continue : TK_CONTINUE ';' { format($$, "continue ;
"); } ; Return : TK_RETURN ';' { format($$, "return ;
"); } | TK_RETURN Expression ';' { format($$, "return %s;
", $2); } ; ControlFlow : If { format($$, $1); } | For { format($$, $1); } | DoWhile { format($$, $1); } | While { format($$, $1); } | Switch { format($$, $1); } | Break { format($$, $1); } | Continue { format($$, $1); } | Return { format($$, $1); } ; /* Function */ FunctionArgumentDefine : DataType TK_ID { format($$, "%s %s,", $1, $2); } | DataType TK_ID ',' FunctionArgumentDefine { format($$, "%s %s, %s", $1, $2, $4); } ; FunctionDecl : DataType TK_ID '(' ')' { format($$, "%s %s()", $1, $2); } | DataType TK_ID '(' FunctionArgumentDefine ')' { format($$, "%s %s(%s)", $1, $2, $4); } ; FunctionForwardingDecl : FunctionDecl ';' { format($$, $1); } ; FunctionDefine : FunctionDecl Block { format($$, "%s
%s", $1, $2); } ; FunctionParameterSet : Expression { format($$, $1); } | Expression ',' FunctionParameterSet { format($$, "%s, %s", $1, $3); } ; FunctionCalling : TK_ID '(' ')' { format($$, "%s()", $1); } | TK_ID '(' FunctionParameterSet ')' { format($$, "%s(%s)", $1, $3); } ; ProgramStatement : VariableDecl ';' { format($$, "%s
", $1); } | FunctionForwardingDecl { format($$, "%s
", $1); } | FunctionDefine { format($$, "%s
", $1); } ; ProgramStatementSet : ProgramStatement { format($$, $1); } | ProgramStatementSet ProgramStatement { format($$, "%s%s", $1, $2); } ; Program : ProgramStatementSet { printf($$); } ; %% void yyerror(const char* err) { printf("
Error:%s
", err); } void main() { printf("Running..
"); printf("Loading file...
"); char path[] = "z:\\Syntax.lex.test\\Syntax.Test.Code.txt"; yyin = fopen(path, "r"); if (yyin) { printf("Parsing...
"); do { yyparse(); }while(!feof(yyin)); printf("
Done."); fclose(yyin); } else printf("Can't open code file"); }

上記yaccスクリプトは、テストに使用するコードのパスを指定しており、自分の実際の状況に応じて変更する必要があります.
 
yaccとlexはいつもペアで現れます.
コードを分詞するlexスクリプトを次に示します.
%{
#include "syntax.yacc.hh"
#include <string.h>

#undef	yywrap
#define yywrap()	1

extern "C" {
	int yylex(void);
}

void error(const char* err)
{
	printf("Error:%s
", err); } %} DIGIT [0-9] %x comment chr str %% char __lex_string_buffer[4096]; char *__lex_string_buf_ptr; [ \t\r
]+ ; const { return TK_CONST; } void { return TK_VOID_TYPE; } char { return TK_CHAR_TYPE; } short { return TK_SHORT_TYPE; } int { return TK_INT_TYPE; } float { return TK_FLOAT_TYPE; } signed { return TK_SIGNED_TYPE; } unsigned { return TK_UNSIGNED_TYPE; } if { return TK_IF; } else { return TK_ELSE; } for { return TK_FOR; } do { return TK_DO; } while { return TK_WHILE; } switch { return TK_SWITCH; } case { return TK_CASE; } default { return TK_DEFAULT; } break { return TK_BREAK; } return { return TK_RETURN; } && { return TK_LOGIC_AND; } \|\| { return TK_LOGIC_OR; } & { return TK_AND; } \| { return TK_OR; } \>\> { return TK_RSHIFT; } \<\< { return TK_LSHIFT; } == { return TK_EQUAL; } = { return TK_ASSIGN; } != { return TK_NOT_EQUAL; } \>= { return TK_GREATER_EQ; } \> { return TK_GREATER; } \<= { return TK_LESS_EQ; } \< { return TK_LESS; } \+\+ { return TK_SELF_ADD; } -- { return TK_SELF_SUB; } \+= { return TK_ADD_ASSIGN; } -= { return TK_SUB_ASSIGN; } \*= { return TK_MUL_ASSIGN; } \/= { return TK_DIV_ASSIGN; } \%= { return TK_MOD_ASSIGN; } &= { return TK_AND_ASSIGN; } \|= { return TK_OR_ASSIGN; } \^= { return TK_XOR_ASSIGN; } \<\<= { return TK_LSHIFT_ASSIGN; } \>\>= { return TK_RSHIFT_ASSIGN; } \+ { return TK_ADD; } - { return TK_SUB; } \* { return TK_MUL; } \/ { return TK_DIV; } \% { return TK_MOD; } \^ { return TK_XOR; } ! { return TK_NOT; } ~ { return TK_BITWISE_NOT; } {DIGIT}+ { yylval.string = strdup(yytext); return TK_INTEGER; } 0[xX][a-fA-F0-9]+ { yylval.string = strdup(yytext); return TK_HEX; } ({DIGIT}+\.{DIGIT}*)|({DIGIT}+\.?{DIGIT}*[fF])|({DIGIT}+\.?{DIGIT}*[eE][+-]{DIGIT}+) { yylval.string = strdup(yytext); return TK_FLOAT; } [a-zA-Z\$_][a-zA-Z\$_0-9]* { yylval.string = strdup(yytext); return TK_ID; } "//"[^
]*
{ /* Single line comment */ } "/*" BEGIN(comment); <comment>[^*
]* <comment>"*"+[^*/
]* <comment>
<comment>"*"+"/" { BEGIN(INITIAL); /* Multi-line comments */ } ' __lex_string_buf_ptr = __lex_string_buffer; BEGIN(chr); <chr>' { BEGIN(INITIAL); *__lex_string_buf_ptr = '\0'; __lex_string_buf_ptr = __lex_string_buffer; yylval.string = strdup(__lex_string_buf_ptr); return TK_CHAR; } <chr>\\r *__lex_string_buf_ptr++ = '\r'; <chr>\
*__lex_string_buf_ptr++ = '
'; <chr>\\t *__lex_string_buf_ptr++ = '\t'; <chr>\\b *__lex_string_buf_ptr++ = '\b'; <chr>\\f *__lex_string_buf_ptr++ = '\f'; <chr>\\[0-7]{1,3} { int result = 0; (void)scanf(yytext + 1, "%o", &result); if (result > 0xff) { error("Invalid escape value: out of bounds"); yyterminate(); } *__lex_string_buf_ptr++ = result; } <chr>[^\\\t
'] { *__lex_string_buf_ptr++ = yytext[0]; } \" __lex_string_buf_ptr = __lex_string_buffer; BEGIN(str); <str>\" { /* saw closing quote - all done */ BEGIN(INITIAL); *__lex_string_buf_ptr = '\0'; __lex_string_buf_ptr = __lex_string_buffer; /* return string constant token type and * value to parser */ yylval.string = strdup(__lex_string_buf_ptr); return TK_STRING; } <str>
{ /* error - unterminated string constant */ /* generate error message */ error("Unterminated string"); yyterminate(); } <str>\\[0-7]{1,3} { /* octal escape sequence */ int result; (void) sscanf( yytext + 1, "%o", &result ); if ( result > 0xff ) { /* error, constant is out-of-bounds */ error("Char value out of bound."); yyterminate(); } *__lex_string_buf_ptr++ = result; } <str>\\[0-9]+ { /* generate error - bad escape sequence; something * like '\48' or '\0777777' */ error("Invalid escape sequence"); yyterminate(); } <str>\
{ *__lex_string_buf_ptr++ = '
'; } <str>\\t { *__lex_string_buf_ptr++ = '\t'; } <str>\\r { *__lex_string_buf_ptr++ = '\r'; } <str>\\b { *__lex_string_buf_ptr++ = '\b'; } <str>\\f { *__lex_string_buf_ptr++ = '\f'; } <str>\\(.|
) { *__lex_string_buf_ptr++ = yytext[1]; } <str>[^\\
\"]+ { char *yptr = yytext; while ( *yptr ) *__lex_string_buf_ptr++ = *yptr++; } . return yytext[0]; %%

lexの一部の構文は、(実際には直接コピー)インストールに付属するガイドのコードを参照しています.
 
BisonとFlexの作業スクリプトを一緒に示しましょう.このスクリプトはBATファイルです.
@echo off

set IN_ROOT=z:\SharpC\Grammar\Common\
set OUT_ROOT=z:\Syntax.Lex.Test\

set GNU_ROOT=C:\gnuwin32\
set GNU_INC=%GNU_ROOT%include
set GNU_LIB=%GNU_ROOT%lib
set GNU_FLEX_LIB=%GNU_LIB%\libfl.a
set GNU_YACC_LIB=%GNU_LIB%\liby.a

set YACC_SRC=%IN_ROOT%Syntax.yacc
set YACC_TARGET_CC=%OUT_ROOT%Syntax.yacc.cc
set YACC_TARGET_HH=%OUT_ROOT%Syntax.yacc.hh
set YACC_TARGET_RPT=%OUT_ROOT%Syntax.report.yacc.txt

set FLEX_SRC=%IN_ROOT%Syntax.Lex
set FLEX_TARGET=%OUT_ROOT%Syntax.lex.cc

set BIN_TARGET=%OUT_ROOT%Debug\Syntax.Lex.exe

cls

@echo ============= Clean ===============
if exist "%YACC_TARGET_CC%" del "%YACC_TARGET_CC%"
if exist "%YACC_TARGET_HH%" del "%YACC_TARGET_HH%"
if exist "%FLEX_TARGET%" del "%FLEX_TARGET%"
if exist "%BIN_TARGET%" del "%BIN_TARGET%"

@echo ============= Syntax ===============
bison -d -v "%YACC_SRC%" -o"%YACC_TARGET_CC%" --report=state --report-file="%YACC_TARGET_RPT%"

if not errorlevel 0 goto failed
@echo =============   Lex  ===============
flex -o"%FLEX_TARGET%" "%FLEX_SRC%" 

if not errorlevel 0 goto failed
@echo ============= Build ===============
cl.exe  "%YACC_TARGET_CC%" "%LEX_TARGET%" /I"%GNU_INC%" /link "%GNU_FLEX_LIB%" "%GNU_YACC_LIB%" /OUT:"%BIN_TARGET%" 

if not errorlevel 0 goto failed
if not exist "%BIN_TARGET%" goto failed
@echo ============= Run ===============
"%BIN_TARGET%"

goto exit
:failed
@echo =================================
@echo Failed.
:exit

話を忘れてしまいましたが、Windows+VSE 2012で作業全体が完了しました.作業スクリプトもVSEの環境コマンドウィンドウで実行する必要があります.VSEとその環境コマンドウィンドウが何なのか分からない方は、このシリーズを見ていないふりをしてください.