コンパイル原理語法分析実験コード(C/C++実現)


一時的に文法分析のコードを書いて、多分すべてのC言語プログラムを分析することができて、どうせそれ自身を分析することができて、比較的に強いメンテナンス性があって、時間空間の効率はすべて低くありません.辞書ツリーのような構造(オートマチックに似ている可能性があります)を採用し、任意の長さの境界記号識別を処理することができ、単一の二重引用符の中の反スラッシュエスケープ文字も処理することができます.エラーの位置は合理的で、情報ははっきりしています.muuuuuuuuuuuuuuuuuuuuuuuuuua!
#include
using namespace std;
const int KEY_WORD_NUM = 22 ;
const int DIVIDE_WORD_NUM = 49 ;
const int CHARACTER_SET_NUM = 256 ;
const string NUM1 = "NUM1" ;
const string NUM2 = "NUM2" ;
const string CONSTANT_STRING = "CONSTANT_STRING" ;
const string ID = "ID" ;
const string ANNOTATIONL1 = "//" ;
const string ANNOTATIONL2 = "/*" ;
const string key_word [ KEY_WORD_NUM ] = { "if", "else", "for", "while", "do", "int", "double", "char", "read", "write", "const", "vector", "struct", "map", "void", "bool",
        "print", "scanf", "return", "inline", "true", "false" } ;
const string divide_word [ DIVIDE_WORD_NUM ]= { "!", "~", "(", ")", "{", "}", "[", "]", "+", "-", "*", "/", "%", "=", "^", "&", "|", "&&", "||", "<, ">>", "++", "--",
        ", ">", "<=", ">=", "!=", "==", "+=", "-=", "/=", "*=", "%=", "^=", "&=", "|=", ">>=", "<<=", ".", ",", ":", ";", "'", "\"", "\\", "#", "//", "/*" } ;

struct Node{
    string str;
    bool is_terminator;
    map < char, int > next;
    Node(){
        str.clear();
        is_terminator = false;
        next.clear();
    }
    Node( const string &str ){
        this -> next.clear();
        this -> str = str;
        this -> is_terminator = false;
    }
};
struct Word{
    string type;
    string val;
    Word(){
        type.clear();
        val.clear();
    }
    Word( const string &str , const string &val ){
        this -> type = str;
        this -> val = val;
    }
    void print(){
        printf( " %s  %s
"
, this -> type.c_str(), this -> val.c_str() ); } }; int now_line = 1; char ch = 0; map < string, int > key_word_id; map < char, int > delimiter_head; vector < Node > state_graph; vector < Word > ans ; string error_information ; string fin_name , fout_name ; void read(char &c){ int ret = scanf("%c",&c); if( c == '
'
) now_line++ ; if( ret == EOF ) c = EOF; } inline bool Is_Character(char &c){ return c>='A'&&c<='Z' || c>='a'&&c<='z' || c=='_'; } inline bool Is_Number(char &c){ return c>='0'&&c<='9' ; } inline bool Is_delimiter_head(char &c){ return delimiter_head.count(c) > 0 ; } void InitTestScan(){ /// key_word_id.clear(); for( int i=0; i<KEY_WORD_NUM; i++ ) { key_word_id[ key_word[i] ] = i; } /// delimiter_head.clear(); for( int i=0; i<DIVIDE_WORD_NUM; i++ ) { delimiter_head[ divide_word[i][0] ] = i; } state_graph.clear(); state_graph.push_back( Node() ); for( int i=0; i<DIVIDE_WORD_NUM; i++ ) { const string &temp = divide_word[i]; int node_index = 0; for( int j=0; j<(int)temp.length(); j++ ) { Node &now_node = state_graph[node_index]; if( now_node.next.count( temp[j] ) == 0 ) { now_node.next[ temp[j] ] = state_graph.size(); state_graph.push_back( Node( now_node.str +temp[j] ) ); node_index = state_graph.size() -1 ; } else{ node_index = now_node.next[ temp[j] ]; } } state_graph[node_index].is_terminator = true ; } ans.clear(); now_line = 1; } void PrintError( int op ){ if( op == 1 ) { printf( "ERROR: fail to open the source program !
"
); } if( op == 2 ) { // printf( "ERROR: fail to open the output file !
"
); } if( op == 3 ) { printf( "ERROR: the number %s is not comply with the rules !
"
, error_information.c_str() ); } if( op == 4 ) { printf( "ERROR: %s is not a divide word !
"
, error_information.c_str() ); } if( op == 5 ) { printf( "ERROR: the character %s in line %d was not clear !
"
, error_information.c_str(), now_line ); } if( op == 6 ) { printf( "ERROR: missing terminating \" character in line %d !
"
, now_line ); } if( op == 7 ) { printf( "ERROR: missing terminating \' character in line %d !
"
, now_line ); } } int InputTestScan(){ FILE *temp ; printf( "input the name of the source program...
"
); cin >> fin_name ; temp = fopen( fin_name.c_str(), "r" ); if( temp == NULL ) return 1; else fclose( temp ); printf( "input the name of the output file...
"
); cin >> fout_name ; freopen( fin_name.c_str(), "r", stdin ) ; freopen( fout_name.c_str(), "w", stdout ) ; return 0; } int TestScan(){ int ret = InputTestScan(); if( ret != 0 ) return ret; InitTestScan(); read( ch ); while( ch != EOF ){ while( ch==' ' || ch=='\t' || ch=='
'
){ read( ch ); if( ch == EOF ) return 0; } if( Is_Character(ch) == true ) { // string str; while( Is_Character(ch) == true || Is_Number(ch) == true ) { str += ch; read( ch ); } if( key_word_id.count(str) > 0 ) { ans.push_back( Word(str, str) ); } else { ans.push_back( Word(ID, str) ); } } else if( Is_Number(ch) == true ) { // string str; bool is_decimal = false; while( Is_Number(ch) == true ) { str +=ch; read( ch ); } if( ch == '.' ) { is_decimal = true; str +=ch; read( ch ); while( Is_Number(ch) == true ) { str +=ch; read( ch ); } } if( is_decimal == true ) { ans.push_back( Word( NUM2, str ) ); if( str.back() == '.' ) { error_information = str ; return 3; } } else{ ans.push_back( Word( NUM1, str ) ); } } else if ( ch == '\'' ) { // string str; str +=ch; read( ch ); while( ch!='\'' && ch!='
'
) { if( ch == '\\' ) { str +=ch; read( ch ); } str +=ch; read( ch ); } if( ch != '\'' ) { return 7; } str +=ch; ans.push_back( Word(CONSTANT_STRING, str) ); read( ch ); } else if ( ch == '"' ) { // string str; str +=ch; read( ch ); while( ch!='"' && ch!='
'
) { if( ch == '\\' ) { str +=ch; read( ch ); } str +=ch; read( ch ); } if( ch != '"' ) { return 6; } str +=ch; ans.push_back( Word(CONSTANT_STRING, str) ); read( ch ); } else if ( Is_delimiter_head(ch) == true ) { // int node_index = 0; while( state_graph[node_index].next.count(ch) > 0 ) { node_index = state_graph[node_index].next[ch] ; read( ch ); } Node &now_node = state_graph[node_index]; if( now_node.str == ANNOTATIONL1 ) { // 1 while( ch != '
'
) read( ch ); read( ch ); } else if( now_node.str == ANNOTATIONL2 ) { // 2 char pre_ch = ch; read( ch ); while( !(pre_ch == '*' && ch == '/') ) { pre_ch = ch; read( ch ); } read( ch ); } else if( state_graph[node_index].is_terminator == false ) { // error_information = state_graph[node_index].str ; return 4; } else { // ans.push_back( Word(now_node.str, now_node.str) ); } } else{ // error_information = ch; return 5; } } } void output(){ printf( "Lexical analysis completed !
"
); for( int i=0; i<ans.size(); i++ ) { ans[i].print(); } printf( "----------------------------------------------------------------
"
); printf(" Statistical information:
"
); int num_int = 0, num_double = 0, num_string = 0, num_variable = 0, num_keyword = 0, num_divide_word = 0 ; for( int i=0; i<ans.size() ;i++ ) { if( ans[i].type == NUM1) { num_int++ ; } else if( ans[i].type == NUM2 ) { num_double++ ; } else if( ans[i].type == CONSTANT_STRING ) { num_string++ ; } else if( ans[i].type == ID ) { num_variable++ ; } else if( key_word_id.count( ans[i].type ) > 0 ) { num_keyword++ ; } else { num_divide_word++ ; } } printf(" num_int = %d
num_double = %d
num_string = %d
num_variable = %d
num_keyword = %d
num_divide_word = %d
"
, num_int, num_double, num_string, num_variable, num_keyword, num_divide_word ) ; } int main() { int flag = TestScan(); if( flag == 0 ) { output(); freopen( "CON", "w", stdout ); printf( "Lexical analysis completed, the results have been saved in the \"%s\" file !
"
, fout_name.c_str() ); } else { freopen( "CON", "w", stdout ); PrintError( flag ); } return 0; }