ファイル統計語の読み込み

2154 ワード

#include
#include
#include
#include
#include
using namespace std;

int main()
{
    map<string,int>word_count;
  //  setexclude={"the","a","but","an","and","or",
  //                         "The","A","An","But","And","Or"};
    string buff;
    string filename;
    cout<<"Input file name: "<cin>>filename;
    ifstream fin(filename.c_str());

    while(fin>>buff)
    {
      //  if(exclude.find(word)==exclude.end())
            word_count[buff]++;
    }
    fin.close();
    map<string,int>::iterator iter;

    for(iter=word_count.begin();iter!=word_count.end();iter++)
        cout<" occurs"<1)?" times":" time")<return 0;

}

拡張対象:1.大文字と小文字を無視2.語根抽出,すなわち,異なる時態などの差を無視する.出現回数順に4.中国語を認識できる