coreseek辞書検索辞書のインポート

1491 ワード

   .exe  txt 

ini_set('max_execution_time','6000');

//header('Content-type: text/html; charset=gb2312');
$buffer=ini_get('output_buffering');
if($buffer)ob_end_flush();

echo ' ...';
flush();
$filename = 'hefei_house_name.txt'; // 
$handle = fopen ($filename, "r");
$content = fread ($handle, filesize ($filename));

fclose ($handle);

$content=trim($content);
$arr1 = explode( "\r
" ,$content ); $arr1=array_flip(array_flip($arr1)); foreach($arr1 as $key=>$value){ $value=dealchinese($value); if(!empty($value)){ $arr1[$key] = $value; } else{ unset($arr1[$key]); } } echo ' ...';flush(); $filename2 = "unigram.txt"; // $handle2 = fopen ($filename2, "r"); $content2 = fread ($handle2, filesize ($filename2)); fclose ($handle2); $content2=dealchinese($content2,"\r
"); $arr2 = explode( "\r
" ,$content2 ); echo ' ...';flush(); $array_diff=array_diff($arr1,$arr2); echo ' ...';flush(); $words=''; foreach($array_diff as $k=>$word){ $words.=$word."\t1\r
x:1\r
"; } file_put_contents('newciku.txt',$words,FILE_APPEND); // echo 'done!'; function dealChinese($str,$join=''){ preg_match_all('/[\x{4e00}-\x{9fa5}]+/u', $str, $matches); // //print_r($matches)."<br>"; $str = join($join, $matches[0]); // return $str; }