半時間で完成します。国家統計局の2013年全国行政機構を捕まえて、mysqlを挿入します。
4018 ワード
class BaseAreaCode {
// 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2013/'
private $_base_url;
private $_conn;
public function __construct($base_url) {
$this->_base_url = $base_url;
$this->_conn = new Connction();
}
public function getData(){
$data = file_get_contents($this->_base_url);
$data = str_replace("\r
", "", $data);
$pattern = "/<table class='provincetable' width=775 >(.*?)<\/table>/m";
preg_match_all($pattern, $data, $matches);
$data_pattern = "/<tr class='provincetr'>(.*?)<\/tr>/";
preg_match_all($data_pattern, $matches[1][0], $data_matches);
$href_pattern = "/<td><a href='(.*?)'>/";
$href_array = array();
foreach($data_matches[1] as $value){
preg_match_all($href_pattern, $value, $href_matches);
foreach($href_matches[1] as $href){
set_time_limit(0);
$this->level2Data($href) ;
}
}
}
public function level2Data($href){
$temp_url = $this->_base_url.$href;
$pattern = "/<tr class='citytr'>(.*?)<\/tr>/";
$href_pattern = "/<td><a href='(.*?)'>(.*?)<\/a><\/td>/";
set_time_limit(0);
$this->execData($temp_url, $pattern, $href_pattern,'level3Data');
}
public function level3Data($href){
$pattern = "/<tr class='countytr'>(.*?)<\/tr>/";
$href_pattern = "/<td><a href='(.*?)'>(.*?)<\/a><\/td>/";
set_time_limit(0);
$this->execData($href, $pattern, $href_pattern,'level4Data');
}
private function execData($href,$pattern,$href_pattern,$callback=null){
$temp_url = preg_replace("/\d+\.html$/", "", $href);
$data = file_get_contents($href);
$data = str_replace("\r
", "", $data);
preg_match_all($pattern, $data, $data_matches);
foreach($data_matches[1] as $value){
preg_match_all($href_pattern, $value, $value_matches);
$_href = $temp_url.array_shift($value_matches[1]);
$code = array_shift($value_matches[2]);
$name = array_shift($value_matches[2]);
$this->insert($code, $name);
if(!is_null($callback)){
set_time_limit(0);
$this->$callback($_href);
}
}
}
public function level4Data($href){
$pattern = "/<tr class='towntr'>(.*?)<\/tr>/";
$href_pattern = "/<td><a href='(.*?)'>(.*?)<\/a><\/td>/";
set_time_limit(0);
$this->execData($href, $pattern, $href_pattern,'level5Data');
}
public function level5Data($href){
$data = file_get_contents($href);
$data = str_replace("\r
", "", $data);
$pattern = "/<tr class='villagetr'>(.*?)<\/tr>/";
$href_pattern = "/<td>(.*?)<\/td>/";
preg_match_all($pattern, $data, $data_matches);
foreach($data_matches[1] as $value){
preg_match_all($href_pattern, $value, $value_matches);
$code = array_shift($value_matches[1]);
$city_code = array_shift($value_matches[1]);
$name = array_shift($value_matches[1]);
$this->insert($code, $name,$city_code);
}
}
private function insert($code,$name,$city_code = null){
$name = iconv("gbk", "utf-8", $name);
$sql = "insert into base_area_code values (0,'$code','{$name}','$city_code')";
$this->_conn->insert($sql);
}
}