国家統計局2019年省市区データ(自取)


最近、プロジェクトで使用されている省・市街地のデータベースが古く、多くの新規区のデータがないことが分かった.ネットでたくさん探しましたが、最新ではありません.
ブログを見つけました.彼は自分で国家統計局に取りに行きました.
https://blog.csdn.net/z_wen_quan/article/details/79737567
しかし、彼が使っているphpは、私はphp環境がなく、データベースのフォーマットが異なるので、自分でツールクラスを書き直しました.
アイデアは次のとおりです.
  • 統計局のウェブサイトで見つけた各省・市街地のIDにより、関連インタフェースのURL
  • をつなぐことができる.
  • ネットワーク要求により現在のインタフェースのソースコード
  • をURLで取得することができる.
  • は、名称、統計用区画コード、ジャンプ下位インタフェースID
  • を含む正規表現により必要なデータを切り取る.
  • 取得したデータをSQL文につづるか、JSON文字列出力
  • につづる
    ツールクラスChinaCityNameUtilsには、次の4つの方法があります.
  • printProvinceSQL()printCitySQL()printAreaSQL()コンソール印刷テーブルprovince city areaの挿入SQL文
  • getCitiesJsonコンソールでjson文字列を印刷し、Dディスク
  • に保持する
    注意、sql文は私の使うデータベースのフォーマットによってつなぎ合わせて、もし使うデータベースのフォーマットと私の使う不通ならば、自分で対応するSQL文を修正することができます
     
    /**
     * Copyright (C), 2018-2019, wankun
     */
    package com.wankun.richduckling.utils;
    
    import com.alibaba.fastjson.JSONArray;
    import com.alibaba.fastjson.JSONObject;
    import org.thymeleaf.util.StringUtils;
    
    import java.io.ByteArrayOutputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     *           
     *
     * @author wankun
     * @create 2019/4/10
     * @since 1.0.0
     */
    public class ChinaCityNameUtils {
        /**
         *           2018    
         */
        private static final String WEB_URL = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/index.html";
        private static final String BASE_URL = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/";
        /**
         *       id
         *              ,      
         */
        private static final int[] province_id = {11, 12, 13, 14, 15, 21, 22, 23, 31, 32, 33, 34, 35, 36, 37, 41, 42, 43, 44, 45, 46, 50, 51, 52,
                53, 54, 61, 62, 63, 64, 65};
        private static final String[] province_name = {"   ", "   ", "   ", "   ", "      ", "   ", "   ", "    ",
                "   ", "   ", "   ", "   ", "   ", "   ", "   ", "   ",
                "   ", "   ", "   ", "       ", "   ", "   ", "   ", "   ",
                "   ", "     ", "   ", "   ", "   ", "       ", "        "
        };
        private static final String RGEX_IDS = "(.{1,30})";
    private static final String RGEX_NAMES = "(.{1,30})";
    private static final String RGEX_CODES = "(.{1,30})";
        private static final String RGEX_CODES_NO_A = "(.{1,30}).*?";
        private static final String RGEX_NAMES_NO_A = ".*?(.{1,30})";
    
        private static int cityFromIdId = 0;
        private static int provinceFromId = 0;
        private static int areaFormId = 0;
    
        /**
         *             sql
         */
        private static String CREAT_PROVINCE_SQL = "CREATE TABLE `province`(`pro_id` int(11),`pro_code` varchar(18),`pro_name` varchar(60),`pro_name2` varchar(60));";
        private static String CREAT_CITY_SQL = " CREATE TABLE `city`( `id` int(11),   `province_id` int(10),   `code` varchar(18),   `name` varchar(60),   `province_code` varchar(18));";
        private static String CREAT_AREA_SQL = " CREATE TABLE `area`(`id` int(11),   `city_id` int(10),   `code` varchar(18),   `name` varchar(60),   `city_code` varchar(18));";
        /**
         *     sql
         */
        public static void printProvinceSQL() {
            System.out.println("size:" + province_id.length);
            String sql = "";
            for (int i = 0; i < province_id.length; i++) {
                int id = i + 1;
                //  id  
                if (!StringUtils.isEmpty(sql)) {
                    sql = String.format("%s,", sql);
                }
                sql = String.format("%s(%s,'%s0000','%s','')",sql, id, province_id[i], province_name[i]);
            }
            sql =  String.format("insert into province values %s ;", sql);
            System.out.println(sql);
        }
    
        /**
         *       sql
         *   :1、   sql        
         * 2、      delete from  area;
         */
        public static void printCitySQL() {
            System.out.println("      ");
            //     id
            cityFromIdId = 0;
            provinceFromId = 0;
            for (int i = 0; i < province_id.length; i++) {
                String url = BASE_URL + province_id[i] + ".html";
                provinceFromId++;
                String html = getHtml(url);
    
                try {
                    //          
                    String provinceCode = province_id[i] + "0000";
                    //      
                    String sql = getSqlStr(0, html, RGEX_NAMES, RGEX_CODES, provinceCode);
                    sql = String.format("insert into city values %s ;", sql);
                    System.out.println(sql);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            System.out.println("    :" + cityFromIdId);
        }
    
        /**
         *       sql
         */
        public static void printAreaSQL() {
            System.out.println("      ");
            areaFormId = 0;
            cityFromIdId = 0;
            for (int i = 0; i < province_id.length; i++) {
                String url = BASE_URL + province_id[i] + ".html";
                String html = getHtml(url);
                //  5         
                try {
                    //      
                    List ids = getSubUtil(html, RGEX_IDS);
                    List codes = getSubUtil(html, RGEX_CODES);
                    for (int n = 0; n < ids.size(); n++) {
                        cityFromIdId++;
                        //       
                        String urlQu = BASE_URL + province_id[i] + "/" + ids.get(n) + ".html";
                        String htmlQu = getHtml(urlQu);
                        //           ,      ,        
                        String sql = getSqlStr(1, htmlQu, RGEX_NAMES_NO_A, RGEX_CODES_NO_A, codes.get(n));
                        sql = String.format(StringUtils.isEmpty(sql) ? "%s %s" : "%s,%s", sql, getSqlStr(1, htmlQu, RGEX_NAMES, RGEX_CODES, codes.get(n)));
                        sql = String.format("insert into area values %s ;", sql);
                        System.out.println(sql);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            System.out.println("    :" + areaFormId);
        }
    
        /**
         *        html      ,   Sql  
         *
         * @param type    , 0    , 1    
         * @return
         */
        private static String getSqlStr(int type, String html, String rgexNames, String rgexCodes, String parentsCode) {
            String sql = "";
            int formId = 0;
            int parentsFromIdId = 0;
            List names = getSubUtil(html, rgexNames);
            List codes = getSubUtil(html, rgexCodes);
            for (int q = 0; q < names.size(); q++) {
                if (type == 0) {
                    cityFromIdId++;
                    parentsFromIdId = provinceFromId;
                    formId = cityFromIdId;
                } else {
                    areaFormId++;
                    parentsFromIdId = cityFromIdId;
                    formId = areaFormId;
                }
                if (!StringUtils.isEmpty(sql)) {
                    sql = String.format("%s,", sql);
                }
                //       SQL  
                sql = String.format("%s(%s,%s,%s,'%s',%s)", sql, formId, parentsFromIdId, codes.get(q), names.get(q), parentsCode);
            }
            return sql;
        }
    
    
        /**
         *      json
         *
         * @return
         */
        public static JSONArray getCitiesJson() {
            System.out.println("  citiesJson");
            JSONArray jsonArray = new JSONArray();
            //  id  id      ,      
            int cityId = 0;
            int areaId = 0;
            int provinceId = 0;
            //      
            for (int p = 0; p < province_id.length; p++) {
                try {
                    JSONObject province = new JSONObject();
                    provinceId = p + 1;
                    province.put("pro_id", provinceId);
                    //    
                    province.put("pro_code", province_id[p] + "0000");
                    province.put("pro_name", province_name[p]);
                    //      
                    System.out.println("    " + province_name[p] + "     ");
                    JSONArray cities = new JSONArray();
                    String url = BASE_URL + province_id[p] + ".html";
                    String html = getHtml(url);
                    List ids = getSubUtil(html, RGEX_IDS);
                    List names = getSubUtil(html, RGEX_NAMES);
                    List codes = getSubUtil(html, RGEX_CODES);
                    for (int c = 0; c < ids.size(); c++) {
                        int cityID = ++cityId;
                        JSONObject city = new JSONObject();
                        city.put("city_id", cityID);
                        city.put("city_code", codes.get(c));
                        city.put("city_name", names.get(c));
                        //      
                        JSONArray areas = new JSONArray();
                        String urlArea = BASE_URL + province_id[p] + "/" + ids.get(c) + ".html";
                        String htmlArea = getHtml(urlArea);
                        List namesQu = new ArrayList<>();
                        List codesQu = new ArrayList<>();
                        //    "   ",         
                        namesQu.addAll(getSubUtil(htmlArea, RGEX_NAMES_NO_A));
                        codesQu.addAll(getSubUtil(htmlArea, RGEX_CODES_NO_A));
                        namesQu.addAll(getSubUtil(htmlArea, RGEX_NAMES));
                        codesQu.addAll(getSubUtil(htmlArea, RGEX_CODES));
                        for (int a = 0; a < namesQu.size(); a++) {
                            int areaID = ++areaId;
                            JSONObject area = new JSONObject();
                            area.put("area_id", areaID);
                            area.put("area_code", codesQu.get(a));
                            area.put("area_name", namesQu.get(a));
                            areas.add(area);
                        }
                        city.put("city_areas", areas);
                        cities.add(city);
                    }
                    province.put("pro_cities", cities);
                    jsonArray.add(province);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            //       ,       ,      
            System.out.println(jsonArray.toString());
            writeFile(jsonArray.toString());
            return jsonArray;
        }
    
    
        /**
         *  json      D 
         *
         * @param str
         */
        public static void writeFile(String str) {
            FileWriter fw = null;
            //      
            SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss");
            String fileName = "D:\\china_city_name_" + df.format(new Date()) + ".txt";
            try {
                //    :FileWriter    :3,9,5   
                fw = new FileWriter(fileName);
                fw.write(str);
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    fw.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
    
        }
    
    
        /**
         *   url       
         *
         * @param htmlUrl
         * @return
         */
        public  static String getHtml(String htmlUrl) {
            HttpURLConnection conn = null;
            try {
                URL url = new URL(htmlUrl);
                conn = (HttpURLConnection) url.openConnection();
                //          
                conn.setDoInput(true);
                //          
                conn.setDoOutput(true);
                //    
                conn.setUseCaches(false);
                conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
                conn.setRequestProperty("Charset", "utf-8");
                //     GET
                conn.setRequestMethod("GET");
                //       5 
                conn.setConnectTimeout(5000);
                //        ,       
                if (conn.getResponseCode() == 200) {
                    // getInputStream()             
                    InputStream in = conn.getInputStream();
                    byte[] data = read(in);
                    //    gbk,            
                    String html = new String(data, "gbk");
                    in.close();
                    return html;
                } else {
                    return "    ";
                }
            } catch (IOException e) {
                e.printStackTrace();
                return "    ";
            }
        }
    
    
        /**
         *          ,
         *
         * @param inStream
         * @return
         * @throws IOException
         */
        public static final byte[] read(InputStream inStream)
                throws IOException {
            ByteArrayOutputStream swapStream = new ByteArrayOutputStream();
            byte[] buff = new byte[100];
            int rc = 0;
            while ((rc = inStream.read(buff, 0, 100)) > 0) {
                swapStream.write(buff, 0, rc);
            }
            byte[] in2b = swapStream.toByteArray();
            return in2b;
        }
    
        /**
         *                    
         *
         * @param soap
         * @return
         */
        public  static List getSubUtil(String soap, String rgex) {
            List list = new ArrayList();
            //      
            Pattern pattern = Pattern.compile(rgex);
            Matcher m = pattern.matcher(soap);
            while (m.find()) {
                int i = 1;
                list.add(m.group(i));
                i++;
            }
            return list;
        }
    }
     
      

     

    sql :

    https://github.com/bihansheng/china_city_name/blob/master/china_citys_name-20190410.sql

    json :

    https://github.com/bihansheng/china_city_name/blob/master/china_city_name_20190410112328.txt