Java Webデータの取得

5733 ワード

登るページ:http://quote.eastmoney.com/zs000001.html このデータを登るには:Java爬取网页数据_第1张图片
必要なデータは、ソースコードのセクション:在这里插入图片描述でデータを定義します.
public class Information {
    String type;
    String volume;
    String money;
    String market_value;
    String number;
    String price_earnings_ratio;
    public void setType(String type){
        this.type = type;
    }
    public String getType(){
        return type;
    }
    public void setVolume(String volume){
        this.volume = volume;
    }
    public String getVolume(){
        return volume;
    }
    public void setMoney(String money){
        this.money = money;
    }
    public String getMoney(){
        return money;
    }
    public void setMarket_value(String  market_value){
        this.market_value = market_value;
    }
    public String  getMarket_value(){
        return market_value;
    }
    public void setNumber(String  number){
        this.number = number;
    }
    public String  getNumber(){
        return number;
    }
    public void setPrice_earnings_ratio(String  price_earnings_ratio){
        this.price_earnings_ratio = price_earnings_ratio;
    }
    public String getPrice_earnings_ratio(){
        return price_earnings_ratio;
    }
}

i
mport org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class AlmanacUtil {

    public AlmanacUtil() {

    }
//    
    public String getdata(String url) {
        String data = null;
        org.apache.commons.httpclient.HttpClient client = new HttpClient();
        GetMethod getMethod = new GetMethod(url);
        getMethod.setRequestHeader("User_Agent", "Mozilla/5.0(Windows NT 6.1;Win64;x64;rv:39.0) Gecko/20100101 Firefox/39.0");
        getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());//         
        try {
            int statusCode = client.executeMethod(getMethod);
            if (statusCode != HttpStatus.SC_OK) {
                System.out.println("Wrong");
            }
            byte[] responseBody = getMethod.getResponseBody();
            data = new String(responseBody);
            return data;

        } catch (HttpException e) {
            System.out.println("Please check your provided http address!");
            data = "";
            e.printStackTrace();

        } catch (IOException e) {
            data = "";
            e.printStackTrace();
        } finally {

            getMethod.releaseConnection();

        }
        return data;
    }

    //             
    static Pattern proInfo = Pattern.compile("(.*?)(.*?)(.*?)(.*?)(.*?)" +
            "(.*?)(.*?)", Pattern.DOTALL);

    private static List getDataStructure(String str) {
        //              
        String[] info = str.split("");
        List list = new ArrayList();
        for (String s : info) {
            Matcher m = proInfo.matcher(s);
            Information information = null;
            if (m.find()) {
                information = new Information();
                String ss = m.group(1).trim();
                information.setType(ss);
                information.setVolume(m.group(2).trim());
                information.setMoney(m.group(3).trim());
                information.setMarket_value(m.group(4).trim());
                information.setNumber(m.group(6).trim());
                information.setPrice_earnings_ratio(m.group(7).trim());
                list.add(information);
            }
        }
        return list;
    }

    public static void main(String[] args) throws IOException {
        AlmanacUtil almanacUtil = new AlmanacUtil();
        String ss = almanacUtil.getdata("http://quote.eastmoney.com/zs000001.html");
        List list = getDataStructure(ss);
        String string = "";
        for (int k = 0; k < list.size(); k++) {
            String s = "  :" + list.get(k).getType() + " " + "   :" + list.get(k).volume + " " +
                    "    :" + list.get(k).getMoney() + " " + "   :" + list.get(k).getMarket_value() +
                    " " + "    ( ):" + list.get(k).getNumber() + " " + "     :" + list.get(k).getPrice_earnings_ratio() + "
"; string = string + s; } System.out.println(string); File f = new File("D:" + File.separator + "gupiao.txt");// D guipiao.txt OutputStream out = null; out = new FileOutputStream(f); byte b[] = string.getBytes(); out.write(b); out.close(); } }