ページの内容を読み取って文字化けしていません

4028 ワード

ページの内容を読み取るたびに、ページのコードタイプを探しているのではないでしょうか.今回は、次のページの内容を読み取るときの共通の方法を検討しました.
文字化けしは二度と起こらない.
package package org.httpclient;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

import org.apache.commons.lang3.ArrayUtils;

import com.auto.generate.log.LogManager;

public class HttpClient {
	
	private String charset ; 
	
	public Cookie[]cookies ;
	
	public String userAgent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36" ; 
	/**
	 *  
	 * */
	public static final String LINE_SEPARATOR = System.getProperty("line.separator") ; 
	
	/**
	 *  
	 * */
	public String execute(String url){ 
		StringBuffer stringBuffer = new StringBuffer(); 
		try {
			URL u=new URL( url );
			HttpURLConnection conn = (HttpURLConnection)u.openConnection() ; 
			StringBuffer cookieBuffer = new StringBuffer();
			if(null != cookies && cookies.length > 0){
				int length = 0 ;
				for(int x=0;x<cookies.length;x++){
					Cookie cookie = cookies[x];
					if(length != 0){
						cookieBuffer.append("; ") ;
					}
					length++;
					cookieBuffer.append( cookie.getName()).append("=")
					.append(cookie.getValue()) ; 
				} 
			}
			conn.setRequestProperty("User-agent" , userAgent ) ;
			conn.addRequestProperty("Cookie" , cookieBuffer.toString() );  
			conn.connect( ) ; 
			this.charset = conn.getContentType().replaceAll(".+?charset=" , "" )  ;  
			InputStream is = conn.getInputStream() ; 
			BufferedReader bf = new BufferedReader(new InputStreamReader( is , charset ));
			String line = null ;
			while( (line = bf.readLine()) != null ){
				stringBuffer.append( line ).append( LINE_SEPARATOR ) ;  
			}
			String cookie = conn.getHeaderField( "Set-Cookie" ) ;
			splitCookies( cookie ) ; 
			conn.disconnect() ; 
		} catch (Exception e) {
			LogManager.err(  "http " , e ) ; 
		}
		return stringBuffer.toString( ) ; 
	}
	
	
	
	
	private void splitCookies(String cookie){
		if(null!=cookie){
			String[]cookieArray = cookie.split(";"); 
			for(String cookieStr : cookieArray){
				String[]tempArr = cookieStr.split("=") ; 
				if(tempArr.length == 2){
					cookies = ArrayUtils.add(cookies, new Cookie( tempArr[0], tempArr[1] ));
				}
			}
		}
	}
	/**
	 *  
	 * */
	public String getCharset() {
		return charset;
	}
	
	/**
	 *  cookie 
	 * */
	public Cookie[] getCookies() {
		return cookies;
	}
	/**
	 *  user-Agent
	 * */
	public void setUserAgent(String userAgent) {
		if(null == userAgent || "".equals( userAgent.trim() )){
			return ; 
		}
		this.userAgent = userAgent ; 
	}

	public void setCookies(Cookie[] cookies) {
		this.cookies = cookies;
	}
}

クッキークラス:
package org.httpclient;

public class Cookie {
	
	private String name ; 
	
	private String value ;
	
	public Cookie(String name, String value) {
		this.name = name;
		this.value = value;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public String getValue() {
		return value;
	}

	public void setValue(String value) {
		this.value = value;
	}
	
}

テストコード:
package com.test;

import org.httpclient.Cookie;
import org.httpclient.HttpClient;

public class HttpClientTest {

	public static void main(String[] args) throws Exception {
		HttpClient client = new HttpClient();
		 
		client.addCookie( new Cookie( "JSESSIONID" , "62643C55999D30EA870012F9E1D7B654" ) );
		
		String html = client
				.execute("http://127.0.0.1/hksAdmin/"); 
		System.out.println( html ) ; 
		if(null != client.getCookies()){
			for(Cookie cookie : client.getCookies()){
				System.out.println( cookie.getName() + "--->" + cookie.getValue() );
			}
		}
		System.out.println( client.getCharset( ) ) ;
	}
}