html TagStreamストリームの実現

3990 ワード

java html

htmlファイルについて以下の処理を行いたいです.
1)htmlの中のテキストもtagと見なし、tagには3つの属性があり、1つは開始のtag、例えば、1つは終了のtag、例えば、もう1つは対応する開始と閉鎖のtagに閉じ込められたテキスト、例えば

that is a p item

であり、that is a p itemは普通のテキストである.
2)htmlの中のすべてのtag(テキストを含む)を1つの配列に形成し,hasnext(),next()メソッドを加えた.
3)テキストtagのisempty()の場合は無視
次のhtml:

<html>
	<head>
		<title>title content</title>
	</head>
	<body>
		<font>font content</font>
		<p>p content</p>
		body content
	</body>
</html>

次のtagストリームを形成します.
html>head>title>title content>/title>/head>body>font>font content>/font>p>pcontent>/p>body content>/body>/html
tag属性クラス:

public class Tagattribute {
	/*attribute set mostly as 't':text 's':start tag  'e':end tag*/
	private char attribute;
	private String content;
	
	public Tagattribute(char attribute,String content){
		this.attribute = attribute;
		this.content = content;
	}
        /*get set method*/
}

Java.io.*を使用します.配列を作成し、配列の下付きに従って読み込みます.

package org.biti.html;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;

public class TagStream {
	private Tagattribute[] tas = null;
	private int order = 0;
	
	public static void main(String[] args){
		TagStream ts = new TagStream("d:\\data\\index.html","GB2312");
		while(ts.hasnext()){
			Tagattribute ta = ts.next();
			System.out.println(ta.getattribute()+":"+ta.getcontent());
		}
	}
	
	public TagStream(String file,String charset){
		InputStreamReader is;
		ArrayList<Tagattribute> al = new ArrayList<Tagattribute>();
		try {
			is = new InputStreamReader(new FileInputStream(file),charset);
			BufferedReader buffer = new BufferedReader(is);
			char[] cbuff = new char[1];
			StringBuffer tag = new StringBuffer();
			
			while( buffer.read(cbuff) != -1){
				char start = cbuff[0];
				if(start=='<'){			
					String content = tag.toString();
					content = content.replaceAll("^\\s+|\\s+$|\r+|
+", "");
					content = content.replaceAll("&nbsp;","");
					if(!content.isEmpty()){
//						System.out.println("content:"+content);
						al.add(new Tagattribute('t',content));
					}
					tag.delete(0, tag.length());
					
					String webtag = "";
					while(buffer.read(cbuff)!=-1){
						char end = cbuff[0];
						if(end=='>'){
							webtag = tag.toString();
							break;
						}
						tag.append(end);
					}
					
					if(webtag.charAt(0)=='/'){
						webtag = webtag.substring(1);	
						al.add(new Tagattribute('e',webtag));
					}else{
						webtag = webtag.trim().split("\\s+")[0];
						al.add(new Tagattribute('s',webtag));
					}
//					System.out.println(webtag);
					
					tag.delete(0, tag.length());
				}else{
					tag.append(start);
				}
			}
			tas = (Tagattribute[]) al.toArray(new Tagattribute[0]);
		} catch (IOException e){
			e.printStackTrace();
		}
	}
	
	public boolean hasnext(){
		if(order<tas.length){
			return true;
		}
		return false;
	}
	
	public Tagattribute next(){
		if(order<tas.length){
			Tagattribute ta = tas[order];
			order++;
			return ta;
		}else{
			return null;
		}
	}
}

JAva設計モード学習の工場モードと抽象工場モード

JAvaカスタムequals関数とhashCode関数