JavaはGBK符号化ファイルの繁簡体字変換を実現する

6856 ワード

最近は高品質の音楽が好きになりましたが、cueの多くは繁体字中国語で、少し不快に見えます.大陸のwindowsはすべてGBKコードを使って、持ってきてコードを変えるつもりです.3歩に分ける.
最初のステップは、GBK中国語フォントライブラリをダウンロードします.wpsに入れ,その後繁簡体変換を行い,繁簡対応を得た.txtファイルに入れ、ANSI符号化を使用します.
第2ステップでは、ワードライブラリを前処理します.2つの部分が含まれています.
重み付けは、繁簡体字と同じ文字を削除することを指します.
コード:
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

/*
 * @author GT
 * find the different characters in two files
 * 2013.1.17
 * */
public class FindDifferent {

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub
		System.out.println(System.getProperty("file.encoding"));
		if (args.length != 2) {
			System.err.println("not enough files");
			return;
		} else {
			BufferedReader br1 = new BufferedReader(new FileReader(args[0]));
			BufferedReader br2 = new BufferedReader(new FileReader(args[1]));
			BufferedWriter bw1 = new BufferedWriter(new FileWriter("d"
					+ args[0]));
			BufferedWriter bw2 = new BufferedWriter(new FileWriter("d"
					+ args[1]));
			String line1 = null;
			String line2 = null;
			while ((line1 = br1.readLine()) != null
					&& (line2 = br2.readLine()) != null) {
				line1 = line1.trim();
				line2 = line2.trim();
				if (line1.length() != line2.length()) {
					System.err.println("not same length " + line1.length()
							+ " " + line2.length());
					continue;
				} else {
					for (int i = 0; i < line1.length(); ++i) {
						if (line1.charAt(i) != line2.charAt(i)) {
							bw1.append(line1.charAt(i));
							bw2.append(line2.charAt(i));
						}
					}
				}
			}
			bw1.flush();
			bw2.flush();
			br1.close();
			br2.close();
			bw1.close();
			bw2.close();
		}
	}
}
転送されたファイルがアップロードされました.アドレス:http://download.csdn.net/detail/pouloghost/5005734
ソートにはスタックを使用します.対応を保証するために、繁体字の2つのファイルを同じ操作する必要があります.コード#コード#
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

/*
 * @author GT
 * sort the characters in ascending order
 * from trad.txt simp.txt to traditional.txt simple.txt
 * 2013.1.17
 * */
public class SortFile {

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub
		BufferedReader br = new BufferedReader(new FileReader("trad.txt"));
		StringBuffer trad = new StringBuffer(br.readLine());
		br.close();
		br = new BufferedReader(new FileReader("simp.txt"));
		StringBuffer simp = new StringBuffer(br.readLine());
		br.close();
		buildHeap(trad, simp);
		// System.out.println(trad);
		int size = trad.length();
		for (int i = trad.length() - 1; i > 0; --i) {
			exchange(trad, simp, 0, i);
			--size;
			maxHeapify(trad, simp, 0, size);
		}
		System.out.println("traditional in order " + check(trad));
		// System.out.println(trad);
		// System.out.println(check(simp));
		FileWriter fr = new FileWriter("traditional.txt");
		fr.write(trad.toString());
		fr.flush();
		fr.close();
		fr = new FileWriter("simple.txt");
		fr.write(simp.toString());
		fr.flush();
		fr.close();
	}

	private static void maxHeapify(StringBuffer trad, StringBuffer simp, int i,
			int size) {
		int left = 2 * i + 1;
		int right = 2 * i + 2;
		int max = i;
		if (left < size && ((int) trad.charAt(i) < (int) trad.charAt(left))) {
			max = left;
		}
		if (right < size && ((int) trad.charAt(max) < (int) trad.charAt(right))) {
			max = right;
		}
		if (max != i) {
			exchange(trad, simp, i, max);
			maxHeapify(trad, simp, max, size);
		}

	}

	private static void exchange(StringBuffer trad, StringBuffer simp, int a,
			int b) {
		char tradTemp = trad.charAt(a);
		char simpTemp = simp.charAt(a);
		trad.setCharAt(a, trad.charAt(b));
		simp.setCharAt(a, simp.charAt(b));
		trad.setCharAt(b, tradTemp);
		simp.setCharAt(b, simpTemp);
	}

	private static void buildHeap(StringBuffer trad, StringBuffer simp) {
		for (int i = trad.length() / 2; i > -1; --i) {
			maxHeapify(trad, simp, i, trad.length());
		}
	}

	private static boolean check(StringBuffer trad) {
		boolean res = true;
		for (int i = 0; i < trad.length() - 1; ++i) {
			if ((int) trad.charAt(i) > (int) trad.charAt(i + 1)) {
				System.out.println(i);
				res = false;
				break;
			}
		}
		return res;
	}
}

ステップ3では、繁体字対応テーブルを使用してファイルを処理します.コード#コード#
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

/*
 * @author GT
 * change all tradition Chinese in the file and store the simplified version in simple+filename 
 * all encoded by GBK
 * 2013.1.17
 * */
public class Main {
	static String traditional = null;
	static String simple = null;

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub
		if (args.length != 1) {
			System.err.println("not enough files");
			return;
		} else {
			// initial
			initial();
			//
			BufferedReader br = new BufferedReader(new FileReader(args[0]));
			BufferedWriter bw = new BufferedWriter(new FileWriter("simple"
					+ args[0]));
			String line = null;
			while ((line = br.readLine()) != null) {
				simplify(line, bw);
				bw.newLine();
			}
			bw.flush();
			br.close();
			bw.close();
		}
	}

	private static void initial() throws IOException {
		BufferedReader br = new BufferedReader(
				new FileReader("traditional.txt"));
		traditional = br.readLine();
		br.close();
		br = new BufferedReader(new FileReader("simple.txt"));
		simple = br.readLine();
		br.close();
		// for (int i = 0; i < 100; ++i) {
		// System.out.printf("%d ", (int) tradition.charAt(i));
		// }
	}

	private static void simplify(String line, BufferedWriter bw)
			throws IOException {
		// TODO Auto-generated method stub
		int index = -1;
		for (int i = 0; i < line.length(); ++i) {
			if ((index = find(line.charAt(i))) != -1) {
				bw.append(simple.charAt(index));
			} else {
				bw.append(line.charAt(i));
			}
		}
	}

	/*
	 * binary search 2013.1.18
	 */
	private static int find(char ch) {
		// TODO Auto-generated method stub
		int low, high, mid, res;
		low = 0;
		high = traditional.length();
		res = -1;
		while (low <= high) {
			mid = (low + high) / 2;
			if (traditional.charAt(mid) == ch) {
				res = mid;
				break;
			} else {
				if ((int) traditional.charAt(mid) < (int) ch) {
					low = mid + 1;
				} else {
					high = mid - 1;
				}
			}
		}
		return res;
		// return tradition.indexOf(ch);
	}
}