文字列グループ

4358 ワード

一連の文字列の漢字とピンインがあり、アルファベットでグループ化してソートします.
使用するツール:
pinyin4j
文字クラス:
public class NameBean implements Comparable<NameBean> {
	//  
	private String nameGBk;
	//  
	private String namePY;

	public NameBean() {
		super();
	}

	public String getNameGBk() {
		return nameGBk;
	}

	public void setNameGBk(String nameGBk) {
		this.nameGBk = nameGBk;
	}

	public String getNamePY() {
		return namePY;
	}

	public void setNamePY(String namePY) {
		this.namePY = namePY;
	}

	@Override
	public int compareTo(NameBean arg0) {
		return getNamePY().compareTo(arg0.getNamePY());
	}

	@Override
	public String toString() {
		// TODO Auto-generated method stub
		// return getNameGBk()+"="+getNamePY();
		return getNameGBk() != null ? getNameGBk() : getNamePY();
	}
}

テスト:
public class TestMain {
	//           
	public static boolean isChineseChar(String str) {
		boolean temp = false;
		Pattern p = Pattern.compile("[\u4e00-\u9fa5]");
		Matcher m = p.matcher(str);
		if (m.find()) {
			temp = true;
		}
		return temp;
	}

	//         
	public static String getPinYin(String strs) {
		HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
		format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
		format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK);
		format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
		char[] ch = strs.trim().toCharArray();
		StringBuffer buffer = new StringBuffer("");
		try {
			for (int i = 0; i < ch.length; i++) {
				if (Character.toString(ch[i]).matches("[\u4e00-\u9fa5]+")) {
					String[] temp = PinyinHelper.toHanyuPinyinStringArray(
							ch[i], format);
					buffer.append(temp[0]);
					buffer.append(" ");
				} else {
					buffer.append(Character.toString(ch[i]));
				}
			}
		} catch (BadHanyuPinyinOutputFormatCombination e) {
			e.printStackTrace();
		}
		return buffer.toString();
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String data[] = {
				 "   ", "android", "   ", "news",
				"baidu", "location", "oberser", "mary", "next", "ruby",
				"money", "lucy", "very", "thunder", "object", "lily", "jay",
				"answer", "layout", "demos", "com", "collect", "custom",
				"blog", "round", "redirect", "ground", "gray", "blue", "zone",
				"james", "zhang", " ", " ", " ", " ", "  ", "  ", "   ", "da",
				" ", " ", };

		//     
		TreeSet<String> treeData = new TreeSet<String>();
		//    
		List<NameBean> nData = new LinkedList<NameBean>();

		String str = "";
		String let = "";
		NameBean b;
		//              ,          ,            
		for (int m = 0; m < data.length; m++) {
			b = new NameBean();

			//      
			str = data[m].trim();
			//           
			let = String.valueOf(str.charAt(0));
			//       
			if (isChineseChar(let)) {
				b.setNameGBk(str);
				//       
				b.setNamePY(getPinYin(str));

				//        
				let = String.valueOf(getPinYin(let).charAt(0));
				if (!treeData.contains(let)) {
					//            
					treeData.add(let);

					NameBean ins = new NameBean();
					ins.setNamePY(let);
					nData.add(ins);
				}
			}
			//       
			else {
				b.setNamePY(str);

				if (!treeData.contains(let)) {
					treeData.add(let);

					NameBean ins = new NameBean();
					ins.setNamePY(let);
					nData.add(ins);
				}
			}
			nData.add(b);
		}

		//    
		Collections.sort(nData);

		for (int m = 0; m < nData.size(); m++) {
			System.out.print(";");
			System.out.print(nData.get(m));
		}
	}
}

結果:
;a;android;answer;b;baidu;blog;blue;  ;  ;   ;c;collect;com;custom;d;da;demos; ;g;gray;ground;j;james;jay;   ;l;layout;lily;location;lucy;m;mary;money;n;news;next;o;oberser;object;r;redirect;round;ruby;s;   ;t;thunder;v;very;w; ;y; ;z;zhang;zone;é; ; ;ā; 

結果の中でā aとは違う(似たような場合も)ので、このような状況が表示されます.母音音標を見つけてマークを作って誤りを訂正したほうがいいです.そうすればいいです.全部で何もありません.