java中国語はUnicodeコードとUnicodeコードに変換して中国語に変換します。
8055 ワード
:
http://blog.csdn.net/jdsjlzx/article/details/7058823
package lia.meetlucene;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
public class Unicode {
public static void main(String[] args) throws CorruptIndexException,
IOException {
String s = " ";
String tt = gbEncoding(s); // String tt1 = " , ";
System.out.println("unicodeBytes is: " + tt);
// “ ” unicode
System.out.println(" : " + decodeUnicode("\\u7b80\\u4ecb")); // System.out.println(decodeUnicode(tt1));
// unicode
System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
System.out.println(s.indexOf("\\"));
}
public static String gbEncoding(final String gbString) {
char[] utfBytes = gbString.toCharArray();
String unicodeBytes = "";
for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
String hexB = Integer.toHexString(utfBytes[byteIndex]);
if (hexB.length() <= 2) {
hexB = "00" + hexB;
}
unicodeBytes = unicodeBytes + "\\u" + hexB;
}
return unicodeBytes;
}
public static String decodeUnicode(final String dataStr) {
int start = 0;
int end = 0;
final StringBuffer buffer = new StringBuffer();
while (start > -1) {
end = dataStr.indexOf("\\u", start + 2);
String charStr = "";
if (end == -1) {
charStr = dataStr.substring(start + 2, dataStr.length());
} else {
charStr = dataStr.substring(start + 2, end);
}
char letter = (char) Integer.parseInt(charStr, 16); // 16 parse 。
buffer.append(new Character(letter).toString());
start = end;
}
return buffer.toString();
}
}
コードの詳細:
public static String decodeUnicode(final String dataStr) {
int start = 0;
int end = 0;
final StringBuffer buffer = new StringBuffer();
while (start > -1) {
end = dataStr.indexOf("\\u", start + 1);
// unicode start~end ,+1,+2,+3
System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end);
// the index of the first occurrence of the specified substring,
// starting at the specified index,
// or -1 if there is no such occurrence.
String charStr = "";
if (end == -1) {
charStr = dataStr.substring(start + 2, dataStr.length());
} else {
charStr = dataStr.substring(start + 2, end);
}
char letter = 0;
if (charStr.length() == 4) {
letter = (char) Integer.parseInt(charStr, 16); // 16 parse 。
}
//
buffer.append(new Character(letter).toString());
start = end;
}
return buffer.toString();
}