java中国語はUnicodeコードとUnicodeコードに変換して中国語に変換します。

8055 ワード

  :    
http://blog.csdn.net/jdsjlzx/article/details/7058823
 
package lia.meetlucene;

import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;

public class Unicode {
    public static void main(String[] args) throws CorruptIndexException,
            IOException {
        String s = "  ";
        String tt = gbEncoding(s); // String tt1 = "  ,         ";
        System.out.println("unicodeBytes is: " + tt);
        //   “  ” unicode  
        System.out.println("     : " + decodeUnicode("\\u7b80\\u4ecb")); // System.out.println(decodeUnicode(tt1));
        //   unicode       
        System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
        System.out.println(s.indexOf("\\"));
    }

    public static String gbEncoding(final String gbString) {
        char[] utfBytes = gbString.toCharArray();
        String unicodeBytes = "";
        for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
            String hexB = Integer.toHexString(utfBytes[byteIndex]);
            if (hexB.length() <= 2) {
                hexB = "00" + hexB;
            }
            unicodeBytes = unicodeBytes + "\\u" + hexB;
        }
        return unicodeBytes;
    }

    public static String decodeUnicode(final String dataStr) {
        int start = 0;
        int end = 0;
        final StringBuffer buffer = new StringBuffer();
        while (start > -1) {
            end = dataStr.indexOf("\\u", start + 2);
            String charStr = "";
            if (end == -1) {
                charStr = dataStr.substring(start + 2, dataStr.length());
            } else {
                charStr = dataStr.substring(start + 2, end);
            }
            char letter = (char) Integer.parseInt(charStr, 16); // 16  parse     。
            buffer.append(new Character(letter).toString());
            start = end;
        }
        return buffer.toString();
    }

}
 
 
コードの詳細:
    public static String decodeUnicode(final String dataStr) {
        int start = 0;
        int end = 0;
        final StringBuffer buffer = new StringBuffer();
        while (start > -1) {
            end = dataStr.indexOf("\\u", start + 1);
            //     unicode start~end  ,+1,+2,+3  
            System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end);
            // the index of the first occurrence of the specified substring,
            // starting at the specified index,
            // or -1 if there is no such occurrence.
            String charStr = "";
            if (end == -1) {
                charStr = dataStr.substring(start + 2, dataStr.length());
            } else {
                charStr = dataStr.substring(start + 2, end);
            }
            char letter = 0;
            if (charStr.length() == 4) {
                letter = (char) Integer.parseInt(charStr, 16); // 16  parse     。
            }
            //    
            buffer.append(new Character(letter).toString());
            start = end;
        }
        return buffer.toString();
    }