golang unicode形態の[]byte,utf 8形態のstringに移行

1235 ワード


func u16To8(u16 []byte) string {
	log.Println("u16 = ", u16)
	if len(u16)%2 != 0 {
		log.Println("err len", len(u16))
		return ""
	}

	var body bytes.Buffer

	for i := 0; i < len(u16)/2; i++ {
		v := int(u16[2*i]) + int(u16[2*i+1])<<8
		log.Println(int(u16[2*i]), int(u16[2*i+1])<<8)
		log.Println("v = ", v)
		if v <= 127 {

			body.WriteByte(byte(v))
		} else if v <= 2047 {
			a1 := byte(v&63) + 128

			v = v >> 6
			a2 := byte(v&31) + 192
			body.WriteByte(a2)
			body.WriteByte(a1)

		} else if v <= 65535 {
			a1 := byte(v&63) + 128

			v = v >> 6
			a2 := byte(v&63) + 128

			v = v >> 6
			a3 := byte(v&15) + 224
			body.WriteByte(a3)
			body.WriteByte(a2)
			body.WriteByte(a1)
		}
	}
	return string(body.Bytes())
}

 
import (
	"bytes"
	"log"
	"strings"

	"golang.org/x/text/encoding"
	"golang.org/x/text/encoding/unicode"
)


var decoder *encoding.Decoder

func init() {
	decoder = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
}

//  

str, _ := decoder.Bytes(data[nOffset:nNewOffset])
return string(str)