中国語漢字からデジタルコードへの変換方法【回転】

4902 ワード

class StrFormat_F10{ static function encode($var){ $ascii = ''; $strlen_var = strlen($var); for ($c = 0; $c < $strlen_var;++$c) { $ord_var_c = ord($var{$c}); switch (true) { case $ord_var_c == 0x08: $ascii .= '\b'; break; case $ord_var_c == 0x09: $ascii .= '\t'; break; case $ord_var_c == 0x0A: $ascii .= ''; break; case $ord_var_c == 0x0C: $ascii .= '\f'; break; case $ord_var_c == 0x0D: $ascii .= '\r'; break; case $ord_var_c == 0x22: case $ord_var_c == 0x2F: case $ord_var_c == 0x5C: $ascii .= '\\'.$var{$c}; break; case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)): $ascii .= $var{$c}; break; case (($ord_var_c & 0xE0) == 0xC0): $char = pack('C*', $ord_var_c, ord($var{$c + 1})); $c += 1; $utf16 = self::utf82utf16($char); $ascii .= ''.hexdec(bin2hex($utf16)).';'; break; case (($ord_var_c & 0xF0) == 0xE0): $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2})); $c += 2; $utf16 = self::utf82utf16($char); $ascii .= ''.hexdec(bin2hex($utf16)).';'; break; case (($ord_var_c & 0xF8) == 0xF0): $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3})); $c += 3; $utf16 = self::utf82utf16($char); $ascii .= ''.hexdec(bin2hex($utf16)).';'; break; case (($ord_var_c & 0xFC) == 0xF8): $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3}), ord($var{$c + 4})); $c += 4; $utf16 = self::utf82utf16($char); $ascii .= ''.hexdec(bin2hex($utf16)).';'; break; case (($ord_var_c & 0xFE) == 0xFC): $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3}), ord($var{$c + 4}), ord($var{$c + 5})); $c += 5; $utf16 = self::utf82utf16($char); $ascii .= ''.hexdec(bin2hex($utf16)).';'; break; } } return $ascii; } static function decode($var){ $chrs = $var; $utf8 = ''; $strlen_chrs = strlen($chrs); for ($c = 0; $c < $strlen_chrs; $c++) { $substr_chrs_c_2 = substr($chrs, $c, 2); $ord_chrs_c = ord($chrs{$c}); switch (true) { case $substr_chrs_c_2 == '\b': $utf8 .= chr(0x08);++$c; break; case $substr_chrs_c_2 == '\t': $utf8 .= chr(0x09);++$c; break; case $substr_chrs_c_2 == '': $utf8 .= chr(0x0A);++$c; break; case $substr_chrs_c_2 == '\f': $utf8 .= chr(0x0C);++$c; break; case $substr_chrs_c_2 == '\r': $utf8 .= chr(0x0D);++$c; break; case $substr_chrs_c_2 == '\\"': case $substr_chrs_c_2 == '\\\'': case $substr_chrs_c_2 == '\\\\': case $substr_chrs_c_2 == '\\/': if (($delim == '"' && $substr_chrs_c_2 != '\\\'') || ($delim == "'"&& $substr_chrs_c_2 != '\\"')) { $utf8 .= $chrs{++$c}; } break; case preg_match('/\&\#[0-9]{5};/i', substr($chrs, $c, 8)): $tmpstr=substr($chrs, $c, 8); $tmpstr=dechex(substr($tmpstr, 2, 5)); $utf16 = chr(hexdec(substr($tmpstr, 0, 2))) . chr(hexdec(substr($tmpstr, 2, 2))); $utf8 .= self::utf162utf8($utf16); $c += 7; break; case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): $utf8 .= $chrs{$c}; break; case ($ord_chrs_c & 0xE0) == 0xC0: $utf8 .= substr($chrs, $c, 2);++$c; break; case ($ord_chrs_c & 0xF0) == 0xE0: $utf8 .= substr($chrs, $c, 3); $c += 2; break; case ($ord_chrs_c & 0xF8) == 0xF0: $utf8 .= substr($chrs, $c, 4); $c += 3; break; case ($ord_chrs_c & 0xFC) == 0xF8: $utf8 .= substr($chrs, $c, 5); $c += 4; break; case ($ord_chrs_c & 0xFE) == 0xFC: $utf8 .= substr($chrs, $c, 6); $c += 5; break; } } return $utf8; } static function utf82utf16($utf8){ if(function_exists('mb_convert_encoding')) { return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); } switch(strlen($utf8)) { case 1: return $utf8; case 2: return chr(0x07 & (ord($utf8{0}) >> 2)) . chr((0xC0 & (ord($utf8{0}) << 6)) | (0x3F & ord($utf8{1}))); case 3: return chr((0xF0 & (ord($utf8{0}) << 4)) | (0x0F & (ord($utf8{1}) >> 2))) . chr((0xC0 & (ord($utf8{1}) << 6)) | (0x7F & ord($utf8{2}))); } return ''; } static function utf162utf8($utf16){ if(function_exists('mb_convert_encoding')) { return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); } $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); switch(true) { case ((0x7F & $bytes) == $bytes): return chr(0x7F & $bytes); case (0x07FF & $bytes) == $bytes: return chr(0xC0 | (($bytes >> 6) & 0x1F)) . chr(0x80 | ($bytes & 0x3F)); case (0xFFFF & $bytes) == $bytes: return chr(0xE0 | (($bytes >> 12) & 0x0F)) . chr(0x80 | (($bytes >> 6) & 0x3F)) . chr(0x80 | ($bytes & 0x3F)); } return ''; }}$str = StrFormat_F 10::encode(「旅行」);print_r($str . "");