Discuz字符编码转换类2
<;?php
/**
* [Discuz!] (C)2001-2099 Comsenz Inc.
* This is NOT a freeware, use is subject to license terms
*
* $Id: class_chinese.php 6757 2010-03-25 09:01:29Z cnteacher $
*/
define('CODETABLE_DIR', DISCUZ_ROOT.'./source/include/table/');
class Chinese {
var $table = '';
var $iconv_enabled = false;
var $convertbig5 = false;
var $unicode_table = array();
var $config = array (
'SourceLang' =>; '',
'TargetLang' =>; '',
'GBtoUnicode_table' =>; 'gb-unicode.table',
'BIG5toUnicode_table' =>; 'big5-unicode.table',
'GBtoBIG5_table' =>; 'gb-big5.table',
);
function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {
$this->;config['SourceLang'] = $this->;_lang($SourceLang);
$this->;config['TargetLang'] = $this->;_lang($TargetLang);
if(ICONV_ENABLE &;&; $this->;config['TargetLang'] != 'BIG5' &;&; !$ForceTable) {
$this->;iconv_enabled = true;
} else {
$this->;iconv_enabled = false;
$this->;OpenTable();
}
}
function _lang($LangCode) {
$LangCode = strtoupper($LangCode);
if(substr($LangCode, 0, 2) == 'GB') {
return 'GBK';
} elseif(substr($LangCode, 0, 3) == 'BIG') {
return 'BIG5';
} elseif(substr($LangCode, 0, 3) == 'UTF') {
return 'UTF-8';
} elseif(substr($LangCode, 0, 3) == 'UNI') {
return 'UNICODE';
}
}
function _hex2bin($hexdata) {
for($i=0; $i <; strlen($hexdata); $i += 2) {
$bindata .= chr(hexdec(substr($hexdata, $i, 2)));
}
return $bindata;
}
function OpenTable() {
$this->;unicode_table = array();
if(!$this->;iconv_enabled &;&; $this->;config['TargetLang'] == 'BIG5') {
$this->;config['TargetLang'] = 'GBK';
$this->;convertbig5 = TRUE;
}
if($this->;config['SourceLang'] == 'GBK' || $this->;config['TargetLang'] == 'GBK') {
$this->;table =
CODETABLE_DIR.$this->;config['GBtoUnicode_table'];
} elseif($this->;config['SourceLang'] == 'BIG5' || $this->;config['TargetLang'] == 'BIG5') {
$this->;table =
CODETABLE_DIR.$this->;config['BIG5toUnicode_table'];
}
$fp = fopen($this->;table, 'rb');
$tabletmp = fread($fp, filesize($this->;table));
for($i = 0; $i <; strlen($tabletmp); $i += 4) {
$tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
if($this->;config['TargetLang'] == 'UTF-8') {
$this->;unicode_table[$tmp['key']] =
'0x'.dechex($tmp['value']);
} elseif($this->;config['SourceLang'] == 'UTF-8') {
$this->;unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
} elseif($this->;config['TargetLang'] == 'UNICODE') {
$this->;unicode_table[$tmp['key']] =
dechex($tmp['value']);
}
}
}
function CHSUtoUTF8($c) {
$str = '';
if($c <; 0x80) {
$str .= $c;
} elseif($c <; 0x800) {
$str .= (0xC0 | $c >;>; 6);
$str .= (0x80 | $c &; 0x3F);
} elseif($c <; 0x10000) {
$str .= (0xE0 | $c >;>; 12);
$str .= (0x80 | $c >;>; 6 &; 0x3F);
$str .=( 0x80 | $c &; 0x3F);
} elseif($c <; 0x200000) {
$str .= (0xF0 | $c >;>; 18);
$str .= (0x80 | $c >;>; 12 &; 0x3F);
$str .= (0x80 | $c >;>; 6 &; 0x3F);
$str .= (0x80 | $c &; 0x3F);
}
return $str;
}
function GB2312toBIG5($c) {
$f = fopen(CODETABLE_DIR.$this->;config['GBtoBIG5_table'],
'r');
$max=strlen($c)-1;
for($i = 0;$i <; $max;$i++){
$h=ord($c[$i]);
if($h>;=160) {
$l=ord($c[$i+1]);
if($h==161 &;&; $l==64){
$gb="; ";;
} else{
fseek($f,($h-160)*510+($l-1)*2);
$gb=fread($f,2);
}
$c[$i]=$gb[0];
$c[$i+1]=$gb[1];
$i++;
}
}
$result = $c;
return $result;
}
function Convert($SourceText) {
if($this->;config['SourceLang'] ==
$this->;config['TargetLang']) {
return $SourceText;
} elseif($this->;iconv_enabled) {
if($this->;config['TargetLang'] <;>; 'UNICODE') {
return iconv($this->;config['SourceLang'], $this->;config['TargetLang'], $SourceText);
} else {
$return = '';
while($SourceText != '') {
if(ord(substr($SourceText, 0, 1)) >; 127) {
$return .=
";&;#x";.dechex($this->;Utf8_Unicode(iconv($this->;config['SourceL
ang'],";UTF-8";, substr($SourceText, 0, 2)))).";;";;
$SourceText = substr($SourceText, 2, strlen($SourceText));
} else {
$return .= substr($SourceText, 0, 1);
$SourceText = substr($SourceText, 1, strlen($SourceText));
}
}
return $return;
}
} elseif($this->;config['TargetLang'] == 'UNICODE') {
$utf = '';
while($SourceText != '') {
if(ord(substr($SourceText, 0, 1)) >; 127) {
if($this->;config['SourceLang'] == 'GBK') {
$utf .=
'&;#x'.$this->;unicode_table[hexdec(bin2hex(substr($SourceText, 0,
2))) - 0x8080].';';
} elseif($this->;config['SourceLang'] == 'BIG5') {
$utf .=
'&;#x'.$this->;unicode_table[hexdec(bin2hex(substr($SourceText, 0,
2)))].';';
}
$SourceText = substr($SourceText, 2, strlen($SourceText));
} else {
$utf .= substr($SourceText, 0, 1);
$SourceText = substr($SourceText, 1, strlen($SourceText));
}
}
return $utf;
} else {
$ret = '';
if($this->;config['SourceLang'] == 'UTF-8') {
$out = '';
$len = strlen($SourceText);
$i = 0;
while($i <; $len) {
$c = ord(substr($SourceText, $i++, 1));
switch($c >;>; 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
$out .= substr($SourceText, $i - 1, 1);
break;
case 12: case 13:
$char2 = ord(substr($SourceText, $i++, 1));
$char3 = $this->;unicode_table[(($c &; 0x1F) <;<; 6) | ($char2 &; 0x3F)];
if($this->;config['TargetLang'] == 'GBK') {
$out .= $this->;_hex2bin(dechex($char3 + 0x8080));
} elseif($this->;config['TargetLang'] == 'BIG5') {
$out .= $this->;_hex2bin($char3);
}
break;
case 14:
$char2 = ord(substr($SourceText, $i++, 1));
$char3 = ord(substr($SourceText, $i++, 1));
$char4 = $this->;unicode_table[(($c &; 0x0F) <;<; 12) | (($char2 &; 0x3F) <;<; 6) | (($char3 &; 0x3F) <;<; 0)];
if($this->;config['TargetLang'] == 'GBK') {
$out .= $this->;_hex2bin(dechex($char4 + 0x8080));
} elseif($this->;config['TargetLang'] == 'BIG5') {
$out .= $this->;_hex2bin($char4);
}
break;
}
}
return !$this->;convertbig5 ? $out : $this->;GB2312toBIG5($out);
} else {
while($SourceText != '') {
if(ord(substr($SourceText, 0, 1)) >; 127) {
if($this->;config['SourceLang'] == 'BIG5') {
$utf8 =
$this->;CHSUtoUTF8(hexdec($this->;unicode_table[hexdec(bin2hex(sub
str($SourceText, 0, 2)))]));
} elseif($this->;config['SourceLang'] == 'GBK') {
$utf8=$this->;CHSUtoUTF8(hexdec($this->;unicode_table[hexdec(bi
n2hex(substr($SourceText, 0, 2))) - 0x8080]));
}
for($i = 0; $i <; strlen($utf8); $i += 3) {
$ret .= chr(substr($utf8, $i, 3));
}
$SourceText = substr($SourceText, 2, strlen($SourceText));
} else {
$ret .= substr($SourceText, 0, 1);
$SourceText = substr($SourceText, 1, strlen($SourceText));
}
}
$SourceText = '';
return $ret;
}
}
}
function Utf8_Unicode($char) {
switch(strlen($char)) {
case 1:
return ord($char);
case 2:
$n = (ord($char[0]) &; 0x3f) <;<; 6;
$n += ord($char[1]) &; 0x3f;
return $n;
case 3:
$n = (ord($char[0]) &; 0x1f) <;<; 12;
$n += (ord($char[1]) &; 0x3f) <;<; 6;
$n += ord($char[2]) &; 0x3f;
return $n;
case 4:
$n = (ord($char[0]) &; 0x0f) <;<; 18;
$n += (ord($char[1]) &; 0x3f) <;<; 12;
$n += (ord($char[2]) &; 0x3f) <;<; 6;
$n += ord($char[3]) &; 0x3f;
return $n;
}
}
}
?>;
ת??ÀàÓõÄÊý?Ý?â source/include/table gb-unicode.table
$c = new Chinese('utf8', CHARSET, TRUE);
$data = $c->;Convert($data);