123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525 |
- <?php
- define('InEmpireCMSIconv',TRUE);
- class Chinese
- {
- //存放简体中文与拼音对照表
- var $pinyin_table = array();
- //存放 GB <-> UNICODE 对照表的内容
- var $unicode_table = array();
- //访问中文繁简互换表的文件指针
- var $ctf;
- var $SourceText = "";
- //配置
- var $config = array(
- 'codetable_dir' => '', // 存放各种语言互换表的目录
- 'source_lang' => '', // 字符的原编码
- 'target_lang' => '', // 转换后的编码
- 'GBtoBIG5_table' => 'gb-big5.table', // 简体中文转换为繁体中文的对照表
- 'BIG5toGB_table' => 'big5-gb.table', // 繁体中文转换为简体中文的对照表
- 'GBtoPinYin_table' => 'gb-pinyin.table', // 简体中文转换为拼音的对照表
- 'GBtoUnicode_table' => 'gb-unicode.table', // 简体中文转换为UNICODE的对照表
- 'BIG5toUnicode_table' => 'big5-unicode.table' // 繁体中文转换为UNICODE的对照表
- );
- function Chinese($dir='./')
- {
- $this->config['codetable_dir'] = $dir."../data/codetable/";
- }
- function Convert( $source_lang , $target_lang , $source_string='' )
- {
- /* 如果编码相同,直接返回 */
- if ($source_lang == $target_lang || $source_string == '')
- {
- return $source_string;
- }
-
- if ($source_lang != '') {
- $this->config['source_lang'] = $source_lang;
- }
- if ($target_lang != '') {
- $this->config['target_lang'] = $target_lang;
- }
-
- $this->SourceText = $source_string;
-
- $this->OpenTable();
- // 判断是否为中文繁、简转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->GB2312toBIG5();
- }
- // 判断是否为简体中文与拼音转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
- return $this->CHStoPinYin();
- }
- // 判断是否为简体、繁体中文与UTF8转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->CHStoUTF8();
- }
- // 判断是否为简体、繁体中文与UNICODE转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
- return $this->CHStoUNICODE();
- }
- }
- //将 16 进制转换为 2 进制字符
- function _hex2bin( $hexdata )
- {
- $bindata = '';
- for ($i = 0; $i < strlen($hexdata); $i += 2 )
- {
- $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
- }
- return $bindata;
- }
- function OpenTable()
- {
- // 假如原编码为简体中文的话
- if ($this->config['source_lang']=="GB2312") {
- // 假如转换目标编码为繁体中文的话
- if ($this->config['target_lang'] == "BIG5") {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['GBtoBIG5_table'], "rb");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- }
- // 假如转换目标编码为拼音的话
- if ($this->config['target_lang'] == "PinYin") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- //
- $i = 0;
- for ($i=0; $i<count($tmp); $i++) {
- $tmp1 = explode(" ", $tmp[$i]);
- $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
- }
- }
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == "UTF8") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "编码转换失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
- }
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == "UNICODE") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
- }
- }
- // 假如原编码为繁体中文的话
- if ($this->config['source_lang']=="BIG5") {
- // 假如转换目标编码为简体中文的话
- if ($this->config['target_lang'] == "GB2312") {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- }
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == "UTF8") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
- }
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == "UNICODE") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
- }
- // 假如转换目标编码为拼音的话
- if ($this->config['target_lang'] == "PinYin") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- //
- $i = 0;
- for ($i=0; $i<count($tmp); $i++) {
- $tmp1 = explode(" ", $tmp[$i]);
- $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
- }
- }
- }
- // 假如原编码为 UTF8 的话
- if ($this->config['source_lang']=="UTF8") {
- // 假如转换目标编码为 GB2312 的话
- if ($this->config['target_lang'] == "GB2312") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- {
- $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
- }
- }
- // 假如转换目标编码为 BIG5 的话
- if ($this->config['target_lang'] == "BIG5") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- {
- $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
- }
- }
- }
- }
- function OpenFile( $position , $isHTML=false )
- {
- $tempcontent = @file($position);
- if (!$tempcontent) {
- echo "打开文件失败!";
- exit;
- }
- $this->SourceText = implode("",$tempcontent);
- if ($isHTML) {
- $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
- $this->SourceText = str_replace("\n", "", $this->SourceText);
- $this->SourceText = str_replace("\r", "", $this->SourceText);
- }
- }
- function SiteOpen( $position )
- {
- $tempcontent = @file($position);
- if (!$tempcontent) {
- echo "打开文件失败!";
- exit;
- }
- // 将数组的所有内容转换为字符串
- $this->SourceText = implode("",$tempcontent);
- $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
- }
- function setvar( $parameter , $value )
- {
- if(!trim($parameter))
- return $parameter;
- $this->config[$parameter] = $value;
- }
- function CHSUtoUTF8($c)
- {
- $str="";
- if ($c < 0x80) {
- $str.=$c;
- }
- elseif ($c < 0x800) {
- $str.=(0xC0 | $c>>6);
- $str.=(0x80 | $c & 0x3F);
- }
- elseif ($c < 0x10000) {
- $str.=(0xE0 | $c>>12);
- $str.=(0x80 | $c>>6 & 0x3F);
- $str.=(0x80 | $c & 0x3F);
- }
- elseif ($c < 0x200000) {
- $str.=(0xF0 | $c>>18);
- $str.=(0x80 | $c>>12 & 0x3F);
- $str.=(0x80 | $c>>6 & 0x3F);
- $str.=(0x80 | $c & 0x3F);
- }
- return $str;
- }
- function CHStoUTF8(){
- if ($this->config["source_lang"]=="BIG5" || $this->config["source_lang"]=="GB2312") {
- $ret="";
- while($this->SourceText){
- if(ord(substr($this->SourceText,0,1))>127){
- if ($this->config["source_lang"]=="BIG5") {
- $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))]));
- }
- if ($this->config["source_lang"]=="GB2312") {
- $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080]));
- }
- for($i=0;$i<strlen($utf8);$i+=3)
- $ret.=chr(substr($utf8,$i,3));
- $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
- }
- else{
- $ret.=substr($this->SourceText,0,1);
- $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
- }
- }
- $this->unicode_table = array();
- $this->SourceText = "";
- return $ret;
- }
- if ($this->config["source_lang"]=="UTF8") {
- $out = '';
- $len = strlen($this->SourceText);
- $i = 0;
- while($i < $len) {
- $c = ord( substr( $this->SourceText, $i++, 1 ) );
- switch($c >> 4)
- {
- case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
- // 0xxxxxxx
- $out .= substr( $this->SourceText, $i - 1, 1 );
- break;
- case 12: case 13:
- // 110x xxxx 10xx xxxx
- $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
- if ($this->config["target_lang"]=="GB2312")
- {
- $out .= $this->_hex2bin( dechex( $char3 + 0x8080 ) );
- } elseif ($this->config["target_lang"]=="BIG5")
- {
- $out .= $this->_hex2bin( dechex ( $char3 + 0x0000 ) );
- }
- break;
- case 14:
- // 1110 xxxx 10xx xxxx 10xx xxxx
- $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char3 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
- if ($this->config["target_lang"]=="GB2312")
- {
- $out .= $this->_hex2bin( dechex ( $char4 + 0x8080 ) );
- } elseif ($this->config["target_lang"]=="BIG5")
- {
- $out .= $this->_hex2bin( dechex ( $char4 + 0x0000 ) );
- }
- break;
- }
- }
- // 返回结果
- return $out;
- }
- }
- function CHStoUNICODE()
- {
- $utf="";
- while($this->SourceText)
- {
- if (ord(substr($this->SourceText,0,1))>127)
- {
- if ($this->config["source_lang"]=="GB2312")
- $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080].";";
- if ($this->config["source_lang"]=="BIG5")
- $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))].";";
- $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
- }
- else
- {
- $utf.=substr($this->SourceText,0,1);
- $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
- }
- }
- return $utf;
- }
- function GB2312toBIG5()
- {
- // 获取等待转换的字符串的总长度
- $max=strlen($this->SourceText)-1;
- for($i=0;$i<$max;$i++){
- $h=ord($this->SourceText[$i]);
- if($h>=160){
- $l=ord($this->SourceText[$i+1]);
- if($h==161 && $l==64){
- $gb=" ";
- }
- else{
- fseek($this->ctf,($h-160)*510+($l-1)*2);
- $gb=fread($this->ctf,2);
- }
- $this->SourceText[$i]=$gb[0];
- $this->SourceText[$i+1]=$gb[1];
- $i++;
- }
- }
- fclose($this->ctf);
- // 将转换后的结果赋予 $result;
- $result = $this->SourceText;
- // 清空 $thisSourceText
- $this->SourceText = "";
- // 返回转换结果
- return $result;
- }
- function PinYinSearch($num){
- if($num>0&&$num<160){
- return chr($num);
- }
- elseif($num<-20319||$num>-10247){
- return "";
- }
- else{
- for($i=count($this->pinyin_table)-1;$i>=0;$i--){
- if($this->pinyin_table[$i][1]<=$num)
- break;
- }
- return $this->pinyin_table[$i][0];
- }
- }
- function CHStoPinYin(){
- if ( $this->config['source_lang']=="BIG5" ) {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->SourceText = $this->GB2312toBIG5();
- $this->config['target_lang'] = "PinYin";
- }
- $ret = array();
- $ri = 0;
- for($i=0;$i<strlen($this->SourceText);$i++){
- $p=ord(substr($this->SourceText,$i,1));
- if($p>160){
- $q=ord(substr($this->SourceText,++$i,1));
- $p=$p*256+$q-65536;
- }
- $ret[$ri]=$this->PinYinSearch($p);
- $ri = $ri + 1;
- }
- // 清空 $this->SourceText
- $this->SourceText = "";
- $this->pinyin_table = array();
- // 返回转换后的结果
- return implode(" ", $ret);
- }
- function ConvertIT()
- {
- // 判断是否为中文繁、简转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->GB2312toBIG5();
- }
- // 判断是否为简体中文与拼音转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
- return $this->CHStoPinYin();
- }
- // 判断是否为简体、繁体中文与UTF8转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->CHStoUTF8();
- }
- // 判断是否为简体、繁体中文与UNICODE转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
- return $this->CHStoUNICODE();
- }
- }
- }
- ?>
|