1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| <?php define('BASE_PATH', dirname(__FILE__)); class Tpinyin { protected $dict = array(); protected $dictfile = 'pylib.gif'; protected $dictoffs = 244; protected $tune = array(); private $charset = ''; private $maxlength = 0; private $fp; function __construct() { $this->dictfile = BASE_PATH . '/'. $this->dictfile; $fp = fopen($this->dictfile, 'rb'); fread($fp, 41); $this->dict[] = explode(',', fread($fp, 57)); $this->dict[] = explode(',', fread($fp, 146)); } function settune() { $tune = array( 'a' => array("\x01\x01","\x00\xe1","\x01\xce","\x00\xe0",), 'e' => array("\x01\x13","\x00\xe9","\x01\x1b","\x00\xe8",), 'i' => array("\x01\x2b","\x00\xed","\x01\xd0","\x00\xec",), 'o' => array("\x01\x4d","\x00\xf3","\x01\xd2","\x00\xf2",), 'u' => array("\x01\x6b","\x00\xfa","\x01\xd4","\x00\xf9",), 'v' => array("\x01\xd6","\x01\xd8","\x01\xda","\x01\xdc",), ); foreach($tune as $k=>$r) foreach($r as $i=>$v) $this->tune[$k][$i] = mb_convert_encoding($v, $this->charset, 'UCS-2'); } function pinyin($str) { $this->charset = mb_check_encoding($str, 'UTF-8') ? 'UTF-8' : 'GBK'; $this->fp = fopen($this->dictfile, 'rb'); $this->maxlength = filesize($this->dictfile); $str = mb_convert_encoding($str, "UCS-2", "UTF-8, GBK"); $t = array_map(array($this, 'pinyin_back'), str_split($str, 2)); return join('', $t); } function pinyin_back($ch) { if(ord($ch{0}) == 0) return $ch{1}; $o = hexdec(bin2hex($ch)) - 0x4e00; if($o < 0 || $o >= $this->maxlength) return mb_convert_encoding($ch, $this->charset, 'UCS-2'); fseek($this->fp, $o*2 + $this->dictoffs); $x = sprintf('%05d', current(unpack('S', fread($this->fp, 2)))); $t = $this->dict[0][substr($x, 0, 2)+0] . $this->dict[1][substr($x, 2, 2)+0]; $n = substr($x, -1) - 1; $s = $t; foreach($this->tune as $k=>$v) { $s = str_replace($k, $v[$n], $s); if($s != $t) break; } return "$s"; } }
|