Your Ad Here
首页 | 编程语言 | 网站建设 | 游戏天堂 | 冲浪宝典 | 网络安全 | 操作系统 | 软件时空 | 硬件指南 | 病毒相关 | IT 认证
软讯网络 > 网站建设 > PHP > gb,big5,utf-8互相转码
【标  题】:gb,big5,utf-8互相转码
【关键字】:gb,big5,utf-8
【来  源】:http://www.cublog.cn/u/4891/showart.php?id=199325

gb,big5,utf-8互相转码

Your Ad Here

在sf.net去找php news reader,把language.inc.php这段复制出来,并复制language目录下的相应码表,如gb-big5.tab。这个可以实现gb,big5,utf-8互转

function b2g( $instr ) {

    $fp = fopen( 'language/big5-gb.tab', 'r' );

    $len = strlen($instr);
    for( $i = 0 ; $i < $len ; $i++ ) {
        $h = ord($instr[$i]);
        if( $h >= 160 ) {
            $l = ($i+1 >= $len) ? 32 : ord($instr[$i+1]);
            if( $h == 161 && $l == 64 )
                $gb = ' ';
            else {
                fseek( $fp, (($h-160)*255+$l-1)*3 );
                $gb = fread( $fp, 2 );
            }
            $instr[$i] = $gb[0];
            $instr[$i+1] = $gb[1];
            $i++;
        }
    }
    fclose($fp);
    return $instr;
}

function g2b( $instr ) {

    $fp = fopen( 'language/gb-big5.tab', 'r' );

    $len = strlen($instr);
    for( $i = 0 ; $i < $len ; $i++ ) {
        $h = ord($instr[$i]);
        if( $h > 160 && $h < 248 ) {
            $l = ($i+1 >= $len) ? 32 : ord($instr[$i+1]);
            if( $l > 160 && $l < 255 ) {
                fseek( $fp, (($h-161)*94+$l-161)*3 );
                $bg = fread( $fp, 2 );
            }
            else
                $bg = ' ';
            $instr[$i] = $bg[0];
            $instr[$i+1] = $bg[1];
            $i++;
        }
    }
    fclose($fp);
    return $instr;
}

function b2u( $instr ) {
    $fp = fopen( 'language/big5-unicode.tab', 'r' );
    $len = strlen($instr);
    $outstr = '';
    for( $i = $x = 0 ; $i < $len ; $i++ ) {
        $h = ord($instr[$i]);
        if( $h >= 160 ) {
            $l = ( $i+1 >= $len ) ? 32 : ord($instr[$i+1]);
            if( $h == 161 && $l == 64 )
                $uni = ' ';
            else {
                fseek( $fp, ($h-160)*510+($l-1)*2 );
                $uni = fread( $fp, 2 );
            }
            $codenum = ord($uni[0])*256 + ord($uni[1]);
            if( $codenum < 0x800 ) {
                $outstr[$x++] = chr( 192 + $codenum / 64 );
                $outstr[$x++] = chr( 128 + $codenum % 64 );
#                printf("[%02X%02X]<br>\n", ord($outstr[$x-2]), ord($uni[$x-1]) );
            }
            else {
                $outstr[$x++] = chr( 224 + $codenum / 4096 );
                $codenum %= 4096;
                $outstr[$x++] = chr( 128 + $codenum / 64 );
                $outstr[$x++] = chr( 128 + ($codenum % 64) );
#                printf("[%02X%02X%02X]<br>\n", ord($outstr[$x-3]), ord($outstr[$x-2]), ord($outstr[$x-1]) );
            }
            $i++;
        }
        else
            $outstr[$x++] = $instr[$i];
    }
    fclose($fp);
    if( $instr != '' )
        return join( '', $outstr);
}

function u2b( $instr ) {
    $fp = fopen( 'language/unicode-big5.tab', 'r' );
    $len = strlen($instr);
    $outstr = '';
    for( $i = $x = 0 ; $i < $len ; $i++ ) {
        $b1 = ord($instr[$i]);
        if( $b1 < 0x80 ) {
            $outstr[$x++] = chr($b1);
#            printf( "[%02X]", $b1);
        }
        elseif( $b1 >= 224 ) {    # 3 bytes UTF-8
            $b1 -= 224;
            $b2 = ord($instr[$i+1]) - 128;
            $b3 = ord($instr[$i+2]) - 128;
            $i += 2;
            $uc = $b1 * 4096 + $b2 * 64 + $b3 ;
            fseek( $fp, $uc * 2 );
            $bg = fread( $fp, 2 );
            $outstr[$x++] = $bg[0];
            $outstr[$x++] = $bg[1];
#            printf( "[%02X%02X]", ord($bg[0]), ord($bg[1]));
        }
        elseif( $b1 >= 192 ) {    # 2 bytes UTF-8
            printf( "[%02X%02X]", $b1, ord($instr[$i+1]) );
            $b1 -= 192;
            $b2 = ord($instr[$i]) - 128;
            $i++;
            $uc = $b1 * 64 + $b2 ;
            fseek( $fp, $uc * 2 );
            $bg = fread( $fp, 2 );
            $outstr[$x++] = $bg[0];
            $outstr[$x++] = $bg[1];
#            printf( "[%02X%02X]", ord($bg[0]), ord($bg[1]));
        }
    }
    fclose($fp);
    if( $instr != '' ) {
#        echo '##' . $instr . " becomes " . join( '', $outstr) . "<br>\n";
        return join( '', $outstr);
    }
}

function g2u( $instr ) {
    $fp = fopen( 'language/gb-unicode.tab', 'r' );
    $len = strlen($instr);
    $outstr = '';
    for( $i = $x = 0 ; $i < $len ; $i++ ) {
        $h = ord($instr[$i]);
        if( $h > 160 ) {
            $l = ( $i+1 >= $len ) ? 32 : ord($instr[$i+1]);
            fseek( $fp, ($h-161)*188+($l-161)*2 );
            $uni = fread( $fp, 2 );
            $codenum = ord($uni[0])*256 + ord($uni[1]);
            if( $codenum < 0x800 ) {
                $outstr[$x++] = chr( 192 + $codenum / 64 );
                $outstr[$x++] = chr( 128 + $codenum % 64 );
#                printf("[%02X%02X]<br>\n", ord($outstr[$x-2]), ord($uni[$x-1]) );
            }
            else {
                $outstr[$x++] = chr( 224 + $codenum / 4096 );
                $codenum %= 4096;
                $outstr[$x++] = chr( 128 + $codenum / 64 );
                $outstr[$x++] = chr( 128 + ($codenum % 64) );
#                printf("[%02X%02X%02X]<br>\n", ord($outstr[$x-3]), ord($outstr[$x-2]), ord($outstr[$x-1]) );
            }
            $i++;
        }
        else
            $outstr[$x++] = $instr[$i];
    }
    fclose($fp);
    if( $instr != '' )
        return join( '', $outstr);
}

function u2g( $instr ) {
    $fp = fopen( 'language/unicode-gb.tab', 'r' );
    $len = strlen($instr);
    $outstr = '';
    for( $i = $x = 0 ; $i < $len ; $i++ ) {
        $b1 = ord($instr[$i]);
        if( $b1 < 0x80 ) {
            $outstr[$x++] = chr($b1);
#            printf( "[%02X]", $b1);
        }
        elseif( $b1 >= 224 ) {    # 3 bytes UTF-8
            $b1 -= 224;
            $b2 = ($i+1 >= $len) ? 0 : ord($instr[$i+1]) - 128;
            $b3 = ($i+2 >= $len) ? 0 : ord($instr[$i+2]) - 128;
            $i += 2;
            $uc = $b1 * 4096 + $b2 * 64 + $b3 ;
            fseek( $fp, $uc * 2 );
            $gb = fread( $fp, 2 );
            $outstr[$x++] = $gb[0];
            $outstr[$x++] = $gb[1];
#            printf( "[%02X%02X]", ord($gb[0]), ord($gb[1]));
        }
        elseif( $b1 >= 192 ) {    # 2 bytes UTF-8
            printf( "[%02X%02X]", $b1, ord($instr[$i+1]) );
            $b1 -= 192;
            $b2 = ($i+1>=$len) ? 0 : ord($instr[$i+1]) - 128;
            $i++;
            $uc = $b1 * 64 + $b2 ;
            fseek( $fp, $uc * 2 );
            $gb = fread( $fp, 2 );
            $outstr[$x++] = $gb[0];
            $outstr[$x++] = $gb[1];
#            printf( "[%02X%02X]", ord($gb[0]), ord($gb[1]));
        }
    }
    fclose($fp);
    if( $instr != '' ) {
#        echo '##' . $instr . " becomes " . join( '', $outstr) . "<br>\n";
        return join( '', $outstr);
    }
}

Perl的现状:【上一篇】
内码转换表(转):【下一篇】
【相关文章】
  • Unicode,UTF-8,GB2312编码的识别
  • 字符集之间转换(UTF-8,UNICODE,Gb2312)
  • ubuntu dapper en_US.UTF-8 locale 下安装fcitx
  • C#中StringBuilder类的使用
  • Suse Linux平台下XML4c解析库支持GB2312编码问题解决过程
  • HttpClient POST 的 UTF-8 编码问题
  • String&StringBuffer的区别
  • Netbeans中设置UTF-8编码格式实践
  • j2se 5.0新特性之StringBuilder类应用
  • Download ZigBee &IEEE 802.15.4 Specification.
  • 【随机文章】
  • 给大家推荐一个我的开源项目: Permission Base
  • 关于IP-MAC地址绑定的交换机设置
  • 破解winhex v9.3
  • ASP程序界面的多语言支持
  • VNC:Linux下的远程遥控专家
  • 倒计时
  • Win32 ASM详解-内存管理和文件输入/输出
  • php & apache 安装
  • 搞清linux的用户和组-基础篇
  • do{}while(0)的使用技巧
  • 【相关评论】
    没有相关评论
    【发表评论】
    姓名:
    邮件:
    随机码*
    评论*
          
    |  首 页  |  版权声明  |  联系我们   |  网站地图  |
    CopyRight © 2004-2007 bbb软讯网络 All Rigths Reserved.