#include "char_encode.h" #include "string.h" #include "unigbk_table.h" #include "mymem.h" //Unicode转utf-8,返回值是下一个uni编码的指针 //本函数只能转换汉字,即输入的汉字固定为2个字节 // //参数:uni_in,要转码的Unicode编码地址,高字节在前 //参数:utf8_out,转码后输出的utf8编码存储地址的指针,在转码后地址自动向后移编 //返回值:下一个Unicode编码地址 u8 *uni2utf8 (const u8 *uni_in,u8 **utf8_out) { u16 c=0; if ((uni_in)&&(c=(uni_in[0]<<8)|uni_in[1],c)) { if (c<0x80) { **utf8_out=c; (*utf8_out)++; return (u8*)uni_in+2; } else { (*utf8_out)[0]=0xe0; (*utf8_out)[0]|=uni_in[0]>>4; (*utf8_out)[1]=0x80; (*utf8_out)[1]|=((uni_in[0]<<2)|(uni_in[1]>>6))&0x3f; (*utf8_out)[2]=0x80; (*utf8_out)[2]|=(uni_in[1])&0x3f; (*utf8_out)+=3; return (u8*)uni_in+2; } } return 0; } //Unicode转utf-8, void uni2utf8_str (u8 *uni_in,u8 *utf8_out) { u8 *pt_char_uni=uni_in; u8 *pt_char_utf8=utf8_out; //Unicode转UTF8 while(pt_char_uni) { pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8); } pt_char_utf8[0]=0; } //utf-8转Unicode,返回值是下一个utf8编码的指针 //本函数只能转换汉字,即输入的汉字固定为3个字节 // //参数:utf8_in,要转码的UTF8编码地址,高字节在前 //参数:uni_out,转码后输出的uni编码存储地址的指针,在转码后地址自动向后移编 //返回值:下一个utf8编码地址 u8 *utf82uni (const u8 *uft8_in,u8 **uni_out) { if ((uft8_in)&&(*uft8_in)) { if (uft8_in[0]<0x80) { (*uni_out)[0]=0; (*uni_out)[1]=*uft8_in; (*uni_out)+=2; return (u8*)uft8_in+1; } else { (*uni_out)[0]=uft8_in[0]<<4; (*uni_out)[0]|=(uft8_in[1]>>2)&0x0f; (*uni_out)[1]=(uft8_in[1]<<6); (*uni_out)[1]|=(uft8_in[2])&0x3f; (*uni_out)+=2; return (u8*)uft8_in+3; } } return 0; } //UTF8转Unicode void utf82uni_str (u8 *uft8_in,u8 *uni_out) { u8 *pt_char_uni=uni_out; u8 *pt_char_utf8=uft8_in; //UTF8转Unicode while(pt_char_utf8) { pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni); } pt_char_uni[0]=0; pt_char_uni[1]=0; } static int g_unigbk_size=0; //GBK编码转Unicode编码 //高字节在前 u8 *gbk2uni (u8 *gbk_in,u8 **uni_out) { u16 t[2]; u16 c; u32 i, li, hi; u16 n; unsigned int cout; u32 gbk2uni_offset=0; if (gbk_in==0||*gbk_in==0) return 0; if (*gbk_in < 0x80) { (*uni_out)[0]=0; //输出高字节在前 (*uni_out)[1]=*gbk_in; (*uni_out)+=2; return gbk_in+1; } else { c=(gbk_in[0]<<8)|gbk_in[1]; if(!g_unigbk_size)//如果没打开UNIGBK.BIN. { g_unigbk_size=unigbk_open(); } gbk2uni_offset=g_unigbk_size/2; if(g_unigbk_size)//存在 { /* Unicode to OEMCP */ hi=g_unigbk_size/2;//对半开. hi =hi / 4 - 1; li = 0; for (n = 16; n; n--) { i = li + (hi - li) / 2; cout=unigbk_read(i*4+gbk2uni_offset,&t,4); if (c == t[0]) break; if (c > t[0])li = i; else hi = i; } c = n ? t[1] : 0; }else c=0; (*uni_out)[0]=c>>8; //输出高字节在前 (*uni_out)[1]=c&0xff; *uni_out+=2; return gbk_in+2; } //return 0; } //Unicode编码转GBK编码 //高字节在前 u8 *uni2gbk (u8 *uni_in,u8 **gbk_out) { u16 t[2]; u16 c; u32 i, li, hi; u16 n; unsigned int cout; u32 gbk2uni_offset=0; if (uni_in==0||(c=(uni_in[0]<<8)|uni_in[1],c==0)) return 0; if (c < 0x80) { (*gbk_out)[0]=c; (*gbk_out)+=1; return uni_in+2; } else { if(!g_unigbk_size)//如果没打开UNIGBK.BIN. { g_unigbk_size=unigbk_open(); } gbk2uni_offset=0; //因为在编码表文件中是低字节在前,这里重新指定c //c=(uni_in[1]<<8)|uni_in[0]; if(g_unigbk_size)//存在 { /* Unicode to OEMCP */ hi=g_unigbk_size/2;//对半开. hi =hi / 4 - 1; li = 0; for (n = 16; n; n--) { i = li + (hi - li) / 2; cout=unigbk_read(i*4+gbk2uni_offset,&t,4); if (c == t[0]) break; if (c > t[0])li = i; else hi = i; } c = n ? t[1] : 0; }else c=0; (*gbk_out)[0]=c>>8; //输出高字节在前 (*gbk_out)[1]=c&0xff; *gbk_out+=2; return uni_in+2; } //return 0; } //GBK转Unicode void gbk2uni_str (u8 *gbk_in,u8 *uni_out) { u8 *pt_char_gbk=gbk_in; u8 *pt_char_uni=uni_out; //GBK转Unicode while(pt_char_gbk) { pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni); } pt_char_uni[0]=0; pt_char_uni[1]=0; } //Unicode编码转GBK编码 //高字节在前 void uni2gbk_str (u8 *uni_in,u8 *gbk_out) { u8 *pt_char_gbk=gbk_out; u8 *pt_char_uni=uni_in; //Unicode转GBK while (pt_char_uni) { pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk); } pt_char_gbk[0]=0; } //GBK转UTF8 void gbk2utf8_str (u8 *gbk_in,u8 *utf8_out) { int str_size=strlen((const char *)gbk_in); u8 *char_uni=mymalloc(str_size*3); gbk2uni_str (gbk_in,char_uni); uni2utf8_str (char_uni,utf8_out); myfree(char_uni); } //UTF8转GBK void utf82gbk_str (u8 *utf8_in,u8 *gbk_out) { int str_size=strlen((const char *)utf8_in); u8 *char_uni=mymalloc(str_size*3); utf82uni_str (utf8_in,char_uni); uni2gbk_str (char_uni,gbk_out); myfree(char_uni); } //汉字编码转化测试 //返回值 //0,测试成功,非0失败 int char_encode_test (char *str_input) { int str_size=strlen(str_input); u8 *char_gbk=mymalloc (str_size*3); u8 *char_uni=mymalloc (str_size*3); u8 *char_utf8=mymalloc (str_size*3); mymemset(char_gbk,0,str_size*3); mymemset(char_uni,0,str_size*3); mymemset(char_utf8,0,str_size*3); u8 *pt_char_gbk=char_gbk; u8 *pt_char_uni=char_uni; u8 *pt_char_utf8=char_utf8; mymemcpy (char_gbk,str_input,str_size); //GBK转Unicode while(pt_char_gbk) { pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni); } mymemset(char_gbk,0,str_size*3); pt_char_gbk=char_gbk; pt_char_uni=char_uni; pt_char_utf8=char_utf8; //Unicode转UTF8 while(pt_char_uni) { pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8); } mymemset(char_uni,0,str_size*3); pt_char_gbk=char_gbk; pt_char_uni=char_uni; pt_char_utf8=char_utf8; //UTF8转Unicode while(pt_char_utf8) { pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni); } pt_char_gbk=char_gbk; pt_char_uni=char_uni; pt_char_utf8=char_utf8; //Unicode转GBK while (pt_char_uni) { pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk); } int ret=strcmp((const char *)char_gbk,str_input); myfree(char_gbk); myfree(char_uni); myfree(char_utf8); return ret; }