358 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			358 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include  "char_encode.h" 
 | ||
| #include "string.h"
 | ||
| #include "unigbk_table.h"
 | ||
| #include "mymem.h"
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //Unicode转utf-8,返回值是下一个uni编码的指针
 | ||
| //本函数只能转换汉字,即输入的汉字固定为2个字节
 | ||
| //
 | ||
| //参数:uni_in,要转码的Unicode编码地址,高字节在前
 | ||
| //参数:utf8_out,转码后输出的utf8编码存储地址的指针,在转码后地址自动向后移编
 | ||
| //返回值:下一个Unicode编码地址
 | ||
| u8 *uni2utf8 (const u8 *uni_in,u8 **utf8_out)
 | ||
| {
 | ||
| 	u16 c=0;
 | ||
| 	if ((uni_in)&&(c=(uni_in[0]<<8)|uni_in[1],c))
 | ||
| 	{
 | ||
| 		if (c<0x80)
 | ||
| 		{
 | ||
| 			**utf8_out=c;
 | ||
| 			(*utf8_out)++;
 | ||
| 			return (u8*)uni_in+2;
 | ||
| 		}
 | ||
| 		else
 | ||
| 		{
 | ||
| 			(*utf8_out)[0]=0xe0;
 | ||
| 			(*utf8_out)[0]|=uni_in[0]>>4;
 | ||
| 			(*utf8_out)[1]=0x80;
 | ||
| 			(*utf8_out)[1]|=((uni_in[0]<<2)|(uni_in[1]>>6))&0x3f;
 | ||
| 			(*utf8_out)[2]=0x80;
 | ||
| 			(*utf8_out)[2]|=(uni_in[1])&0x3f;
 | ||
| 			(*utf8_out)+=3;
 | ||
| 			return (u8*)uni_in+2;
 | ||
| 		}
 | ||
| 	}
 | ||
| 	return 0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //Unicode转utf-8,
 | ||
| void uni2utf8_str (u8 *uni_in,u8 *utf8_out)
 | ||
| {
 | ||
| 	u8 *pt_char_uni=uni_in;
 | ||
| 	u8 *pt_char_utf8=utf8_out;
 | ||
| 	
 | ||
| 	//Unicode转UTF8
 | ||
| 	while(pt_char_uni)
 | ||
| 	{
 | ||
| 		pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8);
 | ||
| 	}
 | ||
| 	pt_char_utf8[0]=0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //utf-8转Unicode,返回值是下一个utf8编码的指针
 | ||
| //本函数只能转换汉字,即输入的汉字固定为3个字节
 | ||
| //
 | ||
| //参数:utf8_in,要转码的UTF8编码地址,高字节在前
 | ||
| //参数:uni_out,转码后输出的uni编码存储地址的指针,在转码后地址自动向后移编
 | ||
| //返回值:下一个utf8编码地址
 | ||
| u8 *utf82uni (const u8 *uft8_in,u8 **uni_out)
 | ||
| {
 | ||
| 	if ((uft8_in)&&(*uft8_in))
 | ||
| 	{
 | ||
| 		if (uft8_in[0]<0x80)
 | ||
| 		{
 | ||
| 			(*uni_out)[0]=0;
 | ||
| 			(*uni_out)[1]=*uft8_in;
 | ||
| 			(*uni_out)+=2;
 | ||
| 			return (u8*)uft8_in+1;
 | ||
| 		}
 | ||
| 		else
 | ||
| 		{
 | ||
| 			(*uni_out)[0]=uft8_in[0]<<4;
 | ||
| 			(*uni_out)[0]|=(uft8_in[1]>>2)&0x0f;
 | ||
| 			(*uni_out)[1]=(uft8_in[1]<<6);
 | ||
| 			(*uni_out)[1]|=(uft8_in[2])&0x3f;
 | ||
| 			(*uni_out)+=2;
 | ||
| 			return (u8*)uft8_in+3;
 | ||
| 		}
 | ||
| 	}
 | ||
| 	return 0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //UTF8转Unicode
 | ||
| void utf82uni_str (u8 *uft8_in,u8 *uni_out)
 | ||
| {
 | ||
| 	u8 *pt_char_uni=uni_out;
 | ||
| 	u8 *pt_char_utf8=uft8_in;
 | ||
| 	
 | ||
| 	//UTF8转Unicode
 | ||
| 	while(pt_char_utf8)
 | ||
| 	{
 | ||
| 		pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni);
 | ||
| 	}
 | ||
| 	pt_char_uni[0]=0;
 | ||
| 	pt_char_uni[1]=0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| static int g_unigbk_size=0;
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //GBK编码转Unicode编码
 | ||
| //高字节在前
 | ||
| u8 *gbk2uni (u8 *gbk_in,u8 **uni_out)
 | ||
| {
 | ||
| 	u16 t[2];
 | ||
| 	u16 c;
 | ||
| 	u32 i, li, hi;
 | ||
| 	u16 n;			 
 | ||
| 	unsigned int cout;		   
 | ||
| 	u32 gbk2uni_offset=0;		  
 | ||
| 		
 | ||
| 	if (gbk_in==0||*gbk_in==0) return 0;
 | ||
| 	if (*gbk_in < 0x80)
 | ||
| 	{
 | ||
| 		(*uni_out)[0]=0;		//输出高字节在前
 | ||
| 		(*uni_out)[1]=*gbk_in;
 | ||
| 		(*uni_out)+=2;
 | ||
| 		return gbk_in+1;
 | ||
| 	}
 | ||
| 	else 
 | ||
| 	{
 | ||
| 		c=(gbk_in[0]<<8)|gbk_in[1];
 | ||
| 		if(!g_unigbk_size)//如果没打开UNIGBK.BIN.
 | ||
| 		{
 | ||
| 			g_unigbk_size=unigbk_open();
 | ||
| 		}
 | ||
| 		gbk2uni_offset=g_unigbk_size/2;	 
 | ||
| 		if(g_unigbk_size)//存在 		    
 | ||
| 		{
 | ||
| 			/* Unicode to OEMCP */
 | ||
| 			hi=g_unigbk_size/2;//对半开.
 | ||
| 			hi =hi / 4 - 1;
 | ||
| 			li = 0;
 | ||
| 			for (n = 16; n; n--)
 | ||
| 			{
 | ||
| 				i = li + (hi - li) / 2;		 
 | ||
| 				cout=unigbk_read(i*4+gbk2uni_offset,&t,4);
 | ||
| 				if (c == t[0]) break;
 | ||
| 				if (c > t[0])li = i;  
 | ||
| 				else hi = i;    
 | ||
| 			}
 | ||
| 			c = n ? t[1] : 0;  
 | ||
|  		}else c=0;
 | ||
| 		(*uni_out)[0]=c>>8;		//输出高字节在前
 | ||
| 		(*uni_out)[1]=c&0xff;
 | ||
| 		*uni_out+=2;
 | ||
| 		return gbk_in+2;
 | ||
| 	}
 | ||
| 	//return 0;
 | ||
| }		   
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //Unicode编码转GBK编码
 | ||
| //高字节在前
 | ||
| u8 *uni2gbk (u8 *uni_in,u8 **gbk_out)
 | ||
| {
 | ||
| 	u16 t[2];
 | ||
| 	u16 c;
 | ||
| 	u32 i, li, hi;
 | ||
| 	u16 n;			 
 | ||
| 	unsigned int cout;		   
 | ||
| 	u32 gbk2uni_offset=0;		  
 | ||
| 		
 | ||
| 	if (uni_in==0||(c=(uni_in[0]<<8)|uni_in[1],c==0)) return 0;
 | ||
| 	if (c < 0x80)
 | ||
| 	{
 | ||
| 		(*gbk_out)[0]=c;
 | ||
| 		(*gbk_out)+=1;
 | ||
| 		return uni_in+2;
 | ||
| 	}
 | ||
| 	else 
 | ||
| 	{
 | ||
| 		if(!g_unigbk_size)//如果没打开UNIGBK.BIN.
 | ||
| 		{
 | ||
| 			g_unigbk_size=unigbk_open();
 | ||
| 		}
 | ||
| 		gbk2uni_offset=0;	 
 | ||
| 		//因为在编码表文件中是低字节在前,这里重新指定c
 | ||
| 		//c=(uni_in[1]<<8)|uni_in[0];
 | ||
| 		if(g_unigbk_size)//存在 		    
 | ||
| 		{
 | ||
| 			/* Unicode to OEMCP */
 | ||
| 			hi=g_unigbk_size/2;//对半开.
 | ||
| 			hi =hi / 4 - 1;
 | ||
| 			li = 0;
 | ||
| 			for (n = 16; n; n--)
 | ||
| 			{
 | ||
| 				i = li + (hi - li) / 2;		 
 | ||
| 				cout=unigbk_read(i*4+gbk2uni_offset,&t,4);
 | ||
| 				if (c == t[0]) break;
 | ||
| 				if (c > t[0])li = i;  
 | ||
| 				else hi = i;    
 | ||
| 			}
 | ||
| 			c = n ? t[1] : 0;  
 | ||
|  		}else c=0;
 | ||
| 		(*gbk_out)[0]=c>>8;		//输出高字节在前
 | ||
| 		(*gbk_out)[1]=c&0xff;
 | ||
| 		*gbk_out+=2;
 | ||
| 		return uni_in+2;
 | ||
| 	}
 | ||
| 	//return 0;
 | ||
| }		   
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //GBK转Unicode
 | ||
| void gbk2uni_str (u8 *gbk_in,u8 *uni_out)
 | ||
| {
 | ||
| 	u8 *pt_char_gbk=gbk_in;
 | ||
| 	u8 *pt_char_uni=uni_out;
 | ||
| 	
 | ||
| 	//GBK转Unicode
 | ||
| 	while(pt_char_gbk)
 | ||
| 	{
 | ||
| 		pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni); 
 | ||
| 	}
 | ||
| 	pt_char_uni[0]=0;
 | ||
| 	pt_char_uni[1]=0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //Unicode编码转GBK编码
 | ||
| //高字节在前
 | ||
| void uni2gbk_str (u8 *uni_in,u8 *gbk_out)
 | ||
| {
 | ||
| 	u8 *pt_char_gbk=gbk_out;
 | ||
| 	u8 *pt_char_uni=uni_in;
 | ||
| 	
 | ||
| 	//Unicode转GBK
 | ||
| 	while (pt_char_uni)
 | ||
| 	{
 | ||
| 		pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk);
 | ||
| 	}
 | ||
| 	pt_char_gbk[0]=0;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| //GBK转UTF8
 | ||
| void gbk2utf8_str (u8 *gbk_in,u8 *utf8_out)
 | ||
| {
 | ||
| 	int str_size=strlen((const char *)gbk_in);
 | ||
| 	u8 *char_uni=mymalloc(str_size*3);
 | ||
| 	gbk2uni_str (gbk_in,char_uni);
 | ||
| 	uni2utf8_str (char_uni,utf8_out);
 | ||
| 	myfree(char_uni);
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| //UTF8转GBK
 | ||
| void utf82gbk_str (u8 *utf8_in,u8 *gbk_out)
 | ||
| {
 | ||
| 	int str_size=strlen((const char *)utf8_in);
 | ||
| 	u8 *char_uni=mymalloc(str_size*3);
 | ||
| 	utf82uni_str (utf8_in,char_uni);
 | ||
| 	uni2gbk_str (char_uni,gbk_out);
 | ||
| 	myfree(char_uni);
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //汉字编码转化测试
 | ||
| //返回值
 | ||
| //0,测试成功,非0失败
 | ||
| int char_encode_test (char *str_input)
 | ||
| {
 | ||
| 	int str_size=strlen(str_input);
 | ||
| 	u8 *char_gbk=mymalloc (str_size*3);
 | ||
| 	u8 *char_uni=mymalloc (str_size*3);
 | ||
| 	u8 *char_utf8=mymalloc (str_size*3);
 | ||
| 	mymemset(char_gbk,0,str_size*3);
 | ||
| 	mymemset(char_uni,0,str_size*3);
 | ||
| 	mymemset(char_utf8,0,str_size*3);
 | ||
| 	u8 *pt_char_gbk=char_gbk;
 | ||
| 	u8 *pt_char_uni=char_uni;
 | ||
| 	u8 *pt_char_utf8=char_utf8;
 | ||
| 	
 | ||
| 	mymemcpy (char_gbk,str_input,str_size);
 | ||
| 	
 | ||
| 	//GBK转Unicode
 | ||
| 	while(pt_char_gbk)
 | ||
| 	{
 | ||
| 		pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni); 
 | ||
| 	}
 | ||
| 	mymemset(char_gbk,0,str_size*3);
 | ||
| 	pt_char_gbk=char_gbk;
 | ||
| 	pt_char_uni=char_uni;
 | ||
| 	pt_char_utf8=char_utf8;
 | ||
| 	
 | ||
| 	//Unicode转UTF8
 | ||
| 	while(pt_char_uni)
 | ||
| 	{
 | ||
| 		pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8);
 | ||
| 	}
 | ||
| 	mymemset(char_uni,0,str_size*3);
 | ||
| 	pt_char_gbk=char_gbk;
 | ||
| 	pt_char_uni=char_uni;
 | ||
| 	pt_char_utf8=char_utf8;
 | ||
| 	
 | ||
| 	//UTF8转Unicode
 | ||
| 	while(pt_char_utf8)
 | ||
| 	{
 | ||
| 		pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni);
 | ||
| 	}
 | ||
| 	pt_char_gbk=char_gbk;
 | ||
| 	pt_char_uni=char_uni;
 | ||
| 	pt_char_utf8=char_utf8;
 | ||
| 	
 | ||
| 	//Unicode转GBK
 | ||
| 	while (pt_char_uni)
 | ||
| 	{
 | ||
| 		pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk);
 | ||
| 	}
 | ||
| 	
 | ||
| 	int ret=strcmp((const char *)char_gbk,str_input);
 | ||
| 	myfree(char_gbk);
 | ||
| 	myfree(char_uni);
 | ||
| 	myfree(char_utf8);
 | ||
| 	return ret;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| 
 |