Files
player/Project/Src/MyApp/char_encode.c

358 lines
6.1 KiB
C
Raw Normal View History

2025-06-27 00:32:57 +08:00
#include "char_encode.h"
#include "string.h"
#include "unigbk_table.h"
#include "mymem.h"
2025-07-05 19:47:28 +08:00
//Unicodeתutf-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>uni<6E><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA>̶ֹ<D6B9>Ϊ2<CEAA><32><EFBFBD>ֽ<EFBFBD>
2025-06-27 00:32:57 +08:00
//
2025-07-05 19:47:28 +08:00
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni_in<69><6E>Ҫת<D2AA><D7AA><EFBFBD><EFBFBD>Unicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8_out<75><74>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6B4A2>ַ<EFBFBD><D6B7>ָ<EFBFBD><EFBFBD><EBA3AC>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ʊ<EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>Unicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ
2025-06-27 00:32:57 +08:00
u8 *uni2utf8 (const u8 *uni_in,u8 **utf8_out)
{
u16 c=0;
if ((uni_in)&&(c=(uni_in[0]<<8)|uni_in[1],c))
{
if (c<0x80)
{
**utf8_out=c;
(*utf8_out)++;
return (u8*)uni_in+2;
}
else
{
(*utf8_out)[0]=0xe0;
(*utf8_out)[0]|=uni_in[0]>>4;
(*utf8_out)[1]=0x80;
(*utf8_out)[1]|=((uni_in[0]<<2)|(uni_in[1]>>6))&0x3f;
(*utf8_out)[2]=0x80;
(*utf8_out)[2]|=(uni_in[1])&0x3f;
(*utf8_out)+=3;
return (u8*)uni_in+2;
}
}
return 0;
}
2025-07-05 19:47:28 +08:00
//Unicodeתutf-8<><38>
2025-06-27 00:32:57 +08:00
void uni2utf8_str (u8 *uni_in,u8 *utf8_out)
{
u8 *pt_char_uni=uni_in;
u8 *pt_char_utf8=utf8_out;
2025-07-05 19:47:28 +08:00
//UnicodeתUTF8
2025-06-27 00:32:57 +08:00
while(pt_char_uni)
{
pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8);
}
pt_char_utf8[0]=0;
}
2025-07-05 19:47:28 +08:00
//utf-8תUnicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA>̶ֹ<D6B9>Ϊ3<CEAA><33><EFBFBD>ֽ<EFBFBD>
2025-06-27 00:32:57 +08:00
//
2025-07-05 19:47:28 +08:00
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8_in<69><6E>Ҫת<D2AA><D7AA><EFBFBD><EFBFBD>UTF8<46><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni_out<75><74>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni<6E><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6B4A2>ַ<EFBFBD><D6B7>ָ<EFBFBD><EFBFBD><EBA3AC>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ʊ<EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ
2025-06-27 00:32:57 +08:00
u8 *utf82uni (const u8 *uft8_in,u8 **uni_out)
{
if ((uft8_in)&&(*uft8_in))
{
if (uft8_in[0]<0x80)
{
(*uni_out)[0]=0;
(*uni_out)[1]=*uft8_in;
(*uni_out)+=2;
return (u8*)uft8_in+1;
}
else
{
(*uni_out)[0]=uft8_in[0]<<4;
(*uni_out)[0]|=(uft8_in[1]>>2)&0x0f;
(*uni_out)[1]=(uft8_in[1]<<6);
(*uni_out)[1]|=(uft8_in[2])&0x3f;
(*uni_out)+=2;
return (u8*)uft8_in+3;
}
}
return 0;
}
2025-07-05 19:47:28 +08:00
//UTF8תUnicode
2025-06-27 00:32:57 +08:00
void utf82uni_str (u8 *uft8_in,u8 *uni_out)
{
u8 *pt_char_uni=uni_out;
u8 *pt_char_utf8=uft8_in;
2025-07-05 19:47:28 +08:00
//UTF8תUnicode
2025-06-27 00:32:57 +08:00
while(pt_char_utf8)
{
pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni);
}
pt_char_uni[0]=0;
pt_char_uni[1]=0;
}
static int g_unigbk_size=0;
2025-07-05 19:47:28 +08:00
//GBK<42><4B><EFBFBD><EFBFBD>תUnicode<64><65><EFBFBD><EFBFBD>
//<2F><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
u8 *gbk2uni (u8 *gbk_in,u8 **uni_out)
{
u16 t[2];
u16 c;
u32 i, li, hi;
u16 n;
unsigned int cout;
u32 gbk2uni_offset=0;
if (gbk_in==0||*gbk_in==0) return 0;
if (*gbk_in < 0x80)
{
2025-07-05 19:47:28 +08:00
(*uni_out)[0]=0; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
(*uni_out)[1]=*gbk_in;
(*uni_out)+=2;
return gbk_in+1;
}
else
{
c=(gbk_in[0]<<8)|gbk_in[1];
2025-07-05 19:47:28 +08:00
if(!g_unigbk_size)//<2F><><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD><EFBFBD>UNIGBK.BIN.
2025-06-27 00:32:57 +08:00
{
g_unigbk_size=unigbk_open();
}
gbk2uni_offset=g_unigbk_size/2;
2025-07-05 19:47:28 +08:00
if(g_unigbk_size)//<2F><><EFBFBD><EFBFBD>
2025-06-27 00:32:57 +08:00
{
/* Unicode to OEMCP */
2025-07-05 19:47:28 +08:00
hi=g_unigbk_size/2;//<2F>԰뿪.
2025-06-27 00:32:57 +08:00
hi =hi / 4 - 1;
li = 0;
for (n = 16; n; n--)
{
i = li + (hi - li) / 2;
cout=unigbk_read(i*4+gbk2uni_offset,&t,4);
if (c == t[0]) break;
if (c > t[0])li = i;
else hi = i;
}
c = n ? t[1] : 0;
}else c=0;
2025-07-05 19:47:28 +08:00
(*uni_out)[0]=c>>8; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
(*uni_out)[1]=c&0xff;
*uni_out+=2;
return gbk_in+2;
}
//return 0;
}
2025-07-05 19:47:28 +08:00
//Unicode<64><65><EFBFBD><EFBFBD>תGBK<42><4B><EFBFBD><EFBFBD>
//<2F><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
u8 *uni2gbk (u8 *uni_in,u8 **gbk_out)
{
u16 t[2];
u16 c;
u32 i, li, hi;
u16 n;
unsigned int cout;
u32 gbk2uni_offset=0;
if (uni_in==0||(c=(uni_in[0]<<8)|uni_in[1],c==0)) return 0;
if (c < 0x80)
{
(*gbk_out)[0]=c;
(*gbk_out)+=1;
return uni_in+2;
}
else
{
2025-07-05 19:47:28 +08:00
if(!g_unigbk_size)//<2F><><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD><EFBFBD>UNIGBK.BIN.
2025-06-27 00:32:57 +08:00
{
g_unigbk_size=unigbk_open();
}
gbk2uni_offset=0;
2025-07-05 19:47:28 +08:00
//<2F><>Ϊ<EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD>ǵ<EFBFBD><C7B5>ֽ<EFBFBD><D6BD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>c
2025-06-27 00:32:57 +08:00
//c=(uni_in[1]<<8)|uni_in[0];
2025-07-05 19:47:28 +08:00
if(g_unigbk_size)//<2F><><EFBFBD><EFBFBD>
2025-06-27 00:32:57 +08:00
{
/* Unicode to OEMCP */
2025-07-05 19:47:28 +08:00
hi=g_unigbk_size/2;//<2F>԰뿪.
2025-06-27 00:32:57 +08:00
hi =hi / 4 - 1;
li = 0;
for (n = 16; n; n--)
{
i = li + (hi - li) / 2;
cout=unigbk_read(i*4+gbk2uni_offset,&t,4);
if (c == t[0]) break;
if (c > t[0])li = i;
else hi = i;
}
c = n ? t[1] : 0;
}else c=0;
2025-07-05 19:47:28 +08:00
(*gbk_out)[0]=c>>8; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
(*gbk_out)[1]=c&0xff;
*gbk_out+=2;
return uni_in+2;
}
//return 0;
}
2025-07-05 19:47:28 +08:00
//GBKתUnicode
2025-06-27 00:32:57 +08:00
void gbk2uni_str (u8 *gbk_in,u8 *uni_out)
{
u8 *pt_char_gbk=gbk_in;
u8 *pt_char_uni=uni_out;
2025-07-05 19:47:28 +08:00
//GBKתUnicode
2025-06-27 00:32:57 +08:00
while(pt_char_gbk)
{
pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni);
}
pt_char_uni[0]=0;
pt_char_uni[1]=0;
}
2025-07-05 19:47:28 +08:00
//Unicode<64><65><EFBFBD><EFBFBD>תGBK<42><4B><EFBFBD><EFBFBD>
//<2F><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
2025-06-27 00:32:57 +08:00
void uni2gbk_str (u8 *uni_in,u8 *gbk_out)
{
u8 *pt_char_gbk=gbk_out;
u8 *pt_char_uni=uni_in;
2025-07-05 19:47:28 +08:00
//UnicodeתGBK
2025-06-27 00:32:57 +08:00
while (pt_char_uni)
{
pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk);
}
pt_char_gbk[0]=0;
}
2025-07-05 19:47:28 +08:00
//GBKתUTF8
2025-06-27 00:32:57 +08:00
void gbk2utf8_str (u8 *gbk_in,u8 *utf8_out)
{
int str_size=strlen((const char *)gbk_in);
u8 *char_uni=mymalloc(str_size*3);
gbk2uni_str (gbk_in,char_uni);
uni2utf8_str (char_uni,utf8_out);
myfree(char_uni);
}
2025-07-05 19:47:28 +08:00
//UTF8תGBK
2025-06-27 00:32:57 +08:00
void utf82gbk_str (u8 *utf8_in,u8 *gbk_out)
{
int str_size=strlen((const char *)utf8_in);
u8 *char_uni=mymalloc(str_size*3);
utf82uni_str (utf8_in,char_uni);
uni2gbk_str (char_uni,gbk_out);
myfree(char_uni);
}
2025-07-05 19:47:28 +08:00
//<2F><><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ
//0<><30><EFBFBD><EFBFBD><EFBFBD>Գɹ<D4B3><C9B9><EFBFBD><EFBFBD><EFBFBD><30><CAA7>
2025-06-27 00:32:57 +08:00
int char_encode_test (char *str_input)
{
int str_size=strlen(str_input);
u8 *char_gbk=mymalloc (str_size*3);
u8 *char_uni=mymalloc (str_size*3);
u8 *char_utf8=mymalloc (str_size*3);
mymemset(char_gbk,0,str_size*3);
mymemset(char_uni,0,str_size*3);
mymemset(char_utf8,0,str_size*3);
u8 *pt_char_gbk=char_gbk;
u8 *pt_char_uni=char_uni;
u8 *pt_char_utf8=char_utf8;
mymemcpy (char_gbk,str_input,str_size);
2025-07-05 19:47:28 +08:00
//GBKתUnicode
2025-06-27 00:32:57 +08:00
while(pt_char_gbk)
{
pt_char_gbk=gbk2uni(pt_char_gbk,&pt_char_uni);
}
mymemset(char_gbk,0,str_size*3);
pt_char_gbk=char_gbk;
pt_char_uni=char_uni;
pt_char_utf8=char_utf8;
2025-07-05 19:47:28 +08:00
//UnicodeתUTF8
2025-06-27 00:32:57 +08:00
while(pt_char_uni)
{
pt_char_uni=uni2utf8(pt_char_uni,&pt_char_utf8);
}
mymemset(char_uni,0,str_size*3);
pt_char_gbk=char_gbk;
pt_char_uni=char_uni;
pt_char_utf8=char_utf8;
2025-07-05 19:47:28 +08:00
//UTF8תUnicode
2025-06-27 00:32:57 +08:00
while(pt_char_utf8)
{
pt_char_utf8=utf82uni(pt_char_utf8,&pt_char_uni);
}
pt_char_gbk=char_gbk;
pt_char_uni=char_uni;
pt_char_utf8=char_utf8;
2025-07-05 19:47:28 +08:00
//UnicodeתGBK
2025-06-27 00:32:57 +08:00
while (pt_char_uni)
{
pt_char_uni=uni2gbk(pt_char_uni,&pt_char_gbk);
}
int ret=strcmp((const char *)char_gbk,str_input);
myfree(char_gbk);
myfree(char_uni);
myfree(char_utf8);
return ret;
}