Files
player/Project/Src/MyApp/char_encode.c

285 lines
6.9 KiB
C
Raw Permalink Normal View History

#include "char_encode.h"
#include "mymem.h"
2025-06-27 00:32:57 +08:00
#include "string.h"
#include "unigbk_table.h"
// Unicodeתutf-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>uni<6E><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA>̶ֹ<D6B9>Ϊ2<CEAA><32><EFBFBD>ֽ<EFBFBD>
2025-06-27 00:32:57 +08:00
//
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni_in<69><6E>Ҫת<D2AA><D7AA><EFBFBD><EFBFBD>Unicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8_out<75><74>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6B4A2>ַ<EFBFBD><D6B7>ָ<EFBFBD><EFBFBD><EBA3AC>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ʊ<EFBFBD>
// <20><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>Unicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ
uint8_t *uni2utf8(const uint8_t *uni_in, uint8_t **utf8_out) {
uint16_t c = 0;
if ((uni_in) && (c = (uni_in[0] << 8) | uni_in[1], c)) {
if (c < 0x80) {
**utf8_out = c;
(*utf8_out)++;
return (uint8_t *)uni_in + 2;
} else {
(*utf8_out)[0] = 0xe0;
(*utf8_out)[0] |= uni_in[0] >> 4;
(*utf8_out)[1] = 0x80;
(*utf8_out)[1] |= ((uni_in[0] << 2) | (uni_in[1] >> 6)) & 0x3f;
(*utf8_out)[2] = 0x80;
(*utf8_out)[2] |= (uni_in[1]) & 0x3f;
(*utf8_out) += 3;
return (uint8_t *)uni_in + 2;
}
}
return 0;
2025-06-27 00:32:57 +08:00
}
// Unicodeתutf-8<><38>
void uni2utf8_str(uint8_t *uni_in, uint8_t *utf8_out) {
uint8_t *pt_char_uni = uni_in;
uint8_t *pt_char_utf8 = utf8_out;
2025-06-27 00:32:57 +08:00
// UnicodeתUTF8
while (pt_char_uni) {
pt_char_uni = uni2utf8(pt_char_uni, &pt_char_utf8);
}
pt_char_utf8[0] = 0;
2025-06-27 00:32:57 +08:00
}
// utf-8תUnicode<64><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA>̶ֹ<D6B9>Ϊ3<CEAA><33><EFBFBD>ֽ<EFBFBD>
2025-06-27 00:32:57 +08:00
//
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>utf8_in<69><6E>Ҫת<D2AA><D7AA><EFBFBD><EFBFBD>UTF8<46><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni_out<75><74>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>uni<6E><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6B4A2>ַ<EFBFBD><D6B7>ָ<EFBFBD><EFBFBD><EBA3AC>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ʊ<EFBFBD>
// <20><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>utf8<66><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ
uint8_t *utf82uni(const uint8_t *uft8_in, uint8_t **uni_out) {
if ((uft8_in) && (*uft8_in)) {
if (uft8_in[0] < 0x80) {
(*uni_out)[0] = 0;
(*uni_out)[1] = *uft8_in;
(*uni_out) += 2;
return (uint8_t *)uft8_in + 1;
} else {
(*uni_out)[0] = uft8_in[0] << 4;
(*uni_out)[0] |= (uft8_in[1] >> 2) & 0x0f;
(*uni_out)[1] = (uft8_in[1] << 6);
(*uni_out)[1] |= (uft8_in[2]) & 0x3f;
(*uni_out) += 2;
return (uint8_t *)uft8_in + 3;
}
}
return 0;
2025-06-27 00:32:57 +08:00
}
// UTF8תUnicode
void utf82uni_str(uint8_t *uft8_in, uint8_t *uni_out) {
uint8_t *pt_char_uni = uni_out;
uint8_t *pt_char_utf8 = uft8_in;
// UTF8תUnicode
while (pt_char_utf8) {
pt_char_utf8 = utf82uni(pt_char_utf8, &pt_char_uni);
}
pt_char_uni[0] = 0;
pt_char_uni[1] = 0;
2025-06-27 00:32:57 +08:00
}
static int g_unigbk_size = 0;
// GBK<42><4B><EFBFBD><EFBFBD>תUnicode<64><65><EFBFBD><EFBFBD>
// <20><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
uint8_t *gbk2uni(uint8_t *gbk_in, uint8_t **uni_out) {
uint16_t t[2];
uint16_t c;
uint32_t i, li, hi;
uint16_t n;
unsigned int cout;
uint32_t gbk2uni_offset = 0;
if (gbk_in == 0 || *gbk_in == 0)
return 0;
if (*gbk_in < 0x80) {
(*uni_out)[0] = 0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
(*uni_out)[1] = *gbk_in;
(*uni_out) += 2;
return gbk_in + 1;
} else {
c = (gbk_in[0] << 8) | gbk_in[1];
if (!g_unigbk_size) // <20><><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD><EFBFBD>UNIGBK.BIN.
{
g_unigbk_size = unigbk_open();
}
gbk2uni_offset = g_unigbk_size / 2;
if (g_unigbk_size) // <20><><EFBFBD><EFBFBD>
{
/* Unicode to OEMCP */
hi = g_unigbk_size / 2; // <20>԰뿪.
hi = hi / 4 - 1;
li = 0;
for (n = 16; n; n--) {
i = li + (hi - li) / 2;
cout = unigbk_read(i * 4 + gbk2uni_offset, &t, 4);
if (c == t[0])
break;
if (c > t[0])
li = i;
else
hi = i;
}
c = n ? t[1] : 0;
} else
c = 0;
(*uni_out)[0] = c >> 8; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
(*uni_out)[1] = c & 0xff;
*uni_out += 2;
return gbk_in + 2;
}
// return 0;
}
2025-06-27 00:32:57 +08:00
// Unicode<64><65><EFBFBD><EFBFBD>תGBK<42><4B><EFBFBD><EFBFBD>
// <20><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
uint8_t *uni2gbk(uint8_t *uni_in, uint8_t **gbk_out) {
uint16_t t[2];
uint16_t c;
uint32_t i, li, hi;
uint16_t n;
unsigned int cout;
uint32_t gbk2uni_offset = 0;
if (uni_in == 0 || (c = (uni_in[0] << 8) | uni_in[1], c == 0))
return 0;
if (c < 0x80) {
(*gbk_out)[0] = c;
(*gbk_out) += 1;
return uni_in + 2;
} else {
if (!g_unigbk_size) // <20><><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD><EFBFBD>UNIGBK.BIN.
{
g_unigbk_size = unigbk_open();
}
gbk2uni_offset = 0;
// <20><>Ϊ<EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD>ǵ<EFBFBD><C7B5>ֽ<EFBFBD><D6BD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>c
// c=(uni_in[1]<<8)|uni_in[0];
if (g_unigbk_size) // <20><><EFBFBD><EFBFBD>
{
/* Unicode to OEMCP */
hi = g_unigbk_size / 2; // <20>԰뿪.
hi = hi / 4 - 1;
li = 0;
for (n = 16; n; n--) {
i = li + (hi - li) / 2;
cout = unigbk_read(i * 4 + gbk2uni_offset, &t, 4);
if (c == t[0])
break;
if (c > t[0])
li = i;
else
hi = i;
}
c = n ? t[1] : 0;
} else
c = 0;
(*gbk_out)[0] = c >> 8; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
(*gbk_out)[1] = c & 0xff;
*gbk_out += 2;
return uni_in + 2;
}
// return 0;
}
2025-06-27 00:32:57 +08:00
// GBKתUnicode
void gbk2uni_str(uint8_t *gbk_in, uint8_t *uni_out) {
uint8_t *pt_char_gbk = gbk_in;
uint8_t *pt_char_uni = uni_out;
// GBKתUnicode
while (pt_char_gbk) {
pt_char_gbk = gbk2uni(pt_char_gbk, &pt_char_uni);
}
pt_char_uni[0] = 0;
pt_char_uni[1] = 0;
2025-06-27 00:32:57 +08:00
}
// Unicode<64><65><EFBFBD><EFBFBD>תGBK<42><4B><EFBFBD><EFBFBD>
// <20><><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>ǰ
void uni2gbk_str(uint8_t *uni_in, uint8_t *gbk_out) {
uint8_t *pt_char_gbk = gbk_out;
uint8_t *pt_char_uni = uni_in;
// UnicodeתGBK
while (pt_char_uni) {
pt_char_uni = uni2gbk(pt_char_uni, &pt_char_gbk);
}
pt_char_gbk[0] = 0;
2025-06-27 00:32:57 +08:00
}
// GBKתUTF8
void gbk2utf8_str(uint8_t *gbk_in, uint8_t *utf8_out) {
int str_size = strlen((const char *)gbk_in);
uint8_t *char_uni = mymalloc(str_size * 3);
gbk2uni_str(gbk_in, char_uni);
uni2utf8_str(char_uni, utf8_out);
myfree(char_uni);
2025-06-27 00:32:57 +08:00
}
// UTF8תGBK
void utf82gbk_str(uint8_t *utf8_in, uint8_t *gbk_out) {
int str_size = strlen((const char *)utf8_in);
uint8_t *char_uni = mymalloc(str_size * 3);
utf82uni_str(utf8_in, char_uni);
uni2gbk_str(char_uni, gbk_out);
myfree(char_uni);
2025-06-27 00:32:57 +08:00
}
// <20><><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD>ת<EFBFBD><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD>ֵ
// 0<><30><EFBFBD><EFBFBD><EFBFBD>Գɹ<D4B3><C9B9><EFBFBD><EFBFBD><EFBFBD><30><CAA7>
int char_encode_test(char *str_input) {
int str_size = strlen(str_input);
uint8_t *char_gbk = mymalloc(str_size * 3);
uint8_t *char_uni = mymalloc(str_size * 3);
uint8_t *char_utf8 = mymalloc(str_size * 3);
mymemset(char_gbk, 0, str_size * 3);
mymemset(char_uni, 0, str_size * 3);
mymemset(char_utf8, 0, str_size * 3);
uint8_t *pt_char_gbk = char_gbk;
uint8_t *pt_char_uni = char_uni;
uint8_t *pt_char_utf8 = char_utf8;
mymemcpy(char_gbk, str_input, str_size);
// GBKתUnicode
while (pt_char_gbk) {
pt_char_gbk = gbk2uni(pt_char_gbk, &pt_char_uni);
}
mymemset(char_gbk, 0, str_size * 3);
pt_char_gbk = char_gbk;
pt_char_uni = char_uni;
pt_char_utf8 = char_utf8;
// UnicodeתUTF8
while (pt_char_uni) {
pt_char_uni = uni2utf8(pt_char_uni, &pt_char_utf8);
}
mymemset(char_uni, 0, str_size * 3);
pt_char_gbk = char_gbk;
pt_char_uni = char_uni;
pt_char_utf8 = char_utf8;
// UTF8תUnicode
while (pt_char_utf8) {
pt_char_utf8 = utf82uni(pt_char_utf8, &pt_char_uni);
}
pt_char_gbk = char_gbk;
pt_char_uni = char_uni;
pt_char_utf8 = char_utf8;
// UnicodeתGBK
while (pt_char_uni) {
pt_char_uni = uni2gbk(pt_char_uni, &pt_char_gbk);
}
int ret = strcmp((const char *)char_gbk, str_input);
myfree(char_gbk);
myfree(char_uni);
myfree(char_utf8);
return ret;
2025-06-27 00:32:57 +08:00
}