#include "char_encode.h" #include "mymem.h" #include "string.h" #include "unigbk_table.h" // Unicode转utf-8,返回值是下一个uni编码的指针 // 本函数只能转换汉字,即输入的汉字固定为2个字节 // // 参数:uni_in,要转码的Unicode编码地址,高字节在前 // 参数:utf8_out,转码后输出的utf8编码存储地址的指针,在转码后地址自动向后移编 // 返回值:下一个Unicode编码地址 uint8_t *uni2utf8(const uint8_t *uni_in, uint8_t **utf8_out) { uint16_t c = 0; if ((uni_in) && (c = (uni_in[0] << 8) | uni_in[1], c)) { if (c < 0x80) { **utf8_out = c; (*utf8_out)++; return (uint8_t *)uni_in + 2; } else { (*utf8_out)[0] = 0xe0; (*utf8_out)[0] |= uni_in[0] >> 4; (*utf8_out)[1] = 0x80; (*utf8_out)[1] |= ((uni_in[0] << 2) | (uni_in[1] >> 6)) & 0x3f; (*utf8_out)[2] = 0x80; (*utf8_out)[2] |= (uni_in[1]) & 0x3f; (*utf8_out) += 3; return (uint8_t *)uni_in + 2; } } return 0; } // Unicode转utf-8, void uni2utf8_str(uint8_t *uni_in, uint8_t *utf8_out) { uint8_t *pt_char_uni = uni_in; uint8_t *pt_char_utf8 = utf8_out; // Unicode转UTF8 while (pt_char_uni) { pt_char_uni = uni2utf8(pt_char_uni, &pt_char_utf8); } pt_char_utf8[0] = 0; } // utf-8转Unicode,返回值是下一个utf8编码的指针 // 本函数只能转换汉字,即输入的汉字固定为3个字节 // // 参数:utf8_in,要转码的UTF8编码地址,高字节在前 // 参数:uni_out,转码后输出的uni编码存储地址的指针,在转码后地址自动向后移编 // 返回值:下一个utf8编码地址 uint8_t *utf82uni(const uint8_t *uft8_in, uint8_t **uni_out) { if ((uft8_in) && (*uft8_in)) { if (uft8_in[0] < 0x80) { (*uni_out)[0] = 0; (*uni_out)[1] = *uft8_in; (*uni_out) += 2; return (uint8_t *)uft8_in + 1; } else { (*uni_out)[0] = uft8_in[0] << 4; (*uni_out)[0] |= (uft8_in[1] >> 2) & 0x0f; (*uni_out)[1] = (uft8_in[1] << 6); (*uni_out)[1] |= (uft8_in[2]) & 0x3f; (*uni_out) += 2; return (uint8_t *)uft8_in + 3; } } return 0; } // UTF8转Unicode void utf82uni_str(uint8_t *uft8_in, uint8_t *uni_out) { uint8_t *pt_char_uni = uni_out; uint8_t *pt_char_utf8 = uft8_in; // UTF8转Unicode while (pt_char_utf8) { pt_char_utf8 = utf82uni(pt_char_utf8, &pt_char_uni); } pt_char_uni[0] = 0; pt_char_uni[1] = 0; } static int g_unigbk_size = 0; // GBK编码转Unicode编码 // 高字节在前 uint8_t *gbk2uni(uint8_t *gbk_in, uint8_t **uni_out) { uint16_t t[2]; uint16_t c; uint32_t i, li, hi; uint16_t n; unsigned int cout; uint32_t gbk2uni_offset = 0; if (gbk_in == 0 || *gbk_in == 0) return 0; if (*gbk_in < 0x80) { (*uni_out)[0] = 0; // 输出高字节在前 (*uni_out)[1] = *gbk_in; (*uni_out) += 2; return gbk_in + 1; } else { c = (gbk_in[0] << 8) | gbk_in[1]; if (!g_unigbk_size) // 如果没打开UNIGBK.BIN. { g_unigbk_size = unigbk_open(); } gbk2uni_offset = g_unigbk_size / 2; if (g_unigbk_size) // 存在 { /* Unicode to OEMCP */ hi = g_unigbk_size / 2; // 对半开. hi = hi / 4 - 1; li = 0; for (n = 16; n; n--) { i = li + (hi - li) / 2; cout = unigbk_read(i * 4 + gbk2uni_offset, &t, 4); if (c == t[0]) break; if (c > t[0]) li = i; else hi = i; } c = n ? t[1] : 0; } else c = 0; (*uni_out)[0] = c >> 8; // 输出高字节在前 (*uni_out)[1] = c & 0xff; *uni_out += 2; return gbk_in + 2; } // return 0; } // Unicode编码转GBK编码 // 高字节在前 uint8_t *uni2gbk(uint8_t *uni_in, uint8_t **gbk_out) { uint16_t t[2]; uint16_t c; uint32_t i, li, hi; uint16_t n; unsigned int cout; uint32_t gbk2uni_offset = 0; if (uni_in == 0 || (c = (uni_in[0] << 8) | uni_in[1], c == 0)) return 0; if (c < 0x80) { (*gbk_out)[0] = c; (*gbk_out) += 1; return uni_in + 2; } else { if (!g_unigbk_size) // 如果没打开UNIGBK.BIN. { g_unigbk_size = unigbk_open(); } gbk2uni_offset = 0; // 因为在编码表文件中是低字节在前,这里重新指定c // c=(uni_in[1]<<8)|uni_in[0]; if (g_unigbk_size) // 存在 { /* Unicode to OEMCP */ hi = g_unigbk_size / 2; // 对半开. hi = hi / 4 - 1; li = 0; for (n = 16; n; n--) { i = li + (hi - li) / 2; cout = unigbk_read(i * 4 + gbk2uni_offset, &t, 4); if (c == t[0]) break; if (c > t[0]) li = i; else hi = i; } c = n ? t[1] : 0; } else c = 0; (*gbk_out)[0] = c >> 8; // 输出高字节在前 (*gbk_out)[1] = c & 0xff; *gbk_out += 2; return uni_in + 2; } // return 0; } // GBK转Unicode void gbk2uni_str(uint8_t *gbk_in, uint8_t *uni_out) { uint8_t *pt_char_gbk = gbk_in; uint8_t *pt_char_uni = uni_out; // GBK转Unicode while (pt_char_gbk) { pt_char_gbk = gbk2uni(pt_char_gbk, &pt_char_uni); } pt_char_uni[0] = 0; pt_char_uni[1] = 0; } // Unicode编码转GBK编码 // 高字节在前 void uni2gbk_str(uint8_t *uni_in, uint8_t *gbk_out) { uint8_t *pt_char_gbk = gbk_out; uint8_t *pt_char_uni = uni_in; // Unicode转GBK while (pt_char_uni) { pt_char_uni = uni2gbk(pt_char_uni, &pt_char_gbk); } pt_char_gbk[0] = 0; } // GBK转UTF8 void gbk2utf8_str(uint8_t *gbk_in, uint8_t *utf8_out) { int str_size = strlen((const char *)gbk_in); uint8_t *char_uni = mymalloc(str_size * 3); gbk2uni_str(gbk_in, char_uni); uni2utf8_str(char_uni, utf8_out); myfree(char_uni); } // UTF8转GBK void utf82gbk_str(uint8_t *utf8_in, uint8_t *gbk_out) { int str_size = strlen((const char *)utf8_in); uint8_t *char_uni = mymalloc(str_size * 3); utf82uni_str(utf8_in, char_uni); uni2gbk_str(char_uni, gbk_out); myfree(char_uni); } // 汉字编码转化测试 // 返回值 // 0,测试成功,非0失败 int char_encode_test(char *str_input) { int str_size = strlen(str_input); uint8_t *char_gbk = mymalloc(str_size * 3); uint8_t *char_uni = mymalloc(str_size * 3); uint8_t *char_utf8 = mymalloc(str_size * 3); mymemset(char_gbk, 0, str_size * 3); mymemset(char_uni, 0, str_size * 3); mymemset(char_utf8, 0, str_size * 3); uint8_t *pt_char_gbk = char_gbk; uint8_t *pt_char_uni = char_uni; uint8_t *pt_char_utf8 = char_utf8; mymemcpy(char_gbk, str_input, str_size); // GBK转Unicode while (pt_char_gbk) { pt_char_gbk = gbk2uni(pt_char_gbk, &pt_char_uni); } mymemset(char_gbk, 0, str_size * 3); pt_char_gbk = char_gbk; pt_char_uni = char_uni; pt_char_utf8 = char_utf8; // Unicode转UTF8 while (pt_char_uni) { pt_char_uni = uni2utf8(pt_char_uni, &pt_char_utf8); } mymemset(char_uni, 0, str_size * 3); pt_char_gbk = char_gbk; pt_char_uni = char_uni; pt_char_utf8 = char_utf8; // UTF8转Unicode while (pt_char_utf8) { pt_char_utf8 = utf82uni(pt_char_utf8, &pt_char_uni); } pt_char_gbk = char_gbk; pt_char_uni = char_uni; pt_char_utf8 = char_utf8; // Unicode转GBK while (pt_char_uni) { pt_char_uni = uni2gbk(pt_char_uni, &pt_char_gbk); } int ret = strcmp((const char *)char_gbk, str_input); myfree(char_gbk); myfree(char_uni); myfree(char_utf8); return ret; }