CString UTF8ToUnicode(char* UTF8) { DWORD dwUnicodeLen; //轉(zhuǎn)換后Unicode的長度 TCHAR *pwText; //保存Unicode的指針 CString strUnicode; //返回值 //獲得轉(zhuǎn)換后的長度,,并分配內(nèi)存 dwUnicodeLen = MultiByteToWideChar(CP_UTF8,0,UTF8,-1,NULL,0); pwText = new TCHAR[dwUnicodeLen]; if (!pwText) { return strUnicode; } //轉(zhuǎn)為Unicode MultiByteToWideChar(CP_UTF8,0,UTF8,-1,pwText,dwUnicodeLen); //轉(zhuǎn)為CString strUnicode.Format(_T("%s"),pwText); //清除內(nèi)存 delete []pwText; //返回轉(zhuǎn)換好的Unicode字串 return strUnicode; } char* UnicodeToUTF8( const CString& str ) { char* pElementText = NULL; int iTextLen; // wide char to multi char iTextLen = WideCharToMultiByte( CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL ); pElementText = new char[iTextLen + 1]; memset( ( void* )pElementText, 0, sizeof( char ) * ( iTextLen + 1 ) ); WideCharToMultiByte( CP_UTF8, 0, str, -1, pElementText, iTextLen, NULL, NULL ); return pElementText; } size_t g_f_wctou8(char * dest_str, const wchar_t src_wchar) { int count_bytes = 0; wchar_t byte_one = 0, byte_other = 0x3f; // 用于位與運(yùn)算以提取位值0x3f--->00111111 unsigned char utf_one = 0, utf_other = 0x80; // 用于"位或"置標(biāo)UTF-8編碼0x80--->1000000 wchar_t tmp_wchar =L'0'; // 用于寬字符位置析取和位移(右移位) unsigned char tmp_char =L'0'; if (!src_wchar)// return (size_t)-1; for (;;) // 檢測字節(jié)序列長度 { if (src_wchar <= 0x7f){ // <=01111111 count_bytes = 1; // ASCII字符: 0xxxxxxx( ~ 01111111) byte_one = 0x7f; // 用于位與運(yùn)算, 提取有效位值, 下同 utf_one = 0x0; break; } if ( (src_wchar > 0x7f) && (src_wchar <= 0x7ff) ){ // <=0111,11111111 count_bytes = 2; // 110xxxxx 10xxxxxx[1](最多個位, 簡寫為*1) byte_one = 0x1f; // 00011111, 下類推(1位的數(shù)量遞減) utf_one = 0xc0; // 11000000 break; } if ( (src_wchar > 0x7ff) && (src_wchar <= 0xffff) ){ //0111,11111111<=11111111,11111111 count_bytes = 3; // 1110xxxx 10xxxxxx[2](MaxBits: 16*1) byte_one = 0xf; // 00001111 utf_one = 0xe0; // 11100000 break; } if ( (src_wchar > 0xffff) && (src_wchar <= 0x1fffff) ){ //對UCS-4的支持.. count_bytes = 4; // 11110xxx 10xxxxxx[3](MaxBits: 21*1) byte_one = 0x7; // 00000111 utf_one = 0xf0; // 11110000 break; } if ( (src_wchar > 0x1fffff) && (src_wchar <= 0x3ffffff) ){ count_bytes = 5; // 111110xx 10xxxxxx[4](MaxBits: 26*1) byte_one = 0x3; // 00000011 utf_one = 0xf8; // 11111000 break; } if ( (src_wchar > 0x3ffffff) && (src_wchar <= 0x7fffffff) ){ count_bytes = 6; // 1111110x 10xxxxxx[5](MaxBits: 31*1) byte_one = 0x1; // 00000001 utf_one = 0xfc; // 11111100 break; } return (size_t)-1; // 以上皆不滿足則為非法序列 } // 以下幾行析取寬字節(jié)中的相應(yīng)位, 并分組為UTF-8編碼的各個字節(jié) tmp_wchar = src_wchar; for (int i = count_bytes; i > 1; i--) { // 一個寬字符的多字節(jié)降序賦值 tmp_char = (unsigned char)(tmp_wchar & byte_other);///后位與byte_other 00111111 dest_str[i - 1] = (tmp_char | utf_other);/// 在前面加----跟或 tmp_wchar >>= 6;//右移位 } //這個時候i=1 //對UTF-8第一個字節(jié)位處理, //第一個字節(jié)的開頭"1"的數(shù)目就是整個串中字節(jié)的數(shù)目 tmp_char = (unsigned char)(tmp_wchar & byte_one);//根據(jù)上面附值得來,,有效位個數(shù) dest_str[0] = (tmp_char | utf_one);//根據(jù)上面附值得來1的個數(shù) // 位值析取分組__End! return count_bytes; } CString g_f_wcs_to_pchar(CString& wstr) { char* p = new char[1024]; ZeroMemory(p, 1024); wchar_t wc=L'1'; char c[10]="1";//申請一個緩存 size_t r=0; //size_t unsigned integer Result of sizeof operator int i=0; int j=0; for(i=0;i<wstr.GetLength();i++) { wc=wstr.GetAt(i);//得到一個寬字符 r=g_f_wctou8(c,wc);//將一個寬字符按UTF-8格式轉(zhuǎn)換到p地址 if(r==-1)//出錯判斷 AfxMessageBox(_T("wcs_to_pchar error")); p[j]=c[0];//第一個值附給p j++; if(r>1) { for(size_t x=1;x<r;x++) { p[j]=c[x]; j++; } } } //p[j]='0'; USES_CONVERSION; CString strText; strText = A2W(p); delete[] p; return strText;; }
|
|