Unicode,UTF8互轉(zhuǎn)

3D建模仿真 2013-06-19

展開全文

CString UTF8ToUnicode(char* UTF8)

{

DWORD dwUnicodeLen; //轉(zhuǎn)換后Unicode的長度

TCHAR *pwText; //保存Unicode的指針

CString strUnicode; //返回值

//獲得轉(zhuǎn)換后的長度,，并分配內(nèi)存

dwUnicodeLen = MultiByteToWideChar(CP_UTF8,0,UTF8,-1,NULL,0);

pwText = new TCHAR[dwUnicodeLen];

if (!pwText)

{

return strUnicode;

}

//轉(zhuǎn)為Unicode

MultiByteToWideChar(CP_UTF8,0,UTF8,-1,pwText,dwUnicodeLen);

//轉(zhuǎn)為CString

strUnicode.Format(_T("%s"),pwText);

//清除內(nèi)存

delete []pwText;

//返回轉(zhuǎn)換好的Unicode字串

return strUnicode;

}

char* UnicodeToUTF8( const CString& str )

{

char* pElementText = NULL;

int iTextLen;

// wide char to multi char

iTextLen = WideCharToMultiByte( CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL );

pElementText = new char[iTextLen + 1];

memset( ( void* )pElementText, 0, sizeof( char ) * ( iTextLen + 1 ) );

WideCharToMultiByte( CP_UTF8, 0, str, -1, pElementText, iTextLen, NULL, NULL );

return pElementText;

}

size_t g_f_wctou8(char * dest_str, const wchar_t src_wchar)

{

int count_bytes = 0;

wchar_t byte_one = 0, byte_other = 0x3f; // 用于位與運(yùn)算以提取位值0x3f--->00111111

unsigned char utf_one = 0, utf_other = 0x80; // 用于"位或"置標(biāo)UTF-8編碼0x80--->1000000

wchar_t tmp_wchar =L'0'; // 用于寬字符位置析取和位移(右移位)

unsigned char tmp_char =L'0';

if (!src_wchar)//

return (size_t)-1;

for (;;) // 檢測字節(jié)序列長度

{

if (src_wchar <= 0x7f){ // <=01111111

count_bytes = 1; // ASCII字符: 0xxxxxxx( ~ 01111111)

byte_one = 0x7f; // 用于位與運(yùn)算, 提取有效位值, 下同

utf_one = 0x0;

break;

}

if ( (src_wchar > 0x7f) && (src_wchar <= 0x7ff) ){ // <=0111,11111111

count_bytes = 2; // 110xxxxx 10xxxxxx[1](最多個位, 簡寫為*1)

byte_one = 0x1f; // 00011111, 下類推(1位的數(shù)量遞減)

utf_one = 0xc0; // 11000000

break;

}

if ( (src_wchar > 0x7ff) && (src_wchar <= 0xffff) ){ //0111,11111111<=11111111,11111111

count_bytes = 3; // 1110xxxx 10xxxxxx[2](MaxBits: 16*1)

byte_one = 0xf; // 00001111

utf_one = 0xe0; // 11100000

break;

}

if ( (src_wchar > 0xffff) && (src_wchar <= 0x1fffff) ){ //對UCS-4的支持..

count_bytes = 4; // 11110xxx 10xxxxxx[3](MaxBits: 21*1)

byte_one = 0x7; // 00000111

utf_one = 0xf0; // 11110000

break;

}

if ( (src_wchar > 0x1fffff) && (src_wchar <= 0x3ffffff) ){

count_bytes = 5; // 111110xx 10xxxxxx[4](MaxBits: 26*1)

byte_one = 0x3; // 00000011

utf_one = 0xf8; // 11111000

break;

}

if ( (src_wchar > 0x3ffffff) && (src_wchar <= 0x7fffffff) ){

count_bytes = 6; // 1111110x 10xxxxxx[5](MaxBits: 31*1)

byte_one = 0x1; // 00000001

utf_one = 0xfc; // 11111100

break;

}

return (size_t)-1; // 以上皆不滿足則為非法序列

}

// 以下幾行析取寬字節(jié)中的相應(yīng)位, 并分組為UTF-8編碼的各個字節(jié)

tmp_wchar = src_wchar;

for (int i = count_bytes; i > 1; i--)

{ // 一個寬字符的多字節(jié)降序賦值

tmp_char = (unsigned char)(tmp_wchar & byte_other);///后位與byte_other 00111111

dest_str[i - 1] = (tmp_char | utf_other);/// 在前面加----跟或

tmp_wchar >>= 6;//右移位

}

//這個時候i=1

//對UTF-8第一個字節(jié)位處理，

//第一個字節(jié)的開頭"1"的數(shù)目就是整個串中字節(jié)的數(shù)目

tmp_char = (unsigned char)(tmp_wchar & byte_one);//根據(jù)上面附值得來,，有效位個數(shù)

dest_str[0] = (tmp_char | utf_one);//根據(jù)上面附值得來1的個數(shù)

// 位值析取分組__End!

return count_bytes;

}

CString g_f_wcs_to_pchar(CString& wstr)

{

char* p = new char[1024];

ZeroMemory(p, 1024);

wchar_t wc=L'1';

char c[10]="1";//申請一個緩存

size_t r=0; //size_t unsigned integer Result of sizeof operator

int i=0;

int j=0;

for(i=0;i<wstr.GetLength();i++)

{

wc=wstr.GetAt(i);//得到一個寬字符

r=g_f_wctou8(c,wc);//將一個寬字符按UTF-8格式轉(zhuǎn)換到p地址

if(r==-1)//出錯判斷

AfxMessageBox(_T("wcs_to_pchar error"));

p[j]=c[0];//第一個值附給p

j++;

if(r>1)

{

for(size_t x=1;x<r;x++)

{

p[j]=c[x];

j++;

}

//p[j]='0';

USES_CONVERSION;

CString strText;

strText = A2W(p);

delete[] p;

return strText;;

}

本站是提供個人知識管理的網(wǎng)絡(luò)存儲空間,，所有內(nèi)容均由用戶發(fā)布,，不代表本站觀點(diǎn)。請注意甄別內(nèi)容中的聯(lián)系方式,、誘導(dǎo)購買等信息,，謹(jǐn)防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,，請點(diǎn)擊一鍵舉報(bào),。

轉(zhuǎn)藏 分享

QQ空間 QQ好友新浪微博微信

獻(xiàn)花（0） +1

來自： 3D建模仿真 > 《學(xué)習(xí)筆記》

舉報(bào)/認(rèn)領(lǐng)