编码方式
编码方式
下面的字节串用来表示一个字符。用什么串依照该字符在 UCS 编码中的序号来定:
0x00000000 - 0x0000007F:
0xxxxxxx
0x00000080 - 0x000007FF:
110xxxxx 10xxxxxx
0x00000800 - 0x0000FFFF:
1110xxxx 10xxxxxx 10xxxxxx
0x00010000 - 0x001FFFFF:
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
0x00200000 - 0x03FFFFFF:
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
0x04000000 - 0x7FFFFFFF:
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
这里 xxx 的位置二进制位形式的字符编码填入。只用最短的那个足够表达一个字符编码数的多字节串。
举例说明
Unicode 字符 0xa9 = 1010 1001 (版权所有的符号) 在 UTF-8 中被编码为:
11000010 10101001 = 0xc2 0xa9
字符0x2260 = 0010 0010 0110 0000 (“不等于”符号)被编码为:
11100010 10001001 10100000 = 0xe2 0x89 0xa0
''encode - 转成16进制符
For i = 1 To Len(strIN)
s = CStr(Hex(Asc(Mid(strIN, i, 1)))) ''是不是太~~~快了~~
If Len(s) = 1 Then s = "0" & s ''解出小于F的值就在前面加 0
strOUT = strOUT & "%" & s
Next i
txtOUT.Text = strOUT ''这里就是输出了
___________________________________________________________
''decode - 解码, 比较麻烦点
For i = 1 To Len(strIN) Step 3 '' Step 3 因为格式是 %XX 三位一个字符
s1 = UCase(Mid(strIN, i + 1, 1)) ''得到第2位的16进制数字
s2 = UCase(Mid(strIN, i + 2, 1)) ''得到第1位的16进制数字
select Case s1 '' 进制转换
Case "A" To "F"
n = (10 + 5 - (Asc("F") - Asc(s1))) * 16
Case Else
n = Val(s1) * 16
End select
select Case s2
Case "A" To "F"
n = n + (10 + 5 - (Asc("F") - Asc(s2)))
Case Else
n = n + Val(s2)
End select
s = ChrW$(n) '' 转为字符
strOUT = strOUT & s
Next i
txtOUT.Text = strOUT
怎么把字符串转成UTF-8格式????
下面的代码不成功:
WCHAR wstr[10];
char cstr[20];
memset( wstr, 0, sizeof(wstr) );
MultiByteToWideChar(CP_UTF8, 0, "我是小于", -1, wstr, sizeof(wstr));
WideCharToMultiByte(CP_ACP, 0, wstr, -1, cstr, -1, "", 0);
求助。。。。
---------------------------------------------------------------
int main(void)
{
char str [ 256 ] = "唐志国" ; //一段UTF-8编码
WCHAR* strA;
int i= MultiByteToWideChar ( CP_ACP , 0 ,(char*) str ,-1 ,NULL,0);
strA = new WCHAR[i];
MultiByteToWideChar ( CP_ACP , 0 ,( char * ) str, -1, strA , i);
i= WideCharToMultiByte(CP_UTF8,0,strA,-1,NULL,0,NULL,NULL);
char *strB=new char[i];
WideCharToMultiByte (CP_UTF8,0,strA,-1,strB,i,NULL,NULL);
//strB即为所求
delete []strA;
delete []strB;
return 0;
}
---------------------------------------------------------------
这是转换函数:
int CodePageConvert(UINT SrcCodePage, LPCTSTR pBuff, int iBuffLen, UINT DestCodePage, char* &lpCodePage)
{
int iWideCharCnt = ::MultiByteToWideChar(SrcCodePage, 0, pBuff, iBuffLen, NULL, 0);
LPWSTR lpszWideChar = new wchar_t[iWideCharCnt + 1];
memset(lpszWideChar, 0, (iWideCharCnt + 1) * sizeof(WCHAR));
iWideCharCnt = MultiByteToWideChar(SrcCodePage, 0, pBuff, iBuffLen, lpszWideChar, iWideCharCnt);
if(DestCodePage == 54936
&& !IsValidCodePage(54936))
DestCodePage = 936;
int iDestCnt = WideCharToMultiByte(DestCodePage, 0, lpszWideChar, iWideCharCnt, NULL, 0, NULL, NULL);
lpCodePage = new char[iDestCnt + 1];
memset(lpCodePage, 0, iDestCnt + 1);
iDestCnt = WideCharToMultiByte(DestCodePage, 0, lpszWideChar, iWideCharCnt, lpCodePage, iDestCnt, NULL, NULL);
delete []lpszWideChar;
return iDestCnt;
}
下面是调用方法:
utf-8 到 gbk
int nLen = CodePageConvertUnix("UTF-8",_T("标准"),2,"GBK",lpOut);
gbk 到utf-8
int nLen = CodePageConvertUnix("UTF-8",_T("标准"),2,"GBK",lpOut);
解决Servu命令漏洞 Servu命令过滤插件 | 正则表达式语法 |
utf8 编码方式 | |
[ 发布日期:20年前 (2004-09-16) ] [ 来自:Original ] | [分类:C# .Net编程] |
[ 分类:C# .Net编程
| 查看:1123 ]
下一篇: 正则表达式语法
暂时没有评论,快来发表一个评论吧。
发表评论 |