|
1 // UTFConvert.cpp |
|
2 |
|
3 #include "StdAfx.h" |
|
4 |
|
5 #include "UTFConvert.h" |
|
6 #include "Types.h" |
|
7 |
|
8 static Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; |
|
9 |
|
10 // These functions are for UTF8 <-> UTF16 conversion. |
|
11 |
|
12 bool ConvertUTF8ToUnicode(const AString &src, UString &dest) |
|
13 { |
|
14 dest.Empty(); |
|
15 for(int i = 0; i < src.Length();) |
|
16 { |
|
17 Byte c = (Byte)src[i++]; |
|
18 if (c < 0x80) |
|
19 { |
|
20 dest += (wchar_t)c; |
|
21 continue; |
|
22 } |
|
23 if(c < 0xC0) |
|
24 return false; |
|
25 int numAdds; |
|
26 for (numAdds = 1; numAdds < 5; numAdds++) |
|
27 if (c < kUtf8Limits[numAdds]) |
|
28 break; |
|
29 UInt32 value = (c - kUtf8Limits[numAdds - 1]); |
|
30 do |
|
31 { |
|
32 if (i >= src.Length()) |
|
33 return false; |
|
34 Byte c2 = (Byte)src[i++]; |
|
35 if (c2 < 0x80 || c2 >= 0xC0) |
|
36 return false; |
|
37 value <<= 6; |
|
38 value |= (c2 - 0x80); |
|
39 numAdds--; |
|
40 } |
|
41 while(numAdds > 0); |
|
42 if (value < 0x10000) |
|
43 dest += (wchar_t)(value); |
|
44 else |
|
45 { |
|
46 value -= 0x10000; |
|
47 if (value >= 0x100000) |
|
48 return false; |
|
49 dest += (wchar_t)(0xD800 + (value >> 10)); |
|
50 dest += (wchar_t)(0xDC00 + (value & 0x3FF)); |
|
51 } |
|
52 } |
|
53 return true; |
|
54 } |
|
55 |
|
56 bool ConvertUnicodeToUTF8(const UString &src, AString &dest) |
|
57 { |
|
58 dest.Empty(); |
|
59 for(int i = 0; i < src.Length();) |
|
60 { |
|
61 UInt32 value = (UInt32)src[i++]; |
|
62 if (value < 0x80) |
|
63 { |
|
64 dest += (char)value; |
|
65 continue; |
|
66 } |
|
67 if (value >= 0xD800 && value < 0xE000) |
|
68 { |
|
69 if (value >= 0xDC00) |
|
70 return false; |
|
71 if (i >= src.Length()) |
|
72 return false; |
|
73 UInt32 c2 = (UInt32)src[i++]; |
|
74 if (c2 < 0xDC00 || c2 >= 0xE000) |
|
75 return false; |
|
76 value = ((value - 0xD800) << 10) | (c2 - 0xDC00); |
|
77 } |
|
78 int numAdds; |
|
79 for (numAdds = 1; numAdds < 5; numAdds++) |
|
80 if (value < (((UInt32)1) << (numAdds * 5 + 6))) |
|
81 break; |
|
82 dest += (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); |
|
83 do |
|
84 { |
|
85 numAdds--; |
|
86 dest += (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); |
|
87 } |
|
88 while(numAdds > 0); |
|
89 } |
|
90 return true; |
|
91 } |