use*_*841 2 c++ character-encoding
如何在c ++应用程序中转换ú,其中应用程序接收字符为UTF-8编码%C3%BA并将其存储为unicode等效%FA.我只想知道如何编写代码来执行此编码过程
我昨天写了一些代码来做这件事......
我并不是说这是完美的"完美"方式,但它似乎适用于我所经历过的所有测试用例(我为此目的编写了两个方向).
我会留给你把"%NN"翻译成一个整数值.
#include <iostream>
#include <deque>
std::deque<int> unicode_to_utf8(int charcode)
{
std::deque<int> d;
if (charcode < 128)
{
d.push_back(charcode);
}
else
{
int first_bits = 6;
const int other_bits = 6;
int first_val = 0xC0;
int t = 0;
while (charcode >= (1 << first_bits))
{
{
t = 128 | (charcode & ((1 << other_bits)-1));
charcode >>= other_bits;
first_val |= 1 << (first_bits);
first_bits--;
}
d.push_front(t);
}
t = first_val | charcode;
d.push_front(t);
}
return d;
}
int utf8_to_unicode(std::deque<int> &coded)
{
int charcode = 0;
int t = coded.front();
coded.pop_front();
if (t < 128)
{
return t;
}
int high_bit_mask = (1 << 6) -1;
int high_bit_shift = 0;
int total_bits = 0;
const int other_bits = 6;
while((t & 0xC0) == 0xC0)
{
t <<= 1;
t &= 0xff;
total_bits += 6;
high_bit_mask >>= 1;
high_bit_shift++;
charcode <<= other_bits;
charcode |= coded.front() & ((1 << other_bits)-1);
coded.pop_front();
}
charcode |= ((t >> high_bit_shift) & high_bit_mask) << total_bits;
return charcode;
}
int main()
{
int charcode;
for(;;)
{
std::cout << "Enter unicode value:" << std::endl;
std::cin >> charcode;
auto x = unicode_to_utf8(charcode);
for(auto c : x)
{
std::cout << "\\x" << std::hex << c << " ";
}
std::cout << std::endl;
int c = utf8_to_unicode(x);
std::cout << "reversed:" << std::dec << c << std::hex << " in hex:" << c << std::endl;
}
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
10956 次 |
| 最近记录: |