Sve*_*ven 5 c# sqlite encoding utf-8
当我在 Sqlite 浏览器中打开旧数据库时,文本已显示错误。我唯一可以设置的编码是 UTF-8 和 UTF-16。

当我查询数据库时,Visual Studio 中的编码已经错误。

我假设文本是用 ANSI (Windows-1252) 编码的(在评论中确认)。我尝试将其转换为 UTF-8
var encoding = Encoding.GetEncoding(1252);
byte[] encBytes = encoding.GetBytes(result);
byte[] utf8Bytes = Encoding.Convert(encoding, Encoding.UTF8, encBytes);
return Encoding.UTF8.GetString(utf8Bytes);
Run Code Online (Sandbox Code Playgroud)
不知何故,外部遗留应用程序可以正确显示它,所以似乎有一种方法。但我不确定接下来我可以尝试什么。
我曾经遇到过同样的问题
\n\n约翰·斯基特在这里回答了这个问题:
\n\n基本上获取字符串,以错误的编码方式获取字节,然后以实际的编码方式获取字符串:
\n\nstring broken = "Brokers M\xc3\x83\xc2\xa9xico, Intermediario de Aseguro,S.A."; // Get text from database\nbyte[] encoded = Encoding.GetEncoding(28591).GetBytes(broken);\nstring corrected = Encoding.UTF8.GetString(encoded);\nRun Code Online (Sandbox Code Playgroud)\n\n所以你的应该只是
\n\nstring broken = "Whatever";\nbyte[] encoded = Encoding.GetEncoding(1252).GetBytes(broken);\nstring corrected = Encoding.UTF8.GetString(encoded);\nRun Code Online (Sandbox Code Playgroud)\n\n基本上,现在您知道重新转换程序是正确的,我会尝试使用此处提到的编码:
\n https://msdn.microsoft.com/en-us/library/system.text.encodinginfo .getencoding(v=vs.110).aspx
\n(只需编写一个程序来测试那里列出的所有可能的可能性,并查看哪对将产生匹配...)
如果您知道源文本,您甚至可以自动执行检查:
\n\npublic partial class Form1 : Form\n{\n public System.Data.DataTable dt;\n\n public Form1()\n {\n InitializeComponent();\n }\n\n\n\n\n private void btnTest_Click(object sender, EventArgs e)\n {\n dt = new System.Data.DataTable();\n\n string correct = "Brokers M\xc3\xa9xico, Intermediario de Aseguro,S.A.";\n\n string broken = "Brokers M\xc3\x83\xc2\xa9xico, Intermediario de Aseguro,S.A."; // Get text from database\n\n dt.Columns.Add("SourceEncoding", typeof(string));\n dt.Columns.Add("TargetEncoding", typeof(string));\n dt.Columns.Add("Result", typeof(string));\n dt.Columns.Add("SourceEncodingName", typeof(string));\n dt.Columns.Add("TargetEncodingName", typeof(string));\n\n // For reference\n // https://msdn.microsoft.com/en-us/library/system.text.encodinginfo.getencoding(v=vs.110).aspx\n int[] encs = new int[] { \n 20127 // US-ASCII\n ,28591 // iso-8859-1 Western European (ISO) \n ,28592 // iso-8859-2 Central European (ISO) \n ,28593 // iso-8859-3 Latin 3 (ISO)\n ,28594 // iso-8859-4 Baltic (ISO)\n ,28595 // iso-8859-5 Cyrillic (ISO)\n ,28596 // iso-8859-6 Arabic (ISO)\n ,28597 // iso-8859-7 Greek (ISO)\n ,28598 // iso-8859-8 Hebrew (ISO-Visual) \n ,28599 // iso-8859-9 Turkish (ISO)\n ,28603 // iso-8859-13 Estonian (ISO)\n ,28605 // iso-8859-15 Latin 9 (ISO) \n\n ,1250 // windows-1250 Central European (Windows) \n ,1251 // windows-1251 Cyrillic (Windows) \n ,1252 // Windows-1252 Western European (Windows) \n ,1253 // windows-1253 Greek (Windows) \n ,1254 // windows-1254 Turkish (Windows) \n ,1255 // windows-1255 Hebrew (Windows) \n ,1256 // windows-1256 Arabic (Windows) \n ,1257 // windows-1257 Baltic (Windows) \n ,1258 // windows-1258 Vietnamese (Windows)\n\n ,20866 // Cyrillic (KOI8-R)\n ,21866 // Cyrillic (KOI8-U) \n\n ,65000 // UTF-7\n ,65001 // UTF-8\n ,1200 // UTF-16\n ,1201 // Unicode (Big-Endian) \n\n ,12000 // UTF-32\n ,12001 // UTF-32BE (UTF-32 Big-Endian) \n };\n\n\n for (int i = 0; i < encs.Length; ++i)\n {\n\n for (int j = 0; j < encs.Length; ++j)\n {\n System.Data.DataRow dr = dt.NewRow();\n\n dr["SourceEncoding"] = encs[i];\n dr["TargetEncoding"] = encs[j];\n\n\n System.Text.Encoding enci = Encoding.GetEncoding(encs[i]);\n System.Text.Encoding encj = Encoding.GetEncoding(encs[j]);\n\n byte[] encoded = enci.GetBytes(broken);\n string corrected = encj.GetString(encoded);\n\n dr["Result"] = corrected;\n\n dr["SourceEncodingName"] = enci.BodyName;\n dr["TargetEncodingName"] = encj.BodyName;\n\n\n if (StringComparer.InvariantCultureIgnoreCase.Equals(correct, corrected))\n dt.Rows.Add(dr);\n }\n\n }\n\n this.dataGridView1.DataSource = dt;\n }\n}\nRun Code Online (Sandbox Code Playgroud)\n\n或者更彻底,只需测试所有编码:
\n\nprivate void btnTestAll_Click(object sender, EventArgs e)\n{\n dt = new System.Data.DataTable();\n\n string correct = "Brokers M\xc3\xa9xico, Intermediario de Aseguro,S.A.";\n\n string broken = "Brokers M\xc3\x83\xc2\xa9xico, Intermediario de Aseguro,S.A."; // Get text from database\n\n dt.Columns.Add("SourceEncoding", typeof(string));\n dt.Columns.Add("TargetEncoding", typeof(string));\n dt.Columns.Add("Result", typeof(string));\n dt.Columns.Add("SourceEncodingName", typeof(string));\n dt.Columns.Add("TargetEncodingName", typeof(string));\n\n\n\n System.Text.EncodingInfo[] encs = System.Text.Encoding.GetEncodings();\n\n for (int i = 0; i < encs.Length; ++i)\n {\n\n for (int j = 0; j < encs.Length; ++j)\n {\n System.Data.DataRow dr = dt.NewRow();\n\n dr["SourceEncoding"] = encs[i].CodePage;\n dr["TargetEncoding"] = encs[j].CodePage;\n\n\n System.Text.Encoding enci = System.Text.Encoding.GetEncoding(encs[i].CodePage);\n System.Text.Encoding encj = System.Text.Encoding.GetEncoding(encs[j].CodePage);\n\n byte[] encoded = enci.GetBytes(broken);\n string corrected = encj.GetString(encoded);\n\n dr["Result"] = corrected;\n\n dr["SourceEncodingName"] = enci.BodyName;\n dr["TargetEncodingName"] = encj.BodyName;\n\n\n if (StringComparer.InvariantCultureIgnoreCase.Equals(correct, corrected))\n dt.Rows.Add(dr);\n }\n\n }\n\n this.dataGridView1.DataSource = dt;\n}\nRun Code Online (Sandbox Code Playgroud)\n\n您可以在此处下载结果:
\n\n很奇怪,看起来你可以从 German/ANSI(或 ISO-8859-1)转换为 ASCII,但没有办法将其转换回来(信息丢失)...
\n\npublic static string lol()\n{\n string source = "Alu-Dreieckst\xc3\xbctze";\n\n // System.Text.Encoding encSource = System.Text.Encoding.Default;\n System.Text.Encoding encSource = System.Text.Encoding.GetEncoding(28591);\n System.Text.Encoding encTarget = System.Text.Encoding.ASCII;\n\n byte[] encoded = encSource.GetBytes(source);\n string broken = encTarget.GetString(encoded);\n\n return broken;\n}\nRun Code Online (Sandbox Code Playgroud)\n\n有趣的是,由于旧版应用程序可以正确显示它,因此它不可能丢失信息。\n
\n那么您确定没有在 Sqlite connectionString 中输入错误(或没有)的编码吗?
例如
\n\n "Data Source=C:\\\\Users\\\\USERNAME\\\\Desktop\\\\location.db; Version=3; UseUTF16Encoding=True;Synchronous=Normal;New=False"; // set up the connection string\nRun Code Online (Sandbox Code Playgroud)\n\nhttps://www.sqlite.org/c3ref/c_any.html
\n\n看来您可以使用pragma 编码来测试编码
\n| 归档时间: |
|
| 查看次数: |
7037 次 |
| 最近记录: |