hem*_*ani 3 c# persian unicode-string
我想将 Unicode 波斯字符放在一个单词中。
例如转换“????” 到 '?' '?'?'?
我发现ArabicLigaturizer可以完成我在阿拉伯语中的要求。
有没有一种简单的方法可以在 .net 中为波斯语做到这一点?
我用这个代码
(System.Text.RegularExpressions.Regex.IsMatch(SearchText, @"\p{IsArabic}"))
SearchText = (new iTextSharp.text.pdf.ArabicLigaturizer()).Process(SearchText);
Run Code Online (Sandbox Code Playgroud)
但仍然有些文件有问题。我认为这取决于 PdfWriter。
小智 5
我已经为我的一个项目编写了一个 C# 代码,对波斯文本进行硬编码并转换它们,希望这会有所帮助(它也可以反转它们,因为我为不支持 RTL 的 Unity3D 编写了该代码):
//This library maps persian characters to unicode and makes them rtl, in order to use in softwares like Unity3D
//Author: Shayan Edalatmanesh
using System.Collections.Generic;
using System.Text.RegularExpressions;
public static class PersianMap
{
static string persianChars = "?????????????????????????????????????????????????????????????";
static string tashkil = "?????????";
static Dictionary<char?, char[]> map = new Dictionary<char?, char[]>()
{
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] {'?', '?', '?', '?'}},
{'?', new char[] { '?', '?', '?', '?'}},
};
static Dictionary<string, char[]> doubleMap = new Dictionary<string, char[]>()
{
{"??", new char[] {'?', '?', '?', '?'}},
{"??", new char[] {'?', '?', '?', '?'}},
{"??", new char[] {'?', '?', '?', '?'}},
{"??", new char[] {'?', '?', '?', '?'}},
};
//The letters that after them, next letter will be stuck
static List<char?> afterStick = new List<char?>()
{
'?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
'?','?','?','?','?','?','?','?',
};
//0: Isolated
//1: Begin
//2: Middle
//3: End
static int getGlyphType(char chr, char? before, char? after)
{
if (before == null && after == null)
return 0;
bool afs_chr = afterStick.Contains(chr);
bool afs_bfr = before != null && afterStick.Contains(before);
if (afs_chr && after != null && map.ContainsKey(after))
{
if (afs_chr && afs_bfr)
return 2;
else
return 1;
}
else
{
if (afs_bfr)
return 3;
else
return 0;
}
}
static string convertWord(string word)
{
string result = "";
//Convert word
for (int i = 0; i < word.Length; i++)
{
if (!map.ContainsKey(word[i]))
{
result += word[i].ToString();
continue;
}
char? before = null, after = null;
//Find the previous letter that is not tashkil
for (int k = i - 1; k >= 0; k--)
{
if (!tashkil.Contains(word[k].ToString()))
{
before = word[k];
break;
}
}
//Find the next letter that is not tashkil
for (int k = i + 1; k < word.Length; k++)
{
if (!tashkil.Contains(word[k].ToString()))
{
after = word[k];
break;
}
}
int glyph = getGlyphType(word[i], before, after);
//Consider doubleMap
if (after != null)
{
string dm = word[i].ToString() + after.ToString();
if (doubleMap.ContainsKey(dm))
{
result += doubleMap[dm][glyph].ToString();
i++; //Skip the next letter
continue;
}
}
char mapped = map[word[i]][glyph];
result += mapped.ToString();
}
//Reverse
int len = result.Length;
char[] rev = new char[len];
for (int i = 0; i < len; i++)
{
rev[i] = result[len - i - 1];
}
return new string(rev);
}
static Regex wordRE = new Regex("(?<fa>[" + persianChars + "]*)(?<nfa>[^" + persianChars + "]*)");
static Regex spaceRE = new Regex("( )*([^ ]*)( )*");
static Regex persianRE = new Regex("[" + persianChars + "]");
static string convertLine(string line)
{
//Does not contain any persian characters
if (!persianRE.IsMatch(line))
return line;
string res = "";
MatchCollection matches = wordRE.Matches(line);
for (int i = matches.Count - 1; i >= 0; i--)
{
//If the non-persian word contains space, put the space on it's other side
res += spaceRE.Replace(matches[i].Groups["nfa"].Value, "$3$2$1");
//Convert the persian part
res += convertWord(matches[i].Groups["fa"].Value);
}
return res;
}
public static string ConvertPersian(string str)
{
string[] lines = str.Split('\n');
for (int i = 0; i < lines.Length; i++)
{
lines[i] = convertLine(lines[i]);
}
return string.Join("\n", lines);
}
public static string FixPersian(this string str)
{
return ConvertPersian(str);
}
}
Run Code Online (Sandbox Code Playgroud)