获取波斯字符初始内侧最终 unicode

hem*_*ani 3 c# persian unicode-string

我想将 Unicode 波斯字符放在一个单词中。

例如转换“????” 到 '?' '?'?'?

我发现ArabicLigaturizer可以完成我在阿拉伯语中的要求。

有没有一种简单的方法可以在 .net 中为波斯语做到这一点?

我用这个代码

    (System.Text.RegularExpressions.Regex.IsMatch(SearchText, @"\p{IsArabic}"))
 SearchText = (new iTextSharp.text.pdf.ArabicLigaturizer()).Process(SearchText);
Run Code Online (Sandbox Code Playgroud)

但仍然有些文件有问题。我认为这取决于 PdfWriter。

小智 5

我已经为我的一个项目编写了一个 C# 代码,对波斯文本进行硬编码并转换它们,希望这会有所帮助(它也可以反转它们,因为我为不支持 RTL 的 Unity3D 编写了该代码):

//This library maps persian characters to unicode and makes them rtl, in order to use in softwares like Unity3D
//Author: Shayan Edalatmanesh

using System.Collections.Generic;
using System.Text.RegularExpressions;

public static class PersianMap
{
    static string persianChars = "?????????????????????????????????????????????????????????????";
    static string tashkil = "?????????";
static Dictionary<char?, char[]> map = new Dictionary<char?, char[]>()
{
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] {'?', '?', '?', '?'}}, 
        {'?', new char[] { '?', '?', '?', '?'}},
};

static Dictionary<string, char[]> doubleMap = new Dictionary<string, char[]>()
{
    {"??", new char[] {'?', '?', '?', '?'}}, 
    {"??", new char[] {'?', '?', '?', '?'}}, 
    {"??", new char[] {'?', '?', '?', '?'}}, 
    {"??", new char[] {'?', '?', '?', '?'}}, 
};

//The letters that after them, next letter will be stuck
static List<char?> afterStick = new List<char?>()
{
    '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
    '?','?','?','?','?','?','?','?',
};

//0: Isolated
//1: Begin
//2: Middle
//3: End
static int getGlyphType(char chr, char? before, char? after)
{
    if (before == null && after == null)
        return 0;

    bool afs_chr = afterStick.Contains(chr);
    bool afs_bfr = before != null && afterStick.Contains(before);

    if (afs_chr && after != null && map.ContainsKey(after))
    {
        if (afs_chr && afs_bfr)
            return 2;
        else
            return 1;
    }
    else
    {
        if (afs_bfr)
            return 3;
        else
            return 0;
    }
}

static string convertWord(string word)
{
    string result = "";
    //Convert word
    for (int i = 0; i < word.Length; i++)
    {
        if (!map.ContainsKey(word[i]))
        {
            result += word[i].ToString();
            continue;
        }

        char? before = null, after = null;

        //Find the previous letter that is not tashkil
        for (int k = i - 1; k >= 0; k--)
        {
            if (!tashkil.Contains(word[k].ToString()))
            {
                before = word[k];
                break;
            }
        }

        //Find the next letter that is not tashkil
        for (int k = i + 1; k < word.Length; k++)
        {
            if (!tashkil.Contains(word[k].ToString()))
            {
                after = word[k];
                break;
            }
        }           

        int glyph = getGlyphType(word[i], before, after);

        //Consider doubleMap
        if (after != null)
        {
            string dm = word[i].ToString() + after.ToString();
            if (doubleMap.ContainsKey(dm))
            {
                result += doubleMap[dm][glyph].ToString();
                i++; //Skip the next letter
                continue;
            }
        }

        char mapped = map[word[i]][glyph];

        result += mapped.ToString();
    }

    //Reverse
    int len = result.Length;
    char[] rev = new char[len];
    for (int i = 0; i < len; i++)
    {
        rev[i] = result[len - i - 1];
    }

    return new string(rev);
}

static Regex wordRE = new Regex("(?<fa>[" + persianChars + "]*)(?<nfa>[^" + persianChars + "]*)");
static Regex spaceRE = new Regex("( )*([^ ]*)( )*");
static Regex persianRE = new Regex("[" + persianChars + "]");
static string convertLine(string line)
{
    //Does not contain any persian characters
    if (!persianRE.IsMatch(line))
        return line;

    string res = "";
    MatchCollection matches = wordRE.Matches(line);
    for (int i = matches.Count - 1; i >= 0; i--)
    {
        //If the non-persian word contains space, put the space on it's other side
        res += spaceRE.Replace(matches[i].Groups["nfa"].Value, "$3$2$1");

        //Convert the persian part
        res += convertWord(matches[i].Groups["fa"].Value);
    }
    return res;
}

public static string ConvertPersian(string str)
{       
    string[] lines = str.Split('\n');
    for (int i = 0; i < lines.Length; i++)
    {
        lines[i] = convertLine(lines[i]);
    }
    return string.Join("\n", lines);
}

public static string FixPersian(this string str)
{
    return ConvertPersian(str);
}
}
Run Code Online (Sandbox Code Playgroud)