以最高分打印两个序列的所有比对

dis*_*kit 5 c# algorithm dynamic-programming data-structures

序列比对是一个非常标准的问题,可用于生物信息学领域的DNA或蛋白质比对.我最近遇到了这个问题的不同版本.

给定两个输入字符串(假设字符串仅由A,C,G,T组成),问题基本上是根据以下矩阵找到最大对齐分数 -

   A  C  G  T  -
A  5 -1 -2 -1 -3  
C -1  5 -3 -2 -4
G -2 -3  5 -2 -2
T -1 -2 -2  5 -1
- -3 -4 -2 -1 Not Allowed
Run Code Online (Sandbox Code Playgroud)

因此,如果A与 - 对齐,我们在对齐分数上加-3,或者如果G与T对齐,我们在分数上加-2,或者如果C与C对齐,我们加5.因此,对于输入字符串AGTGATG和GTTAG,最大比对得分为14,其中一个具有最大得分的比对可以表示为

AGTGATG
-GTTA-G
Run Code Online (Sandbox Code Playgroud)

对齐得分计算如下:A- = -3,GG = 5,TT = 5,GT = -2,AA = 5,T- = -1和GG = 5.加起来,-3 + 5 + 5-2 + 5-1 + 5 = 14这是这对弦的最大可能对齐分数.

我能够使用动态编程对其进行编码并获得Alignment得分矩阵,但我在打印两个字符串的所有可能对齐方面遇到问题,并且最大对齐得分.我试图像在LCS中一样回溯,但无法使其正常工作.我附上了我的代码.

static Dictionary<string, int> dict;

    static void Main(string[] args)
    {
        //This has been assumed that the strings contain only A,C,G,T and -(?)..caps

        Console.WriteLine("Enter first string : ");
        string a = Console.ReadLine();
        a = "-" + a;
        Console.WriteLine("Enter second string : ");
        string b = Console.ReadLine();
        b = "-" + b;
        int[,] SQ = new int[a.Length, b.Length];
        #region Create Dictionary
        dict = new Dictionary<string, int>();
        dict.Add("AA", 5);
        dict.Add("AC", -1);
        dict.Add("AG", -2);
        dict.Add("AT", -1);
        dict.Add("A-", -3);

        dict.Add("CA", -1);
        dict.Add("CC", 5);
        dict.Add("CG", -3);
        dict.Add("CT", -2);
        dict.Add("C-", -4);

        dict.Add("GA", -2);
        dict.Add("GC", -3);
        dict.Add("GG", 5);
        dict.Add("GT", -2);
        dict.Add("G-", -2);

        dict.Add("TA", -1);
        dict.Add("TC", -2);
        dict.Add("TG", -2);
        dict.Add("TT", 5);
        dict.Add("T-", -1);

        dict.Add("-A", -3);
        dict.Add("-C", -4);
        dict.Add("-G", -2);
        dict.Add("-T", -1);
        dict.Add("--", 0);
        #endregion Create Dictionary

        for (int i = 0; i < a.Length; i++)
        {
            for (int j = 0; j < b.Length; j++)
            {
                int key = 0, key1 = 0, key2 = 0;
                dict.TryGetValue(a[i].ToString() + b[j].ToString(), out key);
                dict.TryGetValue("-" + b[j].ToString(), out key1);
                dict.TryGetValue(a[i].ToString() + "-", out key2);
                if (i == 0)
                    SQ[i, j] = key1;
                else if (j == 0)
                    SQ[i, j] = key2;
                else
                    SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + key, Math.Max(SQ[i - 1, j] + key1, SQ[i, j - 1] + key2));
            }
        }
        for (int i = 0; i < a.Length; i++)
        {
            for (int j = 0; j < b.Length; j++)
            {
                Console.Write(SQ[i, j] + "   ");
            }
            Console.WriteLine();
        }

        Console.WriteLine("Alignment Score : " + SQ[a.Length - 1, b.Length - 1]);            
        printAllAlignmentsWithHighestAlignmentScore();
        Console.Read();
    }
Run Code Online (Sandbox Code Playgroud)

有人可以帮我实现printAllAlignmentsWithHighestAlignmentScore()函数吗?

dis*_*kit 2

最后,我有了工作代码来完全完成我想做的事情。\n问题实际上是Needleman\xe2\x80\x93Wunsch 算法的细微变化

\n\n

代码 :

\n\n
class Program\n{\n    static Dictionary<string, int> dict;\n    static void printAllAlignments(int[,] SQ, string a, string b, int p, int q, string str1, string str2){\n        if (p == 0 || q == 0){\n            while (p == 0 && q != 0){\n                str1 = "-" + str1;\n                str2 = b[--q]+str2;\n            }\n            while (q == 0 && p != 0){\n                str1 = a[--p]+str1;\n                str2 = \'-\' + str2;\n            }\n            Console.WriteLine("\\n"+str1+"\\n"+str2+"\\n");\n            return;\n        }\n\n        if (SQ[p, q] == (dict[a[p - 1] + b[q - 1].ToString()] + SQ[p - 1, q - 1]))\n            printAllAlignments(SQ, a, b, p - 1, q - 1, a[p-1]+str1, b[q-1]+str2);\n\n        if (SQ[p, q] == (dict[a[p - 1]+ "-"] + SQ[p - 1, q]))\n            printAllAlignments(SQ, a, b, p - 1, q, a[p-1]+str1, "-"+str2);\n\n        if (SQ[p, q] == (dict["-" + b[q-1]] + SQ[p, q - 1]))            \n            printAllAlignments(SQ, a, b, p, q - 1, "-"+str1, b[q-1]+str2);\n\n\n    }\n    static void Main(string[] args)\n    {\n        //This has been assumed that the strings contain only A,C,G,T and -(?)..caps\n\n        Console.WriteLine("Enter first string : ");\n        string a = Console.ReadLine();         \n        Console.WriteLine("Enter second string : ");\n        string b = Console.ReadLine();          \n        int[,] SQ = new int[a.Length+1, b.Length+1];\n\n        #region Create Dictionary\n        dict = new Dictionary<string, int>();\n        dict.Add("AA", 5);\n        dict.Add("AC", -1);\n        dict.Add("AG", -2);\n        dict.Add("AT", -1);\n        dict.Add("A-", -3);\n\n        dict.Add("CA", -1);\n        dict.Add("CC", 5);\n        dict.Add("CG", -3);\n        dict.Add("CT", -2);\n        dict.Add("C-", -4);\n\n        dict.Add("GA", -2);\n        dict.Add("GC", -3);\n        dict.Add("GG", 5);\n        dict.Add("GT", -2);\n        dict.Add("G-", -2);\n\n        dict.Add("TA", -1);\n        dict.Add("TC", -2);\n        dict.Add("TG", -2);\n        dict.Add("TT", 5);\n        dict.Add("T-", -1);\n\n        dict.Add("-A", -3);\n        dict.Add("-C", -4);\n        dict.Add("-G", -2);\n        dict.Add("-T", -1);\n        dict.Add("--", 0);\n        #endregion Create Dictionary\n\n        SQ[0, 0] = 0;            \n        for (int i = 1; i <= a.Length; i++)            \n            SQ[i, 0] = dict["-" + a[i - 1].ToString()] + SQ[i - 1, 0];\n\n        for (int i = 1; i <= b.Length; i++)\n            SQ[0, i] = dict[b[i - 1].ToString() + "-"] + SQ[0, i - 1];\n\n        for (int i = 1; i <= a.Length; i++)\n            for (int j = 1; j <= b.Length; j++)\n                SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + dict[a[i-1].ToString() + b[j-1]], Math.Max(SQ[i - 1, j] + dict[a[i-1] + "-"], SQ[i, j - 1] + dict["-" + b[j-1]]));           \n\n\n        Console.WriteLine("Max Alignment Score : " + SQ[a.Length, b.Length]);\n        printAllAlignments(SQ, a, b, a.Length , b.Length,"","");\n        Console.Read();\n    }\n}\n
Run Code Online (Sandbox Code Playgroud)\n