dis*_*kit 5 c# algorithm dynamic-programming data-structures
序列比对是一个非常标准的问题,可用于生物信息学领域的DNA或蛋白质比对.我最近遇到了这个问题的不同版本.
给定两个输入字符串(假设字符串仅由A,C,G,T组成),问题基本上是根据以下矩阵找到最大对齐分数 -
A C G T -
A 5 -1 -2 -1 -3
C -1 5 -3 -2 -4
G -2 -3 5 -2 -2
T -1 -2 -2 5 -1
- -3 -4 -2 -1 Not Allowed
Run Code Online (Sandbox Code Playgroud)
因此,如果A与 - 对齐,我们在对齐分数上加-3,或者如果G与T对齐,我们在分数上加-2,或者如果C与C对齐,我们加5.因此,对于输入字符串AGTGATG和GTTAG,最大比对得分为14,其中一个具有最大得分的比对可以表示为
AGTGATG
-GTTA-G
Run Code Online (Sandbox Code Playgroud)
对齐得分计算如下:A- = -3,GG = 5,TT = 5,GT = -2,AA = 5,T- = -1和GG = 5.加起来,-3 + 5 + 5-2 + 5-1 + 5 = 14这是这对弦的最大可能对齐分数.
我能够使用动态编程对其进行编码并获得Alignment得分矩阵,但我在打印两个字符串的所有可能对齐方面遇到问题,并且最大对齐得分.我试图像在LCS中一样回溯,但无法使其正常工作.我附上了我的代码.
static Dictionary<string, int> dict;
static void Main(string[] args)
{
//This has been assumed that the strings contain only A,C,G,T and -(?)..caps
Console.WriteLine("Enter first string : ");
string a = Console.ReadLine();
a = "-" + a;
Console.WriteLine("Enter second string : ");
string b = Console.ReadLine();
b = "-" + b;
int[,] SQ = new int[a.Length, b.Length];
#region Create Dictionary
dict = new Dictionary<string, int>();
dict.Add("AA", 5);
dict.Add("AC", -1);
dict.Add("AG", -2);
dict.Add("AT", -1);
dict.Add("A-", -3);
dict.Add("CA", -1);
dict.Add("CC", 5);
dict.Add("CG", -3);
dict.Add("CT", -2);
dict.Add("C-", -4);
dict.Add("GA", -2);
dict.Add("GC", -3);
dict.Add("GG", 5);
dict.Add("GT", -2);
dict.Add("G-", -2);
dict.Add("TA", -1);
dict.Add("TC", -2);
dict.Add("TG", -2);
dict.Add("TT", 5);
dict.Add("T-", -1);
dict.Add("-A", -3);
dict.Add("-C", -4);
dict.Add("-G", -2);
dict.Add("-T", -1);
dict.Add("--", 0);
#endregion Create Dictionary
for (int i = 0; i < a.Length; i++)
{
for (int j = 0; j < b.Length; j++)
{
int key = 0, key1 = 0, key2 = 0;
dict.TryGetValue(a[i].ToString() + b[j].ToString(), out key);
dict.TryGetValue("-" + b[j].ToString(), out key1);
dict.TryGetValue(a[i].ToString() + "-", out key2);
if (i == 0)
SQ[i, j] = key1;
else if (j == 0)
SQ[i, j] = key2;
else
SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + key, Math.Max(SQ[i - 1, j] + key1, SQ[i, j - 1] + key2));
}
}
for (int i = 0; i < a.Length; i++)
{
for (int j = 0; j < b.Length; j++)
{
Console.Write(SQ[i, j] + " ");
}
Console.WriteLine();
}
Console.WriteLine("Alignment Score : " + SQ[a.Length - 1, b.Length - 1]);
printAllAlignmentsWithHighestAlignmentScore();
Console.Read();
}
Run Code Online (Sandbox Code Playgroud)
有人可以帮我实现printAllAlignmentsWithHighestAlignmentScore()函数吗?
最后,我有了工作代码来完全完成我想做的事情。\n问题实际上是Needleman\xe2\x80\x93Wunsch 算法的细微变化
\n\n代码 :
\n\nclass Program\n{\n static Dictionary<string, int> dict;\n static void printAllAlignments(int[,] SQ, string a, string b, int p, int q, string str1, string str2){\n if (p == 0 || q == 0){\n while (p == 0 && q != 0){\n str1 = "-" + str1;\n str2 = b[--q]+str2;\n }\n while (q == 0 && p != 0){\n str1 = a[--p]+str1;\n str2 = \'-\' + str2;\n }\n Console.WriteLine("\\n"+str1+"\\n"+str2+"\\n");\n return;\n }\n\n if (SQ[p, q] == (dict[a[p - 1] + b[q - 1].ToString()] + SQ[p - 1, q - 1]))\n printAllAlignments(SQ, a, b, p - 1, q - 1, a[p-1]+str1, b[q-1]+str2);\n\n if (SQ[p, q] == (dict[a[p - 1]+ "-"] + SQ[p - 1, q]))\n printAllAlignments(SQ, a, b, p - 1, q, a[p-1]+str1, "-"+str2);\n\n if (SQ[p, q] == (dict["-" + b[q-1]] + SQ[p, q - 1])) \n printAllAlignments(SQ, a, b, p, q - 1, "-"+str1, b[q-1]+str2);\n\n\n }\n static void Main(string[] args)\n {\n //This has been assumed that the strings contain only A,C,G,T and -(?)..caps\n\n Console.WriteLine("Enter first string : ");\n string a = Console.ReadLine(); \n Console.WriteLine("Enter second string : ");\n string b = Console.ReadLine(); \n int[,] SQ = new int[a.Length+1, b.Length+1];\n\n #region Create Dictionary\n dict = new Dictionary<string, int>();\n dict.Add("AA", 5);\n dict.Add("AC", -1);\n dict.Add("AG", -2);\n dict.Add("AT", -1);\n dict.Add("A-", -3);\n\n dict.Add("CA", -1);\n dict.Add("CC", 5);\n dict.Add("CG", -3);\n dict.Add("CT", -2);\n dict.Add("C-", -4);\n\n dict.Add("GA", -2);\n dict.Add("GC", -3);\n dict.Add("GG", 5);\n dict.Add("GT", -2);\n dict.Add("G-", -2);\n\n dict.Add("TA", -1);\n dict.Add("TC", -2);\n dict.Add("TG", -2);\n dict.Add("TT", 5);\n dict.Add("T-", -1);\n\n dict.Add("-A", -3);\n dict.Add("-C", -4);\n dict.Add("-G", -2);\n dict.Add("-T", -1);\n dict.Add("--", 0);\n #endregion Create Dictionary\n\n SQ[0, 0] = 0; \n for (int i = 1; i <= a.Length; i++) \n SQ[i, 0] = dict["-" + a[i - 1].ToString()] + SQ[i - 1, 0];\n\n for (int i = 1; i <= b.Length; i++)\n SQ[0, i] = dict[b[i - 1].ToString() + "-"] + SQ[0, i - 1];\n\n for (int i = 1; i <= a.Length; i++)\n for (int j = 1; j <= b.Length; j++)\n SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + dict[a[i-1].ToString() + b[j-1]], Math.Max(SQ[i - 1, j] + dict[a[i-1] + "-"], SQ[i, j - 1] + dict["-" + b[j-1]])); \n\n\n Console.WriteLine("Max Alignment Score : " + SQ[a.Length, b.Length]);\n printAllAlignments(SQ, a, b, a.Length , b.Length,"","");\n Console.Read();\n }\n}\nRun Code Online (Sandbox Code Playgroud)\n
| 归档时间: |
|
| 查看次数: |
1129 次 |
| 最近记录: |