annotate commons/tools/tests/Test_F_PostAnalyzeTELib.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 from commons.core.utils.FileUtils import FileUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 from commons.core.sql.DbFactory import DbFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 from commons.tools.PostAnalyzeTELib import PostAnalyzeTELib
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import subprocess
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import unittest
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 class Test_F_PostAnalyzeTELib(unittest.TestCase):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 def setUp(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 self._expStatFileName = "expStats.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 self._obsStatFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 self._genomeSize = 1281640
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 self._pathTableName = "dummyDmelChr4_chr_allTEs_nr_noSSR_join_path"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 self._seqTableName = "dummyDmelChr4_denovoLibTEs_seq"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 def tearDown(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 os.remove(self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 os.remove(self._obsStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 def test_run_analysis1(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 libFileName = "TElib.fa"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 self._writeInputFasta_analysis1(libFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 expClusterFileName = "expClusters.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 self._writeExpClusterFile_analysis1(expClusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 expGlobalStatFileName = "expGlobalStats.txt"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 self._writeExpGlobalStats_analysis1(expGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 self._writeExpStatsFile_analysis1(self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 obsClusterFileName = "TElib.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 obsGlobalStatFileName = "TElib.globalStatsPerCluster.txt"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 self._obsStatFileName = "TElib.statsPerCluster.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 iPATEL = PostAnalyzeTELib(analysis=1, fastaFileName=libFileName, doClean=True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 iPATEL.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 self.assertTrue(FileUtils.are2FilesIdentical(expGlobalStatFileName, obsGlobalStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 os.remove(libFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 os.remove(expClusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 os.remove(expGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 os.remove(obsClusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 os.remove(obsGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 def test_run_as_script_analysis1(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 libFileName = "TElib.fa"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 with open(libFileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 f.write(">transib2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 f.write("GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 f.write("GATTCGTTTCATTCACCGGATCTCTTGCGTCCGCCTCCGCCGTGCGACCTCCGCATTC\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 f.write(">transib3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 f.write("TGAAACTCGTCAGCGTCGTTTCCAAGTCCT\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 f.write(">transib4\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 f.write("GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 f.write("GATTCGTTTCATTCACCGGATCTCTTGCGTCCGCCTCCGCCGTGCGACCTCCGCATTCAT\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 f.write("AAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 expClusterFileName = "expClusters.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 with open(expClusterFileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 f.write("transib4 \n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 f.write("transib2 \n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 f.write("transib3 \n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 with open(self._expStatFileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 f.write("cluster\tsequencesNb\tsizeOfSmallestSeq\tsizeOfLargestSeq\taverageSize\tmedSize\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 f.write("1\t1\t238\t238\t238\t238\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 f.write("2\t1\t178\t178\t178\t178\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 f.write("3\t1\t90\t90\t90\t90\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 expGlobalStatFileName = "expGlobalStats.txt"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 with open(expGlobalStatFileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 f.write("nb of clusters: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 f.write("nb of clusters with 1 sequence: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 f.write("nb of clusters with 2 sequences: 0\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 f.write("nb of clusters with >2 sequences: 0 (0 sequences)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 f.write("nb of sequences: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 f.write("nb of sequences in the largest cluster: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 f.write("nb of sequences in the smallest cluster: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 f.write("size of the smallest sequence: 90\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 f.write("size of the largest sequence: 238\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 f.write("average sequences size: 168\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 f.write("median sequences size: 178\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 obsClusterFileName = "TElib.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 obsGlobalStatFileName = "TElib.globalStatsPerCluster.txt"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 self._obsStatFileName = "TElib.statsPerCluster.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 cmd = "PostAnalyzeTELib.py -i %s -L 98 -S 95 -b -c -v 3" % libFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 process = subprocess.Popen(cmd, shell = True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 self.assertTrue(FileUtils.are2FilesIdentical(expGlobalStatFileName, obsGlobalStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 os.remove(libFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 os.remove(expClusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 os.remove(expGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 os.remove(obsClusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 os.remove(obsGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 def test_run_analysis2(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 clusterFileName = "ConsensusClusters.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 self._writeClusterFile_analysis2(clusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 self._writeExpStatsFile_analysis2(self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self._obsStatFileName = "ConsensusClusters.classifStatsPerCluster.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 iPATEL = PostAnalyzeTELib(analysis=2, clusterFileName=clusterFileName, verbosity=3)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 iPATEL.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 print "exp: %s, obs: %s" % (self._expStatFileName, self._obsStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 os.remove(clusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 def test_run_analysis3(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 iDb = DbFactory.createInstance()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 iDb.createTable(self._pathTableName, "path", "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ['REPET_DATA'], True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 iDb.createTable(self._seqTableName, "seq", "%s/TEannot/DmelChr4_denovoLibTEs.fa" % os.environ['REPET_DATA'], True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 expGlobalStatFileName = "expGlobalStats.txt"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self._writeExpGlobalStats_analysis3(expGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 self._writeExpStatsFile_analysis3(self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 obsGlobalStatFileName = "%s.globalAnnotStatsPerTE.txt" % self._pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 self._obsStatFileName = "%s.annotStatsPerTE.tab" % self._pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 iPATEL = PostAnalyzeTELib(analysis=3, pathTableName=self._pathTableName, seqTableName=self._seqTableName, genomeSize=self._genomeSize)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 iPATEL.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 self.assertTrue(FileUtils.are2FilesIdentical(expGlobalStatFileName, obsGlobalStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 iDb.dropTable(self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 iDb.dropTable(self._seqTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 iDb.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 os.remove(expGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 os.remove(obsGlobalStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 def test_run_analysis4(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 iDb = DbFactory.createInstance()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 iDb.createTable(self._pathTableName, "path", "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ['REPET_DATA'], True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 iDb.createTable(self._seqTableName, "seq", "%s/TEannot/DmelChr4_denovoLibTEs.fa" % os.environ['REPET_DATA'], True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 clusterFileName = "clusters.tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 self._writeClusterFile_analysis4(clusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 self._writeExpStatsFile_analysis4(self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 self._obsStatFileName = "%s.annotStatsPerCluster.tab" % self._pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 iPATEL = PostAnalyzeTELib(analysis=4, clusterFileName=clusterFileName, pathTableName=self._pathTableName, seqTableName=self._seqTableName, genomeSize=self._genomeSize)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 iPATEL.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 iDb.dropTable(self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 iDb.dropTable(self._seqTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 iDb.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 os.remove(clusterFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 def _writeInputFasta_analysis1(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 f.write(">DTX-incomp_DmelChr4-B-R10-Map3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 f.write("TACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACGATTT\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 f.write("TTTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAG\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 f.write("AGCGGAGAGCGCTACAGCGAACAGCTCTTTTCAACGCATAAAGTGATAGCAGACAACTGT\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 def _writeExpClusterFile_analysis1(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 f.write("DTX-incomp_DmelChr4-B-R10-Map3 DTX-incomp_DmelChr4-B-R9-Map3_reversed \n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 f.write("PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed \n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 def _writeExpStatsFile_analysis1(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 #TODO: header in option ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 f.write("cluster\tsequencesNb\tsizeOfSmallestSeq\tsizeOfLargestSeq\taverageSize\tmedSize\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 f.write("1\t2\t240\t240\t240\t240\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 f.write("2\t1\t180\t180\t180\t180\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 def _writeExpGlobalStats_analysis1(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 #TODO: file or STDOUT ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 f.write("nb of clusters: 2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 f.write("nb of clusters with 1 sequence: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 f.write("nb of clusters with 2 sequences: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 f.write("nb of clusters with >2 sequences: 0 (0 sequences)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 f.write("nb of sequences: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 f.write("nb of sequences in the largest cluster: 2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 f.write("nb of sequences in the smallest cluster: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 f.write("size of the smallest sequence: 180\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 f.write("size of the largest sequence: 240\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 f.write("average sequences size: 220\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 f.write("median sequences size: 240\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 def _writeClusterFile_analysis2(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 f.write("DTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-R10-Map3_reversed\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 f.write("DTX-incomp_Blc1_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_Blc1_DmelChr4-B-R10-Map3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 f.write("DXX-comp_DmelChr4-B-R9-Map3\tDTX-comp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R10-Map3_reversed\tnoCat_DmelChr4-B-G1-Map3\tnoCat_DmelChr4-B-R1-Map4\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 f.write("RXX-MITE_DmelChr4-B-G7-Map3\tRXX-MITE_DmelChr4-B-G5-Map3\tRXX-MITE_DmelChr4-B-G2-Map3\tRXX-MITE_DmelChr4-B-G23-Map3\tRXX-MITE_DmelChr4-B-G6-Map3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 def _writeExpStatsFile_analysis2(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 #TODO: header in option ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 f.write("cluster\tnoCat\tPotentialChimeric\tcomp\tincomp\tclassifs (nbTEs)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 f.write("1\t0\t1\t0\t2\tDTX (2)\tPotentialHostGene (1)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 f.write("2\t0\t0\t0\t2\tDTX (2)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 f.write("3\t2\t0\t2\t1\tDTX (2)\tDXX (1)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 f.write("4\t0\t0\t0\t0\tMITE (5)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 def _writeExpStatsFile_analysis3(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 #TODO: header in option ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 f.write("TE\tlength\tcovg\tfrags\tfullLgthFrags\tcopies\tfullLgthCopies\tmeanId\tmeanLgth\tmeanLgthPerc\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 f.write("DmelChr4-B-G1-Map3_NoCat\t542\t3701\t12\t4\t10\t4\t95.72\t370.10\t68.28\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 f.write("DmelChr4-B-G11-Map20_classII-TIR-incomp\t1240\t8216\t27\t0\t22\t0\t88.80\t375.00\t30.24\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 f.write("DmelChr4-B-G7-Map3_classII-TIR-incomp\t1944\t15212\t49\t1\t42\t1\t89.44\t382.36\t19.67\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 f.write("DmelChr4-B-G9-Map3_NoCat\t1590\t11564\t24\t0\t21\t1\t92.03\t550.67\t34.63\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 f.write("DmelChr4-B-P0.0-Map3_classII-TIR-incomp\t1042\t4001\t13\t3\t11\t3\t85.11\t366.36\t35.16\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 f.write("DmelChr4-B-R1-Map4_NoCat\t2367\t66031\t484\t0\t361\t0\t77.84\t182.91\t7.73\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 f.write("DmelChr4-B-R12-Map3_NoCat\t2284\t4938\t3\t2\t3\t2\t99.26\t1646.00\t72.07\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 f.write("DmelChr4-B-R19-Map4_NoCat\t705\t3328\t10\t3\t10\t3\t88.51\t332.80\t47.21\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 f.write("DmelChr4-B-R2-Map6_NoCat\t4638\t20539\t34\t2\t29\t3\t80.93\t708.24\t15.27\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 f.write("DmelChr4-B-R4-Map5_NoCat\t1067\t7292\t35\t1\t28\t1\t86.50\t260.54\t24.42\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 f.write("DmelChr4-B-R9-Map3_NoCat\t714\t5453\t19\t2\t16\t2\t81.18\t340.81\t47.73\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 def _writeExpGlobalStats_analysis3(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 f.write("nb of sequences: 11\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242 f.write("nb of matched sequences: 11\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 f.write("cumulative coverage: 150275 bp\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 f.write("coverage percentage: 11.73%\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 f.write("\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246 f.write("total nb of TE fragments: 710\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 f.write("total nb full-length fragments: 18 (2.54%)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 f.write("total nb of TE copies: 553\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 f.write("total nb full-length copies: 20 (3.62%)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250 f.write("families with full-length fragments: 8 (72.73%)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 f.write(" with only one full-length fragment: 2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 f.write(" with only two full-length fragments: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 f.write(" with only three full-length fragments: 2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 f.write(" with more than three full-length fragments: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 f.write("families with full-length copies: 9 (81.82%)\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 f.write(" with only one full-length copy: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 f.write(" with only two full-length copies: 2\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 f.write(" with only three full-length copies: 3\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 f.write(" with more than three full-length copies: 1\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 f.write("mean of median identity of all families: 88.30 +- 8.33\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261 f.write("mean of median length percentage of all families: 30.83 +- 32.30\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 def _writeClusterFile_analysis4(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 f.write("1\tDmelChr4-B-R1-Map4_NoCat\tDmelChr4-B-R2-Map6_NoCat\tDmelChr4-B-R4-Map5_NoCat\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 f.write("2\tDmelChr4-B-G7-Map3_classII-TIR-incomp\tDmelChr4-B-P0.0-Map3_classII-TIR-incomp\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 def _writeExpStatsFile_analysis4(self, fileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269 with open(fileName, "w") as f:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 f.write("Cluster\tcovg\tfrags\tcopies\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 f.write("1\t93862\t553\t418\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 f.write("2\t19213\t62\t53\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 def _writeConfigFile(self, configFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 with open(configFileName, "w") as fHandle:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 fHandle.write("[repet_env]\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277 fHandle.write("repet_host: %s\n" % os.environ["REPET_HOST"])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 fHandle.write("repet_user: %s\n" % os.environ["REPET_USER"])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279 fHandle.write("repet_pw: %s\n" % os.environ["REPET_PW"])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 fHandle.write("repet_db: %s\n" % os.environ["REPET_DB"])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281 fHandle.write("repet_port: 3306\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 fHandle.write("[analysis1]\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 fHandle.write("fasta_name: %s\n" % self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285 fHandle.write("[analysis2]\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 fHandle.write("clusterFileName: %s\n" % self._expStatFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 fHandle.write("[analysis3]\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289 fHandle.write("pathTableName: %s\n" % self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290 fHandle.write("seqTableName: %s\n" % self._seqTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291 fHandle.write("genomeSize: %s\n" % self._genomeSize)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294 unittest.main()