comparison commons/core/coord/test/Test_F_ConvCoord.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 from commons.core.utils.FileUtils import FileUtils
2 from commons.core.sql.DbFactory import DbFactory
3 from commons.core.coord.ConvCoord import ConvCoord
4 import time
5 import subprocess
6 import os
7 import unittest
8
9 class Test_F_ConvCoord(unittest.TestCase):
10
11 def setUp( self ):
12 self._i = ConvCoord()
13 self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
14 self._inData = "dummyInData_%s" % ( self._uniqId )
15 self._mapData = "dummyMapData_%s" % ( self._uniqId )
16 self._expData = "dummyExpData_%s" % ( self._uniqId )
17 self._obsData = "dummyObsData_%s" % ( self._uniqId )
18 self._iDb = DbFactory.createInstance()
19 self._i._iDb = self._iDb
20
21 def tearDown( self ):
22 self._iDb.close()
23
24 def test_run_as_script_alignFile_query( self ):
25 configFile = "%s/dummyConfigFile_%s" % ( os.getcwd(), self._uniqId )
26 configF = open( configFile, "w" )
27 configF.write( "[repet_env]\n" )
28 configF.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
29 configF.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
30 configF.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
31 configF.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
32 configF.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
33 configF.close()
34 self._writeMapFile( self._mapData )
35
36 linesToProcess = [ "chunk1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n", # hit within the 1st chunk
37 "chunk1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 1st chunk
38 "chunk2" + "\t" + "2" + "\t" + "9" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 2nd chunk
39 "chunk2" + "\t" + "51" + "\t" + "58" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit inside the 2nd chunk
40 "chunk2" + "\t" + "51" + "\t" + "70" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n" # subject on reverse strand
41 ]
42 FileUtils.writeLineListInFile( self._inData, linesToProcess )
43
44 refLines = [ "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n",
45 "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n",
46 "chromosome1" + "\t" + "141" + "\t" + "148" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n",
47 "chromosome1" + "\t" + "141" + "\t" + "160" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n"
48 ]
49 FileUtils.writeLineListInFile( self._expData, refLines )
50
51 cmd = "ConvCoord.py"
52 cmd += " -i %s" % ( self._inData )
53 cmd += " -f %s" % ( "align" )
54 cmd += " -c %s" % ( "q" )
55 cmd += " -m %s" % ( self._mapData )
56 cmd += " -o %s" % ( self._obsData )
57 cmd += " -C %s" % ( configFile )
58 process = subprocess.Popen(cmd, shell = True)
59 process.communicate()
60
61 self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )
62
63 os.remove( self._inData )
64 os.remove(configFile)
65 os.remove( self._mapData )
66 os.remove( self._expData )
67 os.remove( self._obsData )
68
69 def test_run_as_script_alignFile_queryAndSubject( self ):
70 self._writeMapFile( self._mapData )
71 linesToProcess = [ "chunk1" + "\t" + "21" + "\t" + "37" + "\t" + "chunk3" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n", # hit within the 1st chunk
72 "chunk1" + "\t" + "92" + "\t" + "99" + "\t" + "chunk2" + "\t" + "2" + "\t" + "9" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 1st chunk
73 "chunk2" + "\t" + "51" + "\t" + "58" + "\t" + "chunk1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit inside the 2nd chunk
74 "chunk2" + "\t" + "51" + "\t" + "70" + "\t" + "chunk1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n" # subject on reverse strand
75 "chunk2" + "\t" + "51" + "\t" + "70" + "\t" + "chunk1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n" # doublon of previous line
76 ]
77 FileUtils.writeLineListInFile( self._inData, linesToProcess )
78
79 refLines = [ "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "chromosome2" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n",
80 "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n",
81 "chromosome1" + "\t" + "141" + "\t" + "148" + "\t" + "chromosome1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n",
82 "chromosome1" + "\t" + "141" + "\t" + "160" + "\t" + "chromosome1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n"
83 ]
84 FileUtils.writeLineListInFile( self._expData, refLines )
85
86 cmd = "ConvCoord.py"
87 cmd += " -i %s" % ( self._inData )
88 cmd += " -f %s" % ( "align" )
89 cmd += " -c %s" % ( "qs" )
90 cmd += " -m %s" % ( self._mapData )
91 cmd += " -o %s" % ( self._obsData )
92 process = subprocess.Popen(cmd, shell = True)
93 process.communicate()
94
95 self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )
96
97 os.remove( self._inData )
98 self._iDb.dropTable( self._mapData )
99 os.remove( self._expData )
100 os.remove( self._obsData )
101 os.remove( self._mapData )
102
103 def test_run_as_script_pathTable_query( self ):
104 self._writeMapFile( self._mapData )
105 self._iDb.createTable( self._mapData, "map", self._mapData, True )
106 os.remove( self._mapData )
107
108 linesToProcess = [ "1" + "\t" + "chunk1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n", # hit within the 1st chunk
109 "2" + "\t" + "chunk1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 1st chunk
110 "3" + "\t" + "chunk2" + "\t" + "2" + "\t" + "9" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 2nd chunk
111 "4" + "\t" + "chunk2" + "\t" + "51" + "\t" + "58" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit inside the 2nd chunk
112 "5" + "\t" + "chunk2" + "\t" + "51" + "\t" + "70" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n" # subject on reverse strand
113 ]
114 FileUtils.writeLineListInFile( self._inData, linesToProcess )
115 self._iDb.createTable( self._inData, "path", self._inData, True )
116 os.remove( self._inData )
117
118 refLines = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n",
119 "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n",
120 "4" + "\t" + "chromosome1" + "\t" + "141" + "\t" + "148" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n",
121 "5" + "\t" + "chromosome1" + "\t" + "141" + "\t" + "160" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n"
122 ]
123 FileUtils.writeLineListInFile( self._expData, refLines )
124
125 cmd = "ConvCoord.py"
126 cmd += " -i %s" % ( self._inData )
127 cmd += " -f %s" % ( "path" )
128 cmd += " -c %s" % ( "q" )
129 cmd += " -m %s" % ( self._mapData )
130 cmd += " -o %s" % ( self._obsData )
131 process = subprocess.Popen(cmd, shell = True)
132 process.communicate()
133
134 self._iDb.exportDataToFile( self._obsData, self._obsData )
135 self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )
136
137 os.remove( self._obsData )
138 os.remove( self._expData )
139 self._iDb.dropTable( self._mapData )
140 self._iDb.dropTable( self._inData )
141 self._iDb.dropTable( self._expData )
142 self._iDb.dropTable( self._obsData )
143
144 def test_run_as_script_pathTable_query_noMergeChunkOverlaps( self ):
145 self._writeMapFile( self._mapData )
146 self._iDb.createTable( self._mapData, "map", self._mapData, True )
147 os.remove( self._mapData )
148
149 linesToProcess = [ "1" + "\t" + "chunk1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.800000" + "\n", # hit within the 1st chunk
150 "2" + "\t" + "chunk1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 1st chunk
151 "3" + "\t" + "chunk2" + "\t" + "2" + "\t" + "9" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit included within the chunk overlap, on the 2nd chunk
152 "4" + "\t" + "chunk2" + "\t" + "51" + "\t" + "58" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n", # hit inside the 2nd chunk
153 "5" + "\t" + "chunk2" + "\t" + "51" + "\t" + "70" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.800000" + "\n" # subject on reverse strand
154 ]
155 FileUtils.writeLineListInFile( self._inData, linesToProcess )
156 self._iDb.createTable( self._inData, "path", self._inData, True )
157 os.remove( self._inData )
158
159 refLines = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n",
160 "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n",
161 "3" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 2nd chunk
162 "4" + "\t" + "chromosome1" + "\t" + "141" + "\t" + "148" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n",
163 "5" + "\t" + "chromosome1" + "\t" + "141" + "\t" + "160" + "\t" + "TE1" + "\t" + "8" + "\t" + "1" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n"
164 ]
165 FileUtils.writeLineListInFile( self._expData, refLines )
166
167 cmd = "ConvCoord.py"
168 cmd += " -i %s" % ( self._inData )
169 cmd += " -f %s" % ( "path" )
170 cmd += " -c %s" % ( "q" )
171 cmd += " -m %s" % ( self._mapData )
172 cmd += " -M %s" % ( "no" )
173 cmd += " -o %s" % ( self._obsData )
174 process = subprocess.Popen(cmd, shell = True)
175 process.communicate()
176
177 self._iDb.exportDataToFile( self._obsData, self._obsData )
178 self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )
179
180 os.remove( self._obsData )
181 os.remove( self._expData )
182 self._iDb.dropTable( self._mapData )
183 self._iDb.dropTable( self._inData )
184 self._iDb.dropTable( self._expData )
185 self._iDb.dropTable( self._obsData )
186
187 def test_run(self):
188 inFileName = "DmelChr4_chk.align.not_over.filtered"
189 expFileName = "%s/Tools/DmelChr4_chr.align.not_over.filtered" % os.environ["REPET_DATA"]
190 obsFileName = "obs.align"
191 os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName), inFileName)
192 iConvCoord = ConvCoord()
193 iConvCoord.setInputData(inFileName)
194 iConvCoord.setMapData("%s/Tools/DmelChr4_chunks.map" % os.environ["REPET_DATA"])
195 iConvCoord.setCoordinatesToConvert("qs")
196 iConvCoord.setMergeChunkOverlaps(False)
197 iConvCoord.setOutputData(obsFileName)
198 iConvCoord.run()
199
200 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
201
202 os.remove(inFileName)
203 os.remove(obsFileName)
204
205 def _writeMapFile( self, mapFile ):
206 mapF = open( mapFile, "w" )
207 mapF.write( "chunk1\tchromosome1\t1\t100\n" )
208 mapF.write( "chunk2\tchromosome1\t91\t190\n" )
209 mapF.write( "chunk3\tchromosome2\t1\t100\n" )
210 mapF.close()
211
212 if __name__ == "__main__":
213 unittest.main()