0
|
1 #!/usr/bin/python
|
|
2 """
|
|
3 This Galaxy tool permits to prepare your files to be ready for
|
|
4 Assembly Hub visualization.
|
|
5 Program test arguments:
|
|
6 hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip
|
|
7 """
|
|
8
|
|
9 import sys
|
|
10 import tempfile
|
|
11 import getopt
|
|
12 import zipfile
|
|
13 import subprocess
|
|
14 import os
|
|
15 import argparse
|
|
16
|
|
17 from mako.template import Template
|
|
18 from mako.lookup import TemplateLookup
|
|
19
|
|
20 # Internal dependencies
|
|
21 from twoBitCreator import twoBitFileCreator
|
|
22
|
|
23 # TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE
|
|
24 toolDirectory = '.'
|
|
25 extra_files_path = '.'
|
|
26
|
|
27 def main(argv):
|
|
28 # Command Line parsing init
|
|
29 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
|
|
30
|
|
31 parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt')
|
|
32 parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt')
|
|
33 parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt')
|
|
34 parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt')
|
|
35 parser.add_argument('-o', '--output', help='Directory where to put the foo.txt')
|
|
36
|
|
37
|
|
38 global toolDirectory
|
|
39 global extra_files_path
|
|
40 inputGFF3File = ''
|
|
41 inputFastaFile = ''
|
|
42
|
|
43 # Get the args passed in parameter
|
|
44 args = parser.parse_args()
|
|
45
|
|
46 inputGFF3File = open(args.gff3, 'r')
|
|
47 inputFastaFile = open(args.fasta, 'r')
|
|
48
|
|
49 if args.directory:
|
|
50 toolDirectory = args.directory
|
|
51 if args.extra_files_path:
|
|
52 extra_files_path = args.extra_files_path
|
|
53
|
|
54 outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w')
|
|
55
|
|
56
|
|
57 # Create the structure of the Assembly Hub
|
|
58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator
|
|
59 baseNameFasta = os.path.basename(inputFastaFile.name)
|
|
60 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
|
|
61 nameTwoBit = suffixTwoBit + '.2bit'
|
|
62
|
|
63 rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit)
|
|
64
|
|
65 # TODO: See if we need these temporary files as part of the generated files
|
|
66 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
|
|
67 unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
|
|
68 sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
|
|
69 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
|
|
70 chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
|
|
71
|
|
72 # gff3ToGenePred processing
|
|
73 p = subprocess.Popen(
|
|
74 [os.path.join(toolDirectory, 'tools/gff3ToGenePred'),
|
|
75 inputGFF3File.name,
|
|
76 genePredFile.name])
|
|
77 # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin
|
|
78 # TODO: Check if we should use communicate instead of wait
|
|
79 p.wait()
|
|
80
|
|
81 # genePredToBed processing
|
|
82 p = subprocess.Popen(
|
|
83 [os.path.join(toolDirectory, 'tools/genePredToBed'),
|
|
84 genePredFile.name,
|
|
85 unsortedBedFile.name])
|
|
86 p.wait()
|
|
87
|
|
88 # Sort processing
|
|
89 p = subprocess.Popen(
|
|
90 ['sort',
|
|
91 '-k'
|
|
92 '1,1',
|
|
93 '-k'
|
|
94 '2,2n',
|
|
95 unsortedBedFile.name,
|
|
96 '-o',
|
|
97 sortedBedFile.name])
|
|
98 p.wait()
|
|
99
|
|
100 mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
|
|
101
|
|
102 # 2bit file creation from input fasta
|
|
103 twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath)
|
|
104
|
|
105 # Generate the chrom.sizes
|
|
106 # TODO: Isolate in a function
|
|
107 # We first get the twoBit Infos
|
|
108 p = subprocess.Popen(
|
|
109 [os.path.join(toolDirectory, 'tools/twoBitInfo'),
|
|
110 twoBitFile.name,
|
|
111 'stdout'],
|
|
112 stdout=subprocess.PIPE,
|
|
113 stderr=subprocess.PIPE)
|
|
114
|
|
115 twoBitInfo_out, twoBitInfo_err = p.communicate()
|
|
116 twoBitInfoFile.write(twoBitInfo_out)
|
|
117
|
|
118 # Then we get the output to inject into the sort
|
|
119 # TODO: Check if no errors
|
|
120 p = subprocess.Popen(
|
|
121 ['sort',
|
|
122 '-k2rn',
|
|
123 twoBitInfoFile.name,
|
|
124 '-o',
|
|
125 chromSizesFile.name])
|
|
126 p.wait()
|
|
127
|
|
128 # bedToBigBed processing
|
|
129 # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb
|
|
130 # TODO: Find the best to get this path without hardcoding it
|
|
131 myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks")
|
|
132 # TODO: Change the name of the bb, to tool + genome + .bb
|
|
133 myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb')
|
|
134 with open(myBigBedFilePath, 'w') as bigBedFile:
|
|
135 p = subprocess.Popen(
|
|
136 [os.path.join(toolDirectory, 'tools/bedToBigBed'),
|
|
137 sortedBedFile.name,
|
|
138 chromSizesFile.name,
|
|
139 bigBedFile.name])
|
|
140 p.wait()
|
|
141
|
|
142 # TODO: Add the .bb file in the zip, at the right place
|
|
143
|
|
144 createZip(outputZip, rootAssemblyHub)
|
|
145
|
|
146 # outputZip.write(sortedBedFile.name)
|
|
147 # TODO: Find the best to get this path without hardcoding it
|
|
148
|
|
149 # outputZip.write(bigBedFile.name)
|
|
150 outputZip.close()
|
|
151
|
|
152 # Just a test to output a simple HTML
|
|
153 with open(args.output, 'w') as htmlOutput:
|
|
154 htmlOutput.write('<html>')
|
|
155 htmlOutput.write('<body>')
|
|
156 htmlOutput.write('<p>')
|
|
157 htmlOutput.write('The following generated by Hub Archive Creator:')
|
|
158 htmlOutput.write('</p>')
|
|
159 htmlOutput.write('<ul>')
|
|
160 for root, dirs, files in os.walk(extra_files_path):
|
|
161 # Get all files and get all relative links at the same time
|
|
162 for file in files:
|
|
163 relDir = os.path.relpath(root, extra_files_path)
|
|
164 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file)))
|
|
165 htmlOutput.write('<ul>')
|
|
166 htmlOutput.write('</body>')
|
|
167 htmlOutput.write('</html>')
|
|
168
|
|
169 sys.exit(0)
|
|
170
|
|
171
|
|
172 def createAssemblyHub(outputZip, twoBitName):
|
|
173 # TODO: Manage to put every fill Function in a file dedicated for reading reasons
|
|
174 # Create the root directory
|
|
175 myHubPath = os.path.join(extra_files_path, "myHub")
|
|
176 if not os.path.exists(myHubPath):
|
|
177 os.makedirs(myHubPath)
|
|
178
|
|
179 # Add the genomes.txt file
|
|
180 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
|
|
181 fillGenomesTxt(genomesTxtFilePath, twoBitName)
|
|
182
|
|
183 # Add the hub.txt file
|
|
184 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
|
|
185 fillHubTxt(hubTxtFilePath)
|
|
186
|
|
187 # Add the hub.html file
|
|
188 # TODO: Change the name and get it depending on the specie
|
|
189 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
|
|
190 fillHubHtmlFile(hubHtmlFilePath)
|
|
191
|
|
192 # Create the specie folder
|
|
193 # TODO: Generate the name depending on the specie
|
|
194 mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
|
|
195 if not os.path.exists(mySpecieFolderPath):
|
|
196 os.makedirs(mySpecieFolderPath)
|
|
197
|
|
198 # Create the trackDb.txt file in the specie folder
|
|
199 trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt')
|
|
200 fillTrackDbTxtFile(trackDbTxtFilePath)
|
|
201
|
|
202 # Create the description html file in the specie folder
|
|
203 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
|
|
204 fillDescriptionHtmlFile(descriptionHtmlFilePath)
|
|
205
|
|
206 # Create the file groups.txt
|
|
207 # TODO: If not inputs for this, do no create the file
|
|
208 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
|
|
209 fillGroupsTxtFile(groupsTxtFilePath)
|
|
210
|
|
211 # Create the folder tracks into the specie folder
|
|
212 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
|
|
213 if not os.path.exists(tracksFolderPath):
|
|
214 os.makedirs(tracksFolderPath)
|
|
215
|
|
216 return myHubPath
|
|
217
|
|
218
|
|
219 def fillGenomesTxt(genomesTxtFilePath, twoBitName):
|
|
220 # TODO: Think about the inputs and outputs
|
|
221 # TODO: Manage the template of this file
|
|
222 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
|
|
223 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
|
|
224 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
|
|
225 mytemplate = mylookup.get_template("layout.txt")
|
|
226 with open(genomesTxtFilePath, 'w') as genomesTxtFile:
|
|
227 # Write the content of the file genomes.txt
|
|
228 twoBitPath = os.path.join('dbia3/', twoBitName)
|
|
229 htmlMakoRendered = mytemplate.render(
|
|
230 genomeName="dbia3",
|
|
231 trackDbPath="dbia3/trackDb.txt",
|
|
232 groupsPath="dbia3/groups.txt",
|
|
233 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
|
|
234 twoBitPath=twoBitPath,
|
|
235 organismName="Drosophilia biarmipes",
|
|
236 defaultPosition="contig1",
|
|
237 orderKey="4500",
|
|
238 scientificName="Drosophilia biarmipes",
|
|
239 pathAssemblyHtmlDescription="dbia3/description.html"
|
|
240 )
|
|
241 genomesTxtFile.write(htmlMakoRendered)
|
|
242
|
|
243
|
|
244 def fillHubTxt(hubTxtFilePath):
|
|
245 # TODO: Think about the inputs and outputs
|
|
246 # TODO: Manage the template of this file
|
|
247 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace')
|
|
248 mytemplate = mylookup.get_template('layout.txt')
|
|
249 with open(hubTxtFilePath, 'w') as genomesTxtFile:
|
|
250 # Write the content of the file genomes.txt
|
|
251 htmlMakoRendered = mytemplate.render(
|
|
252 hubName='dbiaOnly',
|
|
253 shortLabel='dbia',
|
|
254 longLabel='This hub only contains dbia with the gene predictions',
|
|
255 genomesFile='genomes.txt',
|
|
256 email='rmarenco@gwu.edu',
|
|
257 descriptionUrl='dbia.html'
|
|
258 )
|
|
259 genomesTxtFile.write(htmlMakoRendered)
|
|
260
|
|
261
|
|
262 def fillHubHtmlFile(hubHtmlFilePath):
|
|
263 # TODO: Think about the inputs and outputs
|
|
264 # TODO: Manage the template of this file
|
|
265 # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
|
|
266 # t = Template(templates.hubDescription.layout.html)
|
|
267 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace')
|
|
268 mytemplate = mylookup.get_template("layout.txt")
|
|
269 with open(hubHtmlFilePath, 'w') as hubHtmlFile:
|
|
270 # Write the content of the file genomes.txt
|
|
271 # htmlPystached = renderer.render_name(
|
|
272 # "layout",
|
|
273 # {'specie': 'Dbia',
|
|
274 # 'toolUsed': 'Augustus',
|
|
275 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
|
|
276 # 'genomeID': '3499',
|
|
277 # 'SpecieFullName': 'Drosophila biarmipes'})
|
|
278 htmlMakoRendered = mytemplate.render(
|
|
279 specie='Dbia',
|
|
280 toolUsed='Augustus',
|
|
281 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
|
|
282 genomeID='3499',
|
|
283 specieFullName='Drosophila biarmipes'
|
|
284 )
|
|
285 # hubHtmlFile.write(htmlPystached)
|
|
286 hubHtmlFile.write(htmlMakoRendered)
|
|
287
|
|
288
|
|
289 def fillTrackDbTxtFile(trackDbTxtFilePath):
|
|
290 # TODO: Modify according to the files passed in parameter
|
|
291 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace')
|
|
292 mytemplate = mylookup.get_template("layout.txt")
|
|
293 with open(trackDbTxtFilePath, 'w') as trackDbFile:
|
|
294 htmlMakoRendered = mytemplate.render(
|
|
295 trackName='augustusTrack',
|
|
296 trackDataURL='Augustus_dbia3',
|
|
297 shortLabel='a_dbia',
|
|
298 longLabel='tracks/augustusDbia3.bb',
|
|
299 trackType='bigBed 12 +',
|
|
300 visibility='dense'
|
|
301 )
|
|
302 trackDbFile.write(htmlMakoRendered)
|
|
303
|
|
304
|
|
305 def fillDescriptionHtmlFile(descriptionHtmlFilePath):
|
|
306 # TODO: Think about the inputs and outputs
|
|
307 # TODO: Manage the template of this file
|
|
308 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace')
|
|
309 mytemplate = mylookup.get_template("layout.txt")
|
|
310 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
|
|
311 # Write the content of the file genomes.txt
|
|
312 htmlMakoRendered = mytemplate.render(
|
|
313 specieDescription='This is the description of the dbia',
|
|
314 )
|
|
315 descriptionHtmlFile.write(htmlMakoRendered)
|
|
316
|
|
317
|
|
318 def fillGroupsTxtFile(groupsTxtFilePath):
|
|
319 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace')
|
|
320 mytemplate = mylookup.get_template("layout.txt")
|
|
321 with open(groupsTxtFilePath, 'w') as groupsTxtFile:
|
|
322 # Write the content of groups.txt
|
|
323 # groupsTxtFile.write('name map')
|
|
324 htmlMakoRendered = mytemplate.render(
|
|
325 mapName='map',
|
|
326 labelMapping='Mapping',
|
|
327 prioriy='2',
|
|
328 isClosed='0'
|
|
329 )
|
|
330 # groupsTxtFile.write(htmlMakoRendered)
|
|
331
|
|
332
|
|
333 def createZip(myZip, folder):
|
|
334 for root, dirs, files in os.walk(folder):
|
|
335 # Get all files and construct the dir at the same time
|
|
336 for file in files:
|
|
337 myZip.write(os.path.join(root, file))
|
|
338
|
|
339 if __name__ == "__main__":
|
|
340 main(sys.argv)
|