comparison hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft

Upload the full hubArchiveCreator archive
author rmarenco
date Tue, 01 Mar 2016 19:43:25 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:163b2de763ea
1 #!/usr/bin/python
2 """
3 This Galaxy tool permits to prepare your files to be ready for
4 Assembly Hub visualization.
5 Program test arguments:
6 hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip
7 """
8
9 import sys
10 import tempfile
11 import getopt
12 import zipfile
13 import subprocess
14 import os
15 import argparse
16
17 from mako.template import Template
18 from mako.lookup import TemplateLookup
19
20 # Internal dependencies
21 from twoBitCreator import twoBitFileCreator
22
23 # TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE
24 toolDirectory = '.'
25 extra_files_path = '.'
26
27 def main(argv):
28 # Command Line parsing init
29 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
30
31 parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt')
32 parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt')
33 parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt')
34 parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt')
35 parser.add_argument('-o', '--output', help='Directory where to put the foo.txt')
36
37
38 global toolDirectory
39 global extra_files_path
40 inputGFF3File = ''
41 inputFastaFile = ''
42
43 # Get the args passed in parameter
44 args = parser.parse_args()
45
46 inputGFF3File = open(args.gff3, 'r')
47 inputFastaFile = open(args.fasta, 'r')
48
49 if args.directory:
50 toolDirectory = args.directory
51 if args.extra_files_path:
52 extra_files_path = args.extra_files_path
53
54 outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w')
55
56
57 # Create the structure of the Assembly Hub
58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator
59 baseNameFasta = os.path.basename(inputFastaFile.name)
60 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
61 nameTwoBit = suffixTwoBit + '.2bit'
62
63 rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit)
64
65 # TODO: See if we need these temporary files as part of the generated files
66 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
67 unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
68 sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
69 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
70 chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
71
72 # gff3ToGenePred processing
73 p = subprocess.Popen(
74 [os.path.join(toolDirectory, 'tools/gff3ToGenePred'),
75 inputGFF3File.name,
76 genePredFile.name])
77 # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin
78 # TODO: Check if we should use communicate instead of wait
79 p.wait()
80
81 # genePredToBed processing
82 p = subprocess.Popen(
83 [os.path.join(toolDirectory, 'tools/genePredToBed'),
84 genePredFile.name,
85 unsortedBedFile.name])
86 p.wait()
87
88 # Sort processing
89 p = subprocess.Popen(
90 ['sort',
91 '-k'
92 '1,1',
93 '-k'
94 '2,2n',
95 unsortedBedFile.name,
96 '-o',
97 sortedBedFile.name])
98 p.wait()
99
100 mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
101
102 # 2bit file creation from input fasta
103 twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath)
104
105 # Generate the chrom.sizes
106 # TODO: Isolate in a function
107 # We first get the twoBit Infos
108 p = subprocess.Popen(
109 [os.path.join(toolDirectory, 'tools/twoBitInfo'),
110 twoBitFile.name,
111 'stdout'],
112 stdout=subprocess.PIPE,
113 stderr=subprocess.PIPE)
114
115 twoBitInfo_out, twoBitInfo_err = p.communicate()
116 twoBitInfoFile.write(twoBitInfo_out)
117
118 # Then we get the output to inject into the sort
119 # TODO: Check if no errors
120 p = subprocess.Popen(
121 ['sort',
122 '-k2rn',
123 twoBitInfoFile.name,
124 '-o',
125 chromSizesFile.name])
126 p.wait()
127
128 # bedToBigBed processing
129 # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb
130 # TODO: Find the best to get this path without hardcoding it
131 myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks")
132 # TODO: Change the name of the bb, to tool + genome + .bb
133 myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb')
134 with open(myBigBedFilePath, 'w') as bigBedFile:
135 p = subprocess.Popen(
136 [os.path.join(toolDirectory, 'tools/bedToBigBed'),
137 sortedBedFile.name,
138 chromSizesFile.name,
139 bigBedFile.name])
140 p.wait()
141
142 # TODO: Add the .bb file in the zip, at the right place
143
144 createZip(outputZip, rootAssemblyHub)
145
146 # outputZip.write(sortedBedFile.name)
147 # TODO: Find the best to get this path without hardcoding it
148
149 # outputZip.write(bigBedFile.name)
150 outputZip.close()
151
152 # Just a test to output a simple HTML
153 with open(args.output, 'w') as htmlOutput:
154 htmlOutput.write('<html>')
155 htmlOutput.write('<body>')
156 htmlOutput.write('<p>')
157 htmlOutput.write('The following generated by Hub Archive Creator:')
158 htmlOutput.write('</p>')
159 htmlOutput.write('<ul>')
160 for root, dirs, files in os.walk(extra_files_path):
161 # Get all files and get all relative links at the same time
162 for file in files:
163 relDir = os.path.relpath(root, extra_files_path)
164 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file)))
165 htmlOutput.write('<ul>')
166 htmlOutput.write('</body>')
167 htmlOutput.write('</html>')
168
169 sys.exit(0)
170
171
172 def createAssemblyHub(outputZip, twoBitName):
173 # TODO: Manage to put every fill Function in a file dedicated for reading reasons
174 # Create the root directory
175 myHubPath = os.path.join(extra_files_path, "myHub")
176 if not os.path.exists(myHubPath):
177 os.makedirs(myHubPath)
178
179 # Add the genomes.txt file
180 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
181 fillGenomesTxt(genomesTxtFilePath, twoBitName)
182
183 # Add the hub.txt file
184 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
185 fillHubTxt(hubTxtFilePath)
186
187 # Add the hub.html file
188 # TODO: Change the name and get it depending on the specie
189 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
190 fillHubHtmlFile(hubHtmlFilePath)
191
192 # Create the specie folder
193 # TODO: Generate the name depending on the specie
194 mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
195 if not os.path.exists(mySpecieFolderPath):
196 os.makedirs(mySpecieFolderPath)
197
198 # Create the trackDb.txt file in the specie folder
199 trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt')
200 fillTrackDbTxtFile(trackDbTxtFilePath)
201
202 # Create the description html file in the specie folder
203 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
204 fillDescriptionHtmlFile(descriptionHtmlFilePath)
205
206 # Create the file groups.txt
207 # TODO: If not inputs for this, do no create the file
208 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
209 fillGroupsTxtFile(groupsTxtFilePath)
210
211 # Create the folder tracks into the specie folder
212 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
213 if not os.path.exists(tracksFolderPath):
214 os.makedirs(tracksFolderPath)
215
216 return myHubPath
217
218
219 def fillGenomesTxt(genomesTxtFilePath, twoBitName):
220 # TODO: Think about the inputs and outputs
221 # TODO: Manage the template of this file
222 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
223 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
224 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
225 mytemplate = mylookup.get_template("layout.txt")
226 with open(genomesTxtFilePath, 'w') as genomesTxtFile:
227 # Write the content of the file genomes.txt
228 twoBitPath = os.path.join('dbia3/', twoBitName)
229 htmlMakoRendered = mytemplate.render(
230 genomeName="dbia3",
231 trackDbPath="dbia3/trackDb.txt",
232 groupsPath="dbia3/groups.txt",
233 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
234 twoBitPath=twoBitPath,
235 organismName="Drosophilia biarmipes",
236 defaultPosition="contig1",
237 orderKey="4500",
238 scientificName="Drosophilia biarmipes",
239 pathAssemblyHtmlDescription="dbia3/description.html"
240 )
241 genomesTxtFile.write(htmlMakoRendered)
242
243
244 def fillHubTxt(hubTxtFilePath):
245 # TODO: Think about the inputs and outputs
246 # TODO: Manage the template of this file
247 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace')
248 mytemplate = mylookup.get_template('layout.txt')
249 with open(hubTxtFilePath, 'w') as genomesTxtFile:
250 # Write the content of the file genomes.txt
251 htmlMakoRendered = mytemplate.render(
252 hubName='dbiaOnly',
253 shortLabel='dbia',
254 longLabel='This hub only contains dbia with the gene predictions',
255 genomesFile='genomes.txt',
256 email='rmarenco@gwu.edu',
257 descriptionUrl='dbia.html'
258 )
259 genomesTxtFile.write(htmlMakoRendered)
260
261
262 def fillHubHtmlFile(hubHtmlFilePath):
263 # TODO: Think about the inputs and outputs
264 # TODO: Manage the template of this file
265 # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
266 # t = Template(templates.hubDescription.layout.html)
267 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace')
268 mytemplate = mylookup.get_template("layout.txt")
269 with open(hubHtmlFilePath, 'w') as hubHtmlFile:
270 # Write the content of the file genomes.txt
271 # htmlPystached = renderer.render_name(
272 # "layout",
273 # {'specie': 'Dbia',
274 # 'toolUsed': 'Augustus',
275 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
276 # 'genomeID': '3499',
277 # 'SpecieFullName': 'Drosophila biarmipes'})
278 htmlMakoRendered = mytemplate.render(
279 specie='Dbia',
280 toolUsed='Augustus',
281 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
282 genomeID='3499',
283 specieFullName='Drosophila biarmipes'
284 )
285 # hubHtmlFile.write(htmlPystached)
286 hubHtmlFile.write(htmlMakoRendered)
287
288
289 def fillTrackDbTxtFile(trackDbTxtFilePath):
290 # TODO: Modify according to the files passed in parameter
291 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace')
292 mytemplate = mylookup.get_template("layout.txt")
293 with open(trackDbTxtFilePath, 'w') as trackDbFile:
294 htmlMakoRendered = mytemplate.render(
295 trackName='augustusTrack',
296 trackDataURL='Augustus_dbia3',
297 shortLabel='a_dbia',
298 longLabel='tracks/augustusDbia3.bb',
299 trackType='bigBed 12 +',
300 visibility='dense'
301 )
302 trackDbFile.write(htmlMakoRendered)
303
304
305 def fillDescriptionHtmlFile(descriptionHtmlFilePath):
306 # TODO: Think about the inputs and outputs
307 # TODO: Manage the template of this file
308 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace')
309 mytemplate = mylookup.get_template("layout.txt")
310 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
311 # Write the content of the file genomes.txt
312 htmlMakoRendered = mytemplate.render(
313 specieDescription='This is the description of the dbia',
314 )
315 descriptionHtmlFile.write(htmlMakoRendered)
316
317
318 def fillGroupsTxtFile(groupsTxtFilePath):
319 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace')
320 mytemplate = mylookup.get_template("layout.txt")
321 with open(groupsTxtFilePath, 'w') as groupsTxtFile:
322 # Write the content of groups.txt
323 # groupsTxtFile.write('name map')
324 htmlMakoRendered = mytemplate.render(
325 mapName='map',
326 labelMapping='Mapping',
327 prioriy='2',
328 isClosed='0'
329 )
330 # groupsTxtFile.write(htmlMakoRendered)
331
332
333 def createZip(myZip, folder):
334 for root, dirs, files in os.walk(folder):
335 # Get all files and construct the dir at the same time
336 for file in files:
337 myZip.write(os.path.join(root, file))
338
339 if __name__ == "__main__":
340 main(sys.argv)