Mercurial > repos > rmarenco > hub_archive_creator
comparison hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft
Upload the full hubArchiveCreator archive
author | rmarenco |
---|---|
date | Tue, 01 Mar 2016 19:43:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:163b2de763ea |
---|---|
1 #!/usr/bin/python | |
2 """ | |
3 This Galaxy tool permits to prepare your files to be ready for | |
4 Assembly Hub visualization. | |
5 Program test arguments: | |
6 hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip | |
7 """ | |
8 | |
9 import sys | |
10 import tempfile | |
11 import getopt | |
12 import zipfile | |
13 import subprocess | |
14 import os | |
15 import argparse | |
16 | |
17 from mako.template import Template | |
18 from mako.lookup import TemplateLookup | |
19 | |
20 # Internal dependencies | |
21 from twoBitCreator import twoBitFileCreator | |
22 | |
23 # TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE | |
24 toolDirectory = '.' | |
25 extra_files_path = '.' | |
26 | |
27 def main(argv): | |
28 # Command Line parsing init | |
29 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') | |
30 | |
31 parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt') | |
32 parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt') | |
33 parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt') | |
34 parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt') | |
35 parser.add_argument('-o', '--output', help='Directory where to put the foo.txt') | |
36 | |
37 | |
38 global toolDirectory | |
39 global extra_files_path | |
40 inputGFF3File = '' | |
41 inputFastaFile = '' | |
42 | |
43 # Get the args passed in parameter | |
44 args = parser.parse_args() | |
45 | |
46 inputGFF3File = open(args.gff3, 'r') | |
47 inputFastaFile = open(args.fasta, 'r') | |
48 | |
49 if args.directory: | |
50 toolDirectory = args.directory | |
51 if args.extra_files_path: | |
52 extra_files_path = args.extra_files_path | |
53 | |
54 outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w') | |
55 | |
56 | |
57 # Create the structure of the Assembly Hub | |
58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | |
59 baseNameFasta = os.path.basename(inputFastaFile.name) | |
60 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
61 nameTwoBit = suffixTwoBit + '.2bit' | |
62 | |
63 rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit) | |
64 | |
65 # TODO: See if we need these temporary files as part of the generated files | |
66 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") | |
67 unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") | |
68 sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") | |
69 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
70 chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
71 | |
72 # gff3ToGenePred processing | |
73 p = subprocess.Popen( | |
74 [os.path.join(toolDirectory, 'tools/gff3ToGenePred'), | |
75 inputGFF3File.name, | |
76 genePredFile.name]) | |
77 # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin | |
78 # TODO: Check if we should use communicate instead of wait | |
79 p.wait() | |
80 | |
81 # genePredToBed processing | |
82 p = subprocess.Popen( | |
83 [os.path.join(toolDirectory, 'tools/genePredToBed'), | |
84 genePredFile.name, | |
85 unsortedBedFile.name]) | |
86 p.wait() | |
87 | |
88 # Sort processing | |
89 p = subprocess.Popen( | |
90 ['sort', | |
91 '-k' | |
92 '1,1', | |
93 '-k' | |
94 '2,2n', | |
95 unsortedBedFile.name, | |
96 '-o', | |
97 sortedBedFile.name]) | |
98 p.wait() | |
99 | |
100 mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") | |
101 | |
102 # 2bit file creation from input fasta | |
103 twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath) | |
104 | |
105 # Generate the chrom.sizes | |
106 # TODO: Isolate in a function | |
107 # We first get the twoBit Infos | |
108 p = subprocess.Popen( | |
109 [os.path.join(toolDirectory, 'tools/twoBitInfo'), | |
110 twoBitFile.name, | |
111 'stdout'], | |
112 stdout=subprocess.PIPE, | |
113 stderr=subprocess.PIPE) | |
114 | |
115 twoBitInfo_out, twoBitInfo_err = p.communicate() | |
116 twoBitInfoFile.write(twoBitInfo_out) | |
117 | |
118 # Then we get the output to inject into the sort | |
119 # TODO: Check if no errors | |
120 p = subprocess.Popen( | |
121 ['sort', | |
122 '-k2rn', | |
123 twoBitInfoFile.name, | |
124 '-o', | |
125 chromSizesFile.name]) | |
126 p.wait() | |
127 | |
128 # bedToBigBed processing | |
129 # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb | |
130 # TODO: Find the best to get this path without hardcoding it | |
131 myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
132 # TODO: Change the name of the bb, to tool + genome + .bb | |
133 myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb') | |
134 with open(myBigBedFilePath, 'w') as bigBedFile: | |
135 p = subprocess.Popen( | |
136 [os.path.join(toolDirectory, 'tools/bedToBigBed'), | |
137 sortedBedFile.name, | |
138 chromSizesFile.name, | |
139 bigBedFile.name]) | |
140 p.wait() | |
141 | |
142 # TODO: Add the .bb file in the zip, at the right place | |
143 | |
144 createZip(outputZip, rootAssemblyHub) | |
145 | |
146 # outputZip.write(sortedBedFile.name) | |
147 # TODO: Find the best to get this path without hardcoding it | |
148 | |
149 # outputZip.write(bigBedFile.name) | |
150 outputZip.close() | |
151 | |
152 # Just a test to output a simple HTML | |
153 with open(args.output, 'w') as htmlOutput: | |
154 htmlOutput.write('<html>') | |
155 htmlOutput.write('<body>') | |
156 htmlOutput.write('<p>') | |
157 htmlOutput.write('The following generated by Hub Archive Creator:') | |
158 htmlOutput.write('</p>') | |
159 htmlOutput.write('<ul>') | |
160 for root, dirs, files in os.walk(extra_files_path): | |
161 # Get all files and get all relative links at the same time | |
162 for file in files: | |
163 relDir = os.path.relpath(root, extra_files_path) | |
164 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file))) | |
165 htmlOutput.write('<ul>') | |
166 htmlOutput.write('</body>') | |
167 htmlOutput.write('</html>') | |
168 | |
169 sys.exit(0) | |
170 | |
171 | |
172 def createAssemblyHub(outputZip, twoBitName): | |
173 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | |
174 # Create the root directory | |
175 myHubPath = os.path.join(extra_files_path, "myHub") | |
176 if not os.path.exists(myHubPath): | |
177 os.makedirs(myHubPath) | |
178 | |
179 # Add the genomes.txt file | |
180 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | |
181 fillGenomesTxt(genomesTxtFilePath, twoBitName) | |
182 | |
183 # Add the hub.txt file | |
184 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | |
185 fillHubTxt(hubTxtFilePath) | |
186 | |
187 # Add the hub.html file | |
188 # TODO: Change the name and get it depending on the specie | |
189 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') | |
190 fillHubHtmlFile(hubHtmlFilePath) | |
191 | |
192 # Create the specie folder | |
193 # TODO: Generate the name depending on the specie | |
194 mySpecieFolderPath = os.path.join(myHubPath, "dbia3") | |
195 if not os.path.exists(mySpecieFolderPath): | |
196 os.makedirs(mySpecieFolderPath) | |
197 | |
198 # Create the trackDb.txt file in the specie folder | |
199 trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt') | |
200 fillTrackDbTxtFile(trackDbTxtFilePath) | |
201 | |
202 # Create the description html file in the specie folder | |
203 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | |
204 fillDescriptionHtmlFile(descriptionHtmlFilePath) | |
205 | |
206 # Create the file groups.txt | |
207 # TODO: If not inputs for this, do no create the file | |
208 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | |
209 fillGroupsTxtFile(groupsTxtFilePath) | |
210 | |
211 # Create the folder tracks into the specie folder | |
212 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
213 if not os.path.exists(tracksFolderPath): | |
214 os.makedirs(tracksFolderPath) | |
215 | |
216 return myHubPath | |
217 | |
218 | |
219 def fillGenomesTxt(genomesTxtFilePath, twoBitName): | |
220 # TODO: Think about the inputs and outputs | |
221 # TODO: Manage the template of this file | |
222 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | |
223 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') | |
224 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | |
225 mytemplate = mylookup.get_template("layout.txt") | |
226 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | |
227 # Write the content of the file genomes.txt | |
228 twoBitPath = os.path.join('dbia3/', twoBitName) | |
229 htmlMakoRendered = mytemplate.render( | |
230 genomeName="dbia3", | |
231 trackDbPath="dbia3/trackDb.txt", | |
232 groupsPath="dbia3/groups.txt", | |
233 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", | |
234 twoBitPath=twoBitPath, | |
235 organismName="Drosophilia biarmipes", | |
236 defaultPosition="contig1", | |
237 orderKey="4500", | |
238 scientificName="Drosophilia biarmipes", | |
239 pathAssemblyHtmlDescription="dbia3/description.html" | |
240 ) | |
241 genomesTxtFile.write(htmlMakoRendered) | |
242 | |
243 | |
244 def fillHubTxt(hubTxtFilePath): | |
245 # TODO: Think about the inputs and outputs | |
246 # TODO: Manage the template of this file | |
247 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') | |
248 mytemplate = mylookup.get_template('layout.txt') | |
249 with open(hubTxtFilePath, 'w') as genomesTxtFile: | |
250 # Write the content of the file genomes.txt | |
251 htmlMakoRendered = mytemplate.render( | |
252 hubName='dbiaOnly', | |
253 shortLabel='dbia', | |
254 longLabel='This hub only contains dbia with the gene predictions', | |
255 genomesFile='genomes.txt', | |
256 email='rmarenco@gwu.edu', | |
257 descriptionUrl='dbia.html' | |
258 ) | |
259 genomesTxtFile.write(htmlMakoRendered) | |
260 | |
261 | |
262 def fillHubHtmlFile(hubHtmlFilePath): | |
263 # TODO: Think about the inputs and outputs | |
264 # TODO: Manage the template of this file | |
265 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | |
266 # t = Template(templates.hubDescription.layout.html) | |
267 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') | |
268 mytemplate = mylookup.get_template("layout.txt") | |
269 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | |
270 # Write the content of the file genomes.txt | |
271 # htmlPystached = renderer.render_name( | |
272 # "layout", | |
273 # {'specie': 'Dbia', | |
274 # 'toolUsed': 'Augustus', | |
275 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', | |
276 # 'genomeID': '3499', | |
277 # 'SpecieFullName': 'Drosophila biarmipes'}) | |
278 htmlMakoRendered = mytemplate.render( | |
279 specie='Dbia', | |
280 toolUsed='Augustus', | |
281 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | |
282 genomeID='3499', | |
283 specieFullName='Drosophila biarmipes' | |
284 ) | |
285 # hubHtmlFile.write(htmlPystached) | |
286 hubHtmlFile.write(htmlMakoRendered) | |
287 | |
288 | |
289 def fillTrackDbTxtFile(trackDbTxtFilePath): | |
290 # TODO: Modify according to the files passed in parameter | |
291 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') | |
292 mytemplate = mylookup.get_template("layout.txt") | |
293 with open(trackDbTxtFilePath, 'w') as trackDbFile: | |
294 htmlMakoRendered = mytemplate.render( | |
295 trackName='augustusTrack', | |
296 trackDataURL='Augustus_dbia3', | |
297 shortLabel='a_dbia', | |
298 longLabel='tracks/augustusDbia3.bb', | |
299 trackType='bigBed 12 +', | |
300 visibility='dense' | |
301 ) | |
302 trackDbFile.write(htmlMakoRendered) | |
303 | |
304 | |
305 def fillDescriptionHtmlFile(descriptionHtmlFilePath): | |
306 # TODO: Think about the inputs and outputs | |
307 # TODO: Manage the template of this file | |
308 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') | |
309 mytemplate = mylookup.get_template("layout.txt") | |
310 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | |
311 # Write the content of the file genomes.txt | |
312 htmlMakoRendered = mytemplate.render( | |
313 specieDescription='This is the description of the dbia', | |
314 ) | |
315 descriptionHtmlFile.write(htmlMakoRendered) | |
316 | |
317 | |
318 def fillGroupsTxtFile(groupsTxtFilePath): | |
319 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') | |
320 mytemplate = mylookup.get_template("layout.txt") | |
321 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | |
322 # Write the content of groups.txt | |
323 # groupsTxtFile.write('name map') | |
324 htmlMakoRendered = mytemplate.render( | |
325 mapName='map', | |
326 labelMapping='Mapping', | |
327 prioriy='2', | |
328 isClosed='0' | |
329 ) | |
330 # groupsTxtFile.write(htmlMakoRendered) | |
331 | |
332 | |
333 def createZip(myZip, folder): | |
334 for root, dirs, files in os.walk(folder): | |
335 # Get all files and construct the dir at the same time | |
336 for file in files: | |
337 myZip.write(os.path.join(root, file)) | |
338 | |
339 if __name__ == "__main__": | |
340 main(sys.argv) |