Mercurial > repos > rmarenco > hub_archive_creator
comparison hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft
Upload the full hubArchiveCreator archive
| author | rmarenco |
|---|---|
| date | Tue, 01 Mar 2016 19:43:25 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:163b2de763ea |
|---|---|
| 1 #!/usr/bin/python | |
| 2 """ | |
| 3 This Galaxy tool permits to prepare your files to be ready for | |
| 4 Assembly Hub visualization. | |
| 5 Program test arguments: | |
| 6 hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip | |
| 7 """ | |
| 8 | |
| 9 import sys | |
| 10 import tempfile | |
| 11 import getopt | |
| 12 import zipfile | |
| 13 import subprocess | |
| 14 import os | |
| 15 import argparse | |
| 16 | |
| 17 from mako.template import Template | |
| 18 from mako.lookup import TemplateLookup | |
| 19 | |
| 20 # Internal dependencies | |
| 21 from twoBitCreator import twoBitFileCreator | |
| 22 | |
| 23 # TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE | |
| 24 toolDirectory = '.' | |
| 25 extra_files_path = '.' | |
| 26 | |
| 27 def main(argv): | |
| 28 # Command Line parsing init | |
| 29 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') | |
| 30 | |
| 31 parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt') | |
| 32 parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt') | |
| 33 parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt') | |
| 34 parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt') | |
| 35 parser.add_argument('-o', '--output', help='Directory where to put the foo.txt') | |
| 36 | |
| 37 | |
| 38 global toolDirectory | |
| 39 global extra_files_path | |
| 40 inputGFF3File = '' | |
| 41 inputFastaFile = '' | |
| 42 | |
| 43 # Get the args passed in parameter | |
| 44 args = parser.parse_args() | |
| 45 | |
| 46 inputGFF3File = open(args.gff3, 'r') | |
| 47 inputFastaFile = open(args.fasta, 'r') | |
| 48 | |
| 49 if args.directory: | |
| 50 toolDirectory = args.directory | |
| 51 if args.extra_files_path: | |
| 52 extra_files_path = args.extra_files_path | |
| 53 | |
| 54 outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w') | |
| 55 | |
| 56 | |
| 57 # Create the structure of the Assembly Hub | |
| 58 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | |
| 59 baseNameFasta = os.path.basename(inputFastaFile.name) | |
| 60 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
| 61 nameTwoBit = suffixTwoBit + '.2bit' | |
| 62 | |
| 63 rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit) | |
| 64 | |
| 65 # TODO: See if we need these temporary files as part of the generated files | |
| 66 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") | |
| 67 unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") | |
| 68 sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") | |
| 69 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 70 chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
| 71 | |
| 72 # gff3ToGenePred processing | |
| 73 p = subprocess.Popen( | |
| 74 [os.path.join(toolDirectory, 'tools/gff3ToGenePred'), | |
| 75 inputGFF3File.name, | |
| 76 genePredFile.name]) | |
| 77 # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin | |
| 78 # TODO: Check if we should use communicate instead of wait | |
| 79 p.wait() | |
| 80 | |
| 81 # genePredToBed processing | |
| 82 p = subprocess.Popen( | |
| 83 [os.path.join(toolDirectory, 'tools/genePredToBed'), | |
| 84 genePredFile.name, | |
| 85 unsortedBedFile.name]) | |
| 86 p.wait() | |
| 87 | |
| 88 # Sort processing | |
| 89 p = subprocess.Popen( | |
| 90 ['sort', | |
| 91 '-k' | |
| 92 '1,1', | |
| 93 '-k' | |
| 94 '2,2n', | |
| 95 unsortedBedFile.name, | |
| 96 '-o', | |
| 97 sortedBedFile.name]) | |
| 98 p.wait() | |
| 99 | |
| 100 mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") | |
| 101 | |
| 102 # 2bit file creation from input fasta | |
| 103 twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath) | |
| 104 | |
| 105 # Generate the chrom.sizes | |
| 106 # TODO: Isolate in a function | |
| 107 # We first get the twoBit Infos | |
| 108 p = subprocess.Popen( | |
| 109 [os.path.join(toolDirectory, 'tools/twoBitInfo'), | |
| 110 twoBitFile.name, | |
| 111 'stdout'], | |
| 112 stdout=subprocess.PIPE, | |
| 113 stderr=subprocess.PIPE) | |
| 114 | |
| 115 twoBitInfo_out, twoBitInfo_err = p.communicate() | |
| 116 twoBitInfoFile.write(twoBitInfo_out) | |
| 117 | |
| 118 # Then we get the output to inject into the sort | |
| 119 # TODO: Check if no errors | |
| 120 p = subprocess.Popen( | |
| 121 ['sort', | |
| 122 '-k2rn', | |
| 123 twoBitInfoFile.name, | |
| 124 '-o', | |
| 125 chromSizesFile.name]) | |
| 126 p.wait() | |
| 127 | |
| 128 # bedToBigBed processing | |
| 129 # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb | |
| 130 # TODO: Find the best to get this path without hardcoding it | |
| 131 myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
| 132 # TODO: Change the name of the bb, to tool + genome + .bb | |
| 133 myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb') | |
| 134 with open(myBigBedFilePath, 'w') as bigBedFile: | |
| 135 p = subprocess.Popen( | |
| 136 [os.path.join(toolDirectory, 'tools/bedToBigBed'), | |
| 137 sortedBedFile.name, | |
| 138 chromSizesFile.name, | |
| 139 bigBedFile.name]) | |
| 140 p.wait() | |
| 141 | |
| 142 # TODO: Add the .bb file in the zip, at the right place | |
| 143 | |
| 144 createZip(outputZip, rootAssemblyHub) | |
| 145 | |
| 146 # outputZip.write(sortedBedFile.name) | |
| 147 # TODO: Find the best to get this path without hardcoding it | |
| 148 | |
| 149 # outputZip.write(bigBedFile.name) | |
| 150 outputZip.close() | |
| 151 | |
| 152 # Just a test to output a simple HTML | |
| 153 with open(args.output, 'w') as htmlOutput: | |
| 154 htmlOutput.write('<html>') | |
| 155 htmlOutput.write('<body>') | |
| 156 htmlOutput.write('<p>') | |
| 157 htmlOutput.write('The following generated by Hub Archive Creator:') | |
| 158 htmlOutput.write('</p>') | |
| 159 htmlOutput.write('<ul>') | |
| 160 for root, dirs, files in os.walk(extra_files_path): | |
| 161 # Get all files and get all relative links at the same time | |
| 162 for file in files: | |
| 163 relDir = os.path.relpath(root, extra_files_path) | |
| 164 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file))) | |
| 165 htmlOutput.write('<ul>') | |
| 166 htmlOutput.write('</body>') | |
| 167 htmlOutput.write('</html>') | |
| 168 | |
| 169 sys.exit(0) | |
| 170 | |
| 171 | |
| 172 def createAssemblyHub(outputZip, twoBitName): | |
| 173 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | |
| 174 # Create the root directory | |
| 175 myHubPath = os.path.join(extra_files_path, "myHub") | |
| 176 if not os.path.exists(myHubPath): | |
| 177 os.makedirs(myHubPath) | |
| 178 | |
| 179 # Add the genomes.txt file | |
| 180 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | |
| 181 fillGenomesTxt(genomesTxtFilePath, twoBitName) | |
| 182 | |
| 183 # Add the hub.txt file | |
| 184 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | |
| 185 fillHubTxt(hubTxtFilePath) | |
| 186 | |
| 187 # Add the hub.html file | |
| 188 # TODO: Change the name and get it depending on the specie | |
| 189 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') | |
| 190 fillHubHtmlFile(hubHtmlFilePath) | |
| 191 | |
| 192 # Create the specie folder | |
| 193 # TODO: Generate the name depending on the specie | |
| 194 mySpecieFolderPath = os.path.join(myHubPath, "dbia3") | |
| 195 if not os.path.exists(mySpecieFolderPath): | |
| 196 os.makedirs(mySpecieFolderPath) | |
| 197 | |
| 198 # Create the trackDb.txt file in the specie folder | |
| 199 trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt') | |
| 200 fillTrackDbTxtFile(trackDbTxtFilePath) | |
| 201 | |
| 202 # Create the description html file in the specie folder | |
| 203 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | |
| 204 fillDescriptionHtmlFile(descriptionHtmlFilePath) | |
| 205 | |
| 206 # Create the file groups.txt | |
| 207 # TODO: If not inputs for this, do no create the file | |
| 208 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | |
| 209 fillGroupsTxtFile(groupsTxtFilePath) | |
| 210 | |
| 211 # Create the folder tracks into the specie folder | |
| 212 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | |
| 213 if not os.path.exists(tracksFolderPath): | |
| 214 os.makedirs(tracksFolderPath) | |
| 215 | |
| 216 return myHubPath | |
| 217 | |
| 218 | |
| 219 def fillGenomesTxt(genomesTxtFilePath, twoBitName): | |
| 220 # TODO: Think about the inputs and outputs | |
| 221 # TODO: Manage the template of this file | |
| 222 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | |
| 223 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') | |
| 224 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | |
| 225 mytemplate = mylookup.get_template("layout.txt") | |
| 226 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | |
| 227 # Write the content of the file genomes.txt | |
| 228 twoBitPath = os.path.join('dbia3/', twoBitName) | |
| 229 htmlMakoRendered = mytemplate.render( | |
| 230 genomeName="dbia3", | |
| 231 trackDbPath="dbia3/trackDb.txt", | |
| 232 groupsPath="dbia3/groups.txt", | |
| 233 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", | |
| 234 twoBitPath=twoBitPath, | |
| 235 organismName="Drosophilia biarmipes", | |
| 236 defaultPosition="contig1", | |
| 237 orderKey="4500", | |
| 238 scientificName="Drosophilia biarmipes", | |
| 239 pathAssemblyHtmlDescription="dbia3/description.html" | |
| 240 ) | |
| 241 genomesTxtFile.write(htmlMakoRendered) | |
| 242 | |
| 243 | |
| 244 def fillHubTxt(hubTxtFilePath): | |
| 245 # TODO: Think about the inputs and outputs | |
| 246 # TODO: Manage the template of this file | |
| 247 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') | |
| 248 mytemplate = mylookup.get_template('layout.txt') | |
| 249 with open(hubTxtFilePath, 'w') as genomesTxtFile: | |
| 250 # Write the content of the file genomes.txt | |
| 251 htmlMakoRendered = mytemplate.render( | |
| 252 hubName='dbiaOnly', | |
| 253 shortLabel='dbia', | |
| 254 longLabel='This hub only contains dbia with the gene predictions', | |
| 255 genomesFile='genomes.txt', | |
| 256 email='rmarenco@gwu.edu', | |
| 257 descriptionUrl='dbia.html' | |
| 258 ) | |
| 259 genomesTxtFile.write(htmlMakoRendered) | |
| 260 | |
| 261 | |
| 262 def fillHubHtmlFile(hubHtmlFilePath): | |
| 263 # TODO: Think about the inputs and outputs | |
| 264 # TODO: Manage the template of this file | |
| 265 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | |
| 266 # t = Template(templates.hubDescription.layout.html) | |
| 267 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') | |
| 268 mytemplate = mylookup.get_template("layout.txt") | |
| 269 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | |
| 270 # Write the content of the file genomes.txt | |
| 271 # htmlPystached = renderer.render_name( | |
| 272 # "layout", | |
| 273 # {'specie': 'Dbia', | |
| 274 # 'toolUsed': 'Augustus', | |
| 275 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', | |
| 276 # 'genomeID': '3499', | |
| 277 # 'SpecieFullName': 'Drosophila biarmipes'}) | |
| 278 htmlMakoRendered = mytemplate.render( | |
| 279 specie='Dbia', | |
| 280 toolUsed='Augustus', | |
| 281 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | |
| 282 genomeID='3499', | |
| 283 specieFullName='Drosophila biarmipes' | |
| 284 ) | |
| 285 # hubHtmlFile.write(htmlPystached) | |
| 286 hubHtmlFile.write(htmlMakoRendered) | |
| 287 | |
| 288 | |
| 289 def fillTrackDbTxtFile(trackDbTxtFilePath): | |
| 290 # TODO: Modify according to the files passed in parameter | |
| 291 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') | |
| 292 mytemplate = mylookup.get_template("layout.txt") | |
| 293 with open(trackDbTxtFilePath, 'w') as trackDbFile: | |
| 294 htmlMakoRendered = mytemplate.render( | |
| 295 trackName='augustusTrack', | |
| 296 trackDataURL='Augustus_dbia3', | |
| 297 shortLabel='a_dbia', | |
| 298 longLabel='tracks/augustusDbia3.bb', | |
| 299 trackType='bigBed 12 +', | |
| 300 visibility='dense' | |
| 301 ) | |
| 302 trackDbFile.write(htmlMakoRendered) | |
| 303 | |
| 304 | |
| 305 def fillDescriptionHtmlFile(descriptionHtmlFilePath): | |
| 306 # TODO: Think about the inputs and outputs | |
| 307 # TODO: Manage the template of this file | |
| 308 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') | |
| 309 mytemplate = mylookup.get_template("layout.txt") | |
| 310 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | |
| 311 # Write the content of the file genomes.txt | |
| 312 htmlMakoRendered = mytemplate.render( | |
| 313 specieDescription='This is the description of the dbia', | |
| 314 ) | |
| 315 descriptionHtmlFile.write(htmlMakoRendered) | |
| 316 | |
| 317 | |
| 318 def fillGroupsTxtFile(groupsTxtFilePath): | |
| 319 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') | |
| 320 mytemplate = mylookup.get_template("layout.txt") | |
| 321 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | |
| 322 # Write the content of groups.txt | |
| 323 # groupsTxtFile.write('name map') | |
| 324 htmlMakoRendered = mytemplate.render( | |
| 325 mapName='map', | |
| 326 labelMapping='Mapping', | |
| 327 prioriy='2', | |
| 328 isClosed='0' | |
| 329 ) | |
| 330 # groupsTxtFile.write(htmlMakoRendered) | |
| 331 | |
| 332 | |
| 333 def createZip(myZip, folder): | |
| 334 for root, dirs, files in os.walk(folder): | |
| 335 # Get all files and construct the dir at the same time | |
| 336 for file in files: | |
| 337 myZip.write(os.path.join(root, file)) | |
| 338 | |
| 339 if __name__ == "__main__": | |
| 340 main(sys.argv) |
