Mercurial > repos > rmarenco > hub_archive_creator
diff hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft
Upload the full hubArchiveCreator archive
author | rmarenco |
---|---|
date | Tue, 01 Mar 2016 19:43:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hub-archive-creator-1.6/hubArchiveCreator.py Tue Mar 01 19:43:25 2016 -0500 @@ -0,0 +1,340 @@ +#!/usr/bin/python +""" +This Galaxy tool permits to prepare your files to be ready for +Assembly Hub visualization. +Program test arguments: +hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip +""" + +import sys +import tempfile +import getopt +import zipfile +import subprocess +import os +import argparse + +from mako.template import Template +from mako.lookup import TemplateLookup + +# Internal dependencies +from twoBitCreator import twoBitFileCreator + +# TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE +toolDirectory = '.' +extra_files_path = '.' + +def main(argv): + # Command Line parsing init + parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') + + parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt') + parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt') + parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt') + parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt') + parser.add_argument('-o', '--output', help='Directory where to put the foo.txt') + + + global toolDirectory + global extra_files_path + inputGFF3File = '' + inputFastaFile = '' + + # Get the args passed in parameter + args = parser.parse_args() + + inputGFF3File = open(args.gff3, 'r') + inputFastaFile = open(args.fasta, 'r') + + if args.directory: + toolDirectory = args.directory + if args.extra_files_path: + extra_files_path = args.extra_files_path + + outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w') + + + # Create the structure of the Assembly Hub + # TODO: Merge the following processing into a function as it is also used in twoBitCreator + baseNameFasta = os.path.basename(inputFastaFile.name) + suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) + nameTwoBit = suffixTwoBit + '.2bit' + + rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit) + + # TODO: See if we need these temporary files as part of the generated files + genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") + unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + + # gff3ToGenePred processing + p = subprocess.Popen( + [os.path.join(toolDirectory, 'tools/gff3ToGenePred'), + inputGFF3File.name, + genePredFile.name]) + # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin + # TODO: Check if we should use communicate instead of wait + p.wait() + + # genePredToBed processing + p = subprocess.Popen( + [os.path.join(toolDirectory, 'tools/genePredToBed'), + genePredFile.name, + unsortedBedFile.name]) + p.wait() + + # Sort processing + p = subprocess.Popen( + ['sort', + '-k' + '1,1', + '-k' + '2,2n', + unsortedBedFile.name, + '-o', + sortedBedFile.name]) + p.wait() + + mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") + + # 2bit file creation from input fasta + twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath) + + # Generate the chrom.sizes + # TODO: Isolate in a function + # We first get the twoBit Infos + p = subprocess.Popen( + [os.path.join(toolDirectory, 'tools/twoBitInfo'), + twoBitFile.name, + 'stdout'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + twoBitInfo_out, twoBitInfo_err = p.communicate() + twoBitInfoFile.write(twoBitInfo_out) + + # Then we get the output to inject into the sort + # TODO: Check if no errors + p = subprocess.Popen( + ['sort', + '-k2rn', + twoBitInfoFile.name, + '-o', + chromSizesFile.name]) + p.wait() + + # bedToBigBed processing + # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb + # TODO: Find the best to get this path without hardcoding it + myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks") + # TODO: Change the name of the bb, to tool + genome + .bb + myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb') + with open(myBigBedFilePath, 'w') as bigBedFile: + p = subprocess.Popen( + [os.path.join(toolDirectory, 'tools/bedToBigBed'), + sortedBedFile.name, + chromSizesFile.name, + bigBedFile.name]) + p.wait() + + # TODO: Add the .bb file in the zip, at the right place + + createZip(outputZip, rootAssemblyHub) + + # outputZip.write(sortedBedFile.name) + # TODO: Find the best to get this path without hardcoding it + + # outputZip.write(bigBedFile.name) + outputZip.close() + + # Just a test to output a simple HTML + with open(args.output, 'w') as htmlOutput: + htmlOutput.write('<html>') + htmlOutput.write('<body>') + htmlOutput.write('<p>') + htmlOutput.write('The following generated by Hub Archive Creator:') + htmlOutput.write('</p>') + htmlOutput.write('<ul>') + for root, dirs, files in os.walk(extra_files_path): + # Get all files and get all relative links at the same time + for file in files: + relDir = os.path.relpath(root, extra_files_path) + htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file))) + htmlOutput.write('<ul>') + htmlOutput.write('</body>') + htmlOutput.write('</html>') + + sys.exit(0) + + +def createAssemblyHub(outputZip, twoBitName): + # TODO: Manage to put every fill Function in a file dedicated for reading reasons + # Create the root directory + myHubPath = os.path.join(extra_files_path, "myHub") + if not os.path.exists(myHubPath): + os.makedirs(myHubPath) + + # Add the genomes.txt file + genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') + fillGenomesTxt(genomesTxtFilePath, twoBitName) + + # Add the hub.txt file + hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') + fillHubTxt(hubTxtFilePath) + + # Add the hub.html file + # TODO: Change the name and get it depending on the specie + hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') + fillHubHtmlFile(hubHtmlFilePath) + + # Create the specie folder + # TODO: Generate the name depending on the specie + mySpecieFolderPath = os.path.join(myHubPath, "dbia3") + if not os.path.exists(mySpecieFolderPath): + os.makedirs(mySpecieFolderPath) + + # Create the trackDb.txt file in the specie folder + trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt') + fillTrackDbTxtFile(trackDbTxtFilePath) + + # Create the description html file in the specie folder + descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') + fillDescriptionHtmlFile(descriptionHtmlFilePath) + + # Create the file groups.txt + # TODO: If not inputs for this, do no create the file + groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') + fillGroupsTxtFile(groupsTxtFilePath) + + # Create the folder tracks into the specie folder + tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") + if not os.path.exists(tracksFolderPath): + os.makedirs(tracksFolderPath) + + return myHubPath + + +def fillGenomesTxt(genomesTxtFilePath, twoBitName): + # TODO: Think about the inputs and outputs + # TODO: Manage the template of this file + # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") + pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') + mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template("layout.txt") + with open(genomesTxtFilePath, 'w') as genomesTxtFile: + # Write the content of the file genomes.txt + twoBitPath = os.path.join('dbia3/', twoBitName) + htmlMakoRendered = mytemplate.render( + genomeName="dbia3", + trackDbPath="dbia3/trackDb.txt", + groupsPath="dbia3/groups.txt", + genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", + twoBitPath=twoBitPath, + organismName="Drosophilia biarmipes", + defaultPosition="contig1", + orderKey="4500", + scientificName="Drosophilia biarmipes", + pathAssemblyHtmlDescription="dbia3/description.html" + ) + genomesTxtFile.write(htmlMakoRendered) + + +def fillHubTxt(hubTxtFilePath): + # TODO: Think about the inputs and outputs + # TODO: Manage the template of this file + mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template('layout.txt') + with open(hubTxtFilePath, 'w') as genomesTxtFile: + # Write the content of the file genomes.txt + htmlMakoRendered = mytemplate.render( + hubName='dbiaOnly', + shortLabel='dbia', + longLabel='This hub only contains dbia with the gene predictions', + genomesFile='genomes.txt', + email='rmarenco@gwu.edu', + descriptionUrl='dbia.html' + ) + genomesTxtFile.write(htmlMakoRendered) + + +def fillHubHtmlFile(hubHtmlFilePath): + # TODO: Think about the inputs and outputs + # TODO: Manage the template of this file + # renderer = pystache.Renderer(search_dirs="templates/hubDescription") + # t = Template(templates.hubDescription.layout.html) + mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template("layout.txt") + with open(hubHtmlFilePath, 'w') as hubHtmlFile: + # Write the content of the file genomes.txt + # htmlPystached = renderer.render_name( + # "layout", + # {'specie': 'Dbia', + # 'toolUsed': 'Augustus', + # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', + # 'genomeID': '3499', + # 'SpecieFullName': 'Drosophila biarmipes'}) + htmlMakoRendered = mytemplate.render( + specie='Dbia', + toolUsed='Augustus', + ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', + genomeID='3499', + specieFullName='Drosophila biarmipes' + ) + # hubHtmlFile.write(htmlPystached) + hubHtmlFile.write(htmlMakoRendered) + + +def fillTrackDbTxtFile(trackDbTxtFilePath): + # TODO: Modify according to the files passed in parameter + mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template("layout.txt") + with open(trackDbTxtFilePath, 'w') as trackDbFile: + htmlMakoRendered = mytemplate.render( + trackName='augustusTrack', + trackDataURL='Augustus_dbia3', + shortLabel='a_dbia', + longLabel='tracks/augustusDbia3.bb', + trackType='bigBed 12 +', + visibility='dense' + ) + trackDbFile.write(htmlMakoRendered) + + +def fillDescriptionHtmlFile(descriptionHtmlFilePath): + # TODO: Think about the inputs and outputs + # TODO: Manage the template of this file + mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template("layout.txt") + with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: + # Write the content of the file genomes.txt + htmlMakoRendered = mytemplate.render( + specieDescription='This is the description of the dbia', + ) + descriptionHtmlFile.write(htmlMakoRendered) + + +def fillGroupsTxtFile(groupsTxtFilePath): + mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') + mytemplate = mylookup.get_template("layout.txt") + with open(groupsTxtFilePath, 'w') as groupsTxtFile: + # Write the content of groups.txt + # groupsTxtFile.write('name map') + htmlMakoRendered = mytemplate.render( + mapName='map', + labelMapping='Mapping', + prioriy='2', + isClosed='0' + ) + # groupsTxtFile.write(htmlMakoRendered) + + +def createZip(myZip, folder): + for root, dirs, files in os.walk(folder): + # Get all files and construct the dir at the same time + for file in files: + myZip.write(os.path.join(root, file)) + +if __name__ == "__main__": + main(sys.argv)