diff hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft

Upload the full hubArchiveCreator archive
author rmarenco
date Tue, 01 Mar 2016 19:43:25 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hub-archive-creator-1.6/hubArchiveCreator.py	Tue Mar 01 19:43:25 2016 -0500
@@ -0,0 +1,340 @@
+#!/usr/bin/python
+"""
+This Galaxy tool permits to prepare your files to be ready for
+Assembly Hub visualization.
+Program test arguments:
+hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip
+"""
+
+import sys
+import tempfile
+import getopt
+import zipfile
+import subprocess
+import os
+import argparse
+
+from mako.template import Template
+from mako.lookup import TemplateLookup
+
+# Internal dependencies
+from twoBitCreator import twoBitFileCreator
+
+# TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE
+toolDirectory = '.'
+extra_files_path = '.'
+
+def main(argv):
+    # Command Line parsing init
+    parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
+
+    parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt')
+    parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt')
+    parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt')
+    parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt')
+    parser.add_argument('-o', '--output', help='Directory where to put the foo.txt')
+
+
+    global toolDirectory
+    global extra_files_path
+    inputGFF3File = ''
+    inputFastaFile = ''
+
+    # Get the args passed in parameter
+    args = parser.parse_args()
+
+    inputGFF3File = open(args.gff3, 'r')
+    inputFastaFile = open(args.fasta, 'r')
+
+    if args.directory:
+        toolDirectory = args.directory
+    if args.extra_files_path:
+        extra_files_path = args.extra_files_path
+
+    outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w')
+
+
+    # Create the structure of the Assembly Hub
+    # TODO: Merge the following processing into a function as it is also used in twoBitCreator
+    baseNameFasta = os.path.basename(inputFastaFile.name)
+    suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+    nameTwoBit = suffixTwoBit + '.2bit'
+
+    rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit)
+
+    # TODO: See if we need these temporary files as part of the generated files
+    genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
+    unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
+    sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+    twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+    chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+
+    # gff3ToGenePred processing
+    p = subprocess.Popen(
+        [os.path.join(toolDirectory, 'tools/gff3ToGenePred'),
+            inputGFF3File.name,
+            genePredFile.name])
+    # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin
+    # TODO: Check if we should use communicate instead of wait
+    p.wait()
+
+    # genePredToBed processing
+    p = subprocess.Popen(
+        [os.path.join(toolDirectory, 'tools/genePredToBed'),
+            genePredFile.name,
+            unsortedBedFile.name])
+    p.wait()
+
+    # Sort processing
+    p = subprocess.Popen(
+        ['sort',
+            '-k'
+            '1,1',
+            '-k'
+            '2,2n',
+            unsortedBedFile.name,
+            '-o',
+            sortedBedFile.name])
+    p.wait()
+
+    mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
+
+    # 2bit file creation from input fasta
+    twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath)
+
+    # Generate the chrom.sizes
+    # TODO: Isolate in a function
+    # We first get the twoBit Infos
+    p = subprocess.Popen(
+        [os.path.join(toolDirectory, 'tools/twoBitInfo'),
+            twoBitFile.name,
+            'stdout'],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+
+    twoBitInfo_out, twoBitInfo_err = p.communicate()
+    twoBitInfoFile.write(twoBitInfo_out)
+
+    # Then we get the output to inject into the sort
+    # TODO: Check if no errors
+    p = subprocess.Popen(
+        ['sort',
+            '-k2rn',
+            twoBitInfoFile.name,
+            '-o',
+            chromSizesFile.name])
+    p.wait()
+
+    # bedToBigBed processing
+    # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb
+    # TODO: Find the best to get this path without hardcoding it
+    myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks")
+    # TODO: Change the name of the bb, to tool + genome + .bb
+    myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb')
+    with open(myBigBedFilePath, 'w') as bigBedFile:
+        p = subprocess.Popen(
+            [os.path.join(toolDirectory, 'tools/bedToBigBed'),
+                sortedBedFile.name,
+                chromSizesFile.name,
+                bigBedFile.name])
+        p.wait()
+
+    # TODO: Add the .bb file in the zip, at the right place
+
+    createZip(outputZip, rootAssemblyHub)
+
+    # outputZip.write(sortedBedFile.name)
+    # TODO: Find the best to get this path without hardcoding it
+
+    # outputZip.write(bigBedFile.name)
+    outputZip.close()
+
+    # Just a test to output a simple HTML
+    with open(args.output, 'w') as htmlOutput:
+        htmlOutput.write('<html>')
+        htmlOutput.write('<body>')
+        htmlOutput.write('<p>')
+        htmlOutput.write('The following generated by Hub Archive Creator:')
+        htmlOutput.write('</p>')
+        htmlOutput.write('<ul>')
+        for root, dirs, files in os.walk(extra_files_path):
+            # Get all files and get all relative links at the same time
+            for file in files:
+                relDir = os.path.relpath(root, extra_files_path)
+                htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file)))
+        htmlOutput.write('<ul>')
+        htmlOutput.write('</body>')
+        htmlOutput.write('</html>')
+
+    sys.exit(0)
+
+
+def createAssemblyHub(outputZip, twoBitName):
+    # TODO: Manage to put every fill Function in a file dedicated for reading reasons
+    # Create the root directory
+    myHubPath = os.path.join(extra_files_path, "myHub")
+    if not os.path.exists(myHubPath):
+        os.makedirs(myHubPath)
+
+    # Add the genomes.txt file
+    genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
+    fillGenomesTxt(genomesTxtFilePath, twoBitName)
+
+    # Add the hub.txt file
+    hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
+    fillHubTxt(hubTxtFilePath)
+
+    # Add the hub.html file
+    # TODO: Change the name and get it depending on the specie
+    hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
+    fillHubHtmlFile(hubHtmlFilePath)
+
+    # Create the specie folder
+    # TODO: Generate the name depending on the specie
+    mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
+    if not os.path.exists(mySpecieFolderPath):
+        os.makedirs(mySpecieFolderPath)
+
+    # Create the trackDb.txt file in the specie folder
+    trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt')
+    fillTrackDbTxtFile(trackDbTxtFilePath)
+
+    # Create the description html file in the specie folder
+    descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
+    fillDescriptionHtmlFile(descriptionHtmlFilePath)
+
+    # Create the file groups.txt
+    # TODO: If not inputs for this, do no create the file
+    groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
+    fillGroupsTxtFile(groupsTxtFilePath)
+
+    # Create the folder tracks into the specie folder
+    tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
+    if not os.path.exists(tracksFolderPath):
+        os.makedirs(tracksFolderPath)
+
+    return myHubPath
+
+
+def fillGenomesTxt(genomesTxtFilePath, twoBitName):
+    # TODO: Think about the inputs and outputs
+    # TODO: Manage the template of this file
+    # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
+    pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
+    mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template("layout.txt")
+    with open(genomesTxtFilePath, 'w') as genomesTxtFile:
+        # Write the content of the file genomes.txt
+        twoBitPath = os.path.join('dbia3/', twoBitName)
+        htmlMakoRendered = mytemplate.render(
+            genomeName="dbia3",
+            trackDbPath="dbia3/trackDb.txt",
+            groupsPath="dbia3/groups.txt",
+            genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
+            twoBitPath=twoBitPath,
+            organismName="Drosophilia biarmipes",
+            defaultPosition="contig1",
+            orderKey="4500",
+            scientificName="Drosophilia biarmipes",
+            pathAssemblyHtmlDescription="dbia3/description.html"
+        )
+        genomesTxtFile.write(htmlMakoRendered)
+
+
+def fillHubTxt(hubTxtFilePath):
+    # TODO: Think about the inputs and outputs
+    # TODO: Manage the template of this file
+    mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template('layout.txt')
+    with open(hubTxtFilePath, 'w') as genomesTxtFile:
+        # Write the content of the file genomes.txt
+        htmlMakoRendered = mytemplate.render(
+            hubName='dbiaOnly',
+            shortLabel='dbia',
+            longLabel='This hub only contains dbia with the gene predictions',
+            genomesFile='genomes.txt',
+            email='rmarenco@gwu.edu',
+            descriptionUrl='dbia.html'
+        )
+        genomesTxtFile.write(htmlMakoRendered)
+
+
+def fillHubHtmlFile(hubHtmlFilePath):
+    # TODO: Think about the inputs and outputs
+    # TODO: Manage the template of this file
+    # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
+    # t = Template(templates.hubDescription.layout.html)
+    mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template("layout.txt")
+    with open(hubHtmlFilePath, 'w') as hubHtmlFile:
+        # Write the content of the file genomes.txt
+        # htmlPystached = renderer.render_name(
+        #     "layout",
+        #     {'specie': 'Dbia',
+        #     'toolUsed': 'Augustus',
+        #     'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
+        #     'genomeID': '3499',
+        #     'SpecieFullName': 'Drosophila biarmipes'})
+        htmlMakoRendered = mytemplate.render(
+            specie='Dbia',
+            toolUsed='Augustus',
+            ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
+            genomeID='3499',
+            specieFullName='Drosophila biarmipes'
+        )
+        # hubHtmlFile.write(htmlPystached)
+        hubHtmlFile.write(htmlMakoRendered)
+
+
+def fillTrackDbTxtFile(trackDbTxtFilePath):
+    # TODO: Modify according to the files passed in parameter
+    mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template("layout.txt")
+    with open(trackDbTxtFilePath, 'w') as trackDbFile:
+        htmlMakoRendered = mytemplate.render(
+            trackName='augustusTrack',
+            trackDataURL='Augustus_dbia3',
+            shortLabel='a_dbia',
+            longLabel='tracks/augustusDbia3.bb',
+            trackType='bigBed 12 +',
+            visibility='dense'
+        )
+        trackDbFile.write(htmlMakoRendered)
+
+
+def fillDescriptionHtmlFile(descriptionHtmlFilePath):
+    # TODO: Think about the inputs and outputs
+    # TODO: Manage the template of this file
+    mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template("layout.txt")
+    with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
+        # Write the content of the file genomes.txt
+        htmlMakoRendered = mytemplate.render(
+            specieDescription='This is the description of the dbia',
+        )
+        descriptionHtmlFile.write(htmlMakoRendered)
+
+
+def fillGroupsTxtFile(groupsTxtFilePath):
+    mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace')
+    mytemplate = mylookup.get_template("layout.txt")
+    with open(groupsTxtFilePath, 'w') as groupsTxtFile:
+        # Write the content of groups.txt
+        # groupsTxtFile.write('name map')
+        htmlMakoRendered = mytemplate.render(
+            mapName='map',
+            labelMapping='Mapping',
+            prioriy='2',
+            isClosed='0'
+        )
+        # groupsTxtFile.write(htmlMakoRendered)
+
+
+def createZip(myZip, folder):
+    for root, dirs, files in os.walk(folder):
+        # Get all files and construct the dir at the same time
+        for file in files:
+            myZip.write(os.path.join(root, file))
+
+if __name__ == "__main__":
+    main(sys.argv)